should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,8 @@
include_directories(../../third_party/sqlite/include)
add_library(
duckdb_benchmark_micro OBJECT append.cpp append_mix.cpp bulkupdate.cpp
cast.cpp in.cpp storage.cpp)
set(BENCHMARK_OBJECT_FILES
${BENCHMARK_OBJECT_FILES} $<TARGET_OBJECTS:duckdb_benchmark_micro>
PARENT_SCOPE)

View File

@@ -0,0 +1,13 @@
# name: benchmark/micro/aggregate/any_value_uuid.benchmark
# description: ANY_VALUE(uuid) over a bunch of uuids
# group: [aggregate]
name Any Value (UUID)
group aggregate
load
CREATE TABLE t AS SELECT uuid() AS uuid FROM range(100000000) tbl(i);
run
SELECT ANY_VALUE(uuid) FROM t;

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/aggregate/bitstring_aggregate.benchmark
# description: Count distinct values using bitstring_agg function
# group: [aggregate]
name distinct count with bitstring_agg
group aggregate
load
CREATE TABLE ints AS SELECT i % 1000 col FROM range(0, 100000000, 5) tbl(i);
run
SELECT bit_count(bitstring_agg(col)) FROM ints
result I
200

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/aggregate/bitwise_aggregate.benchmark
# description: BIT_AND over a bunch of bitstrings
# group: [aggregate]
name Bit string Bitwise AND (Ungrouped)
group aggregate
load
CREATE TABLE bits AS SELECT printf('%032b', 1000 + (i % 5))::BIT col FROM range(0, 1000000) tbl(i);
run
SELECT BIT_AND(col) FROM bits
result I
00000000000000000000001111101000

View File

@@ -0,0 +1,13 @@
# name: benchmark/micro/aggregate/constant_aggregate.benchmark
# description: Aggregate Over Constant Groups
# group: [aggregate]
name Aggregate Over Constant Vectors
group aggregate
storage persistent
load
CREATE TABLE t AS SELECT DATE '1900-01-01' + INTERVAL (i // 50000) MONTH grp, i FROM range(100_000_000) tbl(i);
run
SELECT grp, SUM(i) FROM t GROUP BY ALL ORDER BY ALL

View File

@@ -0,0 +1,115 @@
# name: benchmark/micro/aggregate/dictionary_aggregate.benchmark
# description: Aggregate Over Dictionary Vectors
# group: [aggregate]
name Aggregate Over Dictionary Vectors
group aggregate
storage persistent
load
CREATE TABLE t AS SELECT CONCAT('thisisastringwithrepetitions', i%100) AS grp, i FROM range(100_000_000) tbl(i);
run
SELECT grp, SUM(i) FROM t GROUP BY ALL ORDER BY ALL
result II
thisisastringwithrepetitions0 49999950000000
thisisastringwithrepetitions1 49999951000000
thisisastringwithrepetitions10 49999960000000
thisisastringwithrepetitions11 49999961000000
thisisastringwithrepetitions12 49999962000000
thisisastringwithrepetitions13 49999963000000
thisisastringwithrepetitions14 49999964000000
thisisastringwithrepetitions15 49999965000000
thisisastringwithrepetitions16 49999966000000
thisisastringwithrepetitions17 49999967000000
thisisastringwithrepetitions18 49999968000000
thisisastringwithrepetitions19 49999969000000
thisisastringwithrepetitions2 49999952000000
thisisastringwithrepetitions20 49999970000000
thisisastringwithrepetitions21 49999971000000
thisisastringwithrepetitions22 49999972000000
thisisastringwithrepetitions23 49999973000000
thisisastringwithrepetitions24 49999974000000
thisisastringwithrepetitions25 49999975000000
thisisastringwithrepetitions26 49999976000000
thisisastringwithrepetitions27 49999977000000
thisisastringwithrepetitions28 49999978000000
thisisastringwithrepetitions29 49999979000000
thisisastringwithrepetitions3 49999953000000
thisisastringwithrepetitions30 49999980000000
thisisastringwithrepetitions31 49999981000000
thisisastringwithrepetitions32 49999982000000
thisisastringwithrepetitions33 49999983000000
thisisastringwithrepetitions34 49999984000000
thisisastringwithrepetitions35 49999985000000
thisisastringwithrepetitions36 49999986000000
thisisastringwithrepetitions37 49999987000000
thisisastringwithrepetitions38 49999988000000
thisisastringwithrepetitions39 49999989000000
thisisastringwithrepetitions4 49999954000000
thisisastringwithrepetitions40 49999990000000
thisisastringwithrepetitions41 49999991000000
thisisastringwithrepetitions42 49999992000000
thisisastringwithrepetitions43 49999993000000
thisisastringwithrepetitions44 49999994000000
thisisastringwithrepetitions45 49999995000000
thisisastringwithrepetitions46 49999996000000
thisisastringwithrepetitions47 49999997000000
thisisastringwithrepetitions48 49999998000000
thisisastringwithrepetitions49 49999999000000
thisisastringwithrepetitions5 49999955000000
thisisastringwithrepetitions50 50000000000000
thisisastringwithrepetitions51 50000001000000
thisisastringwithrepetitions52 50000002000000
thisisastringwithrepetitions53 50000003000000
thisisastringwithrepetitions54 50000004000000
thisisastringwithrepetitions55 50000005000000
thisisastringwithrepetitions56 50000006000000
thisisastringwithrepetitions57 50000007000000
thisisastringwithrepetitions58 50000008000000
thisisastringwithrepetitions59 50000009000000
thisisastringwithrepetitions6 49999956000000
thisisastringwithrepetitions60 50000010000000
thisisastringwithrepetitions61 50000011000000
thisisastringwithrepetitions62 50000012000000
thisisastringwithrepetitions63 50000013000000
thisisastringwithrepetitions64 50000014000000
thisisastringwithrepetitions65 50000015000000
thisisastringwithrepetitions66 50000016000000
thisisastringwithrepetitions67 50000017000000
thisisastringwithrepetitions68 50000018000000
thisisastringwithrepetitions69 50000019000000
thisisastringwithrepetitions7 49999957000000
thisisastringwithrepetitions70 50000020000000
thisisastringwithrepetitions71 50000021000000
thisisastringwithrepetitions72 50000022000000
thisisastringwithrepetitions73 50000023000000
thisisastringwithrepetitions74 50000024000000
thisisastringwithrepetitions75 50000025000000
thisisastringwithrepetitions76 50000026000000
thisisastringwithrepetitions77 50000027000000
thisisastringwithrepetitions78 50000028000000
thisisastringwithrepetitions79 50000029000000
thisisastringwithrepetitions8 49999958000000
thisisastringwithrepetitions80 50000030000000
thisisastringwithrepetitions81 50000031000000
thisisastringwithrepetitions82 50000032000000
thisisastringwithrepetitions83 50000033000000
thisisastringwithrepetitions84 50000034000000
thisisastringwithrepetitions85 50000035000000
thisisastringwithrepetitions86 50000036000000
thisisastringwithrepetitions87 50000037000000
thisisastringwithrepetitions88 50000038000000
thisisastringwithrepetitions89 50000039000000
thisisastringwithrepetitions9 49999959000000
thisisastringwithrepetitions90 50000040000000
thisisastringwithrepetitions91 50000041000000
thisisastringwithrepetitions92 50000042000000
thisisastringwithrepetitions93 50000043000000
thisisastringwithrepetitions94 50000044000000
thisisastringwithrepetitions95 50000045000000
thisisastringwithrepetitions96 50000046000000
thisisastringwithrepetitions97 50000047000000
thisisastringwithrepetitions98 50000048000000
thisisastringwithrepetitions99 50000049000000

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/aggregate/group_two_string_dictionaries.benchmark
# description: Group by two string dictionary columns (test string dictionary hash cache)
# group: [aggregate]
name Group by two string dictionary columns
group aggregate
storage persistent
load
create or replace table test as
select format('{:a>256}', cast(range % 100 as varchar)) a256, a256 b256,
from range(10_000_000);
run
select a256, b256 from test group by all;

View File

@@ -0,0 +1,39 @@
# name: benchmark/micro/aggregate/grouped_distinct.benchmark
# description: SUM(i) over a bunch of integers
# group: [aggregate]
name Integer Sum (Grouped)
group aggregate
load
CREATE TABLE integers AS SELECT i % 5 AS i, i % 25 as j FROM range(0, 10000000) tbl(i);
run
SELECT SUM(distinct i), COUNT(distinct i), AVG(distinct i), PRODUCT(distinct i) FROM integers group by j order by all
result IIII
0 1 0.0 0.0
0 1 0.0 0.0
0 1 0.0 0.0
0 1 0.0 0.0
0 1 0.0 0.0
1 1 1.0 1.0
1 1 1.0 1.0
1 1 1.0 1.0
1 1 1.0 1.0
1 1 1.0 1.0
2 1 2.0 2.0
2 1 2.0 2.0
2 1 2.0 2.0
2 1 2.0 2.0
2 1 2.0 2.0
3 1 3.0 3.0
3 1 3.0 3.0
3 1 3.0 3.0
3 1 3.0 3.0
3 1 3.0 3.0
4 1 4.0 4.0
4 1 4.0 4.0
4 1 4.0 4.0
4 1 4.0 4.0
4 1 4.0 4.0

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/aggregate/ordered_first.benchmark
# description: FIRST(i ORDER BY i) over a bunch of integers
# group: [aggregate]
name Ordered First (Grouped)
group aggregate
load
CREATE TABLE t AS FROM range(10000000) tbl(i);
run
SELECT SUM(agg) FROM (
SELECT i // 2048 AS grp, FIRST(i ORDER BY i DESC) AS agg
FROM t
GROUP BY ALL
)
result I
24420932461

View File

@@ -0,0 +1,12 @@
# name: benchmark/micro/aggregate/quantile/quantile.benchmark
# description: Quantile Function
# group: [quantile]
name Quantile
group quantile
load
create table quantile as select range r, random() from range(10000000) union all values (NULL, 0.1), (NULL, 0.5), (NULL, 0.9) order by 2;
run
SELECT quantile(r, 0.5) FROM quantile

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/aggregate/quantile/quantile_approx.benchmark
# description: Approximate Quantile Function
# group: [quantile]
name Approximate Quantile
group aggregate
load
create table quantile as select range r, random() from range(10000000) union all values (NULL, 0.1), (NULL, 0.5), (NULL, 0.9) order by 2;
run
SELECT approx_quantile(r, 0.5) FROM quantile

View File

@@ -0,0 +1,13 @@
# name: benchmark/micro/aggregate/quantile/quantile_many.benchmark
# description: Quantile Function
# group: [quantile]
name Quantile Many Groups
group quantile
load
create table quantile as select range r, random() from range(10000000) union all values (NULL, 0.1), (NULL, 0.5), (NULL, 0.9) order by 2;
run
SELECT quantile(r, 0.5) FROM quantile GROUP BY r % 100000

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/aggregate/quantile/quantile_sampling.benchmark
# description: Quantile Function With Sampling
# group: [quantile]
name Quantile With Sampling
group quantile
load
create table quantile as select range r, random() from range(10000000) union all values (NULL, 0.1), (NULL, 0.5), (NULL, 0.9) order by 2;
run
SELECT reservoir_quantile(r, 0.5) FROM quantile

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/aggregate/simple_aggregate.benchmark
# description: SUM(i) over a bunch of integers
# group: [aggregate]
name Integer Sum (Ungrouped)
group aggregate
load
CREATE TABLE integers AS SELECT i % 5 AS i FROM range(0, 10000000) tbl(i);
run
SELECT SUM(i) FROM integers
result I
20000000

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/aggregate/simple_distinct.benchmark
# description: SUM(i) over a bunch of integers
# group: [aggregate]
name Integer Sum (Ungrouped)
group aggregate
load
CREATE TABLE integers AS SELECT i % 5 AS i FROM range(0, 10000000) tbl(i);
run
SELECT SUM(distinct i), COUNT(distinct i), AVG(distinct i), PRODUCT(distinct i) FROM integers
result IIII
10 5 2 0

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/aggregate/simple_group.benchmark
# description: SUM(i) over integer, grouped by integer
# group: [aggregate]
name Integer Sum (Grouped)
group aggregate
load
CREATE TABLE integers AS SELECT i % 5 AS i, i % 100 AS j FROM range(0, 10000000) tbl(i);
run
SELECT i, SUM(j) FROM integers GROUP BY i ORDER BY i
result II
0 95000000
1 97000000
2 99000000
3 101000000
4 103000000

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/aggregate/sorted_last.benchmark
# description: LAST with ORDER BY
# group: [aggregate]
load
SELECT SETSEED(0.8675309);
CREATE TABLE df AS
SELECT
'2019-01-01 00:00:00'::TIMESTAMP + INTERVAL (FLOOR(RANDOM() * 25000)::INT * 20) MINUTE AS time,
RANDOM()::DECIMAL(18, 17) AS value,
FLOOR(RANDOM() * 300)::INT AS id
FROM range(70000000);
run
SELECT time::DATE, id, last(value ORDER BY time ASC)
FROM df
GROUP BY ALL

View File

@@ -0,0 +1,221 @@
#include "benchmark_runner.hpp"
#include "duckdb_benchmark_macro.hpp"
#include "duckdb/main/appender.hpp"
using namespace duckdb;
//////////////
// INSERT //
//////////////
#define APPEND_BENCHMARK_INSERT(CREATE_STATEMENT, AUTO_COMMIT) \
void Load(DuckDBBenchmarkState *state) override { \
state->conn.Query(CREATE_STATEMENT); \
} \
void RunBenchmark(DuckDBBenchmarkState *state) override { \
if (!AUTO_COMMIT) \
state->conn.Query("BEGIN TRANSACTION"); \
for (int32_t i = 0; i < 100000; i++) { \
state->conn.Query("INSERT INTO integers VALUES (" + std::to_string(i) + ")"); \
} \
if (!AUTO_COMMIT) \
state->conn.Query("COMMIT"); \
} \
void Cleanup(DuckDBBenchmarkState *state) override { \
state->conn.Query("DROP TABLE integers"); \
Load(state); \
} \
string VerifyResult(QueryResult *result) override { \
return string(); \
} \
string BenchmarkInfo() override { \
return "Append 100K 4-byte integers to a table using a series of INSERT INTO statements"; \
}
DUCKDB_BENCHMARK(Append100KIntegersINSERT, "[append]")
APPEND_BENCHMARK_INSERT("CREATE TABLE integers(i INTEGER)", false)
FINISH_BENCHMARK(Append100KIntegersINSERT)
DUCKDB_BENCHMARK(Append100KIntegersINSERTDisk, "[append]")
APPEND_BENCHMARK_INSERT("CREATE TABLE integers(i INTEGER)", false)
bool InMemory() override {
return false;
}
FINISH_BENCHMARK(Append100KIntegersINSERTDisk)
DUCKDB_BENCHMARK(Append100KIntegersINSERTPrimary, "[append]")
APPEND_BENCHMARK_INSERT("CREATE TABLE integers(i INTEGER PRIMARY KEY)", false)
FINISH_BENCHMARK(Append100KIntegersINSERTPrimary)
DUCKDB_BENCHMARK(Append100KIntegersINSERTAutoCommit, "[append]")
APPEND_BENCHMARK_INSERT("CREATE TABLE integers(i INTEGER)", true)
FINISH_BENCHMARK(Append100KIntegersINSERTAutoCommit)
//////////////
// PREPARED //
//////////////
struct DuckDBPreparedState : public DuckDBBenchmarkState {
duckdb::unique_ptr<PreparedStatement> prepared;
DuckDBPreparedState(string path) : DuckDBBenchmarkState(path) {
}
virtual ~DuckDBPreparedState() {
}
};
#define APPEND_BENCHMARK_PREPARED(CREATE_STATEMENT) \
duckdb::unique_ptr<DuckDBBenchmarkState> CreateBenchmarkState() override { \
auto result = make_uniq<DuckDBPreparedState>(GetDatabasePath()); \
return std::move(result); \
} \
void Load(DuckDBBenchmarkState *state_p) override { \
auto state = (DuckDBPreparedState *)state_p; \
state->conn.Query(CREATE_STATEMENT); \
state->prepared = state->conn.Prepare("INSERT INTO integers VALUES ($1)"); \
} \
void RunBenchmark(DuckDBBenchmarkState *state_p) override { \
auto state = (DuckDBPreparedState *)state_p; \
state->conn.Query("BEGIN TRANSACTION"); \
for (int32_t i = 0; i < 100000; i++) { \
state->prepared->Execute(i); \
} \
state->conn.Query("COMMIT"); \
} \
void Cleanup(DuckDBBenchmarkState *state) override { \
state->conn.Query("DROP TABLE integers"); \
Load(state); \
} \
string VerifyResult(QueryResult *result) override { \
return string(); \
} \
string BenchmarkInfo() override { \
return "Append 100K 4-byte integers to a table using a series of prepared INSERT INTO statements"; \
}
DUCKDB_BENCHMARK(Append100KIntegersPREPARED, "[append]")
APPEND_BENCHMARK_PREPARED("CREATE TABLE integers(i INTEGER)")
FINISH_BENCHMARK(Append100KIntegersPREPARED)
DUCKDB_BENCHMARK(Append100KIntegersPREPAREDDisk, "[append]")
APPEND_BENCHMARK_PREPARED("CREATE TABLE integers(i INTEGER)")
bool InMemory() override {
return false;
}
FINISH_BENCHMARK(Append100KIntegersPREPAREDDisk)
DUCKDB_BENCHMARK(Append100KIntegersPREPAREDPrimary, "[append]")
APPEND_BENCHMARK_PREPARED("CREATE TABLE integers(i INTEGER PRIMARY KEY)")
FINISH_BENCHMARK(Append100KIntegersPREPAREDPrimary)
//////////////
// APPENDER //
//////////////
#define APPEND_BENCHMARK_APPENDER(CREATE_STATEMENT) \
void Load(DuckDBBenchmarkState *state) override { \
state->conn.Query(CREATE_STATEMENT); \
} \
void RunBenchmark(DuckDBBenchmarkState *state) override { \
state->conn.Query("BEGIN TRANSACTION"); \
Appender appender(state->conn, "integers"); \
for (int32_t i = 0; i < 100000; i++) { \
appender.BeginRow(); \
appender.Append<int32_t>(i); \
appender.EndRow(); \
} \
appender.Close(); \
state->conn.Query("COMMIT"); \
} \
void Cleanup(DuckDBBenchmarkState *state) override { \
state->conn.Query("DROP TABLE integers"); \
Load(state); \
} \
string VerifyResult(QueryResult *result) override { \
return string(); \
} \
string BenchmarkInfo() override { \
return "Append 100K 4-byte integers to a table using an Appender"; \
}
DUCKDB_BENCHMARK(Append100KIntegersAPPENDER, "[append]")
APPEND_BENCHMARK_APPENDER("CREATE TABLE integers(i INTEGER)")
FINISH_BENCHMARK(Append100KIntegersAPPENDER)
DUCKDB_BENCHMARK(Append100KIntegersAPPENDERDisk, "[append]")
APPEND_BENCHMARK_APPENDER("CREATE TABLE integers(i INTEGER)")
bool InMemory() override {
return false;
}
FINISH_BENCHMARK(Append100KIntegersAPPENDERDisk)
DUCKDB_BENCHMARK(Append100KIntegersAPPENDERPrimary, "[append]")
APPEND_BENCHMARK_APPENDER("CREATE TABLE integers(i INTEGER PRIMARY KEY)")
FINISH_BENCHMARK(Append100KIntegersAPPENDERPrimary)
///////////////
// COPY INTO //
///////////////
#define APPEND_BENCHMARK_COPY(CREATE_STATEMENT) \
void Load(DuckDBBenchmarkState *state) override { \
state->conn.Query("CREATE TABLE integers(i INTEGER)"); \
Appender appender(state->conn, "integers"); \
for (int32_t i = 0; i < 100000; i++) { \
appender.BeginRow(); \
appender.Append<int32_t>(i); \
appender.EndRow(); \
} \
appender.Close(); \
state->conn.Query("COPY integers TO 'integers.csv' DELIMITER '|'"); \
state->conn.Query("DROP TABLE integers"); \
state->conn.Query(CREATE_STATEMENT); \
} \
string GetQuery() override { \
return "COPY integers FROM 'integers.csv' DELIMITER '|'"; \
} \
void Cleanup(DuckDBBenchmarkState *state) override { \
state->conn.Query("DROP TABLE integers"); \
state->conn.Query(CREATE_STATEMENT); \
} \
string VerifyResult(QueryResult *result) override { \
return string(); \
} \
string BenchmarkInfo() override { \
return "Append 100K 4-byte integers to a table using the COPY INTO statement"; \
}
DUCKDB_BENCHMARK(Append100KIntegersCOPY, "[append]")
APPEND_BENCHMARK_COPY("CREATE TABLE integers(i INTEGER)")
FINISH_BENCHMARK(Append100KIntegersCOPY)
DUCKDB_BENCHMARK(Append100KIntegersCOPYDisk, "[append]")
APPEND_BENCHMARK_COPY("CREATE TABLE integers(i INTEGER)")
bool InMemory() override {
return false;
}
FINISH_BENCHMARK(Append100KIntegersCOPYDisk)
DUCKDB_BENCHMARK(Append100KIntegersCOPYPrimary, "[append]")
APPEND_BENCHMARK_COPY("CREATE TABLE integers(i INTEGER PRIMARY KEY)")
FINISH_BENCHMARK(Append100KIntegersCOPYPrimary)
DUCKDB_BENCHMARK(Write100KIntegers, "[append]")
void Load(DuckDBBenchmarkState *state) override {
state->conn.Query("CREATE TABLE integers(i INTEGER)");
Appender appender(state->conn, "integers");
for (int32_t i = 0; i < 100000; i++) {
appender.BeginRow();
appender.Append<int32_t>(i);
appender.EndRow();
}
}
string GetQuery() override {
return "COPY integers TO 'integers.csv' DELIMITER '|' HEADER";
}
string VerifyResult(QueryResult *result) override {
if (result->HasError()) {
return result->GetError();
}
return string();
}
string BenchmarkInfo() override {
return "Write 100K 4-byte integers to CSV";
}
FINISH_BENCHMARK(Write100KIntegers)

View File

@@ -0,0 +1,89 @@
#include "benchmark_runner.hpp"
#include "duckdb_benchmark_macro.hpp"
#include "duckdb/main/appender.hpp"
#include "duckdb/common/random_engine.hpp"
#include "duckdb/common/types/cast_helpers.hpp"
using namespace duckdb;
#define APPEND_MIX_BENCHMARK(PRIMARY_KEY) \
RandomEngine random; \
void Load(DuckDBBenchmarkState *state) override { \
if (!PRIMARY_KEY) \
state->conn.Query("create table IF NOT EXISTS test(id INTEGER not null, area CHAR(6), age TINYINT not " \
"null, active TINYINT not null);"); \
else \
state->conn.Query("create table IF NOT EXISTS test(id INTEGER primary key, area CHAR(6), age TINYINT not " \
"null, active TINYINT not null);"); \
} \
int32_t get_random_age() { \
/* 5, 10, 15 */ \
return 5 + 5 * random.NextRandom(0, 3); \
} \
bool get_random_bool() { \
return random.NextRandom() > 0.5; \
} \
int32_t get_random_active() { \
return int32_t(random.NextRandom(0, 2)); \
} \
void get_random_area_code(char *area_code) { \
uint32_t code = uint32_t(random.NextRandom(0, 999999)); \
auto endptr = area_code + 6; \
NumericHelper::FormatUnsigned(code, endptr); \
} \
void RunBenchmark(DuckDBBenchmarkState *state) override { \
state->conn.Query("BEGIN TRANSACTION"); \
Appender appender(state->conn, "test"); \
for (int32_t i = 0; i < 10000000; i++) { \
appender.BeginRow(); \
appender.Append<int32_t>(i); \
if (get_random_bool()) { \
char area_code[6] = {'0', '0', '0', '0', '0', '0'}; \
get_random_area_code(area_code); \
appender.Append<string_t>(string_t(area_code, 6)); \
} else { \
appender.Append<std::nullptr_t>(nullptr); \
} \
appender.Append<int32_t>(get_random_age()); \
appender.Append<int32_t>(get_random_active()); \
appender.EndRow(); \
} \
appender.Close(); \
state->conn.Query("COMMIT"); \
} \
void Cleanup(DuckDBBenchmarkState *state) override { \
state->conn.Query("DROP INDEX IF EXISTS pk_index"); \
state->conn.Query("DROP TABLE IF EXISTS test"); \
Load(state); \
} \
string VerifyResult(QueryResult *result) override { \
return string(); \
} \
string BenchmarkInfo() override { \
return "Append 10M rows to a table using an Appender"; \
} \
optional_idx Timeout(const BenchmarkConfiguration &config) override { \
return 600; \
}
DUCKDB_BENCHMARK(Appender10MRows, "[append_mix]")
APPEND_MIX_BENCHMARK(false);
FINISH_BENCHMARK(Appender10MRows)
DUCKDB_BENCHMARK(Appender10MRowsPrimaryKey, "[append_mix]")
APPEND_MIX_BENCHMARK(true);
FINISH_BENCHMARK(Appender10MRowsPrimaryKey)
DUCKDB_BENCHMARK(Appender10MRowsDisk, "[append_mix]")
APPEND_MIX_BENCHMARK(false);
bool InMemory() override {
return false;
}
FINISH_BENCHMARK(Appender10MRowsDisk)
DUCKDB_BENCHMARK(Appender10MRowsDiskPrimaryKey, "[append_mix]")
APPEND_MIX_BENCHMARK(true);
bool InMemory() override {
return false;
}
FINISH_BENCHMARK(Appender10MRowsDiskPrimaryKey)

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/arithmetic/multiplications.benchmark
# description: Integer multiplications between 10000000 values
# group: [arithmetic]
name Integer Multiplication
group micro
load
CREATE TABLE integers AS SELECT ((i * 9582398353) % 100)::INTEGER AS i, ((i * 847892347987) % 100)::INTEGER AS j FROM range(0, 10000000) tbl(i);
run
SELECT MIN((i * j) + (i * j) + (i * j) + (i * j)) FROM integers
result I
0

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/array/large_array_dense_select.benchmark
# description: Array Dense Select: select 33% of all arrays, randomly split
# group: [array]
name Large Array Dense Select
group array
load
CREATE TABLE arrays AS SELECT i%2000 as sparse_id, i%3 as dense_id, [i + x for x in range(1024)]::INT[1024] arr FROM range(10000000) tbl(i);
run
SELECT SUM(LIST_SUM(arr)) FROM arrays WHERE dense_id=0;
result I
17068414293682176

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/array/large_array_sparse_select.benchmark
# description: Array Sparse Select: select only one out of every 2000 large arrays
# group: [array]
name Large Array Sparse Select
group array
load
CREATE TABLE arrays AS SELECT i%2000 as id, [i + x for x in range(1024)]::INT[1024] arr FROM range(10000000) tbl(i);
run
SELECT SUM(LIST_SUM(arr)) FROM arrays WHERE id=88;
result I
25597949440000

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/array/small_array_dense_select.benchmark
# description: Array Dense Select: select 33% out of every 2000 large arrays
# group: [array]
name Small Array Dense Select
group array
load
CREATE TABLE arrays AS SELECT i%3 as dense_id, [i + x for x in range(5)]::INT[5] arr FROM range(100000000) tbl(i);
run
SELECT SUM(LIST_SUM(arr)) FROM arrays WHERE dense_id=0
result I
8333333750000005

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/array/small_array_sparse_select.benchmark
# description: Array Sparse Select: select only one out of every 2000 small arrays
# group: [array]
name Small Array Sparse Select
group array
load
CREATE TABLE arrays AS SELECT i%2000 as id, [i + x for x in range(5)]::INT[5] arr FROM range(100000000) tbl(i);
run
SELECT SUM(LIST_SUM(arr)) FROM arrays WHERE id=88;
result I
12499772500000

View File

@@ -0,0 +1,86 @@
#include "benchmark_runner.hpp"
#include "duckdb_benchmark_macro.hpp"
#include "duckdb/main/appender.hpp"
#include <random>
using namespace duckdb;
#define GROUP_ROW_COUNT 1000000
#define GROUP_COUNT 5
DUCKDB_BENCHMARK(BulkUpdate, "[bulkupdate]")
int64_t sum = 0;
int64_t count = 0;
void Load(DuckDBBenchmarkState *state) override {
state->conn.Query("CREATE TABLE integers(i INTEGER);");
Appender appender(state->conn, "integers");
// insert the elements into the database
for (size_t i = 0; i < GROUP_ROW_COUNT; i++) {
appender.BeginRow();
appender.Append<int32_t>(i % GROUP_COUNT);
appender.EndRow();
sum += i % GROUP_COUNT;
count++;
}
}
void RunBenchmark(DuckDBBenchmarkState *state) override {
state->conn.Query("BEGIN TRANSACTION");
state->conn.Query("UPDATE integers SET i = i + 1");
state->result = state->conn.Query("SELECT SUM(i) FROM integers");
state->conn.Query("ROLLBACK");
}
string VerifyResult(QueryResult *result) override {
auto &materialized = (MaterializedQueryResult &)*result;
Value val = materialized.GetValue(0, 0);
if (val != Value::BIGINT(sum + count)) {
return string("Value " + val.ToString() + " does not match expected value " + std::to_string(sum + count));
}
return string();
}
string BenchmarkInfo() override {
return "Run a bulk update followed by an aggregate";
}
FINISH_BENCHMARK(BulkUpdate)
DUCKDB_BENCHMARK(BulkDelete, "[bulkupdate]")
int64_t sum = 0;
int64_t count = 0;
void Load(DuckDBBenchmarkState *state) override {
state->conn.Query("CREATE TABLE integers(i INTEGER);");
Appender appender(state->conn, "integers");
// insert the elements into the database
for (size_t i = 0; i < GROUP_ROW_COUNT; i++) {
appender.BeginRow();
appender.Append<int32_t>(i % GROUP_COUNT);
appender.EndRow();
sum += i % GROUP_COUNT;
if ((i % GROUP_COUNT) == 1) {
count++;
}
}
}
void RunBenchmark(DuckDBBenchmarkState *state) override {
state->conn.Query("BEGIN TRANSACTION");
state->conn.Query("DELETE FROM integers WHERE i=1");
state->result = state->conn.Query("SELECT SUM(i) FROM integers");
state->conn.Query("ROLLBACK");
}
string VerifyResult(QueryResult *result) override {
auto &materialized = (MaterializedQueryResult &)*result;
Value val = materialized.GetValue(0, 0);
if (val != Value::BIGINT(sum - count)) {
return string("Value " + val.ToString() + " does not match expected value " + std::to_string(sum - count));
}
return string();
}
string BenchmarkInfo() override {
return "Run a bulk delete followed by an aggregate";
}
FINISH_BENCHMARK(BulkDelete)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/case/integer_case_alternate.benchmark
# description: Case benchmark
# group: [case]
name Simple case with integers
group case
load
CREATE TABLE integers AS SELECT * FROM range(100000000) tbl(i);
run
SELECT SUM(CASE WHEN i%2=0 THEN 1 ELSE 0 END) FROM integers;
result I
50000000

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/case/integer_case_predictable.benchmark
# description: Case benchmark
# group: [case]
name Predictable case with integers
group case
load
CREATE TABLE integers AS SELECT * FROM range(100000000) tbl(i);
run
SELECT SUM(CASE WHEN i<50000000 THEN 1 ELSE 0 END) FROM integers;
result I
50000000

View File

@@ -0,0 +1,92 @@
#include "benchmark_runner.hpp"
#include "duckdb_benchmark_macro.hpp"
#include "duckdb/main/appender.hpp"
#include <random>
using namespace duckdb;
#define CAST_COUNT 10000000
DUCKDB_BENCHMARK(CastDateToString, "[cast]")
void Load(DuckDBBenchmarkState *state) override {
std::uniform_int_distribution<> year_dist(1990, 2010), day_dist(1, 28), month_dist(1, 12);
std::mt19937 gen;
gen.seed(42);
state->conn.Query("CREATE TABLE dates(d DATE);");
Appender appender(state->conn, "dates");
// insert the elements into the database
for (int i = 0; i < CAST_COUNT; i++) {
appender.AppendRow(Value::DATE(year_dist(gen), month_dist(gen), day_dist(gen)));
}
}
string GetQuery() override {
return "SELECT CAST(d AS VARCHAR) FROM dates";
}
string VerifyResult(QueryResult *result) override {
if (result->HasError()) {
return result->GetError();
}
return string();
}
string BenchmarkInfo() override {
return "Cast date to string";
}
FINISH_BENCHMARK(CastDateToString)
DUCKDB_BENCHMARK(CastTimeToString, "[cast]")
void Load(DuckDBBenchmarkState *state) override {
std::uniform_int_distribution<> hour_dist(0, 23), min_dist(0, 59);
std::mt19937 gen;
gen.seed(42);
state->conn.Query("CREATE TABLE times(d TIME);");
Appender appender(state->conn, "times");
// insert the elements into the database
for (int i = 0; i < CAST_COUNT; i++) {
appender.AppendRow(Value::TIME(hour_dist(gen), min_dist(gen), min_dist(gen), 0));
}
}
string GetQuery() override {
return "SELECT CAST(d AS VARCHAR) FROM times";
}
string VerifyResult(QueryResult *result) override {
if (result->HasError()) {
return result->GetError();
}
return string();
}
string BenchmarkInfo() override {
return "Cast time to string";
}
FINISH_BENCHMARK(CastTimeToString)
DUCKDB_BENCHMARK(CastTimestampToString, "[cast]")
void Load(DuckDBBenchmarkState *state) override {
std::uniform_int_distribution<> year_dist(1990, 2010), day_dist(1, 28), month_dist(1, 12);
std::uniform_int_distribution<> hour_dist(0, 23), min_dist(0, 59);
std::mt19937 gen;
gen.seed(42);
state->conn.Query("CREATE TABLE timestamps(d TIMESTAMP);");
Appender appender(state->conn, "timestamps");
// insert the elements into the database
for (int i = 0; i < CAST_COUNT; i++) {
appender.AppendRow(Value::TIMESTAMP(year_dist(gen), month_dist(gen), day_dist(gen), hour_dist(gen),
min_dist(gen), min_dist(gen), 0));
}
}
string GetQuery() override {
return "SELECT CAST(d AS VARCHAR) FROM timestamps";
}
string VerifyResult(QueryResult *result) override {
if (result->HasError()) {
return result->GetError();
}
return string();
}
string BenchmarkInfo() override {
return "Cast timestamp to string";
}
FINISH_BENCHMARK(CastTimestampToString)

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_constant_string_to_enum.benchmark
# description: Benchmark casting enums to varchar and the comparisons
# group: [cast]
name Cast VARCHAR -> ENUM
group cast
load
create type integer_enum as enum('0', '1', '2', '3', '4');
create table integers as select cast((i%5)::varchar as integer_enum) as enum_val, (i%7) as integer_val, (i%7)::varchar as string_val from range(100000000) t(i);
# constant string value should cast to enum now.
run
select count(*) FROM integers WHERE enum_val='1';

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_date_string.benchmark
# description: Cast date values to string
# group: [cast]
name Cast DATE -> VARCHAR
group cast
load
CREATE TABLE dates AS SELECT DATE '1992-01-01' + interval (i % 10000) days AS d FROM range(0, 10000000) tbl(i);
run
SELECT MIN(CAST(d AS VARCHAR)) FROM dates
result I
1992-01-01 00:00:00

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_decimal_double_1.benchmark
# description: Cast string values to double, range 0-1e30, converted to DECIMAL(38,8), this means full range is full
# group: [cast]
name Cast DECIMAL -> DOUBLEA
group cast
load
CREATE TABLE doubles AS SELECT (random()*1e30)::DECIMAL(38,8) AS d FROM range(0, 10000000) tbl(i);
run
SELECT SUM(CAST(d AS DOUBLE)) > 0.0 FROM doubles
result I
true

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_decimal_double_2.benchmark
# description: Cast string values to double, range 0-1e15, converted to DECIMAL(38,8), top part of the range is empty
# group: [cast]
name Cast DECIMAL -> DOUBLE
group cast
load
CREATE TABLE doubles AS SELECT (random()*1e15)::DECIMAL(38,8) AS d FROM range(0, 10000000) tbl(i);
run
SELECT SUM(CAST(d AS DOUBLE)) > 0.0 FROM doubles
result I
true

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_decimal_double_3.benchmark
# description: Cast string values to double, range 0-1, converted to DECIMAL(38,8), range is mostly empty
# group: [cast]
name Cast DECIMAL -> DOUBLE
group cast
load
CREATE TABLE doubles AS SELECT (random())::DECIMAL(38,8) AS d FROM range(0, 10000000) tbl(i);
run
SELECT SUM(CAST(d AS DOUBLE)) > 0.0 FROM doubles
result I
true

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_decimal_double_4.benchmark
# description: Cast string values to double, range 0-1, converted to DECIMAL(38,8), range is mostly empty
# group: [cast]
name Cast DECIMAL -> DOUBLE
group cast
load
CREATE TABLE doubles AS SELECT (random()*1e28)::DECIMAL(38,0) AS d FROM range(0, 10000000) tbl(i);
run
SELECT SUM(CAST(d AS DOUBLE)) > 0.0 FROM doubles
result I
true

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_double_string.benchmark
# description: Cast double values to string
# group: [cast]
name Cast DOUBLE -> VARCHAR
group cast
load
CREATE TABLE doubles AS SELECT (i*0.8)::DOUBLE d FROM range(0, 10000000) tbl(i);
run
SELECT MIN(CAST(d AS VARCHAR)) FROM doubles
result I
0.0

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_enum_string.benchmark
# description: Benchmark casting enums to varchar and the comparisons
# group: [cast]
name Cast ENUM -> VARCHAR
group cast
load
create type integer_enum as enum('0', '1', '2', '3', '4');
create table integers as select cast((i%5)::varchar as integer_enum) as enum_val, (i%7) as integer_val, (i%7)::varchar as string_val from range(100000000) t(i);
# cast both should auto-cast to varchar
run
select count(*) FROM integers WHERE enum_val = integer_val;

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_huge_hugeint_string_benchmark.benchmark
# description: Cast HUYGE HUGEINT to string
# group: [cast]
name Cast HUGEINT -> VARCHAR (Big HUGEINT)
group cast
load
CREATE TABLE integers AS SELECT '1e18'::HUGEINT*i::HUGEINT*i::HUGEINT AS i FROM range(0, 10000000, 1) tbl(i);
run
SELECT MAX(i::VARCHAR) FROM integers
result I
99999980000001000000000000000000

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_huge_string_hugeint.benchmark
# description: Cast HUYGE HUGEINT to string
# group: [cast]
name Cast VARCHAR -> HUGEINT (Big HUGEINT)
group cast
load
CREATE TABLE strings AS SELECT ('1e18'::HUGEINT*i::HUGEINT*i::HUGEINT)::VARCHAR AS s FROM range(0, 10000000, 1) tbl(i);
run
SELECT MAX(s::HUGEINT) FROM strings
result I
99999980000001000000000000000000

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_hugeint_string.benchmark
# description: Cast small HUGEINT values to string
# group: [cast]
name Cast HUGEINT -> VARCHAR
group cast
load
CREATE TABLE integers AS SELECT i::HUGEINT AS i FROM range(0, 10000000, 1) tbl(i);
run
SELECT MAX(CAST(i AS VARCHAR)) FROM integers
result I
9999999

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_int32_int64.benchmark
# description: Cast INTEGER to BIGINT
# group: [cast]
name Cast INTEGER -> BIGINT
group cast
load
CREATE TABLE integers AS SELECT i::INTEGER i FROM range(0, 50000000) tbl(i);
run
SELECT MAX(CAST(i AS BIGINT)) FROM integers
result I
49999999

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_int64_int32.benchmark
# description: Cast BIGINT to INTEGER
# group: [cast]
name Cast BIGINT -> INTEGER
group cast
load
CREATE TABLE integers AS SELECT i::BIGINT i FROM range(0, 50000000) tbl(i);
run
SELECT MAX(CAST(i AS INTEGER)) FROM integers
result I
49999999

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_int_string.benchmark
# description: Cast integer values to string
# group: [cast]
name Cast INTEGER -> VARCHAR
group cast
load
CREATE TABLE integers AS SELECT i::INTEGER i FROM range(0, 10000000) tbl(i);
run
SELECT MAX(CAST(i AS VARCHAR)) FROM integers
result I
9999999

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/cast/cast_lineitem_json_to_variant.benchmark
# description: Cast all columns of the lineitem table sf1 as a JSON column to a VARIANT column
# group: [cast]
name Lineitem JSON To Variant
group tpch
subgroup sf1
require tpch
require json
load
CALL dbgen(sf=1);
CREATE TABLE lineitem_struct AS SELECT lineitem lineitem_struct FROM lineitem;
CREATE TABLE lineitem_json AS SELECT lineitem::JSON lineitem_json FROM lineitem;
run
SELECT lineitem_json::VARIANT FROM lineitem_json

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_list_string.benchmark
# description: Cast list values to string
# group: [cast]
name Cast LIST -> VARCHAR
group cast
load
CREATE TABLE lists AS SELECT [i, i+1, NULL, i+2] l FROM range(0, 10000000) tbl(i);
run
SELECT MIN(CAST(l AS VARCHAR)) FROM lists;
result I
[0, 1, NULL, 2]

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_string_double.benchmark
# description: Cast string values to double
# group: [cast]
name Cast VARCHAR -> DOUBLE
group cast
load
CREATE TABLE doubles AS SELECT i::DOUBLE d FROM range(0, 10000000) tbl(i);
run
SELECT MIN(CAST(d AS DOUBLE)) FROM doubles
result I
0

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_string_hugeint.benchmark
# description: Cast HUGEINT to string
# group: [cast]
name Cast VARCHAR -> HUGEINT
group cast
load
CREATE TABLE strings AS SELECT i::VARCHAR AS s FROM range(0, 10000000, 1) tbl(i);
run
SELECT MAX(s::HUGEINT) FROM strings
result I
9999999

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_string_int.benchmark
# description: Cast string values to integers
# group: [cast]
name Cast VARCHAR -> INTEGER
group cast
load
CREATE TABLE varchars AS SELECT i::VARCHAR v FROM range(0, 10000000) tbl(i);
run
SELECT MAX(CAST(v AS INTEGER)) FROM varchars
result I
9999999

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_string_list.benchmark
# description: Cast string values to list
# group: [cast]
name Cast VARCHAR -> INT LIST
group cast
load
CREATE TABLE varchars AS SELECT [i, NULL, i+2]::VARCHAR col FROM range(0, 10000000) tbl(i);
run
SELECT MIN(CAST(col AS INT[])[1]) FROM varchars;
result I
0

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_string_map.benchmark
# description: Cast string values to map
# group: [cast]
name Cast VARCHAR -> MAP
group cast
load
CREATE TABLE intMap AS SELECT map([i], [i+1])::VARCHAR col FROM range(0, 10000000) tbl(i);
run
SELECT MAX(CAST(col AS MAP(INT, INT))) FROM intMap;
result I
{9999999=10000000}

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_string_struct.benchmark
# description: Cast string values to struct
# group: [cast]
name Cast VARCHAR -> STRUCT
group cast
load
CREATE TABLE structs AS SELECT {'key_A': i, 'key_B': i + 1}::VARCHAR v FROM range(0, 10000000) tbl(i);
run
SELECT MIN(CAST(v AS STRUCT(key_A INT, key_B INT))) FROM structs;
result I
{'key_A': 0, 'key_B': 1}

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_string_struct_missing_val.benchmark
# description: Cast string values to struct, with some missing values
# group: [cast]
name Cast VARCHAR -> STRUCT
group cast
load
CREATE TABLE structs AS SELECT {'key_C': i, 'key_A': i + 1}::VARCHAR v FROM range(0, 10000000) tbl(i);
run
SELECT MIN(CAST(v AS STRUCT(key_A INT, key_B INT, key_C INT))) FROM structs;
result I
{'key_A': 1, 'key_B': NULL, 'key_C': 0}

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_struct_string.benchmark
# description: Cast list values to string
# group: [cast]
name Cast STRUCT -> VARCHAR
group cast
load
CREATE TABLE structs AS SELECT {'i': i} s FROM range(0, 100000000) tbl(i);
run
SELECT MIN(CAST(s AS VARCHAR)) FROM structs;
result I
{'i': 0}

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/cast_timestamp_string.benchmark
# description: Cast timestamp values to string
# group: [cast]
name Cast TIMESTAMP -> VARCHAR
group cast
load
CREATE TABLE timestamps AS SELECT TIMESTAMP '1992-01-01 12:00:00' + (i % 10000) * interval '1' day AS d FROM generate_series(0, 10000000, 1) tbl(i);
run
SELECT MIN(CAST(d AS STRING)) FROM timestamps;
result I
1992-01-01 12:00:00

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/cast/cast_varcharlist_string.benchmark
# description: Cast list values to string
# group: [cast]
name Cast VARCHAR[] -> VARCHAR
group cast
load
CREATE TABLE lists AS SELECT ['red', 'green', ' test ', 'blue', NULL, 'null'] l FROM range(0, 10000000) tbl(i);
run
SELECT MIN(CAST(l AS VARCHAR)) FROM lists;
#[red, green, test , blue, NULL, null]
result I
[red, green, ' test ', blue, NULL, 'null']

View File

@@ -0,0 +1,18 @@
# name: benchmark/micro/cast/cast_varcharmap_string.benchmark
# description: Cast map values to string
# group: [cast]
name Cast MAP<VARCHAR,VARCHAR> -> VARCHAR
group cast
load
CREATE TABLE maps AS SELECT MAP(
['simple', 'needs space', 'has,comma', 'null', 'has:colon', 'quoted'],
['red', ' needs quotes ', 'no,escape needed', NULL, 'null', 'contains''quote']
) m FROM range(0, 10000000) tbl(i);
run
SELECT MIN(CAST(m AS VARCHAR)) FROM maps;
result I
{simple=red, needs space=' needs quotes ', 'has,comma'='no,escape needed', 'null'=NULL, 'has:colon'='null', quoted='contains\'quote'}

View File

@@ -0,0 +1,22 @@
# name: benchmark/micro/cast/cast_varcharstruct_string.benchmark
# description: Cast struct values with varchar fields to string
# group: [cast]
name Cast STRUCT<VARCHAR> -> VARCHAR
group cast
load
CREATE TABLE structs AS SELECT {
'simple': 'red',
'needs_space': ' leading space ',
'null_field': NULL,
'null_text': 'null',
'special:char': 'value:with:colons',
'quoted''field': 'text with ''quotes'''
} s FROM range(0, 10000000) tbl(i);
run
SELECT MIN(CAST(s AS VARCHAR)) FROM structs;
result I
{'simple': red, 'needs_space': ' leading space ', 'null_field': NULL, 'null_text': 'null', 'special:char': 'value:with:colons', 'quoted\'field': 'text with \'quotes\''}

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/format_int_string.benchmark
# description: Use format to convert integer values to string
# group: [cast]
name Format INTEGER -> VARCHAR
group cast
load
CREATE TABLE integers AS SELECT i::INTEGER i FROM range(0, 10000000) tbl(i);
run
SELECT MAX(format('{}', i)) FROM integers
result I
9999999

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/strftime.benchmark
# description: Use strftime to convert dates to strings
# group: [cast]
name StrfTime for DATE -> STRING
group cast
load
CREATE TABLE dates AS SELECT DATE '1992-01-01' + i::INTEGER AS d FROM range(0, 10000000) tbl(i);
run
SELECT MIN(strftime(d, '%Y/%m/%d')) FROM dates
result I
10000/01/01

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/cast/strptime.benchmark
# description: Use strptime to convert strings to dates
# group: [cast]
name StrpTime for STRING -> DATE
group cast
load
CREATE TABLE dates AS SELECT strftime(DATE '1992-01-01' + i::INTEGER, '%Y/%m/%d') AS d FROM range(0, 1000000) tbl(i);
run
SELECT MIN(strptime(d, '%Y/%m/%d')) FROM dates
result I
1992-01-01

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/alp/alp_read.benchmark
# description: Scanning a large amount of doubles
# group: [alp]
name Alp Scan
group alp
storage persistent
require parquet
require httpfs
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='alp';
CREATE TABLE temperatures (
temperature DOUBLE
);
INSERT INTO temperatures SELECT temp FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(28);
checkpoint;
run
select avg(temperature) from temperatures;
result I
56.028391124637494

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alp/alp_read_best_case.benchmark
# description: ALP best case scenario is when it founds low precision decimals within a limited absolute range
# group: [alp]
name Alp Scan
group alp
storage persistent
load
DROP TABLE IF EXISTS alp_random_doubles;
PRAGMA force_compression='alp';
create table alp_random_doubles as select round(random(), 1)::DOUBLE as data from range(200000000) tbl(i);
checkpoint;
run
select avg(data) from alp_random_doubles;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alp/alp_read_worst_case.benchmark
# description: ALP slowest scenario is when it founds high precision decimals. Here, ALP achieves no compression and everything is encoded as exception
# group: [alp]
name Alp Scan
group alp
storage persistent
load
DROP TABLE IF EXISTS alp_random_doubles;
PRAGMA force_compression='alp';
create table alp_random_doubles as select random()::DOUBLE as data from range(200000000) tbl(i);
checkpoint;
run
select avg(data) from alp_random_doubles;

View File

@@ -0,0 +1,27 @@
# name: benchmark/micro/compression/alp/alp_store.benchmark
# description: Scanning a large amount of doubles
# group: [alp]
name Alp Insert
group alp
storage persistent
require_reinit
require parquet
require httpfs
load
PRAGMA force_compression='uncompressed';
DROP TABLE IF EXISTS temperatures_uncompressed;
CREATE TABLE temperatures_uncompressed (
temperature DOUBLE
);
INSERT INTO temperatures_uncompressed SELECT temp FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(28);
CREATE TABLE temperatures_alp (
temperature DOUBLE
);
PRAGMA force_compression='alp';
checkpoint;
run
INSERT INTO temperatures_alp SELECT * FROM temperatures_uncompressed;
checkpoint;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alp/alp_store_best_case.benchmark
# description: ALP best case scenario is when it founds low precision decimals within a limited absolute range.
# group: [alp]
name Alp Insert
group alp
storage persistent
require_reinit
load
PRAGMA force_compression='alp';
DROP TABLE IF EXISTS alp_random_doubles;
run
create table alp_random_doubles as select round(random(), 1)::DOUBLE as data from range(50000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alp/alp_store_worst_case.benchmark
# description: ALP slowest scenario is when it founds high precision decimals. Here, ALP achieves no compression and everything is encoded as exception
# group: [alp]
name Alp Insert
group alp
storage persistent
require_reinit
load
PRAGMA force_compression='alp';
DROP TABLE IF EXISTS alp_random_doubles;
run
create table alp_random_doubles as select random()::DOUBLE as data from range(50000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/alprd/alprd_read.benchmark
# description: Scanning a large amount of doubles
# group: [alprd]
name Alprd Scan
group alprd
storage persistent
require parquet
require httpfs
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='alprd';
CREATE TABLE temperatures (
temperature DOUBLE
);
INSERT INTO temperatures SELECT temp FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(28);
checkpoint;
run
select avg(temperature) from temperatures;
result I
56.028391124637494

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alprd/alprd_read_best_case.benchmark
# description: ALPRD best case scenario is when all the floats share their front bits
# group: [alprd]
name Alprd Scan
group alprd
storage persistent
load
DROP TABLE IF EXISTS alprd_random_doubles;
PRAGMA force_compression='alprd';
create table alprd_random_doubles as select (random() + 10)::DOUBLE as data from range(200000000) tbl(i);
checkpoint;
run
select avg(data) from alprd_random_doubles;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alprd/alprd_read_worst_case.benchmark
# description: ALPRD worst case scenario is when all the float have unique front bits. Multiplying by different powers of two ensures us unique front bits
# group: [alprd]
name Alprd Scan
group alprd
storage persistent
load
DROP TABLE IF EXISTS alprd_random_doubles;
PRAGMA force_compression='alprd';
create table alprd_random_doubles as select (random() * pow(2, (i % 1000)) * (CASE WHEN i%2=0 THEN 1 ELSE -1 END))::DOUBLE as data from range(200000000) tbl(i);
checkpoint;
run
select avg(data) from alprd_random_doubles;

View File

@@ -0,0 +1,27 @@
# name: benchmark/micro/compression/alprd/alprd_store.benchmark
# description: Scanning a large amount of doubles
# group: [alprd]
name Alprd Insert
group alprd
storage persistent
require_reinit
require parquet
require httpfs
load
PRAGMA force_compression='uncompressed';
DROP TABLE IF EXISTS temperatures_uncompressed;
CREATE TABLE temperatures_uncompressed (
temperature DOUBLE
);
INSERT INTO temperatures_uncompressed SELECT temp FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(28);
CREATE TABLE temperatures_alprd (
temperature DOUBLE
);
PRAGMA force_compression='alprd';
checkpoint;
run
INSERT INTO temperatures_alprd SELECT * FROM temperatures_uncompressed;
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/alprd/alprd_store_best_case.benchmark
# description: ALPRD best case scenario is when all the floats share their front bits.
# group: [alprd]
name Alprd Insert
group alprd
storage persistent
require_reinit
load
DROP TABLE IF EXISTS alprd_random_doubles;
PRAGMA force_compression='alprd';
checkpoint;
run
create table alprd_random_doubles as select (random() + 10)::DOUBLE as data from range(50000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/alprd/alprd_store_worst_case.benchmark
# description: ALPRD worst case scenario is when all the float have unique front bits. Multiplying by different powers of two ensures us unique front bits
# group: [alprd]
name Alprd Insert
group alprd
storage persistent
require_reinit
load
DROP TABLE IF EXISTS alprd_random_doubles;
PRAGMA force_compression='alprd';
checkpoint;
run
create table alprd_random_doubles as select (random() * pow(2, (i % 1000)) * (CASE WHEN i%2=0 THEN 1 ELSE -1 END))::DOUBLE as data from range(50000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_read_constant.benchmark
# description: Scanning 1GB of ints compressed mostly with the CONSTANT bitpacking mode
# group: [bitpacking]
name Bitpacking Scan Constant Mode
group bitpacking
storage persistent
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant';
CREATE TABLE integers AS SELECT (i/119000)::INT32 as i FROM range(0, 250000000) tbl(i);
checkpoint;
run
select avg(i) from integers;
result I
1049.9202

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_read_constant_delta.benchmark
# description: Scanning 1GB of ints compressed mostly with the CONSTANT_DELTA bitpacking mode
# group: [bitpacking]
name Bitpacking Scan Constant Delta Mode
group bitpacking
storage persistent
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant_delta';
CREATE TABLE integers AS SELECT i::INT32 as i FROM range(0, 250000000) tbl(i);
checkpoint;
run
select avg(i) from integers;
result I
124999999.5

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_read_dfor.benchmark
# description: Scanning 1GB of ints compressed mostly with the Delta FOR bitpacking mode
# group: [bitpacking]
name Bitpacking Scan Delta For Mode
group bitpacking
storage persistent
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='delta_for';
CREATE TABLE integers AS SELECT (i%4000000)::INT32 AS i FROM range(0, 250000000) tbl(i);
checkpoint;
run
select avg(i) from integers;
result I
1991999.5

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_read_for.benchmark
# description: Scanning 1GB of ints compressed mostly with the FOR bitpacking mode
# group: [bitpacking]
name Bitpacking Scan For Mode
group bitpacking
storage persistent
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='for';
CREATE TABLE integers AS SELECT (i%4000000)::INT32 AS i FROM range(0, 250000000) tbl(i);
checkpoint;
run
select avg(i) from integers;
result I
1991999.5

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_store_constant.benchmark
# description: Storing 1GB of ints compressed mostly with the CONSTANT bitpacking mode
# group: [bitpacking]
name Bitpacking Insert Constant Mode
group bitpacking
storage persistent
require_reinit
load
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant';
DROP TABLE IF EXISTS integers;
run
CREATE TABLE integers AS SELECT (i/119000)::INT32 as i FROM range(0, 250000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_store_constant_delta.benchmark
# description: Storing 1GB of ints compressed mostly with the CONSTANT DELTA bitpacking mode
# group: [bitpacking]
name Bitpacking Insert Constant Delta Mode
group bitpacking
storage persistent
require_reinit
load
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant_delta';
DROP TABLE IF EXISTS integers;
run
CREATE TABLE integers AS SELECT (i%250000000)::INT32 as i FROM range(0, 250000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_store_dfor.benchmark
# description: Storing 1GB of ints compressed mostly with the DELTA FOR bitpacking mode
# group: [bitpacking]
name Bitpacking Insert Delta For Mode
group bitpacking
storage persistent
require_reinit
load
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='delta_for';
DROP TABLE IF EXISTS integers;
run
CREATE TABLE integers AS SELECT CASE WHEN i%2=0 THEN 0 ELSE 2048 END AS i FROM range(0, 250000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_store_for.benchmark
# description: Storing 1GB of ints compressed mostly with the FOR bitpacking mode
# group: [bitpacking]
name Bitpacking Insert For Mode
group bitpacking
storage persistent
require_reinit
load
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='for';
DROP TABLE IF EXISTS integers;
run
CREATE TABLE integers AS SELECT (i%250000000)::INT32 AS i FROM range(0, 250000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/bitpacking_hugeint/bitpacking_hugeint_read_constant.benchmark
# description: Scanning hugeints compressed mostly with the CONSTANT bitpacking mode
# group: [bitpacking_hugeint]
name Bitpacking Scan Constant Mode Hugeint
group bitpacking
storage persistent
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant';
CREATE TABLE integers AS SELECT (i/119000)::HUGEINT as i FROM range(0, 250000000) tbl(i);
checkpoint;
run
select avg(i) from integers;
result I
1049.9202

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/bitpacking_hugeint/bitpacking_hugeint_store_constant.benchmark
# description: Storing hugeints compressed mostly with the CONSTANT bitpacking mode
# group: [bitpacking_hugeint]
name Bitpacking Insert Constant Mode Hugeint
group bitpacking
storage persistent
require_reinit
load
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant';
DROP TABLE IF EXISTS integers;
run
CREATE TABLE integers AS SELECT (i/119000)::HUGEINT as i FROM range(0, 250000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/compression/dictionary/dictionary_read.benchmark
# description: Scanning strings at ~4.3x compression
# group: [dictionary]
name Dictionary Compression Scan
group dictionary
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='dict_fsst';
CREATE TABLE test AS SELECT (100 + (i%1000))::VARCHAR AS i FROM range(0, 200_000_000) tbl(i);
checkpoint;
run
select avg(i::INT) from test;
result I
599.500000

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/dictionary/dictionary_read_best_case.benchmark
# description: Scanning strings at best case compression of only 2 unique values
# group: [dictionary]
name Dictionary Compression Scan
group dictionary
storage persistent v1.3.0
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='dict_fsst';
CREATE TABLE test AS SELECT (100 + (i%2))::VARCHAR AS i FROM range(0, 200) tbl(i);
checkpoint;
assert I
select compression from pragma_storage_info('test') where segment_type in ('VARCHAR')
----
DICT_FSST
run
select avg(i::INT) from test;
result I
100.500000

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/dictionary/dictionary_read_worst_case.benchmark
# description: Scanning data that is uncompressible with dictionary encoding
# group: [dictionary]
name Dictionary Compression Scan
group aggregate
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='dict_fsst';
CREATE TABLE test AS SELECT i::VARCHAR AS i FROM range(0, 200_000_000) tbl(i);
checkpoint;
run
select avg(i::INT) from test;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/dictionary/dictionary_read_worst_case_with_null.benchmark
# description: Scanning data that is uncompressible with dictionary encoding
# group: [dictionary]
name Dictionary Compression Scan
group aggregate
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='dict_fsst';
CREATE TABLE test AS SELECT if((i % 200) = 0, NULL, i::VARCHAR) AS i FROM range(0, 200_000_000) tbl(i);
checkpoint;
run
select avg(i::INT) from test;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/dictionary/dictionary_store.benchmark
# description: Storing strings compressed at ~4.3x compression
# group: [dictionary]
name Dictionary Compression Write
group aggregate
storage persistent
require_reinit
load
PRAGMA force_compression='dict_fsst';
DROP TABLE IF EXISTS test;
run
CREATE TABLE test AS SELECT (100 + (i%1000))::VARCHAR AS i FROM range(0, 100_000_000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/dictionary/dictionary_store_worst_case.benchmark
# description: Storing a column containing only unique strings.
# group: [dictionary]
name Dictionary Compression Write
group dictionary
storage persistent
require_reinit
load
PRAGMA force_compression='dict_fsst';
DROP TABLE IF EXISTS test;
run
CREATE TABLE test AS SELECT i::VARCHAR AS i FROM range(0, 50_000_000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/dictionary/dictionary_store_worst_case_with_null.benchmark
# description: Storing a column containing only unique strings.
# group: [dictionary]
name Dictionary Compression Write
group dictionary
storage persistent
require_reinit
load
PRAGMA force_compression='dictionary';
DROP TABLE IF EXISTS test;
run
CREATE TABLE test AS SELECT if((i % 200) = 0, NULL, i::VARCHAR) AS i FROM range(0, 50_000_000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/fsst/fsst_late_decompression.benchmark
# description: Using a filter on another column to make use of late decompression
# group: [fsst]
name fsst late decompression benefit
group fsst
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='fsst';
CREATE TABLE test AS SELECT i as id, (100 + (i%2))::VARCHAR AS value FROM range(0, 50000000) tbl(i);
checkpoint;
SET enable_fsst_vectors=false;
run
select avg(value::INT) from test where id%10=0;
result I
100.500000

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/compression/fsst/fsst_read.benchmark
# description: Scanning strings at ~3.35x compression
# group: [fsst]
name fsst Compression Scan
group fsst
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='fsst';
CREATE TABLE test AS SELECT (100 + (i%1000))::VARCHAR AS i FROM range(0, 50000000) tbl(i);
checkpoint;
run
select avg(i::INT) from test;
result I
599.500000

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/fsst/fsst_read_worst_case.benchmark
# description: Scanning data that is not with fsst encoding, note that compresssion ratio is still 1.9x due to bitpacking
# group: [fsst]
name fsst Compression Scan
group aggregate
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='fsst';
CREATE TABLE test AS SELECT gen_random_uuid()::VARCHAR AS i FROM range(0, 20000000) tbl(i);
checkpoint;
run
select max(i[2]) from test;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/fsst/fsst_store.benchmark
# description: Storing strings compressed at ~3.3x compression
# group: [fsst]
name fsst Compression Write
group aggregate
storage persistent
require_reinit
load
PRAGMA force_compression='fsst';
run
CREATE TABLE test_compressed AS SELECT (100 + (i%1000))::VARCHAR AS i FROM range(0, 2500000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/fsst/fsst_store_worst_case.benchmark
# description: Storing a column containing only unique strings.
# group: [fsst]
name name fsst Compression Write
group fsst
storage persistent
require_reinit
load
PRAGMA force_compression='fsst';
DROP TABLE IF EXISTS test;
run
CREATE TABLE test AS SELECT gen_random_uuid()::VARCHAR AS i FROM range(0, 2000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/fsst/select_fsst_selective.benchmark
# description: FSST String selective filter
# group: [fsst]
name Select FSST String Columns (0.1% selectivity)
group fsst
storage persistent
load
CREATE TABLE string_values(filter_val INTEGER USING COMPRESSION UNCOMPRESSED, str_val VARCHAR USING COMPRESSION FSST, str_val_nulls VARCHAR USING COMPRESSION FSST);
INSERT INTO string_values SELECT i % 1000, case when i%7=0 then concat('thisisalongstring', i) else concat('shortstr', i // 10) end str_val, case when i%3=0 then null else str_val end str_val_nulls FROM range(100_000_000) t(i);
run
SELECT COUNT(*), SUM(strlen(str_val)), SUM(strlen(str_val_nulls)), COUNT(str_val_nulls) FROM string_values WHERE filter_val=77;
result IIII
100000 1631748 1087835 66667

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/fsst/select_fsst_unselective.benchmark
# description: FSST String unselective filter
# group: [fsst]
name Select FSST String Columns (33% selectivity)
group fsst
storage persistent
load
CREATE TABLE string_values(filter_val INTEGER USING COMPRESSION UNCOMPRESSED, str_val VARCHAR USING COMPRESSION FSST, str_val_nulls VARCHAR USING COMPRESSION FSST);
INSERT INTO string_values SELECT i % 3, case when i%7=0 then concat('thisisalongstring', i) else concat('shortstr', i // 10) end str_val, case when i%3=0 then null else str_val end str_val_nulls FROM range(100_000_000) t(i);
run
SELECT COUNT(*), SUM(strlen(str_val)), SUM(strlen(str_val_nulls)), COUNT(str_val_nulls) FROM string_values WHERE filter_val=1;
result IIII
33333333 543915346 543915346 33333333

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/rle/select_rle_selective.benchmark
# description: RLE selective filter
# group: [rle]
name Select RLE Columns (0.1% selectivity)
group rle
storage persistent
load
CREATE TABLE rle_values(filter_val INTEGER USING COMPRESSION UNCOMPRESSED, rle_val INTEGER USING COMPRESSION RLE, rle_val_nulls INTEGER USING COMPRESSION RLE);
INSERT INTO rle_values SELECT i % 1000, i // 10 rle_val, case when i%9=0 then null else i // 10 end rle_val_null FROM range(100_000_000) t(i);
run
SELECT COUNT(*), SUM(rle_val), SUM(rle_val_nulls), COUNT(rle_val_nulls) FROM rle_values WHERE filter_val=77;
result IIII
100000 499995700000 444441733323 88889

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/rle/select_rle_unselective.benchmark
# description: RLE selective filter
# group: [rle]
name Select RLE Columns (33% selectivity)
group rle
storage persistent
load
CREATE TABLE rle_values(filter_val INTEGER USING COMPRESSION UNCOMPRESSED, rle_val INTEGER USING COMPRESSION RLE, rle_val_nulls INTEGER USING COMPRESSION RLE);
INSERT INTO rle_values SELECT i % 3, i // 10 rle_val, case when i%9=0 then null else i // 10 end rle_val_null FROM range(100_000_000) t(i);
run
SELECT COUNT(*), SUM(rle_val), SUM(rle_val_nulls), COUNT(rle_val_nulls) FROM rle_values WHERE filter_val=1;
result IIII
33333333 166666646666667 166666646666667 33333333

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/roaring/roaring_array_read.benchmark
# description: Scanning 1GB of ints compressed with Array Containers
# group: [roaring]
name Roaring Scan Array Container
group roaring
storage persistent v1.2.0
load
DROP TABLE IF EXISTS tbl;
PRAGMA force_compression='Roaring';
CREATE TABLE tbl AS SELECT case when i%25=0 then 1337 else null end as a FROM range(0, 250_000_000) tbl(i);
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('tbl') where segment_type in ('VALIDITY')
----
Roaring
run
select count(*) from tbl WHERE a IS NOT NULL;
result I
10000000

Some files were not shown because too many files have changed in this diff Show More