Files
email-tracker/external/duckdb/test/sql/sample/test_sample.test_slow
2025-10-24 19:21:19 -05:00

236 lines
4.3 KiB
Plaintext

# name: test/sql/sample/test_sample.test_slow
# description: Test SAMPLE keyword
# group: [sample]
statement ok
PRAGMA enable_verification;
statement ok
CREATE TABLE test (a INTEGER, b INTEGER);
statement ok
INSERT INTO test VALUES (11, 22), (12, 21), (13, 22)
# test various limits using count
query I
SELECT COUNT(*) FROM test USING SAMPLE 0
----
0
query I
SELECT COUNT(*) FROM test USING SAMPLE 1
----
1
query I
SELECT COUNT(*) FROM test USING SAMPLE 1 ROWS
----
1
query I
SELECT COUNT(*) FROM test USING SAMPLE 3
----
3
# sample size exceeds input
query I
SELECT COUNT(*) FROM test USING SAMPLE 10
----
3
# specify sample
query I
SELECT COUNT(*) FROM test USING SAMPLE 3 (reservoir)
----
3
# specify seed
query I
SELECT COUNT(*) FROM test USING SAMPLE 3 (reservoir, 3)
----
3
query II
SELECT * FROM test USING SAMPLE 10 ORDER BY a, b
----
11 22
12 21
13 22
# sample on a larger data set
query I
SELECT COUNT(*) FROM range(10000) USING SAMPLE 5
----
5
# sample on a large data set over RESERVOIR_THRESHOLD = 100000
query I
SELECT COUNT(*) FROM range(2000000) USING SAMPLE 1000100
----
1000100
query I
SELECT COUNT(*) FROM range(2000000) USING SAMPLE 2
----
2
# test sample with multiple columns
# we insert the same data in the entire column
statement ok
CREATE TABLE test2 AS SELECT i a, i::VARCHAR b, CONCAT(i, ' - ', i) c FROM repeat(1, 1000) tbl(i)
query III
SELECT a, b, c FROM test2 USING SAMPLE 3;
----
1 1 1 - 1
1 1 1 - 1
1 1 1 - 1
# sample in scalar subqueries
query I
SELECT (SELECT COUNT(*) FROM test USING SAMPLE 1);
----
1
query I
SELECT (SELECT COUNT(*) + tbl.i FROM test USING SAMPLE 1) FROM range(3) tbl(i) ORDER BY i;
----
1
2
3
# negative sample size not allowed
statement error
SELECT COUNT(*) FROM test USING SAMPLE -1
----
# must be a number
statement error
SELECT COUNT(*) FROM test USING SAMPLE 'hello'
----
statement error
SELECT COUNT(*) FROM test USING SAMPLE DATE '1992-01-01'
----
# we can also use postgres/sqlserver-style tablesample syntax
statement ok
create table integers as select i from range(200) tbl(i);
# default is sample_size, which follows postgres syntax rules
query I
select count(*) from integers tablesample reservoir(10);
----
10
query I
select count(*) from integers tablesample reservoir(10%);
----
20
query I
select count(*) from integers tablesample reservoir(10 percent);
----
20
query I
select count(*) from integers tablesample reservoir(10 rows);
----
10
# we can also use the default sampling method
query I
select count(*) from integers tablesample(10 rows);
----
10
# we can use our sampling syntax here as well
query I
select count(*) from integers tablesample 10;
----
10
query I
select count(*) from integers tablesample 10 rows (reservoir);
----
10
query I
select count(*) from integers tablesample 10 rows (reservoir, 250);
----
10
# we can also use this with table-producing functions
query I
select count(*) from range(200) tablesample reservoir(10%);
----
20
# and subqueries
query I
select count(*) from (select * from range(200)) tbl(i) tablesample reservoir(10%);
----
20
# specifying a seed leads to repeatable behavior
loop i 0 10
query I nosort reservoirseed
select * from range(100) tablesample reservoir(10 rows) repeatable(250)
----
query I nosort bernoulliseed
select * from range(100) tablesample bernoulli(10%) repeatable(250)
----
query I nosort systemseed
select * from range(100) tablesample system(10%) repeatable(250)
----
endloop
query I
select count(*) from range(1000) using sample reservoir(0.01%);
----
0
query I
select count(*) from range(1000) using sample reservoir(0.1%);
----
1
# cannot use bernoulli or system sampling with X number of rows
statement error
select * from integers using sample bernoulli(5 rows);
----
statement error
select * from integers using sample system(5 rows);
----
# sample_size is out of range
statement error
select * from integers using sample 10000%;
----
query I noresult repeatable_seed_0
select i from integers using sample (1 rows) repeatable (0);
----
96
query I noresult repeatable_seed_0
select i from integers using sample (1 rows) repeatable (0);
----
query I noresult repeatable_seed_1
select i from integers using sample reservoir(1%) repeatable (0) order by i;
----
query I noresult repeatable_seed_1
select i from integers using sample reservoir(1%) repeatable (0) order by i;
----
58
127