should be it
This commit is contained in:
235
external/duckdb/test/sql/sample/test_sample.test_slow
vendored
Normal file
235
external/duckdb/test/sql/sample/test_sample.test_slow
vendored
Normal file
@@ -0,0 +1,235 @@
|
||||
# name: test/sql/sample/test_sample.test_slow
|
||||
# description: Test SAMPLE keyword
|
||||
# group: [sample]
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test (a INTEGER, b INTEGER);
|
||||
|
||||
statement ok
|
||||
INSERT INTO test VALUES (11, 22), (12, 21), (13, 22)
|
||||
|
||||
# test various limits using count
|
||||
query I
|
||||
SELECT COUNT(*) FROM test USING SAMPLE 0
|
||||
----
|
||||
0
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM test USING SAMPLE 1
|
||||
----
|
||||
1
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM test USING SAMPLE 1 ROWS
|
||||
----
|
||||
1
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM test USING SAMPLE 3
|
||||
----
|
||||
3
|
||||
|
||||
# sample size exceeds input
|
||||
query I
|
||||
SELECT COUNT(*) FROM test USING SAMPLE 10
|
||||
----
|
||||
3
|
||||
|
||||
# specify sample
|
||||
query I
|
||||
SELECT COUNT(*) FROM test USING SAMPLE 3 (reservoir)
|
||||
----
|
||||
3
|
||||
|
||||
# specify seed
|
||||
query I
|
||||
SELECT COUNT(*) FROM test USING SAMPLE 3 (reservoir, 3)
|
||||
----
|
||||
3
|
||||
|
||||
query II
|
||||
SELECT * FROM test USING SAMPLE 10 ORDER BY a, b
|
||||
----
|
||||
11 22
|
||||
12 21
|
||||
13 22
|
||||
|
||||
# sample on a larger data set
|
||||
query I
|
||||
SELECT COUNT(*) FROM range(10000) USING SAMPLE 5
|
||||
----
|
||||
5
|
||||
|
||||
# sample on a large data set over RESERVOIR_THRESHOLD = 100000
|
||||
query I
|
||||
SELECT COUNT(*) FROM range(2000000) USING SAMPLE 1000100
|
||||
----
|
||||
1000100
|
||||
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM range(2000000) USING SAMPLE 2
|
||||
----
|
||||
2
|
||||
|
||||
# test sample with multiple columns
|
||||
# we insert the same data in the entire column
|
||||
statement ok
|
||||
CREATE TABLE test2 AS SELECT i a, i::VARCHAR b, CONCAT(i, ' - ', i) c FROM repeat(1, 1000) tbl(i)
|
||||
|
||||
query III
|
||||
SELECT a, b, c FROM test2 USING SAMPLE 3;
|
||||
----
|
||||
1 1 1 - 1
|
||||
1 1 1 - 1
|
||||
1 1 1 - 1
|
||||
|
||||
# sample in scalar subqueries
|
||||
query I
|
||||
SELECT (SELECT COUNT(*) FROM test USING SAMPLE 1);
|
||||
----
|
||||
1
|
||||
|
||||
query I
|
||||
SELECT (SELECT COUNT(*) + tbl.i FROM test USING SAMPLE 1) FROM range(3) tbl(i) ORDER BY i;
|
||||
----
|
||||
1
|
||||
2
|
||||
3
|
||||
|
||||
# negative sample size not allowed
|
||||
statement error
|
||||
SELECT COUNT(*) FROM test USING SAMPLE -1
|
||||
----
|
||||
|
||||
# must be a number
|
||||
statement error
|
||||
SELECT COUNT(*) FROM test USING SAMPLE 'hello'
|
||||
----
|
||||
|
||||
statement error
|
||||
SELECT COUNT(*) FROM test USING SAMPLE DATE '1992-01-01'
|
||||
----
|
||||
|
||||
# we can also use postgres/sqlserver-style tablesample syntax
|
||||
statement ok
|
||||
create table integers as select i from range(200) tbl(i);
|
||||
|
||||
# default is sample_size, which follows postgres syntax rules
|
||||
query I
|
||||
select count(*) from integers tablesample reservoir(10);
|
||||
----
|
||||
10
|
||||
|
||||
query I
|
||||
select count(*) from integers tablesample reservoir(10%);
|
||||
----
|
||||
20
|
||||
|
||||
query I
|
||||
select count(*) from integers tablesample reservoir(10 percent);
|
||||
----
|
||||
20
|
||||
|
||||
query I
|
||||
select count(*) from integers tablesample reservoir(10 rows);
|
||||
----
|
||||
10
|
||||
|
||||
# we can also use the default sampling method
|
||||
query I
|
||||
select count(*) from integers tablesample(10 rows);
|
||||
----
|
||||
10
|
||||
|
||||
# we can use our sampling syntax here as well
|
||||
query I
|
||||
select count(*) from integers tablesample 10;
|
||||
----
|
||||
10
|
||||
|
||||
query I
|
||||
select count(*) from integers tablesample 10 rows (reservoir);
|
||||
----
|
||||
10
|
||||
|
||||
query I
|
||||
select count(*) from integers tablesample 10 rows (reservoir, 250);
|
||||
----
|
||||
10
|
||||
|
||||
# we can also use this with table-producing functions
|
||||
query I
|
||||
select count(*) from range(200) tablesample reservoir(10%);
|
||||
----
|
||||
20
|
||||
|
||||
# and subqueries
|
||||
query I
|
||||
select count(*) from (select * from range(200)) tbl(i) tablesample reservoir(10%);
|
||||
----
|
||||
20
|
||||
|
||||
# specifying a seed leads to repeatable behavior
|
||||
loop i 0 10
|
||||
|
||||
query I nosort reservoirseed
|
||||
select * from range(100) tablesample reservoir(10 rows) repeatable(250)
|
||||
----
|
||||
|
||||
query I nosort bernoulliseed
|
||||
select * from range(100) tablesample bernoulli(10%) repeatable(250)
|
||||
----
|
||||
|
||||
query I nosort systemseed
|
||||
select * from range(100) tablesample system(10%) repeatable(250)
|
||||
----
|
||||
|
||||
endloop
|
||||
|
||||
query I
|
||||
select count(*) from range(1000) using sample reservoir(0.01%);
|
||||
----
|
||||
0
|
||||
|
||||
query I
|
||||
select count(*) from range(1000) using sample reservoir(0.1%);
|
||||
----
|
||||
1
|
||||
|
||||
# cannot use bernoulli or system sampling with X number of rows
|
||||
statement error
|
||||
select * from integers using sample bernoulli(5 rows);
|
||||
----
|
||||
|
||||
statement error
|
||||
select * from integers using sample system(5 rows);
|
||||
----
|
||||
|
||||
# sample_size is out of range
|
||||
statement error
|
||||
select * from integers using sample 10000%;
|
||||
----
|
||||
|
||||
query I noresult repeatable_seed_0
|
||||
select i from integers using sample (1 rows) repeatable (0);
|
||||
----
|
||||
96
|
||||
|
||||
query I noresult repeatable_seed_0
|
||||
select i from integers using sample (1 rows) repeatable (0);
|
||||
----
|
||||
|
||||
|
||||
query I noresult repeatable_seed_1
|
||||
select i from integers using sample reservoir(1%) repeatable (0) order by i;
|
||||
----
|
||||
|
||||
query I noresult repeatable_seed_1
|
||||
select i from integers using sample reservoir(1%) repeatable (0) order by i;
|
||||
----
|
||||
58
|
||||
127
|
||||
Reference in New Issue
Block a user