should be it
This commit is contained in:
182
external/duckdb/test/sql/copy/csv/batched_write/batch_csv_mixed_batches.test_slow
vendored
Normal file
182
external/duckdb/test/sql/copy/csv/batched_write/batch_csv_mixed_batches.test_slow
vendored
Normal file
@@ -0,0 +1,182 @@
|
||||
# name: test/sql/copy/csv/batched_write/batch_csv_mixed_batches.test_slow
|
||||
# description: Test batch CSV write with mixed batches
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000) tbl(i)) TO '__TEST_DIR__/mix_batches_small.parquet' (ROW_GROUP_SIZE 5000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000, 400000) tbl(i)) TO '__TEST_DIR__/mix_batches_large.parquet' (ROW_GROUP_SIZE 200000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(400000, 700000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd.parquet' (ROW_GROUP_SIZE 999)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(700000, 1000000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd_again.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
# create views that read the batches
|
||||
statement ok
|
||||
CREATE VIEW v1 AS SELECT * FROM parquet_scan(['__TEST_DIR__/mix_batches_small.parquet', '__TEST_DIR__/mix_batches_large.parquet', '__TEST_DIR__/mix_batches_odd.parquet', '__TEST_DIR__/mix_batches_odd_again.parquet'])
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
|
||||
|
||||
# empty table
|
||||
statement ok
|
||||
CREATE VIEW v4 AS FROM v1 WHERE i>998 AND i<1000 AND i%2=0
|
||||
|
||||
loop i 0 2
|
||||
|
||||
query I
|
||||
COPY v1 TO '__TEST_DIR__/mixed_batches_v1.csv' (HEADER)
|
||||
----
|
||||
1000000
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v1 AS FROM '__TEST_DIR__/mixed_batches_v1.csv'
|
||||
----
|
||||
1000000
|
||||
|
||||
foreach table v1 mixed_batches_v1
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
|
||||
endloop
|
||||
|
||||
# now do the same, but filter out half of the values
|
||||
query I
|
||||
COPY v2 TO '__TEST_DIR__/mixed_batches_v2.csv' (HEADER)
|
||||
----
|
||||
500000
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v2 AS FROM '__TEST_DIR__/mixed_batches_v2.csv'
|
||||
----
|
||||
500000
|
||||
|
||||
foreach table v2 mixed_batches_v2
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
247499750000 0 989999 500000 500000
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 99998
|
||||
----
|
||||
189998
|
||||
189999
|
||||
200000
|
||||
200001
|
||||
200002
|
||||
|
||||
endloop
|
||||
|
||||
# do it again, but this time only filter out SOME small batches
|
||||
query I
|
||||
COPY v3 TO '__TEST_DIR__/mixed_batches_v3.csv' (HEADER)
|
||||
----
|
||||
700000
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v3 AS FROM '__TEST_DIR__/mixed_batches_v3.csv'
|
||||
----
|
||||
700000
|
||||
|
||||
foreach table v3 mixed_batches_v3
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
348499650000 0 989999 700000 700000
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 9999
|
||||
----
|
||||
9999
|
||||
20000
|
||||
20001
|
||||
20002
|
||||
20003
|
||||
|
||||
endloop
|
||||
|
||||
# now with an empty table
|
||||
query I
|
||||
COPY v4 TO '__TEST_DIR__/mixed_batches_v4.csv' (HEADER)
|
||||
----
|
||||
0
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v4 AS SELECT i::BIGINT as i FROM read_csv_auto('__TEST_DIR__/mixed_batches_v4.csv')
|
||||
----
|
||||
0
|
||||
|
||||
foreach table v4 mixed_batches_v4
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
NULL NULL NULL 0 0
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5
|
||||
----
|
||||
|
||||
endloop
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v1
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v2
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v3
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v4
|
||||
|
||||
statement ok
|
||||
drop view if exists v2;
|
||||
|
||||
statement ok
|
||||
drop view if exists v3;
|
||||
|
||||
statement ok
|
||||
drop view if exists v4;
|
||||
|
||||
# create views that read the batches using unions
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v1 AS FROM '__TEST_DIR__/mix_batches_small.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_large.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd_again.parquet'
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v4 AS FROM v1 WHERE i>998 AND i<1000 AND i%2=0
|
||||
|
||||
endloop
|
||||
89
external/duckdb/test/sql/copy/csv/batched_write/batch_csv_write.test_slow
vendored
Normal file
89
external/duckdb/test/sql/copy/csv/batched_write/batch_csv_write.test_slow
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
# name: test/sql/copy/csv/batched_write/batch_csv_write.test_slow
|
||||
# description: Batched copy to file
|
||||
# group: [batched_write]
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS SELECT i, i // 5 AS j FROM range(1000000) t(i) ;
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/batched_integers.csv' (HEADER);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers_copied AS FROM '__TEST_DIR__/batched_integers.csv'
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), SUM(j), COUNT(*), COUNT(i), COUNT(j) FROM integers_copied
|
||||
----
|
||||
499999500000 99999500000 1000000 1000000 1000000
|
||||
|
||||
query II
|
||||
SELECT * FROM integers_copied LIMIT 5
|
||||
----
|
||||
0 0
|
||||
1 0
|
||||
2 0
|
||||
3 0
|
||||
4 0
|
||||
|
||||
query II
|
||||
SELECT * FROM integers_copied LIMIT 5 OFFSET 99997
|
||||
----
|
||||
99997 19999
|
||||
99998 19999
|
||||
99999 19999
|
||||
100000 20000
|
||||
100001 20000
|
||||
|
||||
query II
|
||||
SELECT * FROM integers_copied QUALIFY i<=lag(i) over ()
|
||||
----
|
||||
|
||||
# now with filters
|
||||
statement ok
|
||||
CREATE VIEW v1 AS SELECT * FROM integers WHERE (i%2=0 AND i<300000) OR (i BETWEEN 500000 AND 700000)
|
||||
|
||||
statement ok
|
||||
COPY v1 TO '__TEST_DIR__/batched_integers_filters.csv' (HEADER);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers_filtered AS FROM '__TEST_DIR__/batched_integers_filters.csv'
|
||||
|
||||
|
||||
foreach table v1 integers_filtered
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), SUM(j), COUNT(*), COUNT(i), COUNT(j) FROM ${table}
|
||||
----
|
||||
142500450000 28499950000 350001 350001 350001
|
||||
|
||||
query II
|
||||
SELECT * FROM ${table} LIMIT 5
|
||||
----
|
||||
0 0
|
||||
2 0
|
||||
4 0
|
||||
6 1
|
||||
8 1
|
||||
|
||||
query II
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 99997
|
||||
----
|
||||
199994 39998
|
||||
199996 39999
|
||||
199998 39999
|
||||
200000 40000
|
||||
200002 40000
|
||||
|
||||
query II
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 300000
|
||||
----
|
||||
650000 130000
|
||||
650001 130000
|
||||
650002 130000
|
||||
650003 130000
|
||||
650004 130000
|
||||
|
||||
endloop
|
||||
161
external/duckdb/test/sql/copy/csv/batched_write/batch_json_mixed_batches.test_slow
vendored
Normal file
161
external/duckdb/test/sql/copy/csv/batched_write/batch_json_mixed_batches.test_slow
vendored
Normal file
@@ -0,0 +1,161 @@
|
||||
# name: test/sql/copy/csv/batched_write/batch_json_mixed_batches.test_slow
|
||||
# description: Test batch CSV write with mixed batches
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
require json
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000) tbl(i)) TO '__TEST_DIR__/mix_batches_small.parquet' (ROW_GROUP_SIZE 5000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000, 400000) tbl(i)) TO '__TEST_DIR__/mix_batches_large.parquet' (ROW_GROUP_SIZE 200000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(400000, 700000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd.parquet' (ROW_GROUP_SIZE 999)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(700000, 1000000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd_again.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
# create views that read the batches
|
||||
statement ok
|
||||
CREATE VIEW v1 AS SELECT * FROM parquet_scan(['__TEST_DIR__/mix_batches_small.parquet', '__TEST_DIR__/mix_batches_large.parquet', '__TEST_DIR__/mix_batches_odd.parquet', '__TEST_DIR__/mix_batches_odd_again.parquet'])
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
|
||||
|
||||
# empty table
|
||||
statement ok
|
||||
CREATE VIEW v4 AS FROM v1 WHERE i>998 AND i<1000 AND i%2=0
|
||||
|
||||
foreach ARRAY_SETTING TRUE FALSE
|
||||
|
||||
query I
|
||||
COPY v1 TO '__TEST_DIR__/mixed_batches_v1.json' (ARRAY ${ARRAY_SETTING})
|
||||
----
|
||||
1000000
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v1 AS FROM '__TEST_DIR__/mixed_batches_v1.json'
|
||||
----
|
||||
1000000
|
||||
|
||||
foreach table v1 mixed_batches_v1
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
|
||||
endloop
|
||||
|
||||
# now do the same, but filter out half of the values
|
||||
query I
|
||||
COPY v2 TO '__TEST_DIR__/mixed_batches_v2.json' (ARRAY ${ARRAY_SETTING})
|
||||
----
|
||||
500000
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v2 AS FROM '__TEST_DIR__/mixed_batches_v2.json'
|
||||
----
|
||||
500000
|
||||
|
||||
foreach table v2 mixed_batches_v2
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
247499750000 0 989999 500000 500000
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 99998
|
||||
----
|
||||
189998
|
||||
189999
|
||||
200000
|
||||
200001
|
||||
200002
|
||||
|
||||
endloop
|
||||
|
||||
# do it again, but this time only filter out SOME small batches
|
||||
query I
|
||||
COPY v3 TO '__TEST_DIR__/mixed_batches_v3.json' (ARRAY ${ARRAY_SETTING})
|
||||
----
|
||||
700000
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v3 AS FROM '__TEST_DIR__/mixed_batches_v3.json'
|
||||
----
|
||||
700000
|
||||
|
||||
foreach table v3 mixed_batches_v3
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
348499650000 0 989999 700000 700000
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 9999
|
||||
----
|
||||
9999
|
||||
20000
|
||||
20001
|
||||
20002
|
||||
20003
|
||||
|
||||
endloop
|
||||
|
||||
query I
|
||||
COPY v4 TO '__TEST_DIR__/mixed_batches_v4.json' (ARRAY ${ARRAY_SETTING})
|
||||
----
|
||||
0
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v4 AS SELECT i::BIGINT as i FROM '__TEST_DIR__/mixed_batches_v4.json' t(i)
|
||||
----
|
||||
0
|
||||
|
||||
foreach table v4 mixed_batches_v4
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
NULL NULL NULL 0 0
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5
|
||||
----
|
||||
|
||||
endloop
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v1
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v2
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v3
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v4
|
||||
|
||||
endloop
|
||||
32
external/duckdb/test/sql/copy/csv/batched_write/csv_write_memory_limit.test_slow
vendored
Normal file
32
external/duckdb/test/sql/copy/csv/batched_write/csv_write_memory_limit.test_slow
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
# name: test/sql/copy/csv/batched_write/csv_write_memory_limit.test_slow
|
||||
# description: Verify data is streamed and memory limit is not exceeded in CSV write
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
require 64bit
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
# 100M rows, 2 BIGINT columns = 1.6GB uncompressed
|
||||
statement ok
|
||||
COPY (SELECT i, i // 5 AS j FROM range(100000000) t(i)) TO '__TEST_DIR__/large_integers.parquet'
|
||||
|
||||
# set a memory limit of 300MB
|
||||
statement ok
|
||||
SET memory_limit='300MB'
|
||||
|
||||
# stream from one parquet file to another
|
||||
query I
|
||||
COPY '__TEST_DIR__/large_integers.parquet' TO '__TEST_DIR__/large_integers.csv'
|
||||
----
|
||||
100000000
|
||||
|
||||
# verify that the file is correctly written
|
||||
statement ok
|
||||
SET memory_limit='-1'
|
||||
|
||||
query II
|
||||
SELECT * FROM '__TEST_DIR__/large_integers.parquet' EXCEPT FROM '__TEST_DIR__/large_integers.csv'
|
||||
----
|
||||
Reference in New Issue
Block a user