72 lines
2.0 KiB
SQL
72 lines
2.0 KiB
SQL
# name: test/sql/copy/per_thread_output.test
|
|
# description: test PER_THREAD_OUTPUT parameter for COPY
|
|
# group: [copy]
|
|
|
|
require parquet
|
|
|
|
statement ok
|
|
PRAGMA verify_parallelism;
|
|
|
|
statement ok
|
|
pragma threads=4;
|
|
|
|
statement ok
|
|
CREATE TABLE bigdata AS SELECT i AS col_a, i AS col_b FROM range(0,10000) tbl(i);
|
|
|
|
statement ok
|
|
COPY (FROM bigdata UNION ALL FROM bigdata) TO '__TEST_DIR__/per_thread_output' (FORMAT PARQUET, PER_THREAD_OUTPUT TRUE);
|
|
|
|
query I
|
|
SELECT COUNT(*) FROM PARQUET_SCAN('__TEST_DIR__/per_thread_output/*.parquet')
|
|
----
|
|
20000
|
|
|
|
# we now lazily create the files to prevent empty files from being created
|
|
# despite setting 4 threads, this may result in less than 4 files
|
|
# instead of checking we have file 0, 1, 2, and 3 like we did previously,
|
|
# we just check if we created more than one file that matches the glob pattern
|
|
query I
|
|
SELECT COUNT(*) > 1 f FROM GLOB('__TEST_DIR__/per_thread_output/data_*.parquet') ORDER BY f
|
|
----
|
|
true
|
|
|
|
# CSV also works
|
|
statement ok
|
|
COPY (FROM bigdata UNION ALL FROM bigdata) TO '__TEST_DIR__/per_thread_output_csv' (FORMAT CSV, PER_THREAD_OUTPUT TRUE);
|
|
|
|
query I
|
|
SELECT COUNT(*) FROM read_csv('__TEST_DIR__/per_thread_output_csv/*.csv', columns={'col_a': 'INT', 'col_b' : 'INT'});
|
|
----
|
|
20000
|
|
|
|
query I
|
|
SELECT COUNT(*) >= 2 f FROM GLOB('__TEST_DIR__/per_thread_output_csv/data_*.csv') ORDER BY f
|
|
----
|
|
true
|
|
|
|
# with a trailing slash
|
|
statement ok
|
|
COPY (FROM bigdata) TO '__TEST_DIR__/per_thread_output2/' (FORMAT PARQUET, PER_THREAD_OUTPUT TRUE);
|
|
|
|
query I
|
|
SELECT COUNT(*) FROM PARQUET_SCAN('__TEST_DIR__/per_thread_output2/*.parquet')
|
|
----
|
|
10000
|
|
|
|
|
|
statement error
|
|
COPY (FROM bigdata) TO '__TEST_DIR__/per_thread_output2/' (FORMAT PARQUET, PER_THREAD_OUTPUT TRUE);
|
|
----
|
|
IO Error
|
|
|
|
# we have not added anything
|
|
query I
|
|
SELECT COUNT(*) FROM PARQUET_SCAN('__TEST_DIR__/per_thread_output2/*.parquet')
|
|
----
|
|
10000
|
|
|
|
|
|
statement error
|
|
COPY (FROM bigdata) TO '__TEST_DIR__/per_thread_output3' (FORMAT PARQUET, PER_THREAD_OUTPUT TRUE, USE_TMP_FILE TRUE);
|
|
----
|
|
Not implemented Error |