# name: test/sql/copy/format_uuid.test # description: basic tests for the hive partitioned write # group: [copy] require parquet require vector_size 512 statement ok PRAGMA verify_parallelism; statement ok PRAGMA threads=4; statement ok CREATE TABLE test2 as SELECT i as a, (i*2) as b, power(i,2) as c from range(0,10) tbl(i); statement ok CREATE TABLE test3 as SELECT i as a, (i*3) as b, power(i,3) as c from range(0,10) tbl(i); statement ok CREATE TABLE test4 as SELECT i as a, (i*4) as b, power(i,4) as c from range(0,10) tbl(i); statement ok CREATE TABLE test5 as SELECT i as a, (i*5) as b, power(i,5) as c from range(0,10) tbl(i); statement ok CREATE TABLE testpto as SELECT i as a, (i*10) as b, (i*100) as c from range(0,10000) tbl(i); query III SELECT * FROM test2; ---- 0 0 0 1 2 1 2 4 4 3 6 9 4 8 16 5 10 25 6 12 36 7 14 49 8 16 64 9 18 81 # Test with the FILENAME_PATTERN option. "{uuid}" will be replaced with a unique string. statement ok COPY test2 TO '__TEST_DIR__/part' (FORMAT PARQUET, PARTITION_BY (a), FILENAME_PATTERN "leading_{uuid}"); # the filename contains a uuid query III SELECT * FROM '__TEST_DIR__/part/a=9/leading_????????-????-4???-????-????????????.parquet'; ---- 18 81 9 statement ok COPY test2 TO '__TEST_DIR__/trailing' (FORMAT PARQUET, PARTITION_BY (a), FILENAME_PATTERN "{uuid}_trailing"); # the filename contains a uuid query III SELECT * FROM '__TEST_DIR__/trailing/a=9/????????-????-4???-????-????????????_trailing.parquet'; ---- 18 81 9 statement ok COPY test2 TO '__TEST_DIR__/only_offset' (FORMAT PARQUET, PARTITION_BY (a), FILENAME_PATTERN "{i}"); query III SELECT * FROM '__TEST_DIR__/only_offset/a=9/?.parquet'; ---- 18 81 9 # create a second file in the same partition dirs. This is the append workflow statement ok COPY test3 TO '__TEST_DIR__/part' (FORMAT PARQUET, PARTITION_BY (a), overwrite_or_ignore TRUE, FILENAME_PATTERN "leading_{uuid}_trailing"); # the filename contains a uuid query III sort SELECT * FROM '__TEST_DIR__/part/a=9/leading_????????-????-4???-????-????????????_trailing.parquet'; ---- 27 729 9 # the partition dir contains the files from previous 2 queries query III sort SELECT * FROM '__TEST_DIR__/part/a=9/leading_????????-????-4???-????-????????????*.parquet'; ---- 18 81 9 27 729 9 # Test without a specified format name for the outputfile. statement ok COPY test4 TO '__TEST_DIR__/part' (FORMAT PARQUET, PARTITION_BY (a), overwrite_or_ignore TRUE); # the filename is the default: "data_{i}" query III sort SELECT * FROM '__TEST_DIR__/part/a=9/data_[0-9]*.parquet'; ---- 36 6561 9 # the partition dir contains the files from test[2-4] query III sort SELECT * FROM '__TEST_DIR__/part/a=9/*.parquet'; ---- 18 81 9 27 729 9 36 6561 9 # Test where the FILENAME_PATTERN does not contain "{i}" or "{uuid}". statement ok COPY test5 TO '__TEST_DIR__/part' (FORMAT PARQUET, PARTITION_BY (a), overwrite_or_ignore TRUE, FILENAME_PATTERN "basename"); # the file has the name of the given input query III sort SELECT * FROM '__TEST_DIR__/part/a=9/basename[0-9]*.parquet'; ---- 45 59049 9 # the partition dir contains the files from test[2-5] query III sort SELECT * FROM '__TEST_DIR__/part/a=9/*.parquet'; ---- 18 81 9 27 729 9 36 6561 9 45 59049 9 # Test without the overwrite_or_ignore param, that tries to add a file to an existing directory statement error COPY test5 TO '__TEST_DIR__/part' (FORMAT PARQUET, PARTITION_BY (a)); ---- Directory # Test where the FILENAME_PATTERN param has no value statement error COPY test4 TO '__TEST_DIR__/to_be_overwritten' (FORMAT PARQUET, PARTITION_BY (a), FILENAME_PATTERN); ---- Copy option "FILENAME_PATTERN" requires an argument of type VARCHAR statement ok COPY test4 TO '__TEST_DIR__/to_be_overwritten' (FORMAT PARQUET, PARTITION_BY (a), FILENAME_PATTERN a_file_name); query I SELECT COUNT(*) FROM GLOB('__TEST_DIR__/to_be_overwritten/a=9/a_file_name*'); ---- 1 query III sort SELECT * FROM '__TEST_DIR__/to_be_overwritten/a=9/a_file_name*.parquet'; ---- 36 6561 9 # Test with a combination of {i} and {uuid} statement ok COPY test5 TO '__TEST_DIR__/incorrect_pos' (FORMAT PARQUET, PARTITION_BY (a), filename_pattern "a_name{i}_with_{uuid}_numbers"); query III sort SELECT * FROM '__TEST_DIR__/incorrect_pos/a=5/a_name?_with_????????-????-4???-????-????????????_numbers.parquet'; ---- 25 3125 5 # Test UUID v7 statement ok COPY test5 TO '__TEST_DIR__/uuid_v7' (FORMAT PARQUET, PARTITION_BY (a), filename_pattern "a_name_with_{uuidv7}"); query III sort SELECT * FROM '__TEST_DIR__/uuid_v7/a=5/a_name_with_????????-????-7???-????-????????????.parquet'; ---- 25 3125 5 # Test UUID v7 mixed with UUID v4 statement ok COPY test5 TO '__TEST_DIR__/mixed_uuid_types' (FORMAT PARQUET, PARTITION_BY (a), filename_pattern "a_name{uuid}_with_{uuidv7}_numbers"); query III sort SELECT * FROM '__TEST_DIR__/mixed_uuid_types/a=5/a_name????????-????-4???-????-????????????_with_????????-????-7???-????-????????????_numbers.parquet'; ---- 25 3125 5 # Test "{uuid}" with per_thread_output TRUE statement ok COPY testpto TO '__TEST_DIR__/pto' (FORMAT PARQUET, PER_THREAD_OUTPUT TRUE, OVERWRITE_OR_IGNORE TRUE, FILENAME_PATTERN "basename_{uuid}"); query I sort SELECT COUNT(*) FROM '__TEST_DIR__/pto/basename_????????-????-4???-????-????????????.parquet'; ---- 10000 query I SELECT COUNT(*) >= 1 FROM GLOB('__TEST_DIR__/pto/basename_????????-????-4???-????-????????????.parquet'); ---- true statement ok PRAGMA threads=1; # Test where overwrite_or_ignore param has no value -> will be true -> overwrite previous test statement ok COPY test5 TO '__TEST_DIR__/to_be_overwritten2' (FORMAT PARQUET, PARTITION_BY (a), OVERWRITE_OR_IGNORE); query I SELECT COUNT(*) FROM GLOB('__TEST_DIR__/to_be_overwritten2/a=9/*'); ---- 1 query III sort SELECT * FROM '__TEST_DIR__/to_be_overwritten2/a=9/data_0*.parquet'; ---- 45 59049 9 # Test where overwrite_or_ignore param == true, and filename_pattern is set to an existing file statement ok COPY test5 TO '__TEST_DIR__/directory0' (FORMAT PARQUET, PARTITION_BY (a), overwrite_or_ignore TRUE, FILENAME_PATTERN "my_filename"); query I SELECT COUNT(*) FROM GLOB('__TEST_DIR__/directory0/a=7/*'); ---- 1 query III sort SELECT * FROM '__TEST_DIR__/directory0/a=7/my_filename0.parquet'; ---- 35 16807 7 # When partition columns are written to files, `select *` returns in order of columns statement ok COPY test5 TO '__TEST_DIR__/to_be_overwritten2' (FORMAT PARQUET, PARTITION_BY (a), OVERWRITE_OR_IGNORE, WRITE_PARTITION_COLUMNS); query I SELECT COUNT(*) FROM GLOB('__TEST_DIR__/to_be_overwritten2/a=9/*'); ---- 1 query III sort SELECT * FROM '__TEST_DIR__/to_be_overwritten2/a=9/data_0*.parquet'; ---- 9 45 59049