should be it
This commit is contained in:
30
external/duckdb/test/sql/copy/parquet/afl.test
vendored
Normal file
30
external/duckdb/test/sql/copy/parquet/afl.test
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
# name: test/sql/copy/parquet/afl.test
|
||||
# description: Read afl-generated parquet files
|
||||
# group: [parquet]
|
||||
|
||||
mode skip
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
foreach i 1 2 6
|
||||
|
||||
statement error
|
||||
select * from parquet_scan('data/parquet-testing/afl/${i}.parquet')
|
||||
----
|
||||
Invalid dictionary page header
|
||||
|
||||
endloop
|
||||
|
||||
|
||||
foreach i 3 4 5 7
|
||||
|
||||
|
||||
statement error
|
||||
select * from parquet_scan('data/parquet-testing/afl/3.parquet')
|
||||
----
|
||||
Invalid Error: Parquet file is likely corrupted, missing dictionary
|
||||
|
||||
endloop
|
||||
32
external/duckdb/test/sql/copy/parquet/alltypes-dictionaries.test
vendored
Normal file
32
external/duckdb/test/sql/copy/parquet/alltypes-dictionaries.test
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
# name: test/sql/copy/parquet/alltypes-dictionaries.test
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
|
||||
foreach type TINYINT SMALLINT INTEGER BIGINT HUGEINT UTINYINT USMALLINT UINTEGER UBIGINT UHUGEINT FLOAT DOUBLE VARCHAR
|
||||
|
||||
statement ok
|
||||
copy (select (r1.range * 10)::${type} r from range(10) r1, range(1000) r2) to '__TEST_DIR__/dict-${type}.parquet' (row_group_size 2048);
|
||||
|
||||
query I
|
||||
select first(encodings) from parquet_metadata('__TEST_DIR__/dict-${type}.parquet') group by encodings;
|
||||
----
|
||||
RLE_DICTIONARY
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) from '__TEST_DIR__/dict-${type}.parquet' WHERE r='20'
|
||||
----
|
||||
1000
|
||||
|
||||
query III
|
||||
select column_id, BOOL_AND(bloom_filter_offset > 4), BOOL_AND(bloom_filter_length > 1) from parquet_metadata('__TEST_DIR__/dict-${type}.parquet') group by column_id order by column_id;
|
||||
----
|
||||
0 true true
|
||||
|
||||
#query I
|
||||
#SELECT bloom_filter_excludes FROM parquet_bloom_probe('__TEST_DIR__/dict-${type}.parquet', 'r', '11');
|
||||
#----
|
||||
#true
|
||||
|
||||
endloop
|
||||
31
external/duckdb/test/sql/copy/parquet/attach_parquet.test
vendored
Normal file
31
external/duckdb/test/sql/copy/parquet/attach_parquet.test
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
# name: test/sql/copy/parquet/attach_parquet.test
|
||||
# description: Attach a Parquet file
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
require skip_reload
|
||||
|
||||
statement ok
|
||||
COPY (SELECT 42 val) TO '__TEST_DIR__/file.parquet';
|
||||
|
||||
statement ok
|
||||
ATTACH '__TEST_DIR__/file.parquet' AS attached_parquet
|
||||
|
||||
statement ok
|
||||
USE attached_parquet
|
||||
|
||||
query I
|
||||
SELECT * FROM file
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
SELECT * FROM attached_parquet
|
||||
----
|
||||
42
|
||||
|
||||
statement error
|
||||
ATTACH 'duckdb:__TEST_DIR__/file.parquet' AS duck_attach
|
||||
----
|
||||
not a valid DuckDB database file
|
||||
14
external/duckdb/test/sql/copy/parquet/auto_glob_directory.test
vendored
Normal file
14
external/duckdb/test/sql/copy/parquet/auto_glob_directory.test
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
# name: test/sql/copy/parquet/auto_glob_directory.test
|
||||
# description: Test auto globbing a directory
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
COPY (SELECT i%2 AS grp, i FROM range(1000) t(i)) TO '__TEST_DIR__/partitioned_glob.parquet' (PARTITION_BY (grp));
|
||||
|
||||
query II
|
||||
SELECT grp, COUNT(*) FROM '__TEST_DIR__/partitioned_glob.parquet' GROUP BY ALL ORDER BY ALL
|
||||
----
|
||||
0 500
|
||||
1 500
|
||||
13
external/duckdb/test/sql/copy/parquet/aws2.test
vendored
Normal file
13
external/duckdb/test/sql/copy/parquet/aws2.test
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
# name: test/sql/copy/parquet/aws2.test
|
||||
# description: Read a file created by AWS (#3981)
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query I
|
||||
SELECT * FROM 'data/parquet-testing/aws2.parquet'
|
||||
----
|
||||
READY
|
||||
22
external/duckdb/test/sql/copy/parquet/aws_kinesis.test
vendored
Normal file
22
external/duckdb/test/sql/copy/parquet/aws_kinesis.test
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
# name: test/sql/copy/parquet/aws_kinesis.test
|
||||
# description: Read a file created by AWS Kinesis Firehose DeliveryStreams (#3981)
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
||||
SELECT * FROM 'data/parquet-testing/aws_kinesis.parquet'
|
||||
----
|
||||
2022 11 22 2022-11-22 00:01:00.871 2022-11-22 00:01:01 -129 Hamburg NULL Germany 53.6304 9.98823 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL http://localhost:3000/ / t=tr&ts=1669075260871&u=http%253A%252F%252Flocalhost%253A3000%252F&hn=localhost&pa=%252F&en=tabVisible&pr=%257B%257D 495 200 Hit 0 3320 NULL tabVisible NULL NULL NULL de3bc04229406da23ee45e234a42a66cc542b335517ab585ca43f55cd2dcf781 3bc2c0a60f9f2dd212db07ed80e817f9dd43aa999d16b0b2b8db91ab092a8102 ab40d20596d7595049399578929ffc598abdb8f539bdfa7637cb509f8613dcc7 track
|
||||
2022 11 22 2022-11-22 00:01:07.67 2022-11-22 00:01:10 -2330 Hamburg NULL Germany 53.6304 9.98823 NULL NULL Chrome 107.0.0.0 Mac OS 10.15.7 Europe/Berlin de-DE NULL NULL 3440x1440 1356x902 24.0 MacIntel 8.0 8.0 NULL NULL NULL NULL NULL http://localhost:3000/azure /azure t=pv&ts=1669075267670&u=http%253A%252F%252Flocalhost%253A3000%252Fazure&hn=localhost&pa=%252Fazure&ua=Mozilla%252F5.0%2520(Macintosh%253B%2520Intel%2520Mac%2520OS%2520X%252010_15_7)%2520AppleWebKit%252F537.36%2520(KHTML%252C%2520like%2520Gecko)%2520Chrome%252F107.0.0.0%2520Safari%252F537.36&iw=1356&ih=902&ti=Map%2520the%2520Cloud&w=3440&h=1440&d=24&l=de-DE&p=MacIntel&m=8&c=8&tz=Europe%252FBerlin 291 200 Hit 1 3320 NULL NULL NULL NULL NULL cb7736f1c3ce9b9a21bb9a7b17edfd9507e7c6261ded2a8ebc7f19189d6de8c6 4e5aea60a9b73aa0f2aa69967e9de9a58b3341d5bc342f94d2eef07859b658da ab40d20596d7595049399578929ffc598abdb8f539bdfa7637cb509f8613dcc7 pageview
|
||||
2022 11 22 2022-11-22 00:01:13.175 2022-11-22 00:01:16 -2825 Hamburg NULL Germany 53.6304 9.98823 NULL NULL Chrome 107.0.0.0 Mac OS 10.15.7 Europe/Berlin de-DE NULL NULL 3440x1440 1356x902 24.0 MacIntel 8.0 8.0 NULL NULL NULL NULL NULL http://localhost:3000/aws /aws t=pv&ts=1669075273175&u=http%253A%252F%252Flocalhost%253A3000%252Faws&hn=localhost&pa=%252Faws&ua=Mozilla%252F5.0%2520(Macintosh%253B%2520Intel%2520Mac%2520OS%2520X%252010_15_7)%2520AppleWebKit%252F537.36%2520(KHTML%252C%2520like%2520Gecko)%2520Chrome%252F107.0.0.0%2520Safari%252F537.36&iw=1356&ih=902&ti=Map%2520the%2520Cloud%2520-%2520Azure%2520Services%2520%2526%2520Regions&w=3440&h=1440&d=24&l=de-DE&p=MacIntel&m=8&c=8&tz=Europe%252FBerlin 315 200 Hit 0 3320 NULL NULL NULL NULL NULL 4326dbc4bbfbef6aec0584b3d6437625551ab22323ed0f81ff79ab54bcfb97db cf7ee5dae81cbe75b0e78aaf200b8f1fb93349dc33fe65d3623df79ff31c53fd ab40d20596d7595049399578929ffc598abdb8f539bdfa7637cb509f8613dcc7 pageview
|
||||
2022 11 22 2022-11-22 00:01:20.2 2022-11-22 00:01:21 -800 Hamburg NULL Germany 53.6304 9.98823 NULL NULL Chrome 107.0.0.0 Mac OS 10.15.7 Europe/Berlin de-DE NULL NULL 3440x1440 1356x902 24.0 MacIntel 8.0 8.0 NULL NULL NULL NULL NULL http://localhost:3000/googlecloud /googlecloud t=pv&ts=1669075280200&u=http%253A%252F%252Flocalhost%253A3000%252Fgooglecloud&hn=localhost&pa=%252Fgooglecloud&ua=Mozilla%252F5.0%2520(Macintosh%253B%2520Intel%2520Mac%2520OS%2520X%252010_15_7)%2520AppleWebKit%252F537.36%2520(KHTML%252C%2520like%2520Gecko)%2520Chrome%252F107.0.0.0%2520Safari%252F537.36&iw=1356&ih=902&ti=Map%2520the%2520Cloud%2520-%2520AWS%2520Services%2520%2526%2520Regions&w=3440&h=1440&d=24&l=de-DE&p=MacIntel&m=8&c=8&tz=Europe%252FBerlin 325 200 Hit 0 3320 NULL NULL NULL NULL NULL 949aa1b5dc5869b315686bb8ab1e211469af10ad8c5cc23c9fec3a43b3f39e5d ee17f38d66e51fa2647ad07e21c847d82221baad23a587a51868d9ee5bf642f4 ab40d20596d7595049399578929ffc598abdb8f539bdfa7637cb509f8613dcc7 pageview
|
||||
|
||||
|
||||
query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
|
||||
SELECT * FROM 'data/parquet-testing/aws_kinesis.parquet' WHERE event_timestamp=TIMESTAMP '2022-11-22 00:01:13.175';
|
||||
----
|
||||
2022 11 22 2022-11-22 00:01:13.175 2022-11-22 00:01:16 -2825 Hamburg NULL Germany 53.6304 9.98823 NULL NULL Chrome 107.0.0.0 Mac OS 10.15.7 Europe/Berlin de-DE NULL NULL 3440x1440 1356x902 24.0 MacIntel 8.0 8.0 NULL NULL NULL NULL NULL http://localhost:3000/aws /aws t=pv&ts=1669075273175&u=http%253A%252F%252Flocalhost%253A3000%252Faws&hn=localhost&pa=%252Faws&ua=Mozilla%252F5.0%2520(Macintosh%253B%2520Intel%2520Mac%2520OS%2520X%252010_15_7)%2520AppleWebKit%252F537.36%2520(KHTML%252C%2520like%2520Gecko)%2520Chrome%252F107.0.0.0%2520Safari%252F537.36&iw=1356&ih=902&ti=Map%2520the%2520Cloud%2520-%2520Azure%2520Services%2520%2526%2520Regions&w=3440&h=1440&d=24&l=de-DE&p=MacIntel&m=8&c=8&tz=Europe%252FBerlin 315 200 Hit 0 3320 NULL NULL NULL NULL NULL 4326dbc4bbfbef6aec0584b3d6437625551ab22323ed0f81ff79ab54bcfb97db cf7ee5dae81cbe75b0e78aaf200b8f1fb93349dc33fe65d3623df79ff31c53fd ab40d20596d7595049399578929ffc598abdb8f539bdfa7637cb509f8613dcc7 pageview
|
||||
28
external/duckdb/test/sql/copy/parquet/batched_write/batch_memory_usage.test_slow
vendored
Normal file
28
external/duckdb/test/sql/copy/parquet/batched_write/batch_memory_usage.test_slow
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
# name: test/sql/copy/parquet/batched_write/batch_memory_usage.test_slow
|
||||
# description: Batched Parquet write memory usage
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
set seed 0.72
|
||||
|
||||
statement ok
|
||||
COPY (SELECT uuid()::VARCHAR as varchar, uuid() AS uuid FROM range(10000000) t(i)) TO '__TEST_DIR__/random_uuids.parquet'
|
||||
|
||||
# copy from one parquet file to another in a memory constrained environment
|
||||
statement ok
|
||||
SET memory_limit='650MB'
|
||||
|
||||
statement ok
|
||||
COPY '__TEST_DIR__/random_uuids.parquet' TO '__TEST_DIR__/random_uuids_copy.parquet';
|
||||
|
||||
# ensure the parquet files hold the same content
|
||||
statement ok
|
||||
SET memory_limit='2GB';
|
||||
|
||||
# ensure the parquet files hold the same content in the same order
|
||||
query III
|
||||
SELECT *, row_number() OVER () as rownum FROM '__TEST_DIR__/random_uuids.parquet'
|
||||
EXCEPT
|
||||
SELECT *, row_number() OVER () as rownum FROM '__TEST_DIR__/random_uuids_copy.parquet';
|
||||
----
|
||||
40
external/duckdb/test/sql/copy/parquet/batched_write/batch_memory_usage_mixed_batches.test_slow
vendored
Normal file
40
external/duckdb/test/sql/copy/parquet/batched_write/batch_memory_usage_mixed_batches.test_slow
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
# name: test/sql/copy/parquet/batched_write/batch_memory_usage_mixed_batches.test_slow
|
||||
# description: Batched Parquet write memory usage with mixed batches
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000) tbl(i)) TO '__TEST_DIR__/mem_usage_mix_batches_small.parquet' (ROW_GROUP_SIZE 5000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000, 400000) tbl(i)) TO '__TEST_DIR__/mem_usage_mix_batches_large.parquet' (ROW_GROUP_SIZE 200000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(400000, 700000) tbl(i)) TO '__TEST_DIR__/mem_usage_mix_batches_odd.parquet' (ROW_GROUP_SIZE 999)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(700000, 1000000) tbl(i)) TO '__TEST_DIR__/mem_usage_mix_batches_odd_again.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v1 AS SELECT * FROM parquet_scan([
|
||||
'__TEST_DIR__/mem_usage_mix_batches_small.parquet',
|
||||
'__TEST_DIR__/mem_usage_mix_batches_large.parquet',
|
||||
'__TEST_DIR__/mem_usage_mix_batches_odd.parquet',
|
||||
'__TEST_DIR__/mem_usage_mix_batches_odd_again.parquet'])
|
||||
|
||||
statement ok
|
||||
SET memory_limit='500MB'
|
||||
|
||||
statement ok
|
||||
COPY v1 TO '__TEST_DIR__/mem_usage_mix_result.parquet'
|
||||
|
||||
# ensure the parquet files hold the same content in the same order
|
||||
statement ok
|
||||
SET memory_limit='2GB';
|
||||
|
||||
query II
|
||||
SELECT *, row_number() OVER () as rownum FROM v1
|
||||
EXCEPT
|
||||
SELECT *, row_number() OVER () as rownum FROM '__TEST_DIR__/mem_usage_mix_result.parquet';
|
||||
----
|
||||
16
external/duckdb/test/sql/copy/parquet/batched_write/batch_memory_usage_small.test_slow
vendored
Normal file
16
external/duckdb/test/sql/copy/parquet/batched_write/batch_memory_usage_small.test_slow
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
# name: test/sql/copy/parquet/batched_write/batch_memory_usage_small.test_slow
|
||||
# description: Batched Parquet write memory usage
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
set seed 0.72
|
||||
|
||||
statement ok
|
||||
COPY (SELECT uuid()::VARCHAR as varchar, uuid() AS uuid FROM range(10000000) t(i)) TO '__TEST_DIR__/random_uuids.parquet'
|
||||
|
||||
statement ok
|
||||
SET memory_limit='750MB'
|
||||
|
||||
statement ok
|
||||
COPY '__TEST_DIR__/random_uuids.parquet' TO '__TEST_DIR__/random_uuids_copy.parquet';
|
||||
88
external/duckdb/test/sql/copy/parquet/batched_write/batched_parquet_write.test_slow
vendored
Normal file
88
external/duckdb/test/sql/copy/parquet/batched_write/batched_parquet_write.test_slow
vendored
Normal file
@@ -0,0 +1,88 @@
|
||||
# name: test/sql/copy/parquet/batched_write/batched_parquet_write.test_slow
|
||||
# description: Batched copy to file
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS SELECT i, i // 5 AS j FROM range(1000000) t(i) ;
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/batched_integers.parquet';
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers_copied AS FROM '__TEST_DIR__/batched_integers.parquet'
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), SUM(j), COUNT(*), COUNT(i), COUNT(j) FROM integers_copied
|
||||
----
|
||||
499999500000 99999500000 1000000 1000000 1000000
|
||||
|
||||
query II
|
||||
SELECT * FROM integers_copied ORDER BY i LIMIT 5
|
||||
----
|
||||
0 0
|
||||
1 0
|
||||
2 0
|
||||
3 0
|
||||
4 0
|
||||
|
||||
query II
|
||||
SELECT * FROM integers_copied ORDER BY i LIMIT 5 OFFSET 99997
|
||||
----
|
||||
99997 19999
|
||||
99998 19999
|
||||
99999 19999
|
||||
100000 20000
|
||||
100001 20000
|
||||
|
||||
query II
|
||||
SELECT * FROM integers_copied QUALIFY i<=lag(i) over ()
|
||||
----
|
||||
|
||||
# now with filters
|
||||
statement ok
|
||||
CREATE VIEW v1 AS SELECT * FROM integers WHERE (i%2=0 AND i<300000) OR (i BETWEEN 500000 AND 700000)
|
||||
|
||||
statement ok
|
||||
COPY v1 TO '__TEST_DIR__/batched_integers_filters.parquet';
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers_filtered AS FROM '__TEST_DIR__/batched_integers_filters.parquet'
|
||||
|
||||
|
||||
foreach table v1 integers_filtered
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), SUM(j), COUNT(*), COUNT(i), COUNT(j) FROM ${table}
|
||||
----
|
||||
142500450000 28499950000 350001 350001 350001
|
||||
|
||||
query II
|
||||
SELECT * FROM ${table} ORDER BY i LIMIT 5
|
||||
----
|
||||
0 0
|
||||
2 0
|
||||
4 0
|
||||
6 1
|
||||
8 1
|
||||
|
||||
query II
|
||||
SELECT * FROM ${table} ORDER BY i LIMIT 5 OFFSET 99997
|
||||
----
|
||||
199994 39998
|
||||
199996 39999
|
||||
199998 39999
|
||||
200000 40000
|
||||
200002 40000
|
||||
|
||||
query II
|
||||
SELECT * FROM ${table} ORDER BY i LIMIT 5 OFFSET 300000
|
||||
----
|
||||
650000 130000
|
||||
650001 130000
|
||||
650002 130000
|
||||
650003 130000
|
||||
650004 130000
|
||||
|
||||
endloop
|
||||
34
external/duckdb/test/sql/copy/parquet/batched_write/lineitem_memory_usage.test_slow
vendored
Normal file
34
external/duckdb/test/sql/copy/parquet/batched_write/lineitem_memory_usage.test_slow
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# name: test/sql/copy/parquet/batched_write/lineitem_memory_usage.test_slow
|
||||
# description: Batched lineitem write memory usage
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
require tpch
|
||||
|
||||
load __TEST_DIR__/lineitem_memory_test.db
|
||||
|
||||
statement ok
|
||||
CALL dbgen(sf=1)
|
||||
|
||||
statement ok
|
||||
COPY lineitem TO '__TEST_DIR__/lineitem_memory_usage.parquet'
|
||||
|
||||
restart
|
||||
|
||||
# copy from one parquet file to another in a memory constrained environment
|
||||
statement ok
|
||||
SET memory_limit='500MB'
|
||||
|
||||
statement ok
|
||||
COPY '__TEST_DIR__/lineitem_memory_usage.parquet' TO '__TEST_DIR__/lineitem_memory_usage_copy.parquet';
|
||||
|
||||
# ensure the parquet files hold the same content in the same order
|
||||
statement ok
|
||||
SET memory_limit='2GB';
|
||||
|
||||
query IIIIIIIIIIIIIIIII
|
||||
SELECT *, row_number() OVER () as rownum FROM '__TEST_DIR__/lineitem_memory_usage.parquet'
|
||||
EXCEPT
|
||||
SELECT *, row_number() OVER () as rownum FROM '__TEST_DIR__/lineitem_memory_usage_copy.parquet';
|
||||
----
|
||||
33
external/duckdb/test/sql/copy/parquet/batched_write/parquet_verify_row_group_size.test_slow
vendored
Normal file
33
external/duckdb/test/sql/copy/parquet/batched_write/parquet_verify_row_group_size.test_slow
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
# name: test/sql/copy/parquet/batched_write/parquet_verify_row_group_size.test_slow
|
||||
# description: Verify row group size is respected
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
loop i 0 2
|
||||
|
||||
foreach row_group_size 777 9999 83838 143431 333333
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS SELECT i, i // 5 AS j FROM range(1000000) t(i) ;
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/row_group_size.parquet' (ROW_GROUP_SIZE ${row_group_size});
|
||||
|
||||
statement ok
|
||||
select row_group_num_rows from parquet_metadata('__TEST_DIR__/row_group_size.parquet');
|
||||
|
||||
query I
|
||||
select abs(median(row_group_num_rows)-${row_group_size})<2048 from parquet_metadata('__TEST_DIR__/row_group_size.parquet');
|
||||
----
|
||||
true
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers
|
||||
|
||||
endloop
|
||||
|
||||
statement ok
|
||||
SET threads=1
|
||||
|
||||
endloop
|
||||
181
external/duckdb/test/sql/copy/parquet/batched_write/parquet_write_mixed_batches.test_slow
vendored
Normal file
181
external/duckdb/test/sql/copy/parquet/batched_write/parquet_write_mixed_batches.test_slow
vendored
Normal file
@@ -0,0 +1,181 @@
|
||||
# name: test/sql/copy/parquet/batched_write/parquet_write_mixed_batches.test_slow
|
||||
# description: Test batch Parquet write with mixed batches
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000) tbl(i)) TO '__TEST_DIR__/mix_batches_small.parquet' (ROW_GROUP_SIZE 5000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000, 400000) tbl(i)) TO '__TEST_DIR__/mix_batches_large.parquet' (ROW_GROUP_SIZE 200000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(400000, 700000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd.parquet' (ROW_GROUP_SIZE 999)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(700000, 1000000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd_again.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
# create views that read the batches
|
||||
statement ok
|
||||
CREATE VIEW v1 AS SELECT * FROM parquet_scan(['__TEST_DIR__/mix_batches_small.parquet', '__TEST_DIR__/mix_batches_large.parquet', '__TEST_DIR__/mix_batches_odd.parquet', '__TEST_DIR__/mix_batches_odd_again.parquet'])
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
|
||||
|
||||
# empty table
|
||||
statement ok
|
||||
CREATE VIEW v4 AS FROM v1 WHERE i>998 AND i<1000 AND i%2=0
|
||||
|
||||
loop i 0 2
|
||||
|
||||
query I
|
||||
COPY v1 TO '__TEST_DIR__/mixed_batches_v1.parquet'
|
||||
----
|
||||
1000000
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v1 AS FROM '__TEST_DIR__/mixed_batches_v1.parquet'
|
||||
----
|
||||
1000000
|
||||
|
||||
foreach table v1 mixed_batches_v1
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
|
||||
endloop
|
||||
|
||||
# now do the same, but filter out half of the values
|
||||
query I
|
||||
COPY v2 TO '__TEST_DIR__/mixed_batches_v2.parquet'
|
||||
----
|
||||
500000
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v2 AS FROM '__TEST_DIR__/mixed_batches_v2.parquet'
|
||||
----
|
||||
500000
|
||||
|
||||
foreach table v2 mixed_batches_v2
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
247499750000 0 989999 500000 500000
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 99998
|
||||
----
|
||||
189998
|
||||
189999
|
||||
200000
|
||||
200001
|
||||
200002
|
||||
|
||||
endloop
|
||||
|
||||
# do it again, but this time only filter out SOME small batches
|
||||
query I
|
||||
COPY v3 TO '__TEST_DIR__/mixed_batches_v3.parquet'
|
||||
----
|
||||
700000
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v3 AS FROM '__TEST_DIR__/mixed_batches_v3.parquet'
|
||||
----
|
||||
700000
|
||||
|
||||
foreach table v3 mixed_batches_v3
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
348499650000 0 989999 700000 700000
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5 OFFSET 9999
|
||||
----
|
||||
9999
|
||||
20000
|
||||
20001
|
||||
20002
|
||||
20003
|
||||
|
||||
endloop
|
||||
|
||||
# now with an empty table
|
||||
query I
|
||||
COPY v4 TO '__TEST_DIR__/mixed_batches_v4.parquet'
|
||||
----
|
||||
0
|
||||
|
||||
query I
|
||||
CREATE TABLE mixed_batches_v4 AS FROM '__TEST_DIR__/mixed_batches_v4.parquet'
|
||||
----
|
||||
0
|
||||
|
||||
foreach table v4 mixed_batches_v4
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
|
||||
----
|
||||
NULL NULL NULL 0 0
|
||||
|
||||
query I
|
||||
SELECT * FROM ${table} LIMIT 5
|
||||
----
|
||||
|
||||
endloop
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v1
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v2
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v3
|
||||
|
||||
statement ok
|
||||
DROP TABLE mixed_batches_v4
|
||||
|
||||
# Drop the VIEWs that depend on V1
|
||||
|
||||
statement ok
|
||||
DROP VIEW IF EXISTS v2
|
||||
|
||||
statement ok
|
||||
DROP VIEW IF EXISTS v3
|
||||
|
||||
statement ok
|
||||
DROP VIEW IF EXISTS v4
|
||||
|
||||
# create views that read the batches using unions
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v1 AS FROM '__TEST_DIR__/mix_batches_small.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_large.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd_again.parquet'
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v4 AS FROM v1 WHERE i>998 AND i<1000 AND i%2=0
|
||||
|
||||
endloop
|
||||
55
external/duckdb/test/sql/copy/parquet/batched_write/tpch_sf1_parquet.test_slow
vendored
Normal file
55
external/duckdb/test/sql/copy/parquet/batched_write/tpch_sf1_parquet.test_slow
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
# name: test/sql/copy/parquet/batched_write/tpch_sf1_parquet.test_slow
|
||||
# description: Test TPC-H SF1 with Parquet
|
||||
# group: [batched_write]
|
||||
|
||||
require tpch
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CALL dbgen(sf=1, suffix='_original');
|
||||
|
||||
foreach tpch_tbl orders customer lineitem nation part partsupp region supplier
|
||||
|
||||
statement ok
|
||||
COPY ${tpch_tbl}_original TO '__TEST_DIR__/${tpch_tbl}.parquet';
|
||||
|
||||
statement ok
|
||||
CREATE VIEW ${tpch_tbl} AS FROM read_parquet('__TEST_DIR__/${tpch_tbl}.parquet');
|
||||
|
||||
endloop
|
||||
|
||||
# verify the data was written/read in the correct order
|
||||
query IIIIIIIIIIIIIIII
|
||||
select * from lineitem qualify l_orderkey<lag(l_orderkey) over ();
|
||||
----
|
||||
|
||||
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 23
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
query IIIIIIIIIIIIIIII
|
||||
SELECT MAX(COLUMNS(*)) FROM (FROM lineitem LIMIT 100000 OFFSET 5000000)
|
||||
----
|
||||
5099235 199996 10000 7 50.00 104649.50 0.10 0.08 R O 1998-11-30 1998-10-30 1998-12-22 TAKE BACK RETURN TRUCK zzle. express, bold deposits was. slyly e
|
||||
|
||||
query IIIIIIIIIIIIIIII
|
||||
select * from lineitem order by l_extendedprice desc, l_shipdate limit 2;
|
||||
----
|
||||
2513090 199999 5038 4 50.00 104949.50 0.02 0.04 A F 1993-10-05 1993-10-17 1993-10-28 TAKE BACK RETURN FOB - ironic, pending pinto be
|
||||
82823 199998 5037 2 50.00 104899.50 0.04 0.05 A F 1992-04-30 1992-07-05 1992-05-29 COLLECT COD SHIP orbits. bold fox
|
||||
51
external/duckdb/test/sql/copy/parquet/batched_write/varying_source_target_row_groups.test_slow
vendored
Normal file
51
external/duckdb/test/sql/copy/parquet/batched_write/varying_source_target_row_groups.test_slow
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
# name: test/sql/copy/parquet/batched_write/varying_source_target_row_groups.test_slow
|
||||
# description: Verify source-target row group size pairs
|
||||
# group: [batched_write]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS SELECT i, i // 5 AS j FROM range(1000000) t(i) ;
|
||||
|
||||
foreach src_size 777 9999 83838 143431 333333
|
||||
|
||||
foreach tgt_size 777 9999 83838 143431 333333
|
||||
|
||||
statement ok
|
||||
SET threads=1
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/src_size.parquet' (ROW_GROUP_SIZE ${src_size});
|
||||
|
||||
statement ok
|
||||
SET threads=4
|
||||
|
||||
query I
|
||||
select abs(median(row_group_num_rows)-${src_size})<2048 from parquet_metadata('__TEST_DIR__/src_size.parquet');
|
||||
----
|
||||
true
|
||||
|
||||
statement ok
|
||||
COPY '__TEST_DIR__/src_size.parquet' TO '__TEST_DIR__/tgt_size.parquet' (ROW_GROUP_SIZE ${tgt_size});
|
||||
|
||||
query I
|
||||
select abs(median(row_group_num_rows)-${tgt_size})<2048 from parquet_metadata('__TEST_DIR__/tgt_size.parquet');
|
||||
----
|
||||
true
|
||||
|
||||
# verify the groups are actually written in the same order and contain the same data
|
||||
query III
|
||||
SELECT *, row_number() OVER () FROM integers
|
||||
EXCEPT
|
||||
SELECT *, row_number() OVER () FROM '__TEST_DIR__/src_size.parquet'
|
||||
----
|
||||
|
||||
query III
|
||||
SELECT *, row_number() OVER () FROM '__TEST_DIR__/src_size.parquet'
|
||||
EXCEPT
|
||||
SELECT *, row_number() OVER () FROM '__TEST_DIR__/tgt_size.parquet'
|
||||
----
|
||||
|
||||
endloop
|
||||
|
||||
endloop
|
||||
18
external/duckdb/test/sql/copy/parquet/bigdecimal.test
vendored
Normal file
18
external/duckdb/test/sql/copy/parquet/bigdecimal.test
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
# name: test/sql/copy/parquet/bigdecimal.test
|
||||
# description: Read a file created by Google BigQuery with a BIGDECIMAL column (i.e. DECIMAL(77,38))
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query I
|
||||
FROM 'data/parquet-testing/bigdecimal.parquet'
|
||||
----
|
||||
0.5
|
||||
-0.5
|
||||
1.2345678912345679e+26
|
||||
-1.2345678912345679e+26
|
||||
5.7896044618658096e+38
|
||||
-5.7896044618658096e+38
|
||||
293
external/duckdb/test/sql/copy/parquet/bloom_filters.test
vendored
Normal file
293
external/duckdb/test/sql/copy/parquet/bloom_filters.test
vendored
Normal file
@@ -0,0 +1,293 @@
|
||||
# name: test/sql/copy/parquet/bloom_filters.test
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
copy (select
|
||||
(r1.range*10)::BIGINT r,
|
||||
r::smallint r_int16,
|
||||
r::integer r_int32,
|
||||
r::double r_double,
|
||||
r::float r_float,
|
||||
'string_' || r::VARCHAR r_string,
|
||||
('blob_' || r::VARCHAR)::BLOB r_blob
|
||||
from range(100) r1, range(1000) order by r) to '__TEST_DIR__/bloom1.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 1000);
|
||||
|
||||
# we don't check the actual offsets since they might change due to filters being moved around
|
||||
query III
|
||||
select column_id, BOOL_AND(bloom_filter_offset > 4), BOOL_AND(bloom_filter_length > 1) from parquet_metadata('__TEST_DIR__/bloom1.parquet') group by column_id order by column_id;
|
||||
----
|
||||
0 true true
|
||||
1 true true
|
||||
2 true true
|
||||
3 true true
|
||||
4 true true
|
||||
5 true true
|
||||
6 true true
|
||||
|
||||
# this value is not in the domain but within min/max
|
||||
query I
|
||||
SELECT BOOL_AND(bloom_filter_excludes) FROM parquet_bloom_probe('__TEST_DIR__/bloom1.parquet', 'r', '201');
|
||||
----
|
||||
true
|
||||
|
||||
# this value is outside min/max
|
||||
query I
|
||||
SELECT BOOL_AND(bloom_filter_excludes) FROM parquet_bloom_probe('__TEST_DIR__/bloom1.parquet', 'r', '112121212');
|
||||
----
|
||||
true
|
||||
|
||||
statement ok
|
||||
CREATE MACRO assert_bloom_filter_hit(file, col, val) AS TABLE
|
||||
SELECT COUNT(*) > 0 AND COUNT(*) < MAX(row_group_id+1) FROM parquet_bloom_probe(file, col, val) WHERE NOT bloom_filter_excludes;
|
||||
|
||||
# this in-domain should only be in a subset of row groups since its ordered
|
||||
query I
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r', '200');
|
||||
----
|
||||
true
|
||||
|
||||
# same dance but with probe not being a string
|
||||
query I
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r', 200);
|
||||
----
|
||||
true
|
||||
|
||||
# non-existent file
|
||||
statement error
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom10000.parquet', 'r', '200');
|
||||
----
|
||||
No files found
|
||||
|
||||
# non-existent column
|
||||
statement error
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r2', '200');
|
||||
----
|
||||
Column r2 not found
|
||||
|
||||
# NULL colname
|
||||
statement error
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', NULL, '200');
|
||||
----
|
||||
Can't have NULL parameters
|
||||
|
||||
# NULL probe
|
||||
statement error
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r', NULL);
|
||||
----
|
||||
Can't have NULL parameters
|
||||
|
||||
statement error
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r', 'a');
|
||||
----
|
||||
Failed to cast value
|
||||
|
||||
# more types
|
||||
|
||||
query I
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r_int16', 200);
|
||||
----
|
||||
true
|
||||
|
||||
query I
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r_int32', 200);
|
||||
----
|
||||
true
|
||||
|
||||
query I
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r_float', 200);
|
||||
----
|
||||
true
|
||||
|
||||
query I
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r_double', 200);
|
||||
----
|
||||
true
|
||||
|
||||
query I
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r_string', 'string_200');
|
||||
----
|
||||
true
|
||||
|
||||
query I
|
||||
FROM assert_bloom_filter_hit('__TEST_DIR__/bloom1.parquet', 'r_blob', 'blob_200'::BLOB);
|
||||
----
|
||||
true
|
||||
|
||||
|
||||
|
||||
# some tests for dictionary_size_limit
|
||||
|
||||
# no bloom filter, dict limit too low
|
||||
statement ok
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom2.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 10);
|
||||
|
||||
query III
|
||||
select row_group_id, bloom_filter_offset IS NOT NULL, bloom_filter_length IS NOT NULL from parquet_metadata('__TEST_DIR__/bloom2.parquet') order by row_group_id;
|
||||
----
|
||||
0 false false
|
||||
|
||||
# no bloom filter - disabled explicitly
|
||||
statement ok
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/disable_bloom_filter.parquet' (format parquet, ROW_GROUP_SIZE 10000, write_bloom_filter false);
|
||||
|
||||
query III
|
||||
select row_group_id, bloom_filter_offset IS NOT NULL, bloom_filter_length IS NOT NULL from parquet_metadata('__TEST_DIR__/disable_bloom_filter.parquet') order by row_group_id;
|
||||
----
|
||||
0 false false
|
||||
|
||||
# still no bloom filter, limit off-by-one
|
||||
statement ok
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom3.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 99);
|
||||
|
||||
query III
|
||||
select row_group_id, bloom_filter_offset IS NOT NULL, bloom_filter_length IS NOT NULL from parquet_metadata('__TEST_DIR__/bloom3.parquet') order by row_group_id;
|
||||
----
|
||||
0 false false
|
||||
|
||||
|
||||
# should have a filter here!
|
||||
statement ok
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom4.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 100);
|
||||
|
||||
query III
|
||||
select row_group_id, bloom_filter_offset IS NOT NULL, bloom_filter_length IS NOT NULL from parquet_metadata('__TEST_DIR__/bloom4.parquet') order by row_group_id;
|
||||
----
|
||||
0 true true
|
||||
|
||||
|
||||
# should have a filter here, too
|
||||
statement ok
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom5.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 1000);
|
||||
|
||||
query III
|
||||
select row_group_id, bloom_filter_offset IS NOT NULL, bloom_filter_length IS NOT NULL from parquet_metadata('__TEST_DIR__/bloom5.parquet') order by row_group_id;
|
||||
----
|
||||
0 true true
|
||||
|
||||
|
||||
# lets mess with the false positive ratio and measue bf size
|
||||
|
||||
# the default 0.01
|
||||
statement ok
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom6.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 1000, bloom_filter_false_positive_ratio 0.01);
|
||||
|
||||
|
||||
query II
|
||||
select row_group_id, bloom_filter_length from parquet_metadata('__TEST_DIR__/bloom6.parquet') order by row_group_id;
|
||||
----
|
||||
0 144
|
||||
|
||||
|
||||
# higher prob: 0.5 should lead to a smaller filter
|
||||
statement ok
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom7.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 1000, bloom_filter_false_positive_ratio 0.5);
|
||||
|
||||
|
||||
query II
|
||||
select row_group_id, bloom_filter_length from parquet_metadata('__TEST_DIR__/bloom7.parquet') order by row_group_id;
|
||||
----
|
||||
0 80
|
||||
|
||||
|
||||
# lower prob: 0.001 should lead to a bigger filter
|
||||
statement ok
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom8.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 1000, bloom_filter_false_positive_ratio 0.001);
|
||||
|
||||
|
||||
query II
|
||||
select row_group_id, bloom_filter_length from parquet_metadata('__TEST_DIR__/bloom8.parquet') order by row_group_id;
|
||||
----
|
||||
0 272
|
||||
|
||||
|
||||
# even lower prob: 0.0001 should lead to an even bigger filter
|
||||
statement ok
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom8.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 1000, bloom_filter_false_positive_ratio 0.0001);
|
||||
|
||||
|
||||
query II
|
||||
select row_group_id, bloom_filter_length from parquet_metadata('__TEST_DIR__/bloom8.parquet') order by row_group_id;
|
||||
----
|
||||
0 528
|
||||
|
||||
|
||||
# some error cases for the new parameters
|
||||
|
||||
statement error
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom8.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit -1, bloom_filter_false_positive_ratio 0.0001);
|
||||
----
|
||||
dictionary_size_limit must be greater than 0
|
||||
|
||||
|
||||
statement error
|
||||
copy (select (r1.range*10)::BIGINT r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom8.parquet' (format parquet, ROW_GROUP_SIZE 10000, dictionary_size_limit 1000, bloom_filter_false_positive_ratio 0);
|
||||
----
|
||||
bloom_filter_false_positive_ratio must be greater than 0
|
||||
|
||||
# some tests for string_dictionary_page_size_limit
|
||||
|
||||
# no bloom filter, limit too low
|
||||
statement ok
|
||||
copy (select (r1.range*10)::VARCHAR r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom9.parquet' (format parquet, ROW_GROUP_SIZE 10000, string_dictionary_page_size_limit 10);
|
||||
|
||||
query III
|
||||
select row_group_id, bloom_filter_offset IS NOT NULL, bloom_filter_length IS NOT NULL from parquet_metadata('__TEST_DIR__/bloom9.parquet') order by row_group_id;
|
||||
----
|
||||
0 false false
|
||||
|
||||
# big enough
|
||||
statement ok
|
||||
copy (select (r1.range*10)::VARCHAR r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom9.parquet' (format parquet, ROW_GROUP_SIZE 10000, string_dictionary_page_size_limit 100000);
|
||||
|
||||
query III
|
||||
select row_group_id, bloom_filter_offset IS NOT NULL, bloom_filter_length IS NOT NULL from parquet_metadata('__TEST_DIR__/bloom9.parquet') order by row_group_id;
|
||||
----
|
||||
0 true true
|
||||
|
||||
# too big
|
||||
statement error
|
||||
copy (select (r1.range*10)::VARCHAR r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom9.parquet' (format parquet, ROW_GROUP_SIZE 10000, string_dictionary_page_size_limit 4294967295);
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# cannot be 0
|
||||
statement error
|
||||
copy (select (r1.range*10)::VARCHAR r,
|
||||
from range(100) r1, range(100) order by r) to '__TEST_DIR__/bloom9.parquet' (format parquet, ROW_GROUP_SIZE 10000, string_dictionary_page_size_limit 0);
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# test some repeated large strings
|
||||
# this should give dictionary
|
||||
statement ok
|
||||
copy (select repeat('abc', 500_000) || (range % 10) s from range(100)) to '__TEST_DIR__/my.parquet';
|
||||
|
||||
query I
|
||||
select encodings from parquet_metadata('__TEST_DIR__/my.parquet');
|
||||
----
|
||||
RLE_DICTIONARY
|
||||
|
||||
# this cannot do dictionary because the strings exceed the limit
|
||||
statement ok
|
||||
copy (select repeat('abc', 500_000) || (range % 10) s from range(100)) to '__TEST_DIR__/my.parquet' (STRING_DICTIONARY_PAGE_SIZE_LIMIT 4_000_000);
|
||||
|
||||
query I
|
||||
select encodings = 'RLE_DICTIONARY' from parquet_metadata('__TEST_DIR__/my.parquet');
|
||||
----
|
||||
false
|
||||
BIN
external/duckdb/test/sql/copy/parquet/broken/broken_structure.parquet
vendored
Normal file
BIN
external/duckdb/test/sql/copy/parquet/broken/broken_structure.parquet
vendored
Normal file
Binary file not shown.
1
external/duckdb/test/sql/copy/parquet/broken/firstmarker.parquet
vendored
Normal file
1
external/duckdb/test/sql/copy/parquet/broken/firstmarker.parquet
vendored
Normal file
@@ -0,0 +1 @@
|
||||
PAR1
|
||||
BIN
external/duckdb/test/sql/copy/parquet/broken/footerlengthzero.parquet
vendored
Normal file
BIN
external/duckdb/test/sql/copy/parquet/broken/footerlengthzero.parquet
vendored
Normal file
Binary file not shown.
BIN
external/duckdb/test/sql/copy/parquet/broken/garbledfooter.parquet
vendored
Normal file
BIN
external/duckdb/test/sql/copy/parquet/broken/garbledfooter.parquet
vendored
Normal file
Binary file not shown.
1
external/duckdb/test/sql/copy/parquet/broken/hugefooter.parquet
vendored
Normal file
1
external/duckdb/test/sql/copy/parquet/broken/hugefooter.parquet
vendored
Normal file
@@ -0,0 +1 @@
|
||||
PAR1<EFBFBD><EFBFBD><EFBFBD><EFBFBD>PAR1
|
||||
1
external/duckdb/test/sql/copy/parquet/broken/missingmagicatend.parquet
vendored
Normal file
1
external/duckdb/test/sql/copy/parquet/broken/missingmagicatend.parquet
vendored
Normal file
@@ -0,0 +1 @@
|
||||
PAR1iojqerwiojqwqhqrwiuhRIUHQWriuwHQRW
|
||||
1
external/duckdb/test/sql/copy/parquet/broken/missingmagicatfront.parquet
vendored
Normal file
1
external/duckdb/test/sql/copy/parquet/broken/missingmagicatfront.parquet
vendored
Normal file
@@ -0,0 +1 @@
|
||||
RJIOJWRIOJQWriojqrqwJRWPAR1
|
||||
1
external/duckdb/test/sql/copy/parquet/broken/twomarkers.parquet
vendored
Normal file
1
external/duckdb/test/sql/copy/parquet/broken/twomarkers.parquet
vendored
Normal file
@@ -0,0 +1 @@
|
||||
PAR1PAR1
|
||||
43
external/duckdb/test/sql/copy/parquet/broken_parquet.test
vendored
Normal file
43
external/duckdb/test/sql/copy/parquet/broken_parquet.test
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
# name: test/sql/copy/parquet/broken_parquet.test
|
||||
# description: Read several broken parquet files
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement error
|
||||
select count(*) from parquet_scan('test/sql/copy/parquet/broken/missingmagicatfront.parquet')
|
||||
----
|
||||
|
||||
statement error
|
||||
select count(*) from parquet_scan('test/sql/copy/parquet/broken/missingmagicatend.parquet')
|
||||
----
|
||||
|
||||
statement error
|
||||
select count(*) from parquet_scan('test/sql/copy/parquet/broken/firstmarker.parquet')
|
||||
----
|
||||
|
||||
statement error
|
||||
select count(*) from parquet_scan('test/sql/copy/parquet/broken/twomarkers.parquet')
|
||||
----
|
||||
|
||||
statement error
|
||||
select count(*) from parquet_scan('test/sql/copy/parquet/broken/footerlengthzero.parquet')
|
||||
----
|
||||
|
||||
statement error
|
||||
select count(*) from parquet_scan('test/sql/copy/parquet/broken/hugefooter.parquet')
|
||||
----
|
||||
|
||||
statement error
|
||||
select count(*) from parquet_scan('test/sql/copy/parquet/broken/garbledfooter.parquet')
|
||||
----
|
||||
|
||||
mode skip
|
||||
|
||||
statement error
|
||||
from parquet_scan('test/sql/copy/parquet/broken/broken_structure.parquet')
|
||||
----
|
||||
Parquet file is likely corrupted
|
||||
13
external/duckdb/test/sql/copy/parquet/byte_stream_split.test
vendored
Normal file
13
external/duckdb/test/sql/copy/parquet/byte_stream_split.test
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
# name: test/sql/copy/parquet/byte_stream_split.test
|
||||
# description: Read a Parquet file with floats and doubles encoded using the byte stream split encoding
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query III
|
||||
SELECT * FROM 'data/parquet-testing/byte_stream_split.parquet'
|
||||
----
|
||||
<FILE>:data/parquet-testing/byte_stream_split.csv
|
||||
10
external/duckdb/test/sql/copy/parquet/case_insensitive_replacement.test
vendored
Normal file
10
external/duckdb/test/sql/copy/parquet/case_insensitive_replacement.test
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# name: test/sql/copy/parquet/case_insensitive_replacement.test
|
||||
# description: Issue #2543: Case insensitive replacement scan
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
SELECT data FROM 'data/parquet-testing/CASE_INSENSITIVE.PARQUET'
|
||||
----
|
||||
\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F
|
||||
15
external/duckdb/test/sql/copy/parquet/copy_option_suggestion.test
vendored
Normal file
15
external/duckdb/test/sql/copy/parquet/copy_option_suggestion.test
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# name: test/sql/copy/parquet/copy_option_suggestion.test
|
||||
# description: Test suggestion of unknown copy options
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement error
|
||||
copy (select 42) to 'file.parquet' (partition_b (a));
|
||||
----
|
||||
partition_by
|
||||
|
||||
statement error
|
||||
copy (select 42) to 'file.csv' (partition_b (a));
|
||||
----
|
||||
partition_by
|
||||
61
external/duckdb/test/sql/copy/parquet/copy_preserve_order.test_slow
vendored
Normal file
61
external/duckdb/test/sql/copy/parquet/copy_preserve_order.test_slow
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
# name: test/sql/copy/parquet/copy_preserve_order.test_slow
|
||||
# description: Test order preservation with the PRESERVE_ORDER flag
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/insert_order_preserving.db
|
||||
|
||||
# test the PRESERVE_ORDER option
|
||||
statement ok
|
||||
SET preserve_insertion_order=false
|
||||
|
||||
query I
|
||||
CREATE TABLE integers AS SELECT * FROM range(10000000) tbl(i);
|
||||
----
|
||||
10000000
|
||||
|
||||
query I
|
||||
COPY integers TO '__TEST_DIR__/force_order_preserve.parquet' (PRESERVE_ORDER);
|
||||
----
|
||||
10000000
|
||||
|
||||
statement ok
|
||||
CREATE VIEW integers2 AS FROM '__TEST_DIR__/force_order_preserve.parquet'
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers
|
||||
----
|
||||
49999995000000
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers2
|
||||
----
|
||||
49999995000000
|
||||
|
||||
# verify the file was written in the correct order - for this we set the preserve_insertion_order back to true
|
||||
statement ok
|
||||
SET preserve_insertion_order=true
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/force_order_preserve.parquet' LIMIT 5
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/force_order_preserve.parquet' LIMIT 5 OFFSET 777778
|
||||
----
|
||||
777778
|
||||
777779
|
||||
777780
|
||||
777781
|
||||
777782
|
||||
|
||||
statement error
|
||||
COPY integers TO '__TEST_DIR__/force_order_preserve_2.parquet' (PRESERVE_ORDER, PARTITION_BY (i), WRITE_PARTITION_COLUMNS);
|
||||
----
|
||||
PRESERVE_ORDER is not supported with these parameters
|
||||
18
external/duckdb/test/sql/copy/parquet/corrupt_stats.test
vendored
Normal file
18
external/duckdb/test/sql/copy/parquet/corrupt_stats.test
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
# name: test/sql/copy/parquet/corrupt_stats.test
|
||||
# description: Issue #14430: group by a timestamp column in a parquet file can cause the process to crash
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement error
|
||||
SELECT a FROM 'data/parquet-testing/corrupt_stats.parquet' GROUP BY a;
|
||||
----
|
||||
This likely means that the statistics in your data source are corrupt
|
||||
|
||||
statement ok
|
||||
PRAGMA disable_optimizer
|
||||
|
||||
query I
|
||||
SELECT a FROM 'data/parquet-testing/corrupt_stats.parquet' GROUP BY a;
|
||||
----
|
||||
2021-01-01 12:00:00
|
||||
28
external/duckdb/test/sql/copy/parquet/decimal_filter.test
vendored
Normal file
28
external/duckdb/test/sql/copy/parquet/decimal_filter.test
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
# name: test/sql/copy/parquet/decimal_filter.test
|
||||
# description: Decimal filter pushdown into Parquet
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
pragma enable_verification
|
||||
|
||||
query IIII
|
||||
select * from 'data/parquet-testing/decimals.parquet'
|
||||
----
|
||||
0.1 0.1 0.1 0.1
|
||||
-0.1 -0.1 -0.1 -0.1
|
||||
|
||||
loop i 1 5
|
||||
|
||||
query IIII
|
||||
select * from 'data/parquet-testing/decimals.parquet' WHERE l${i}=0.1
|
||||
----
|
||||
0.1 0.1 0.1 0.1
|
||||
|
||||
query IIII
|
||||
select * from 'data/parquet-testing/decimals.parquet' WHERE l${i}=-0.1
|
||||
----
|
||||
-0.1 -0.1 -0.1 -0.1
|
||||
|
||||
endloop
|
||||
2016
external/duckdb/test/sql/copy/parquet/delta_byte_array.test
vendored
Normal file
2016
external/duckdb/test/sql/copy/parquet/delta_byte_array.test
vendored
Normal file
File diff suppressed because it is too large
Load Diff
14
external/duckdb/test/sql/copy/parquet/describe_parquet.test
vendored
Normal file
14
external/duckdb/test/sql/copy/parquet/describe_parquet.test
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
# name: test/sql/copy/parquet/describe_parquet.test
|
||||
# description: Test DESCRIBE on a parquet file
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query IIIIII nosort describeresult
|
||||
DESCRIBE 'data/parquet-testing/delta_byte_array.parquet'
|
||||
|
||||
query IIIIII nosort describeresult
|
||||
DESCRIBE "data/parquet-testing/delta_byte_array.parquet"
|
||||
|
||||
query IIIIII nosort describeresult
|
||||
DESCRIBE FROM read_parquet("data/parquet-testing/delta_byte_array.parquet")
|
||||
76
external/duckdb/test/sql/copy/parquet/dictionary_compression_ratio_threshold.test
vendored
Normal file
76
external/duckdb/test/sql/copy/parquet/dictionary_compression_ratio_threshold.test
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
# name: test/sql/copy/parquet/dictionary_compression_ratio_threshold.test
|
||||
# description: Test Parquet dictionary_compression_ratio_threshold parameter
|
||||
# group: [parquet]
|
||||
|
||||
# the setting dictionary_compression_ratio_threshold is DEPRECATED, the tests are here to make sure it can be set without issue
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test AS SELECT 'thisisaverylongstringbutitrepeatsmanytimessoitshighlycompressible' || (range % 10) i FROM range(100000)
|
||||
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/dictionary_compression_ratio_threshold.parquet' (dictionary_compression_ratio_threshold -2)
|
||||
|
||||
|
||||
# default is 1.0
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/dictionary_compression_ratio_threshold.parquet'
|
||||
|
||||
# dictionary compression is applied so page offset is non-null
|
||||
query I
|
||||
SELECT dictionary_page_offset IS NULL FROM parquet_metadata('__TEST_DIR__/dictionary_compression_ratio_threshold.parquet')
|
||||
----
|
||||
false
|
||||
|
||||
# -1 to disable
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/dictionary_compression_ratio_threshold.parquet' (dictionary_compression_ratio_threshold -1)
|
||||
|
||||
# ignored, still dictionary compression
|
||||
query I
|
||||
SELECT dictionary_page_offset IS NULL FROM parquet_metadata('__TEST_DIR__/dictionary_compression_ratio_threshold.parquet')
|
||||
----
|
||||
false
|
||||
|
||||
# the data compresses more than 10x
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/dictionary_compression_ratio_threshold.parquet' (dictionary_compression_ratio_threshold 10)
|
||||
|
||||
# dictionary compression should be enabled
|
||||
query I
|
||||
SELECT dictionary_page_offset IS NULL FROM parquet_metadata('__TEST_DIR__/dictionary_compression_ratio_threshold.parquet')
|
||||
----
|
||||
false
|
||||
|
||||
# compresses less than 20x
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/dictionary_compression_ratio_threshold.parquet' (dictionary_compression_ratio_threshold 20)
|
||||
|
||||
# dictionary compression still enabled, setting is deprecated
|
||||
query I
|
||||
SELECT dictionary_page_offset IS NULL FROM parquet_metadata('__TEST_DIR__/dictionary_compression_ratio_threshold.parquet')
|
||||
----
|
||||
false
|
||||
|
||||
# create table with all uniques
|
||||
statement ok
|
||||
CREATE OR REPLACE TABLE test AS SELECT 'coolstring' || range i FROM range(100000)
|
||||
|
||||
# should still have compression, setting is deprecated
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/dictionary_compression_ratio_threshold.parquet'
|
||||
|
||||
query I
|
||||
SELECT dictionary_page_offset IS NULL FROM parquet_metadata('__TEST_DIR__/dictionary_compression_ratio_threshold.parquet')
|
||||
----
|
||||
true
|
||||
|
||||
# but if we set our threshold to 0 we create a dictionary anyway
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/dictionary_compression_ratio_threshold.parquet' (dictionary_compression_ratio_threshold 0)
|
||||
|
||||
query I
|
||||
SELECT dictionary_page_offset IS NULL FROM parquet_metadata('__TEST_DIR__/dictionary_compression_ratio_threshold.parquet')
|
||||
----
|
||||
true
|
||||
14
external/duckdb/test/sql/copy/parquet/enum_converted_type.test
vendored
Normal file
14
external/duckdb/test/sql/copy/parquet/enum_converted_type.test
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
# name: test/sql/copy/parquet/enum_converted_type.test
|
||||
# description: Test enum converted type
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query IIIIIII
|
||||
select * from 'data/parquet-testing/enum.parquet';
|
||||
----
|
||||
1 0 t1 test_span 1612550512340953 500000 [{'key': service_name, 'v_type': STRING, 'v_str': test_service, 'v_bool': false, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': ''}, {'key': http_method, 'v_type': STRING, 'v_str': POST, 'v_bool': false, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': ''}, {'key': method, 'v_type': STRING, 'v_str': callbacks.flannel, 'v_bool': false, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': ''}, {'key': boolean, 'v_type': BOOL, 'v_str': '', 'v_bool': true, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': ''}, {'key': int, 'v_type': INT64, 'v_str': '', 'v_bool': false, 'v_int64': 1000, 'v_float64': 1001.2, 'v_binary': ''}, {'key': float, 'v_type': FLOAT64, 'v_str': '', 'v_bool': false, 'v_int64': 1000, 'v_float64': 1001.2, 'v_binary': ''}, {'key': binary, 'v_type': BINARY, 'v_str': ignored, 'v_bool': false, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': binaryTagValue}, {'key': type, 'v_type': STRING, 'v_str': msg_type, 'v_bool': false, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': ''}]
|
||||
2 1 t1 test_span 1612550512340954 500001 [{'key': service_name, 'v_type': STRING, 'v_str': test_service, 'v_bool': false, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': ''}, {'key': http_method, 'v_type': STRING, 'v_str': POST, 'v_bool': false, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': ''}, {'key': method, 'v_type': STRING, 'v_str': callbacks.flannel, 'v_bool': false, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': ''}, {'key': boolean, 'v_type': BOOL, 'v_str': '', 'v_bool': true, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': ''}, {'key': int, 'v_type': INT64, 'v_str': '', 'v_bool': false, 'v_int64': 1000, 'v_float64': 1001.2, 'v_binary': ''}, {'key': float, 'v_type': FLOAT64, 'v_str': '', 'v_bool': false, 'v_int64': 1000, 'v_float64': 1001.2, 'v_binary': ''}, {'key': binary, 'v_type': BINARY, 'v_str': ignored, 'v_bool': false, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': binaryTagValue}, {'key': type, 'v_type': STRING, 'v_str': msg_type, 'v_bool': false, 'v_int64': 0, 'v_float64': 0.0, 'v_binary': ''}]
|
||||
17
external/duckdb/test/sql/copy/parquet/file_metadata.test
vendored
Normal file
17
external/duckdb/test/sql/copy/parquet/file_metadata.test
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
# name: test/sql/copy/parquet/file_metadata.test
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
SET parquet_metadata_cache = true;
|
||||
|
||||
query IIIIIIIII
|
||||
SELECT * FROM parquet_file_metadata('data/parquet-testing/arrow/alltypes_dictionary.parquet')
|
||||
----
|
||||
data/parquet-testing/arrow/alltypes_dictionary.parquet impala version 1.3.0-INTERNAL (build 8a48ddb1eff84592b3fc06bc6f51ec120e1fffc9) 2 1 1 NULL NULL 1698 723
|
||||
|
||||
query IIIIIIIII
|
||||
SELECT * FROM parquet_file_metadata('data/parquet-testing/arrow/alltypes_dictionary.parquet')
|
||||
----
|
||||
data/parquet-testing/arrow/alltypes_dictionary.parquet impala version 1.3.0-INTERNAL (build 8a48ddb1eff84592b3fc06bc6f51ec120e1fffc9) 2 1 1 NULL NULL 1698 723
|
||||
10
external/duckdb/test/sql/copy/parquet/fixed.test
vendored
Normal file
10
external/duckdb/test/sql/copy/parquet/fixed.test
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# name: test/sql/copy/parquet/fixed.test
|
||||
# description: Strings in fixed length binary arrays
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
SELECT data FROM parquet_scan('data/parquet-testing/fixed.parquet')
|
||||
----
|
||||
\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F
|
||||
38
external/duckdb/test/sql/copy/parquet/float16.test
vendored
Normal file
38
external/duckdb/test/sql/copy/parquet/float16.test
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
# name: test/sql/copy/parquet/float16.test
|
||||
# description: Test reading half-floats
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query III
|
||||
select type, type_length, logical_type from parquet_schema('data/parquet-testing/float16.parquet') where name = 'x'
|
||||
----
|
||||
FIXED_LEN_BYTE_ARRAY 2 Float16Type()
|
||||
|
||||
query I
|
||||
select typeof(x) from read_parquet('data/parquet-testing/float16.parquet') limit 1;
|
||||
----
|
||||
FLOAT
|
||||
|
||||
query I
|
||||
select x from read_parquet('data/parquet-testing/float16.parquet') order by x;
|
||||
----
|
||||
-inf
|
||||
0.0
|
||||
0.5
|
||||
1.0
|
||||
1.5
|
||||
inf
|
||||
nan
|
||||
-nan
|
||||
|
||||
query I
|
||||
select x from read_parquet('data/parquet-testing/float16.parquet') where x > 1.1 order by x;
|
||||
----
|
||||
1.5
|
||||
inf
|
||||
nan
|
||||
-nan
|
||||
37
external/duckdb/test/sql/copy/parquet/hive_partitioning_struct.test
vendored
Normal file
37
external/duckdb/test/sql/copy/parquet/hive_partitioning_struct.test
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
# name: test/sql/copy/parquet/hive_partitioning_struct.test
|
||||
# description: Test hive partitioning and struct pushdown
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT
|
||||
i//50 id,
|
||||
{'a': i, 'b': 21} s
|
||||
FROM range(100) t(i)
|
||||
) TO '__TEST_DIR__/hive_partitioned_struct_col' (FORMAT PARQUET, PARTITION_BY (id))
|
||||
|
||||
query II
|
||||
SELECT * FROM read_parquet('__TEST_DIR__/hive_partitioned_struct_col/**/*.parquet', hive_partitioning=1) WHERE s.a=42
|
||||
----
|
||||
{'a': 42, 'b': 21} 0
|
||||
|
||||
query I
|
||||
SELECT s.a FROM read_parquet('__TEST_DIR__/hive_partitioned_struct_col/**/*.parquet', hive_partitioning=1) WHERE s.a=42
|
||||
----
|
||||
42
|
||||
|
||||
# what if the hive types themselves are structs?
|
||||
statement ok
|
||||
COPY (SELECT i id, {'a': i//2} s FROM range(100) t(i)) TO '__TEST_DIR__/hive_partitioned_struct' (FORMAT PARQUET, PARTITION_BY (s))
|
||||
|
||||
query II
|
||||
SELECT * FROM read_parquet('__TEST_DIR__/hive_partitioned_struct/**/*.parquet', hive_partitioning=1, hive_types={'s': 'STRUCT(a INT)'}) WHERE s.a=42 ORDER BY ALL
|
||||
----
|
||||
84 {'a': 42}
|
||||
85 {'a': 42}
|
||||
|
||||
62
external/duckdb/test/sql/copy/parquet/hive_timestamps.test
vendored
Normal file
62
external/duckdb/test/sql/copy/parquet/hive_timestamps.test
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
# name: test/sql/copy/parquet/hive_timestamps.test
|
||||
# description: Prefer strict hive timestamps to dates
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# requires notwindows for embedded spaces in the path
|
||||
require notwindows
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
set seed 0.8675309
|
||||
|
||||
statement ok
|
||||
CREATE TABLE raw_data (
|
||||
ts TIMESTAMP_S NOT NULL,
|
||||
hits INTEGER NOT NULL
|
||||
);
|
||||
|
||||
statement ok
|
||||
INSERT INTO raw_data
|
||||
SELECT *, (random() * 500)::INTEGER
|
||||
FROM RANGE(TIMESTAMP '2023-11-01', TIMESTAMP '2023-11-06', INTERVAL 1 MINUTE);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE timeseries AS (
|
||||
SELECT DATE_TRUNC('hour', ts) AS bucket, SUM(hits)::BIGINT AS total
|
||||
FROM raw_data
|
||||
GROUP BY bucket
|
||||
);
|
||||
|
||||
query II
|
||||
SELECT *
|
||||
FROM timeseries
|
||||
ORDER BY ALL
|
||||
LIMIT 5
|
||||
----
|
||||
2023-11-01 00:00:00 15127
|
||||
2023-11-01 01:00:00 16634
|
||||
2023-11-01 02:00:00 14676
|
||||
2023-11-01 03:00:00 14493
|
||||
2023-11-01 04:00:00 13288
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT * FROM timeseries
|
||||
) TO '__TEST_DIR__/hive' (
|
||||
FORMAT 'PARQUET', COMPRESSION 'SNAPPY', PARTITION_BY (bucket), OVERWRITE_OR_IGNORE
|
||||
);
|
||||
|
||||
query II
|
||||
SELECT bucket, total
|
||||
FROM read_parquet('__TEST_DIR__/hive/*/*.parquet')
|
||||
ORDER BY ALL
|
||||
LIMIT 5
|
||||
----
|
||||
2023-11-01 00:00:00 15127
|
||||
2023-11-01 01:00:00 16634
|
||||
2023-11-01 02:00:00 14676
|
||||
2023-11-01 03:00:00 14493
|
||||
2023-11-01 04:00:00 13288
|
||||
60
external/duckdb/test/sql/copy/parquet/incorrect_converted_type.test
vendored
Normal file
60
external/duckdb/test/sql/copy/parquet/incorrect_converted_type.test
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
# name: test/sql/copy/parquet/incorrect_converted_type.test
|
||||
# description: Test parquet files with incorrect converted type annotations
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_bigint.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_date.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_int.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_smallint.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_timestamp.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_timestamp_ms.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_tinyint.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_ubigint.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_uinteger.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_usmallint.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/broken/broken_utinyint.parquet';
|
||||
----
|
||||
<REGEX>:.*IO Error.*converted type.*
|
||||
24
external/duckdb/test/sql/copy/parquet/infer_copy_format.test
vendored
Normal file
24
external/duckdb/test/sql/copy/parquet/infer_copy_format.test
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# name: test/sql/copy/parquet/infer_copy_format.test
|
||||
# description: Infer COPY TO format test
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS SELECT * FROM range(6) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/integers.parquet';
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM '__TEST_DIR__/integers.parquet';
|
||||
----
|
||||
15
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/integers.csv';
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM '__TEST_DIR__/integers.csv' tbl(i);
|
||||
----
|
||||
15
|
||||
24
external/duckdb/test/sql/copy/parquet/json_parquet.test
vendored
Normal file
24
external/duckdb/test/sql/copy/parquet/json_parquet.test
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# name: test/sql/copy/parquet/json_parquet.test
|
||||
# description: Test JSON Parquet
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
require json
|
||||
|
||||
statement ok
|
||||
CREATE TABLE json_tbl AS FROM 'data/parquet-testing/json_convertedtype.parquet';
|
||||
|
||||
query I
|
||||
SELECT json_extract(TX_JSON[1], 'block_hash') FROM json_tbl
|
||||
----
|
||||
"0x95cc694a09424ba463e4b1b704b86a56a41521473b3b4875691383c3d5c799b3"
|
||||
"0x5aa34b59d13fc0c6c199c67451c5643ecfd905ee1ac940478b1e700203c707be"
|
||||
"0x987d9d3a51a630337cdbd78858676ca6237ea692306ebd3586b4c3cb79e3762c"
|
||||
"0x5f90325321b570ba4ade766478df3c73a5b89336c2b57b8fa8e64d2f937639d9"
|
||||
"0x48cb55cb6814a86cbba8e5e859df11bc8f01f772a3972fb5c22a31d1339c24e4"
|
||||
"0x059f581a8c9b196e0d11b462be0e194eb837922e17409c94c6888b72d0001b49"
|
||||
"0xa3855d62087826f46f97d6c90854c223f19bf95b2df0d2a446e5f00efbae8b80"
|
||||
"0x5193e56b142dfca5873d7272ea4892a83a74b3d4faa99f8e49ff83d7d4b53e0d"
|
||||
"0x099041d5e2b624f8be592db1d624998e06495f680c6134bf1d7401d919bd0af0"
|
||||
"0x2e5b481bba4f1596484c65ac5c36075d83eb0f85d10ee509d808d23f2f2af8e0"
|
||||
54
external/duckdb/test/sql/copy/parquet/kv_metadata.test
vendored
Normal file
54
external/duckdb/test/sql/copy/parquet/kv_metadata.test
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
# name: test/sql/copy/parquet/kv_metadata.test
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# Test basic roundtrip
|
||||
statement ok
|
||||
COPY (SELECT 1, 'foo') TO '__TEST_DIR__/kv_metadata_test.parquet' (FORMAT PARQUET, KV_METADATA {foo: 'bar', baz: 42, quz: '\xC3\xB6\xC3\xA4\xC3\xA5'::BLOB});
|
||||
|
||||
query II
|
||||
SELECT key::VARCHAR, value::VARCHAR FROM parquet_kv_metadata('__TEST_DIR__/kv_metadata_test.parquet');
|
||||
----
|
||||
foo bar
|
||||
baz 42
|
||||
quz \xC3\xB6\xC3\xA4\xC3\xA5
|
||||
|
||||
query II
|
||||
SELECT * FROM '__TEST_DIR__/kv_metadata_test.parquet'
|
||||
----
|
||||
1 foo
|
||||
|
||||
# Test decoding blobs
|
||||
query II
|
||||
SELECT key::VARCHAR, decode(value) FROM parquet_kv_metadata('__TEST_DIR__/kv_metadata_test.parquet') WHERE key = 'quz';
|
||||
----
|
||||
quz öäå
|
||||
|
||||
# Test invalid metadata
|
||||
statement error
|
||||
COPY (SELECT 1, 'foo') TO '__TEST_DIR__/kv_metadata_test_fail.parquet' (FORMAT PARQUET, KV_METADATA 'foobar');
|
||||
----
|
||||
Expected kv_metadata argument to be a STRUCT
|
||||
|
||||
# Test no kv
|
||||
statement ok
|
||||
COPY (SELECT 3, 'baz') TO '__TEST_DIR__/kv_metadata_test3.parquet' (FORMAT PARQUET);
|
||||
|
||||
query II
|
||||
SELECT key::VARCHAR, value::VARCHAR FROM parquet_kv_metadata('__TEST_DIR__/kv_metadata_test3.parquet');
|
||||
----
|
||||
|
||||
# Test globbing
|
||||
statement ok
|
||||
COPY (SELECT 2, 'bar') TO '__TEST_DIR__/kv_metadata_test2.parquet' (FORMAT PARQUET, KV_METADATA {a: 'b', c: 'd'});
|
||||
|
||||
query III
|
||||
SELECT replace(replace(file_name, '\', '/'),replace('__TEST_DIR__/', '\', '/'), '') AS file_name, key::VARCHAR, value::VARCHAR FROM parquet_kv_metadata('__TEST_DIR__/kv_metadata_tes*') ORDER BY 1, 2;
|
||||
----
|
||||
kv_metadata_test.parquet baz 42
|
||||
kv_metadata_test.parquet foo bar
|
||||
kv_metadata_test.parquet quz \xC3\xB6\xC3\xA4\xC3\xA5
|
||||
kv_metadata_test2.parquet a b
|
||||
kv_metadata_test2.parquet c d
|
||||
|
||||
33
external/duckdb/test/sql/copy/parquet/lineitem_arrow.test
vendored
Normal file
33
external/duckdb/test/sql/copy/parquet/lineitem_arrow.test
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
# name: test/sql/copy/parquet/lineitem_arrow.test
|
||||
# description: Issue #2261: TPC-H Q6 fails on Parquet input
|
||||
# group: [parquet]
|
||||
|
||||
require tpch
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE lineitem AS SELECT * FROM 'data/parquet-testing/arrow/lineitem-arrow.parquet'
|
||||
|
||||
query I nosort q01
|
||||
PRAGMA tpch(1)
|
||||
----
|
||||
|
||||
query I nosort q06
|
||||
PRAGMA tpch(6)
|
||||
----
|
||||
|
||||
statement ok
|
||||
DROP TABLE lineitem
|
||||
|
||||
statement ok
|
||||
CREATE VIEW lineitem AS SELECT * FROM 'data/parquet-testing/arrow/lineitem-arrow.parquet'
|
||||
|
||||
query I nosort q01
|
||||
PRAGMA tpch(1)
|
||||
----
|
||||
|
||||
query I nosort q06
|
||||
PRAGMA tpch(6)
|
||||
----
|
||||
|
||||
68
external/duckdb/test/sql/copy/parquet/multi_file/multi_file_filter_integer_types.test
vendored
Normal file
68
external/duckdb/test/sql/copy/parquet/multi_file/multi_file_filter_integer_types.test
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
# name: test/sql/copy/parquet/multi_file/multi_file_filter_integer_types.test
|
||||
# description: Test multi file filters
|
||||
# group: [multi_file]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
COPY (FROM (VALUES ('f1', 42::INT), ('f1', 8::INT), ('f1', NULL::INT)) t(f, i)) TO '__TEST_DIR__/multi_file_filter_f1.parquet'
|
||||
|
||||
statement ok
|
||||
COPY (FROM (VALUES (42::BIGINT, 'f2'), (124::BIGINT, 'f2'), (NULL::BIGINT, 'f2')) t(i, f)) TO '__TEST_DIR__/multi_file_filter_f2.parquet'
|
||||
|
||||
# the schema of the file depends on the first file read
|
||||
statement ok
|
||||
CREATE VIEW integer_file_first AS FROM read_parquet(['__TEST_DIR__/multi_file_filter_f1.parquet', '__TEST_DIR__/multi_file_filter_f2.parquet'])
|
||||
|
||||
statement ok
|
||||
CREATE VIEW bigint_file_first AS FROM read_parquet(['__TEST_DIR__/multi_file_filter_f2.parquet', '__TEST_DIR__/multi_file_filter_f1.parquet'])
|
||||
|
||||
# equality
|
||||
query II
|
||||
SELECT f, i
|
||||
FROM integer_file_first
|
||||
WHERE i='042'
|
||||
----
|
||||
f1 42
|
||||
f2 42
|
||||
|
||||
query II
|
||||
SELECT f, i
|
||||
FROM bigint_file_first
|
||||
WHERE i='042'
|
||||
ORDER BY ALL
|
||||
----
|
||||
f1 42
|
||||
f2 42
|
||||
|
||||
# greater than
|
||||
query II
|
||||
SELECT f, i
|
||||
FROM integer_file_first
|
||||
WHERE i>10
|
||||
ORDER BY ALL
|
||||
----
|
||||
f1 42
|
||||
f2 42
|
||||
f2 124
|
||||
|
||||
query II
|
||||
SELECT f, i
|
||||
FROM bigint_file_first
|
||||
WHERE i>'10'
|
||||
ORDER BY ALL
|
||||
----
|
||||
f1 42
|
||||
f2 42
|
||||
f2 124
|
||||
|
||||
query II
|
||||
SELECT f, i
|
||||
FROM integer_file_first
|
||||
WHERE i IS NULL
|
||||
----
|
||||
f1 NULL
|
||||
f2 NULL
|
||||
69
external/duckdb/test/sql/copy/parquet/multi_file/multi_file_filter_mixed.test
vendored
Normal file
69
external/duckdb/test/sql/copy/parquet/multi_file/multi_file_filter_mixed.test
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
# name: test/sql/copy/parquet/multi_file/multi_file_filter_mixed.test
|
||||
# description: Test multi file filters
|
||||
# group: [multi_file]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
COPY (FROM (VALUES ('f1', 42), ('f1', 8), ('f1', NULL)) t(f, i)) TO '__TEST_DIR__/multi_file_filter_f1.parquet'
|
||||
|
||||
statement ok
|
||||
COPY (FROM (VALUES ('042', 'f2'), ('124', 'f2'), (NULL, 'f2')) t(i, f)) TO '__TEST_DIR__/multi_file_filter_f2.parquet'
|
||||
|
||||
# the schema of the file depends on the first file read
|
||||
statement ok
|
||||
CREATE VIEW integer_file_first AS FROM read_parquet(['__TEST_DIR__/multi_file_filter_f1.parquet', '__TEST_DIR__/multi_file_filter_f2.parquet'])
|
||||
|
||||
statement ok
|
||||
CREATE VIEW string_file_first AS FROM read_parquet(['__TEST_DIR__/multi_file_filter_f2.parquet', '__TEST_DIR__/multi_file_filter_f1.parquet'])
|
||||
|
||||
# equality
|
||||
# casting to integer - this works (since '042' = 42)
|
||||
query II
|
||||
SELECT f, i
|
||||
FROM integer_file_first
|
||||
WHERE i='042'
|
||||
----
|
||||
f1 42
|
||||
f2 42
|
||||
|
||||
# casting to string - we only get '042' now
|
||||
query II
|
||||
SELECT f, i
|
||||
FROM string_file_first
|
||||
WHERE i='042'
|
||||
----
|
||||
f2 042
|
||||
|
||||
# greater than
|
||||
query II
|
||||
SELECT f, i
|
||||
FROM integer_file_first
|
||||
WHERE i>10
|
||||
ORDER BY ALL
|
||||
----
|
||||
f1 42
|
||||
f2 42
|
||||
f2 124
|
||||
|
||||
# for strings, '8' is bigger than '10' (since '8' is bigger than '1')
|
||||
query II
|
||||
SELECT f, i
|
||||
FROM string_file_first
|
||||
WHERE i>'10'
|
||||
ORDER BY ALL
|
||||
----
|
||||
f1 42
|
||||
f1 8
|
||||
f2 124
|
||||
|
||||
query II
|
||||
SELECT f, i
|
||||
FROM integer_file_first
|
||||
WHERE i IS NULL
|
||||
----
|
||||
f1 NULL
|
||||
f2 NULL
|
||||
81
external/duckdb/test/sql/copy/parquet/multi_file/multi_file_filter_struct.test
vendored
Normal file
81
external/duckdb/test/sql/copy/parquet/multi_file/multi_file_filter_struct.test
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
# name: test/sql/copy/parquet/multi_file/multi_file_filter_struct.test
|
||||
# description: Test multi file filters on structs
|
||||
# group: [multi_file]
|
||||
|
||||
require parquet
|
||||
|
||||
# statement ok
|
||||
# PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
COPY (SELECT {'f': f, 'i': i} struct_val FROM (VALUES ('f1', 42::INT), ('f1', 8::INT), ('f1', NULL::INT)) t(f, i)) TO '__TEST_DIR__/multi_file_filter_f1.parquet'
|
||||
|
||||
statement ok
|
||||
COPY (SELECT {'i': i, 'f2': f} struct_val FROM (VALUES (42::BIGINT, 'f2'), (124::BIGINT, 'f2'), (NULL::BIGINT, 'f2')) t(i, f)) TO '__TEST_DIR__/multi_file_filter_f2.parquet'
|
||||
|
||||
# the schema of the file depends on the first file read
|
||||
statement ok
|
||||
CREATE VIEW integer_file_first AS FROM read_parquet(['__TEST_DIR__/multi_file_filter_f1.parquet', '__TEST_DIR__/multi_file_filter_f2.parquet'])
|
||||
|
||||
statement ok
|
||||
CREATE VIEW bigint_file_first AS FROM read_parquet(['__TEST_DIR__/multi_file_filter_f2.parquet', '__TEST_DIR__/multi_file_filter_f1.parquet'])
|
||||
|
||||
# projection pushdown
|
||||
query I
|
||||
SELECT struct_val.i
|
||||
FROM integer_file_first
|
||||
ORDER BY ALL
|
||||
----
|
||||
8
|
||||
42
|
||||
42
|
||||
124
|
||||
NULL
|
||||
NULL
|
||||
|
||||
# equality
|
||||
query II
|
||||
SELECT struct_val.f, struct_val.i
|
||||
FROM integer_file_first
|
||||
WHERE struct_val.i='042'
|
||||
----
|
||||
f1 42
|
||||
NULL 42
|
||||
|
||||
query I
|
||||
SELECT struct_val.i
|
||||
FROM bigint_file_first
|
||||
WHERE struct_val.i='042'
|
||||
ORDER BY ALL
|
||||
----
|
||||
42
|
||||
42
|
||||
|
||||
# greater than
|
||||
query II
|
||||
SELECT struct_val.f, struct_val.i
|
||||
FROM integer_file_first
|
||||
WHERE struct_val.i>10
|
||||
ORDER BY ALL
|
||||
----
|
||||
f1 42
|
||||
NULL 42
|
||||
NULL 124
|
||||
|
||||
query I
|
||||
SELECT struct_val.i
|
||||
FROM bigint_file_first
|
||||
WHERE struct_val.i>'10'
|
||||
ORDER BY ALL
|
||||
----
|
||||
42
|
||||
42
|
||||
124
|
||||
|
||||
query II
|
||||
SELECT struct_val.f, struct_val.i
|
||||
FROM integer_file_first
|
||||
WHERE struct_val.i IS NULL
|
||||
----
|
||||
f1 NULL
|
||||
NULL NULL
|
||||
24
external/duckdb/test/sql/copy/parquet/multi_file_conversion_error.test
vendored
Normal file
24
external/duckdb/test/sql/copy/parquet/multi_file_conversion_error.test
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# name: test/sql/copy/parquet/multi_file_conversion_error.test
|
||||
# description: Test multi-file conversion error
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
copy (select 42 as a) to '__TEST_DIR__/conversion_error1.parquet';
|
||||
|
||||
statement ok
|
||||
copy (select blob 'hello world' as a) to '__TEST_DIR__/conversion_error2.parquet';
|
||||
|
||||
statement error
|
||||
SELECT * FROM read_parquet(['__TEST_DIR__/conversion_error1.parquet', '__TEST_DIR__/conversion_error2.parquet'])
|
||||
----
|
||||
failed to cast column "a" from type BLOB to INTEGER
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers(i INT);
|
||||
|
||||
statement error
|
||||
COPY integers FROM '__TEST_DIR__/conversion_error*.parquet'
|
||||
----
|
||||
column "a" has type BLOB, but we are trying to load it into column "i" with type INTEGER
|
||||
49
external/duckdb/test/sql/copy/parquet/parallel_parquet_glob.test
vendored
Normal file
49
external/duckdb/test/sql/copy/parquet/parallel_parquet_glob.test
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
# name: test/sql/copy/parquet/parallel_parquet_glob.test
|
||||
# description: Test parallel reads on multiple parquet files
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
PRAGMA threads=4
|
||||
|
||||
query I
|
||||
select count(*) from parquet_scan('data/parquet-testing/glob/t?.parquet')
|
||||
----
|
||||
2
|
||||
|
||||
query I
|
||||
select count(*) from parquet_scan('data/parquet-testing/glob/*')
|
||||
----
|
||||
2
|
||||
|
||||
query I
|
||||
select count(*) from parquet_scan('data/parquet-testing/glob/*.parquet')
|
||||
----
|
||||
2
|
||||
|
||||
query I
|
||||
select count(*) from parquet_scan('data/parquet-testing/g*/*.parquet')
|
||||
----
|
||||
3
|
||||
|
||||
query I
|
||||
select count(*) from parquet_scan('data/parquet-testing/g*/t1.parquet')
|
||||
----
|
||||
2
|
||||
|
||||
statement ok
|
||||
SET parquet_metadata_cache=true
|
||||
|
||||
query I
|
||||
select count(*) from parquet_scan('data/parquet-testing/g*/t1.parquet')
|
||||
----
|
||||
2
|
||||
|
||||
query I
|
||||
select count(*) from parquet_scan('data/parquet-testing/g*/t1.parquet')
|
||||
----
|
||||
2
|
||||
285
external/duckdb/test/sql/copy/parquet/parquet2.test
vendored
Normal file
285
external/duckdb/test/sql/copy/parquet/parquet2.test
vendored
Normal file
@@ -0,0 +1,285 @@
|
||||
# name: test/sql/copy/parquet/parquet2.test
|
||||
# description: Issue #2261: TPC-H Q6 fails on Parquet input
|
||||
# group: [parquet]
|
||||
|
||||
# Here's how we generate this
|
||||
# from pyspark.sql import SparkSession
|
||||
# from pyspark.sql.types import *
|
||||
#
|
||||
# spark = SparkSession.builder.master("local").config("spark.hadoop.parquet.writer.version", "v2").getOrCreate()
|
||||
# sc = spark.sparkContext
|
||||
#
|
||||
# ref = spark.range(42, 10000, 2).toDF("id").orderBy(rand())
|
||||
# ref.show(10)
|
||||
#
|
||||
# ref.write.parquet("p2.parquet")
|
||||
|
||||
require parquet
|
||||
|
||||
|
||||
query I
|
||||
SELECT id FROM 'data/parquet-testing/p2.parquet' offset 4968;
|
||||
----
|
||||
1436
|
||||
2596
|
||||
4774
|
||||
4402
|
||||
5378
|
||||
5372
|
||||
8658
|
||||
808
|
||||
5876
|
||||
7214
|
||||
9816
|
||||
|
||||
|
||||
query I
|
||||
SELECT id FROM 'data/parquet-testing/p2.parquet' limit 10;
|
||||
----
|
||||
2644
|
||||
8534
|
||||
3276
|
||||
5264
|
||||
5766
|
||||
6018
|
||||
2080
|
||||
576
|
||||
1350
|
||||
9312
|
||||
|
||||
|
||||
|
||||
query I
|
||||
SELECT id FROM 'data/parquet-testing/p2.parquet' limit 100;
|
||||
----
|
||||
2644
|
||||
8534
|
||||
3276
|
||||
5264
|
||||
5766
|
||||
6018
|
||||
2080
|
||||
576
|
||||
1350
|
||||
9312
|
||||
8898
|
||||
1126
|
||||
6704
|
||||
2836
|
||||
390
|
||||
4440
|
||||
7582
|
||||
4386
|
||||
4482
|
||||
6866
|
||||
7814
|
||||
7246
|
||||
8998
|
||||
8454
|
||||
2004
|
||||
7770
|
||||
7590
|
||||
9092
|
||||
7586
|
||||
4762
|
||||
5672
|
||||
6782
|
||||
3968
|
||||
8102
|
||||
726
|
||||
3384
|
||||
3232
|
||||
9628
|
||||
4460
|
||||
556
|
||||
1368
|
||||
560
|
||||
4116
|
||||
4294
|
||||
988
|
||||
1404
|
||||
8380
|
||||
862
|
||||
9172
|
||||
3964
|
||||
5728
|
||||
8018
|
||||
8052
|
||||
8786
|
||||
8828
|
||||
8140
|
||||
4044
|
||||
324
|
||||
7102
|
||||
5898
|
||||
6848
|
||||
174
|
||||
5240
|
||||
4834
|
||||
1354
|
||||
5080
|
||||
2386
|
||||
7402
|
||||
8508
|
||||
2006
|
||||
1270
|
||||
4936
|
||||
4682
|
||||
436
|
||||
6056
|
||||
7772
|
||||
2792
|
||||
982
|
||||
7028
|
||||
8964
|
||||
6632
|
||||
4062
|
||||
8260
|
||||
9494
|
||||
6260
|
||||
8850
|
||||
9238
|
||||
7968
|
||||
9430
|
||||
8156
|
||||
9388
|
||||
478
|
||||
4478
|
||||
3400
|
||||
370
|
||||
130
|
||||
552
|
||||
7614
|
||||
1234
|
||||
5302
|
||||
|
||||
|
||||
|
||||
query I
|
||||
SELECT id_with_null FROM 'data/parquet-testing/p2.parquet' limit 100;
|
||||
----
|
||||
2644
|
||||
8534
|
||||
3276
|
||||
5264
|
||||
5766
|
||||
6018
|
||||
NULL
|
||||
576
|
||||
NULL
|
||||
9312
|
||||
8898
|
||||
1126
|
||||
6704
|
||||
2836
|
||||
NULL
|
||||
NULL
|
||||
7582
|
||||
4386
|
||||
4482
|
||||
6866
|
||||
7814
|
||||
7246
|
||||
8998
|
||||
8454
|
||||
2004
|
||||
NULL
|
||||
NULL
|
||||
9092
|
||||
7586
|
||||
4762
|
||||
5672
|
||||
6782
|
||||
3968
|
||||
8102
|
||||
726
|
||||
3384
|
||||
3232
|
||||
9628
|
||||
NULL
|
||||
556
|
||||
1368
|
||||
NULL
|
||||
4116
|
||||
4294
|
||||
988
|
||||
1404
|
||||
NULL
|
||||
862
|
||||
9172
|
||||
3964
|
||||
5728
|
||||
8018
|
||||
8052
|
||||
8786
|
||||
8828
|
||||
NULL
|
||||
4044
|
||||
324
|
||||
7102
|
||||
5898
|
||||
6848
|
||||
174
|
||||
NULL
|
||||
4834
|
||||
1354
|
||||
NULL
|
||||
2386
|
||||
7402
|
||||
8508
|
||||
2006
|
||||
NULL
|
||||
4936
|
||||
4682
|
||||
436
|
||||
6056
|
||||
7772
|
||||
2792
|
||||
982
|
||||
7028
|
||||
8964
|
||||
6632
|
||||
4062
|
||||
NULL
|
||||
9494
|
||||
NULL
|
||||
NULL
|
||||
9238
|
||||
7968
|
||||
NULL
|
||||
8156
|
||||
9388
|
||||
478
|
||||
4478
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
552
|
||||
7614
|
||||
1234
|
||||
5302
|
||||
|
||||
|
||||
query IIIIIIII
|
||||
select min(id), max(id), sum(id), count(id), min(id_with_null), max(id_with_null), sum(id_with_null), count(id_with_null) from 'data/parquet-testing/p2.parquet'
|
||||
----
|
||||
42 9998 24994580 4979 42 9998 19999680 3984
|
||||
|
||||
|
||||
query IIII
|
||||
select min(id_int), max(id_int), sum(id_int), count(id_int) from 'data/parquet-testing/p2.parquet'
|
||||
----
|
||||
42 9998 19999680 3984
|
||||
|
||||
|
||||
# from bug 2882
|
||||
query I
|
||||
select * from 'data/parquet-testing/7-set.snappy.arrow2.parquet';
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
27
external/duckdb/test/sql/copy/parquet/parquet2strings.test
vendored
Normal file
27
external/duckdb/test/sql/copy/parquet/parquet2strings.test
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
# name: test/sql/copy/parquet/parquet2strings.test
|
||||
# description: Issue #2261: TPC-H Q6 fails on Parquet input
|
||||
# group: [parquet]
|
||||
|
||||
# Here's how we generate this
|
||||
# from pyspark.sql import SparkSession
|
||||
# from pyspark.sql.types import *
|
||||
# from pyspark.sql.functions import *
|
||||
|
||||
# spark = SparkSession.builder.master("local").config("spark.hadoop.parquet.writer.version", "v2").getOrCreate()
|
||||
# sc = spark.sparkContext
|
||||
# ref = spark.range(42, 10000, 2).toDF("id").orderBy(rand())
|
||||
# ref2 = ref.selectExpr("*", "repeat('XYZ', id%5) || cast(id as string) id_string")
|
||||
|
||||
# ref2.show(10)
|
||||
# ref2.write.parquet("p2strings.parquet")
|
||||
# ref2.write.csv("p2strings.csv")
|
||||
|
||||
# for now
|
||||
mode skip
|
||||
|
||||
require parquet
|
||||
|
||||
|
||||
query I
|
||||
SELECT id_string FROM 'data/parquet-testing/p2strings.parquet' limit 10;
|
||||
----
|
||||
19
external/duckdb/test/sql/copy/parquet/parquet_10148.test
vendored
Normal file
19
external/duckdb/test/sql/copy/parquet/parquet_10148.test
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
# name: test/sql/copy/parquet/parquet_10148.test
|
||||
# description: Issue #10148: Wide decimal values in stats
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
SELECT CDCONO FROM 'data/parquet-testing/bug10148-wide-decimal-stats.parquet'
|
||||
----
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
11
external/duckdb/test/sql/copy/parquet/parquet_10279.test
vendored
Normal file
11
external/duckdb/test/sql/copy/parquet/parquet_10279.test
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
# name: test/sql/copy/parquet/parquet_10279.test
|
||||
# description: Issue #10279: Data loss with parquet INT64 and DELTA encoding
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query IIIIIIII
|
||||
SELECT * FROM 'data/parquet-testing/issue10279_delta_encoding.parquet'
|
||||
----
|
||||
MIN_VALUE false -128 -32768 -2147483648 -9223372036854775808 1e-45 5e-324
|
||||
MAX_VALUE true 127 32767 2147483647 9223372036854775807 3.4028235e+38 1.7976931348623157e+308
|
||||
12
external/duckdb/test/sql/copy/parquet/parquet_12621.test
vendored
Normal file
12
external/duckdb/test/sql/copy/parquet/parquet_12621.test
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
# name: test/sql/copy/parquet/parquet_12621.test
|
||||
# description: Issue #12621: Parquet read : Invalid decimal encoding in Parquet file
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
select *
|
||||
from read_parquet('data/parquet-testing/issue12621.parquet')
|
||||
limit 1;
|
||||
----
|
||||
0.0000
|
||||
36
external/duckdb/test/sql/copy/parquet/parquet_13053_duplicate_column_names.test
vendored
Normal file
36
external/duckdb/test/sql/copy/parquet/parquet_13053_duplicate_column_names.test
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
# name: test/sql/copy/parquet/parquet_13053_duplicate_column_names.test
|
||||
# description: Issue #13053: Parquet reader can't deal with duplicate column names
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# original names
|
||||
query I
|
||||
select name from parquet_schema( 'data/parquet-testing/bug13053.parquet') offset 1;
|
||||
----
|
||||
column
|
||||
COLUMN
|
||||
Column
|
||||
|
||||
# renamed names
|
||||
query I
|
||||
SELECT column_name FROM (DESCRIBE FROM 'data/parquet-testing/bug13053.parquet')
|
||||
----
|
||||
column
|
||||
COLUMN_1
|
||||
Column_2
|
||||
|
||||
# case where _1 is already a column, maybe bit ugly but fine and consistent with CSV reader
|
||||
query I
|
||||
select name from parquet_schema( 'data/parquet-testing/bug13053-2.parquet') offset 1;
|
||||
----
|
||||
column
|
||||
column_1
|
||||
column
|
||||
|
||||
query I
|
||||
SELECT column_name FROM (DESCRIBE FROM 'data/parquet-testing/bug13053-2.parquet')
|
||||
----
|
||||
column
|
||||
column_1
|
||||
column_1_1
|
||||
16
external/duckdb/test/sql/copy/parquet/parquet_1554.test
vendored
Normal file
16
external/duckdb/test/sql/copy/parquet/parquet_1554.test
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
# name: test/sql/copy/parquet/parquet_1554.test
|
||||
# description: Unclear what went wrong here in the past, but its fixed, and let's make sure it never happens again
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
SELECT COUNT(backlink_count) FROM parquet_scan('data/parquet-testing/bug1554.parquet') WHERE http_status_code=200
|
||||
----
|
||||
0
|
||||
|
||||
query II
|
||||
SELECT http_status_code, COUNT(backlink_count) FROM parquet_scan('data/parquet-testing/bug1554.parquet') GROUP BY http_status_code ORDER BY http_status_code
|
||||
----
|
||||
200 0
|
||||
301 0
|
||||
54
external/duckdb/test/sql/copy/parquet/parquet_1588.test
vendored
Normal file
54
external/duckdb/test/sql/copy/parquet/parquet_1588.test
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
# name: test/sql/copy/parquet/parquet_1588.test
|
||||
# description: Test boolean filters
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
pragma enable_verification
|
||||
|
||||
# pandas equivalent:
|
||||
# df = pandas.read_parquet('data/parquet-testing/bug1588.parquet')
|
||||
# df[(df.has_image_link == 1) & ((df.has_image_alt_text == 1) | df.is_image_alt_text_empty == 1)]
|
||||
|
||||
statement ok
|
||||
create table some_bools (val boolean);
|
||||
|
||||
statement ok
|
||||
insert into some_bools values (TRUE)
|
||||
|
||||
query I
|
||||
select count(*) from some_bools where val = 1;
|
||||
----
|
||||
1
|
||||
|
||||
query I
|
||||
select count(*) from some_bools where val = '1'::bool;
|
||||
----
|
||||
1
|
||||
|
||||
|
||||
query I
|
||||
SELECT has_image_link FROM parquet_scan('data/parquet-testing/bug1588.parquet') where has_image_link = 1
|
||||
----
|
||||
1
|
||||
1
|
||||
1
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM parquet_scan('data/parquet-testing/bug1588.parquet') WHERE has_image_link = 1
|
||||
----
|
||||
3
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM parquet_scan('data/parquet-testing/bug1588.parquet') WHERE has_image_link = '1'::bool
|
||||
----
|
||||
3
|
||||
|
||||
|
||||
# original query for the lolz
|
||||
query I
|
||||
SELECT COUNT(*) FROM parquet_scan('data/parquet-testing/bug1588.parquet') WHERE (has_image_link = 1 AND (has_image_alt_text = 0 OR is_image_alt_text_empty = 1))
|
||||
----
|
||||
2
|
||||
|
||||
16
external/duckdb/test/sql/copy/parquet/parquet_1589.test
vendored
Normal file
16
external/duckdb/test/sql/copy/parquet/parquet_1589.test
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
# name: test/sql/copy/parquet/parquet_1589.test
|
||||
# description: Test boolean filters
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
pragma enable_verification
|
||||
|
||||
query I
|
||||
SELECT backlink_count FROM parquet_scan('data/parquet-testing/bug1589.parquet') LIMIT 1
|
||||
----
|
||||
NULL
|
||||
|
||||
statement ok
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/bug1589.parquet')
|
||||
23
external/duckdb/test/sql/copy/parquet/parquet_1618_struct_strings.test
vendored
Normal file
23
external/duckdb/test/sql/copy/parquet/parquet_1618_struct_strings.test
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
# name: test/sql/copy/parquet/parquet_1618_struct_strings.test
|
||||
# description: Unclear what went wrong here in the past, but its fixed, and let's make sure it never happens again
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
SELECT "inner"['str_field'] FROM parquet_scan('data/parquet-testing/bug1618_struct_strings.parquet')
|
||||
----
|
||||
hello
|
||||
NULL
|
||||
|
||||
query I
|
||||
SELECT "inner"['f64_field'] FROM parquet_scan('data/parquet-testing/bug1618_struct_strings.parquet')
|
||||
----
|
||||
NULL
|
||||
1.23
|
||||
|
||||
query I
|
||||
SELECT "inner" FROM parquet_scan('data/parquet-testing/bug1618_struct_strings.parquet')
|
||||
----
|
||||
{'str_field': hello, 'f64_field': NULL}
|
||||
{'str_field': NULL, 'f64_field': 1.23}
|
||||
23
external/duckdb/test/sql/copy/parquet/parquet_1619.test
vendored
Normal file
23
external/duckdb/test/sql/copy/parquet/parquet_1619.test
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
# name: test/sql/copy/parquet/parquet_1619.test
|
||||
# description: Error: Not implemented Error: Expr of type 347 not implemented
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
select struct_extract("inner", 'f64_field') from parquet_scan('data/parquet-testing/struct.parquet');
|
||||
----
|
||||
NULL
|
||||
1.23
|
||||
|
||||
query I
|
||||
select ("inner")."f64_field" from parquet_scan('data/parquet-testing/struct.parquet');
|
||||
----
|
||||
NULL
|
||||
1.23
|
||||
|
||||
query I
|
||||
select "inner"['f64_field'] from parquet_scan('data/parquet-testing/struct.parquet');
|
||||
----
|
||||
NULL
|
||||
1.23
|
||||
14
external/duckdb/test/sql/copy/parquet/parquet_1723.test_slow
vendored
Normal file
14
external/duckdb/test/sql/copy/parquet/parquet_1723.test_slow
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
# name: test/sql/copy/parquet/parquet_1723.test_slow
|
||||
# description: CREATE TABLE from parquet crashes latest bleeding edge
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I nosort query
|
||||
select * from 'data/parquet-testing/leftdate3_192_loop_1.parquet'
|
||||
|
||||
statement ok
|
||||
create table test as select * from 'data/parquet-testing/leftdate3_192_loop_1.parquet'
|
||||
|
||||
query I nosort query
|
||||
select * from test
|
||||
15
external/duckdb/test/sql/copy/parquet/parquet_2267.test
vendored
Normal file
15
external/duckdb/test/sql/copy/parquet/parquet_2267.test
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# name: test/sql/copy/parquet/parquet_2267.test
|
||||
# description: Issue #2267: "Struct child row count mismatch" on parquet read
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/bug2267.parquet')
|
||||
----
|
||||
[{'disabledPlans': [bea4c11e-220a-4e6d-8eb8-8ea15d019f90], 'skuId': c7df2760-2c81-4ef7-b578-5b5392b571df}, {'disabledPlans': [8a256a2b-b617-496d-b51b-e76466e88db0, 41781fb2-bc02-4b7c-bd55-b576c07bb09d, eec0eb4f-6444-4f95-aba0-50c24d67f998], 'skuId': 84a661c4-e949-4bd2-a560-ed7766fcaf2b}, {'disabledPlans': [], 'skuId': b05e124f-c7cc-45a0-a6aa-8cf78c946968}, {'disabledPlans': [], 'skuId': f30db892-07e9-47e9-837c-80727f46fd3d}]
|
||||
|
||||
query I
|
||||
SELECT assignedLicenses[1] FROM parquet_scan('data/parquet-testing/bug2267.parquet')
|
||||
----
|
||||
{'disabledPlans': [bea4c11e-220a-4e6d-8eb8-8ea15d019f90], 'skuId': c7df2760-2c81-4ef7-b578-5b5392b571df}
|
||||
78
external/duckdb/test/sql/copy/parquet/parquet_3896.test
vendored
Normal file
78
external/duckdb/test/sql/copy/parquet/parquet_3896.test
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
# name: test/sql/copy/parquet/parquet_3896.test
|
||||
# description: Issue #3896: Error reading parquet file: Struct child row count mismatch
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
# single struct with map and scalar key
|
||||
statement ok
|
||||
CREATE VIEW v1 AS
|
||||
SELECT map([2], [{'key1': map([3,4],[1,2]), 'key2':2}]) AS x
|
||||
|
||||
query I nosort mapres1
|
||||
SELECT * FROM v1;
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY v1
|
||||
TO '__TEST_DIR__/map.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I nosort mapres1
|
||||
SELECT * FROM '__TEST_DIR__/map.parquet';
|
||||
----
|
||||
|
||||
# multiple struct with map and scalar key
|
||||
statement ok
|
||||
CREATE VIEW v2 AS
|
||||
SELECT map([2], [{'key1': map([3,4],[1,2]), 'key2':2}]) AS x
|
||||
UNION ALL
|
||||
SELECT map([2], [{'key1': map([3,4],[1,2]), 'key2':2}])
|
||||
|
||||
query I nosort mapres2
|
||||
SELECT * FROM v2;
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY v2
|
||||
TO '__TEST_DIR__/map.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I nosort mapres2
|
||||
SELECT * FROM '__TEST_DIR__/map.parquet';
|
||||
----
|
||||
|
||||
# struct with struct of lists and scalar key
|
||||
statement ok
|
||||
CREATE VIEW v3 AS
|
||||
SELECT {'key': [2], 'val': [{'key1': {'key': [3,4], 'val': [1,2]}, 'key2':2}]} AS x
|
||||
|
||||
query I nosort structres1
|
||||
SELECT * FROM v3;
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY v3
|
||||
TO '__TEST_DIR__/map.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I nosort structres1
|
||||
SELECT * FROM '__TEST_DIR__/map.parquet';
|
||||
----
|
||||
|
||||
# struct with struct of lists and scalar list key
|
||||
statement ok
|
||||
CREATE VIEW v4 AS
|
||||
SELECT {'key': [2], 'val': [{'key1': {'key': [3,4], 'val': [1,2]}, 'key2':[2]}]} AS x
|
||||
|
||||
query I nosort structres2
|
||||
SELECT * FROM v4;
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY v4
|
||||
TO '__TEST_DIR__/map.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I nosort structres2
|
||||
SELECT * FROM '__TEST_DIR__/map.parquet';
|
||||
----
|
||||
16
external/duckdb/test/sql/copy/parquet/parquet_3989.test
vendored
Normal file
16
external/duckdb/test/sql/copy/parquet/parquet_3989.test
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
# name: test/sql/copy/parquet/parquet_3989.test
|
||||
# description: Issue #3989: Skipping more than 1024 values on list column fails
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE lists as SELECT i as id, [i] as list from range(0,10000) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY lists to '__TEST_DIR__/list_bug_test.parquet';
|
||||
|
||||
query I
|
||||
SELECT list from '__TEST_DIR__/list_bug_test.parquet' where id = 5000;
|
||||
----
|
||||
[5000]
|
||||
10
external/duckdb/test/sql/copy/parquet/parquet_4442.test
vendored
Normal file
10
external/duckdb/test/sql/copy/parquet/parquet_4442.test
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# name: test/sql/copy/parquet/parquet_4442.test
|
||||
# description: Issue #4442: Parquet reader converts timestamp to i64 *sometimes*
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query IIIIIIIIIIIIIIIII
|
||||
SELECT * FROM 'data/parquet-testing/bug4442.parquet'
|
||||
----
|
||||
12 5184 1 22 2011-10-06 22:21:49.58+00 outbound 323020033 {} 2100 33 0 7 10 0 1317427200000 1317939709580 11
|
||||
8
external/duckdb/test/sql/copy/parquet/parquet_4859.test
vendored
Normal file
8
external/duckdb/test/sql/copy/parquet/parquet_4859.test
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
# name: test/sql/copy/parquet/parquet_4859.test
|
||||
# description: Issue #4859: Structs in structs lost type info in recursive call to TypeHasExactRowCount
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
select "repositoryTopics.edges" from "data/parquet-testing/bug4859.parquet"
|
||||
11
external/duckdb/test/sql/copy/parquet/parquet_4903.test
vendored
Normal file
11
external/duckdb/test/sql/copy/parquet/parquet_4903.test
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
# name: test/sql/copy/parquet/parquet_4903.test
|
||||
# description: Issue #4442: Parquet reader converts timestamp to i64 *sometimes*
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# file is corrupt
|
||||
statement error
|
||||
SELECT type_param_constraints FROM 'data/parquet-testing/bug4903.parquet' limit 10
|
||||
----
|
||||
<REGEX>:.*Binder Error.*not found in FROM clause.*
|
||||
24
external/duckdb/test/sql/copy/parquet/parquet_5209.test
vendored
Normal file
24
external/duckdb/test/sql/copy/parquet/parquet_5209.test
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# name: test/sql/copy/parquet/parquet_5209.test
|
||||
# description: Issue #5209: Parquet writer did not set total_uncompressed_size in column chunk statistics
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
require vector_size 2048
|
||||
|
||||
statement ok
|
||||
SET threads=1;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_5209 AS SELECT range FROM range(10000);
|
||||
|
||||
statement ok
|
||||
COPY test_5209 TO '__TEST_DIR__/test_5209.parquet' (ROW_GROUP_SIZE 1000, PARQUET_VERSION 'V1');
|
||||
|
||||
query III
|
||||
SELECT SUM(total_compressed_size) > 10000,
|
||||
SUM(total_uncompressed_size) > 10000,
|
||||
SUM(total_uncompressed_size) > SUM(total_compressed_size)
|
||||
FROM parquet_metadata('__TEST_DIR__/test_5209.parquet');
|
||||
----
|
||||
1 1 1
|
||||
13
external/duckdb/test/sql/copy/parquet/parquet_6044.test
vendored
Normal file
13
external/duckdb/test/sql/copy/parquet/parquet_6044.test
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
# name: test/sql/copy/parquet/parquet_6044.test
|
||||
# description: Issue #6044: node: assertion failure when calling parquet_metadata
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
copy (select 0.9 AS a) to '__TEST_DIR__/tiny_decimal.parquet' (format 'parquet', codec 'zstd');
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/tiny_decimal.parquet'
|
||||
----
|
||||
0.9
|
||||
22
external/duckdb/test/sql/copy/parquet/parquet_6580.test
vendored
Normal file
22
external/duckdb/test/sql/copy/parquet/parquet_6580.test
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
# name: test/sql/copy/parquet/parquet_6580.test
|
||||
# description: Issue #6580: Error when reading Parquet INT96 timestamps that are far in the past
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query IIII
|
||||
select *, make_timestamp(dt2*1000*1000)
|
||||
from read_parquet('data/parquet-testing/bug4903.parquet')
|
||||
where dt2 <= -9214570800
|
||||
limit 10
|
||||
----
|
||||
1678-01-01 00:00:00 -9214570800 1677-12-31 21:00:00 1677-12-31 21:00:00
|
||||
1677-01-01 00:00:00 -9246106800 1676-12-31 21:00:00 1676-12-31 21:00:00
|
||||
1676-01-01 00:00:00 -9277729200 1675-12-31 21:00:00 1675-12-31 21:00:00
|
||||
1675-01-01 00:00:00 -9309265200 1674-12-31 21:00:00 1674-12-31 21:00:00
|
||||
1674-01-01 00:00:00 -9340801200 1673-12-31 21:00:00 1673-12-31 21:00:00
|
||||
1673-01-01 00:00:00 -9372337200 1672-12-31 21:00:00 1672-12-31 21:00:00
|
||||
1672-01-01 00:00:00 -9403959600 1671-12-31 21:00:00 1671-12-31 21:00:00
|
||||
1671-01-01 00:00:00 -9435495600 1670-12-31 21:00:00 1670-12-31 21:00:00
|
||||
1670-01-01 00:00:00 -9467031600 1669-12-31 21:00:00 1669-12-31 21:00:00
|
||||
1669-01-01 00:00:00 -9498567600 1668-12-31 21:00:00 1668-12-31 21:00:00
|
||||
25
external/duckdb/test/sql/copy/parquet/parquet_6630_union_by_name.test
vendored
Normal file
25
external/duckdb/test/sql/copy/parquet/parquet_6630_union_by_name.test
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
# name: test/sql/copy/parquet/parquet_6630_union_by_name.test
|
||||
# description: Issue #6630: Segmentation Fault when using union_by_name with read_parquet
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query II
|
||||
select
|
||||
distinct name,
|
||||
true as is_suspended_or_cancelled
|
||||
from read_parquet('data/parquet-testing/issue6630_*.parquet',union_by_name=True)
|
||||
where "timestamp" between '2023-01-26 20:00:00' and '2023-01-28 04:00:00'
|
||||
and (suspended = true or cancelled <> '' or state='SUSPENDED')
|
||||
and actual_time is null;
|
||||
----
|
||||
|
||||
query II
|
||||
select
|
||||
distinct name,
|
||||
true as is_suspended_or_cancelled
|
||||
from read_parquet('data/parquet-testing/issue6630_*.parquet', union_by_name=False)
|
||||
where "timestamp" between '2023-01-26 20:00:00' and '2023-01-28 04:00:00'
|
||||
and (suspended = true or cancelled <> '' or state='SUSPENDED')
|
||||
and actual_time is null;
|
||||
----
|
||||
48
external/duckdb/test/sql/copy/parquet/parquet_6933.test
vendored
Normal file
48
external/duckdb/test/sql/copy/parquet/parquet_6933.test
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
# name: test/sql/copy/parquet/parquet_6933.test
|
||||
# description: Issue #6933: Segfault when using parquet_metadata_cache alongside union_by_name for parquet files
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE table1 (
|
||||
name VARCHAR,
|
||||
);
|
||||
|
||||
statement ok
|
||||
INSERT INTO table1 VALUES ('Test value 1!');
|
||||
|
||||
statement ok
|
||||
INSERT INTO table1 VALUES ('Test value 2!');
|
||||
|
||||
statement ok
|
||||
COPY table1 TO '__TEST_DIR__/output1.parquet' (FORMAT PARQUET);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE table2 (
|
||||
name VARCHAR,
|
||||
number INTEGER,
|
||||
);
|
||||
|
||||
statement ok
|
||||
INSERT INTO table2 VALUES ('Other test value', 1);
|
||||
|
||||
statement ok
|
||||
INSERT INTO table2 VALUES ('Other test value', 2);
|
||||
|
||||
statement ok
|
||||
COPY table2 TO '__TEST_DIR__/output2.parquet' (FORMAT PARQUET);
|
||||
|
||||
statement ok
|
||||
set parquet_metadata_cache=true;
|
||||
|
||||
query II
|
||||
SELECT name, number FROM read_parquet(['__TEST_DIR__/output*.parquet'], union_by_name=True) ORDER BY name, number
|
||||
----
|
||||
Other test value 1
|
||||
Other test value 2
|
||||
Test value 1! NULL
|
||||
Test value 2! NULL
|
||||
11
external/duckdb/test/sql/copy/parquet/parquet_6990.test_slow
vendored
Normal file
11
external/duckdb/test/sql/copy/parquet/parquet_6990.test_slow
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
# name: test/sql/copy/parquet/parquet_6990.test_slow
|
||||
# description: Issue #6990: Reading parquet file causes a segfault
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
SELECT * FROM 'data/parquet-testing/issue6990.parquet';
|
||||
15
external/duckdb/test/sql/copy/parquet/parquet_arrow_timestamp.test
vendored
Normal file
15
external/duckdb/test/sql/copy/parquet/parquet_arrow_timestamp.test
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# name: test/sql/copy/parquet/parquet_arrow_timestamp.test
|
||||
# description: Test loading a timestamp column from an arrow-parquet generated file
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query T
|
||||
select * from parquet_scan('data/parquet-testing/timestamp.parquet') order by 1
|
||||
----
|
||||
2020-10-05 17:21:49.48844
|
||||
|
||||
query T
|
||||
select * from parquet_scan('data/parquet-testing/timestamp-ms.parquet') order by 1
|
||||
----
|
||||
2020-10-05 17:21:49
|
||||
15
external/duckdb/test/sql/copy/parquet/parquet_blob.test
vendored
Normal file
15
external/duckdb/test/sql/copy/parquet/parquet_blob.test
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# name: test/sql/copy/parquet/parquet_blob.test
|
||||
# description: Test parquet file with blob content
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query III
|
||||
select * from parquet_scan('data/parquet-testing/blob.parquet')
|
||||
----
|
||||
1 \x04\x00 str1
|
||||
2 \x04\x00\x80 str2
|
||||
3 \x03\xFF\x00\xFF str3
|
||||
90
external/duckdb/test/sql/copy/parquet/parquet_blob_string.test
vendored
Normal file
90
external/duckdb/test/sql/copy/parquet/parquet_blob_string.test
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
# name: test/sql/copy/parquet/parquet_blob_string.test
|
||||
# description: Test binary_as_string BLOB Function
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query I
|
||||
SELECT typeof(#1) FROM parquet_scan('data/parquet-testing/binary_string.parquet',binary_as_string=False) limit 1
|
||||
----
|
||||
BLOB
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/binary_string.parquet',binary_as_string=False)
|
||||
----
|
||||
foo
|
||||
bar
|
||||
baz
|
||||
|
||||
query I
|
||||
SELECT typeof(#1) FROM parquet_scan('data/parquet-testing/binary_string.parquet',binary_as_string=True) limit 1
|
||||
----
|
||||
VARCHAR
|
||||
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/binary_string.parquet',binary_as_string=True)
|
||||
----
|
||||
foo
|
||||
bar
|
||||
baz
|
||||
|
||||
query I
|
||||
SELECT converted_type FROM parquet_schema('data/parquet-testing/binary_string.parquet')
|
||||
----
|
||||
NULL
|
||||
NULL
|
||||
|
||||
statement error
|
||||
SET binary_as_sting=true
|
||||
----
|
||||
|
||||
statement ok
|
||||
SET binary_as_string=true
|
||||
|
||||
query I
|
||||
SELECT typeof(#1) FROM parquet_scan('data/parquet-testing/binary_string.parquet') limit 1
|
||||
----
|
||||
VARCHAR
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/binary_string.parquet')
|
||||
----
|
||||
foo
|
||||
bar
|
||||
baz
|
||||
|
||||
statement ok
|
||||
SET binary_as_string=false
|
||||
|
||||
query I
|
||||
SELECT typeof(#1) FROM parquet_scan('data/parquet-testing/binary_string.parquet') limit 1
|
||||
----
|
||||
BLOB
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/binary_string.parquet')
|
||||
----
|
||||
foo
|
||||
bar
|
||||
baz
|
||||
|
||||
# Preference goes to variable set in scan
|
||||
statement ok
|
||||
PRAGMA binary_as_string=1
|
||||
|
||||
query I
|
||||
SELECT typeof(#1) FROM parquet_scan('data/parquet-testing/binary_string.parquet' ,binary_as_string=False) limit 1
|
||||
----
|
||||
BLOB
|
||||
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/binary_string.parquet')
|
||||
----
|
||||
foo
|
||||
bar
|
||||
baz
|
||||
43
external/duckdb/test/sql/copy/parquet/parquet_copy_type_mismatch.test
vendored
Normal file
43
external/duckdb/test/sql/copy/parquet/parquet_copy_type_mismatch.test
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
# name: test/sql/copy/parquet/parquet_copy_type_mismatch.test
|
||||
# description: Test error message when COPY FROM finds a type mismatch
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
SET storage_compatibility_version='v1.1.0'
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers(i INTEGER);
|
||||
|
||||
statement ok
|
||||
COPY (SELECT DATE '1992-01-01' d) TO '__TEST_DIR__/single_date.parquet' (FORMAT parquet);
|
||||
|
||||
statement error
|
||||
COPY integers FROM '__TEST_DIR__/single_date.parquet'
|
||||
----
|
||||
the column "d" has type DATE, but we are trying to load it into column "i" with type INTEGER
|
||||
|
||||
statement ok
|
||||
COPY (SELECT DATE '1992-01-01' d, 42 k) TO '__TEST_DIR__/too_many_columns.parquet' (FORMAT parquet);
|
||||
|
||||
statement error
|
||||
COPY integers FROM '__TEST_DIR__/too_many_columns.parquet'
|
||||
----
|
||||
Table schema: i INTEGER
|
||||
|
||||
# multiple files with different schema
|
||||
statement ok
|
||||
COPY (SELECT 42 i) TO '__TEST_DIR__/f2.parquet' (FORMAT parquet);
|
||||
|
||||
statement ok
|
||||
COPY (SELECT date '1992-01-01' d, 84 i) TO '__TEST_DIR__/f1.parquet' (FORMAT parquet);
|
||||
|
||||
# result here depends on globbing order
|
||||
statement maybe
|
||||
COPY integers FROM '__TEST_DIR__/f*.parquet' (FORMAT parquet);
|
||||
----
|
||||
column count mismatch: expected 1 columns but found 2
|
||||
10
external/duckdb/test/sql/copy/parquet/parquet_corrupt_stats.test
vendored
Normal file
10
external/duckdb/test/sql/copy/parquet/parquet_corrupt_stats.test
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# name: test/sql/copy/parquet/parquet_corrupt_stats.test
|
||||
# description: Test reading a Parquet file with stats that are out-of-range of the type
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
FROM 'data/parquet-testing/out_of_range_stats.parquet'
|
||||
----
|
||||
255
|
||||
15
external/duckdb/test/sql/copy/parquet/parquet_count_star.test
vendored
Normal file
15
external/duckdb/test/sql/copy/parquet/parquet_count_star.test
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# name: test/sql/copy/parquet/parquet_count_star.test
|
||||
# description: Test count star
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM 'data/parquet-testing/out_of_range_stats.parquet'
|
||||
----
|
||||
1
|
||||
|
||||
query I
|
||||
select COUNT(*) from parquet_scan('data/parquet-testing/glob*/t?.parquet')
|
||||
----
|
||||
3
|
||||
33
external/duckdb/test/sql/copy/parquet/parquet_encoding_skip.test
vendored
Normal file
33
external/duckdb/test/sql/copy/parquet/parquet_encoding_skip.test
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
# name: test/sql/copy/parquet/parquet_encoding_skip.test
|
||||
# description: Test skipping of various encodings
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
foreach parquet_version v1 v2
|
||||
|
||||
# primitives
|
||||
statement ok
|
||||
COPY (SELECT i id, i AS bigint, i::SMALLINT AS tinyint, i::DOUBLE dbl, 'prefix_' || i::VARCHAR str, 'constant' const_str FROM range(3000) t(i)) TO '__TEST_DIR__/skip.parquet' (PARQUET_VERSION '${parquet_version}');
|
||||
|
||||
query IIIIII
|
||||
SELECT * FROM '__TEST_DIR__/skip.parquet' WHERE id>2995
|
||||
----
|
||||
2996 2996 2996 2996.0 prefix_2996 constant
|
||||
2997 2997 2997 2997.0 prefix_2997 constant
|
||||
2998 2998 2998 2998.0 prefix_2998 constant
|
||||
2999 2999 2999 2999.0 prefix_2999 constant
|
||||
|
||||
# nested types
|
||||
statement ok
|
||||
COPY (SELECT i id, [i, i + 1, i + 2] l, {'a': i, 'l': [i, i + 1, i + 2]} struct_1, [{'a': i}, {'a': i + 1}, {'a': i + 2}] struct_2 FROM range(3000) t(i)) TO '__TEST_DIR__/skip_nested.parquet' (PARQUET_VERSION '${parquet_version}');
|
||||
|
||||
query IIII
|
||||
SELECT * FROM '__TEST_DIR__/skip_nested.parquet' WHERE id>2995
|
||||
----
|
||||
2996 [2996, 2997, 2998] {'a': 2996, 'l': [2996, 2997, 2998]} [{'a': 2996}, {'a': 2997}, {'a': 2998}]
|
||||
2997 [2997, 2998, 2999] {'a': 2997, 'l': [2997, 2998, 2999]} [{'a': 2997}, {'a': 2998}, {'a': 2999}]
|
||||
2998 [2998, 2999, 3000] {'a': 2998, 'l': [2998, 2999, 3000]} [{'a': 2998}, {'a': 2999}, {'a': 3000}]
|
||||
2999 [2999, 3000, 3001] {'a': 2999, 'l': [2999, 3000, 3001]} [{'a': 2999}, {'a': 3000}, {'a': 3001}]
|
||||
|
||||
endloop
|
||||
94
external/duckdb/test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow
vendored
Normal file
94
external/duckdb/test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
# name: test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow
|
||||
# description: Test Parquet encryption with OpenSSL for TPC-H
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
require httpfs
|
||||
|
||||
require tpch
|
||||
|
||||
statement ok
|
||||
CALL dbgen(sf=1)
|
||||
|
||||
statement ok
|
||||
PRAGMA add_parquet_key('key128', '0123456789112345')
|
||||
|
||||
statement ok
|
||||
EXPORT DATABASE '__TEST_DIR__/tpch_encrypted' (FORMAT 'parquet', ENCRYPTION_CONFIG {footer_key: 'key128'})
|
||||
|
||||
load :memory:
|
||||
|
||||
# re-add key upon loading the DB again
|
||||
statement ok
|
||||
PRAGMA add_parquet_key('key128', '0123456789112345')
|
||||
|
||||
statement ok
|
||||
IMPORT DATABASE '__TEST_DIR__/tpch_encrypted'
|
||||
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 23
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
# now again without importing the DB, just with views, so we can test projection/filter pushdown
|
||||
load :memory:
|
||||
|
||||
# re-add key upon loading the DB again
|
||||
statement ok
|
||||
PRAGMA add_parquet_key('key128', '0123456789112345')
|
||||
|
||||
statement ok
|
||||
CREATE VIEW lineitem AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/lineitem.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW orders AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/orders.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW partsupp AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/partsupp.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW part AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/part.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW customer AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/customer.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW supplier AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/supplier.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW nation AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/nation.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW region AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/region.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 23
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
|
||||
|
||||
endloop
|
||||
98
external/duckdb/test/sql/copy/parquet/parquet_encryption.test
vendored
Normal file
98
external/duckdb/test/sql/copy/parquet/parquet_encryption.test
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
# name: test/sql/copy/parquet/parquet_encryption.test
|
||||
# description: Test Parquet encryption
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# parquet keys are not persisted across restarts
|
||||
require noforcestorage
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
# AES key must have one of the three specified lengths or be valid Base64
|
||||
statement error
|
||||
PRAGMA add_parquet_key('my_cool_key', '42')
|
||||
----
|
||||
Invalid Input Error: Invalid AES key. Not a plain AES key NOR a base64 encoded string
|
||||
|
||||
# Valid Base64 AES key must have one of the three specified lengths
|
||||
statement error
|
||||
PRAGMA add_parquet_key('my_invalid_duck_key', 'ZHVjaw==')
|
||||
----
|
||||
Invalid Input Error: Invalid AES key. Must have a length of 128, 192, or 256 bits (16, 24, or 32 bytes)
|
||||
|
||||
# we dont support this yet
|
||||
statement error
|
||||
COPY (SELECT 42 i) to '__TEST_DIR__/encrypted.parquet' (ENCRYPTION_CONFIG {column_keys: {key_name: ['col0', 'col1']}})
|
||||
----
|
||||
Not implemented Error: Parquet encryption_config column_keys not yet implemented
|
||||
|
||||
statement error
|
||||
COPY (SELECT 42 i) to '__TEST_DIR__/encrypted.parquet' (ENCRYPTION_CONFIG {footer_key: 'nonexistant'})
|
||||
----
|
||||
Binder Error: No key with name "nonexistant" exists. Add it with PRAGMA add_parquet_key('<key_name>','<key>');
|
||||
|
||||
# add keys of 3 different lengths
|
||||
statement ok
|
||||
PRAGMA add_parquet_key('key128', '0123456789112345')
|
||||
|
||||
statement ok
|
||||
PRAGMA add_parquet_key('key192', '012345678911234501234567')
|
||||
|
||||
statement ok
|
||||
PRAGMA add_parquet_key('key256', '01234567891123450123456789112345')
|
||||
|
||||
# test all valid AES key lengths
|
||||
foreach key_len 128 192 256
|
||||
|
||||
statement ok
|
||||
COPY (SELECT 42 i) to '__TEST_DIR__/encrypted${key_len}.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'})
|
||||
|
||||
query I
|
||||
SELECT * FROM read_parquet('__TEST_DIR__/encrypted${key_len}.parquet', encryption_config={footer_key: 'key${key_len}'})
|
||||
----
|
||||
42
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE TABLE test (i INTEGER)
|
||||
|
||||
statement ok
|
||||
COPY test FROM '__TEST_DIR__/encrypted${key_len}.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'})
|
||||
|
||||
query I
|
||||
SELECT * FROM test
|
||||
----
|
||||
42
|
||||
|
||||
endloop
|
||||
|
||||
# what happens if we don't try to decrypt even if the file is encrypted?
|
||||
statement error
|
||||
SELECT * FROM read_parquet('__TEST_DIR__/encrypted128.parquet')
|
||||
----
|
||||
Invalid Input Error
|
||||
|
||||
# what if we try to decrypt with the wrong key?
|
||||
statement error
|
||||
SELECT * FROM read_parquet('__TEST_DIR__/encrypted128.parquet', encryption_config={footer_key: 'key192'})
|
||||
----
|
||||
Invalid Input Error: Computed AES tag differs from read AES tag, are you using the right key?
|
||||
|
||||
# what if we don't encrypt, but try to decrypt?
|
||||
statement ok
|
||||
COPY (SELECT 42 i) to '__TEST_DIR__/unencrypted.parquet'
|
||||
|
||||
statement error
|
||||
SELECT * FROM read_parquet('__TEST_DIR__/unencrypted.parquet', encryption_config={footer_key: 'key256'})
|
||||
----
|
||||
Invalid Input Error
|
||||
|
||||
# Use Base64 encoded key
|
||||
statement ok
|
||||
PRAGMA add_parquet_key('key256base64', 'MDEyMzQ1Njc4OTExMjM0NTAxMjM0NTY3ODkxMTIzNDU=')
|
||||
|
||||
query I
|
||||
SELECT * FROM read_parquet('__TEST_DIR__/encrypted256.parquet', encryption_config={footer_key: 'key256base64'})
|
||||
----
|
||||
42
|
||||
92
external/duckdb/test/sql/copy/parquet/parquet_encryption_tpch.test_slow
vendored
Normal file
92
external/duckdb/test/sql/copy/parquet/parquet_encryption_tpch.test_slow
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
# name: test/sql/copy/parquet/parquet_encryption_tpch.test_slow
|
||||
# description: Test Parquet encryption for TPC-H
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
require tpch
|
||||
|
||||
statement ok
|
||||
CALL dbgen(sf=1)
|
||||
|
||||
statement ok
|
||||
PRAGMA add_parquet_key('key128', '0123456789112345')
|
||||
|
||||
statement ok
|
||||
EXPORT DATABASE '__TEST_DIR__/tpch_encrypted' (FORMAT 'parquet', ENCRYPTION_CONFIG {footer_key: 'key128'})
|
||||
|
||||
load :memory:
|
||||
|
||||
# re-add key upon loading the DB again
|
||||
statement ok
|
||||
PRAGMA add_parquet_key('key128', '0123456789112345')
|
||||
|
||||
statement ok
|
||||
IMPORT DATABASE '__TEST_DIR__/tpch_encrypted'
|
||||
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 23
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
# now again without importing the DB, just with views, so we can test projection/filter pushdown
|
||||
load :memory:
|
||||
|
||||
# re-add key upon loading the DB again
|
||||
statement ok
|
||||
PRAGMA add_parquet_key('key128', '0123456789112345')
|
||||
|
||||
statement ok
|
||||
CREATE VIEW lineitem AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/lineitem.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW orders AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/orders.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW partsupp AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/partsupp.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW part AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/part.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW customer AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/customer.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW supplier AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/supplier.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW nation AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/nation.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
statement ok
|
||||
CREATE VIEW region AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/region.parquet', encryption_config={footer_key: 'key128'});
|
||||
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 23
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
|
||||
|
||||
endloop
|
||||
13
external/duckdb/test/sql/copy/parquet/parquet_enum_test.test
vendored
Normal file
13
external/duckdb/test/sql/copy/parquet/parquet_enum_test.test
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
# name: test/sql/copy/parquet/parquet_enum_test.test
|
||||
# description: Test parquet file with enum content
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query IIIIIIIIIIIIIIIIIIIIII
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/adam_genotypes.parquet')
|
||||
----
|
||||
{'referenceName': NULL, 'start': NULL, 'end': NULL, 'names': [name], 'splitFromMultiAllelic': false, 'referenceAllele': NULL, 'alternateAllele': NULL, 'quality': NULL, 'filtersApplied': NULL, 'filtersPassed': NULL, 'filtersFailed': [], 'annotation': NULL} NULL NULL NULL NULL NULL NULL NULL [] NULL NULL NULL NULL NULL NULL [] [] [] false false NULL NULL
|
||||
34
external/duckdb/test/sql/copy/parquet/parquet_expression_filter.test
vendored
Normal file
34
external/duckdb/test/sql/copy/parquet/parquet_expression_filter.test
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# name: test/sql/copy/parquet/parquet_expression_filter.test
|
||||
# description: Test expression filters on Parquet
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE tbl AS
|
||||
SELECT i, 'thisisalongstring'||(i%5000)::VARCHAR AS str
|
||||
FROM range(100000) t(i);
|
||||
|
||||
statement ok
|
||||
COPY tbl TO '__TEST_DIR__/parquet_expr.parquet'
|
||||
|
||||
statement ok
|
||||
CREATE VIEW parq AS FROM '__TEST_DIR__/parquet_expr.parquet'
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM parq
|
||||
WHERE least(str, 'thisisalongstring50') = str
|
||||
----
|
||||
88940
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM parq
|
||||
WHERE least(str, 'thisisalongstring50') = str AND str >= 'this'
|
||||
----
|
||||
88940
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM parq
|
||||
WHERE least(str, 'thisisalongstring50') = str AND str >= 'thisisalongstring2000' AND str <= 'thisisalongstring4000'
|
||||
----
|
||||
44460
|
||||
58
external/duckdb/test/sql/copy/parquet/parquet_external_access.test
vendored
Normal file
58
external/duckdb/test/sql/copy/parquet/parquet_external_access.test
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
# name: test/sql/copy/parquet/parquet_external_access.test
|
||||
# description: Test that enable_external_access blocks Parquet reads
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# we cannot read parquet files
|
||||
statement ok
|
||||
CREATE TABLE lineitem AS SELECT * FROM 'data/parquet-testing/arrow/lineitem-arrow.parquet'
|
||||
|
||||
statement ok
|
||||
SET enable_external_access=false;
|
||||
|
||||
# we cannot read parquet files
|
||||
statement error
|
||||
SELECT * FROM 'data/parquet-testing/arrow/lineitem-arrow.parquet'
|
||||
----
|
||||
<REGEX>:.*Permission Error: Cannot access file.*
|
||||
|
||||
# or their metadata
|
||||
statement error
|
||||
SELECT * FROM parquet_metadata('data/parquet-testing/arrow/lineitem-arrow.parquet')
|
||||
----
|
||||
<REGEX>:.*Permission Error: Cannot access file.*
|
||||
|
||||
statement error
|
||||
SELECT * FROM parquet_schema('data/parquet-testing/arrow/lineitem-arrow.parquet')
|
||||
----
|
||||
<REGEX>:.*Permission Error: Cannot access file.*
|
||||
|
||||
# also not in a list
|
||||
statement error
|
||||
SELECT * FROM parquet_scan(['data/parquet-testing/arrow/lineitem-arrow.parquet', 'data/parquet-testing/arrow/lineitem-arrow.parquet'])
|
||||
----
|
||||
<REGEX>:.*Permission Error: Cannot access file.*
|
||||
|
||||
# neither can we glob
|
||||
statement error
|
||||
SELECT * FROM glob('data/parquet-testing/arrow/lineitem-arrow.parquet')
|
||||
----
|
||||
<REGEX>:.*Permission Error: Cannot access file.*
|
||||
|
||||
# or copy to/from...
|
||||
statement error
|
||||
COPY lineitem FROM 'data/parquet-testing/arrow/lineitem-arrow.parquet'
|
||||
----
|
||||
<REGEX>:.*Permission Error: Cannot access file.*
|
||||
|
||||
statement error
|
||||
COPY lineitem TO '__TEST_DIR__/lineitem.parquet'
|
||||
----
|
||||
<REGEX>:.*Permission Error: Cannot access file.*
|
||||
|
||||
# we also can't just enable external access again
|
||||
statement error
|
||||
SET enable_external_access=true;
|
||||
----
|
||||
<REGEX>:.*Invalid Input Error: Cannot change.*while database is running.*
|
||||
107
external/duckdb/test/sql/copy/parquet/parquet_filename.test
vendored
Normal file
107
external/duckdb/test/sql/copy/parquet/parquet_filename.test
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
# name: test/sql/copy/parquet/parquet_filename.test
|
||||
# description: Test the filename option of the parquet reader
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# Simple glob with filenames, note that we replace \ for / to make tests pass on windows
|
||||
query III
|
||||
select i, j, replace(filename, '\', '/') from parquet_scan('data/parquet-testing/glob*/t?.parquet', FILENAME=1) order by i;
|
||||
----
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
2 b data/parquet-testing/glob/t2.parquet
|
||||
3 c data/parquet-testing/glob2/t1.parquet
|
||||
|
||||
# Filter on filename col
|
||||
query III
|
||||
select i, j, replace(filename, '\', '/') as file from parquet_scan('data/parquet-testing/glob*/t?.parquet', FILENAME=1) where file='data/parquet-testing/glob2/t1.parquet';
|
||||
----
|
||||
3 c data/parquet-testing/glob2/t1.parquet
|
||||
|
||||
# filter on multiple vector sizes of rows
|
||||
query I
|
||||
SELECT count(filename) FROM parquet_scan('data/parquet-testing/p2.parquet', FILENAME=1) where id < 1000;
|
||||
----
|
||||
479
|
||||
|
||||
# filter pushdown on filename
|
||||
query I
|
||||
SELECT count(id) FROM parquet_scan('data/parquet-testing/p2.parquet', FILENAME=1) where filename >= 'data';
|
||||
----
|
||||
4979
|
||||
|
||||
# Filter on non-filename col
|
||||
query I
|
||||
select replace(filename, '\', '/') from parquet_scan('data/parquet-testing/glob*/t?.parquet', FILENAME=1) where i=2;
|
||||
----
|
||||
data/parquet-testing/glob/t2.parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_csv AS SELECT 1 as id, 'test_csv_content' as filename;
|
||||
|
||||
statement ok
|
||||
COPY test_csv TO '__TEST_DIR__/filename_as_column.csv' WITH HEADER;
|
||||
|
||||
# This currently fails with a binder error
|
||||
statement error
|
||||
SELECT id, filename FROM read_csv_auto('__TEST_DIR__/filename_as_column.csv', FILENAME=1);
|
||||
----
|
||||
|
||||
# Parquet filename name conflict
|
||||
statement ok
|
||||
CREATE TABLE test AS SELECT 1 as id, 'test' as filename;
|
||||
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/filename_as_column.parquet';
|
||||
|
||||
# we currently don't support filename as a column name when using the filename option
|
||||
statement error
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/filename_as_column.parquet', FILENAME=1);
|
||||
----
|
||||
|
||||
# Now also test copy
|
||||
statement ok
|
||||
CREATE TABLE test_copy (i INT, j VARCHAR, filename VARCHAR);
|
||||
|
||||
statement ok
|
||||
INSERT INTO test_copy FROM read_parquet('data/parquet-testing/glob/t1.parquet', filename=1, binary_as_string=1);
|
||||
|
||||
query III
|
||||
SELECT i, j, replace(filename, '\', '/') FROM test_copy
|
||||
----
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
|
||||
statement ok
|
||||
INSERT INTO test_copy FROM read_parquet('data/parquet-testing/glob/t1.parquet', filename=1);
|
||||
|
||||
query III
|
||||
SELECT i, j, replace(filename, '\', '/') FROM test_copy
|
||||
----
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
|
||||
statement error
|
||||
COPY test_copy FROM 'data/parquet-testing/glob/t1.parquet';
|
||||
----
|
||||
column count mismatch
|
||||
|
||||
# Multiple row groups in same file
|
||||
statement ok
|
||||
CREATE TABLE test_table_large AS SELECT * FROM range(0,10000) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY test_table_large TO '__TEST_DIR__/test_table_large.parquet' (ROW_GROUP_SIZE 1000);
|
||||
|
||||
query II
|
||||
SELECT sum(i), max(regexp_replace(filename, '^.*/', '')) FROM parquet_scan('__TEST_DIR__/test_table_large.parquet', FILENAME=1) where i>5000;
|
||||
----
|
||||
37492500 test_table_large.parquet
|
||||
|
||||
# Same file twice
|
||||
query III
|
||||
SELECT i, j, replace(filename, '\', '/') as file FROM parquet_scan(['data/parquet-testing/glob/t1.parquet', 'data/parquet-testing/glob/t1.parquet', 'data/parquet-testing/glob/t2.parquet'], FILENAME=1) where file like '%t1%'
|
||||
----
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
|
||||
|
||||
102
external/duckdb/test/sql/copy/parquet/parquet_filename_filter.test
vendored
Normal file
102
external/duckdb/test/sql/copy/parquet/parquet_filename_filter.test
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
# name: test/sql/copy/parquet/parquet_filename_filter.test
|
||||
# description: Test the filename filter pushdown
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query III
|
||||
select id, value as f, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where filename='value1';
|
||||
----
|
||||
|
||||
# requires notwindows for windows-style path backslash reasons
|
||||
require notwindows
|
||||
|
||||
query III
|
||||
select i, j, filename from parquet_scan('data/parquet-testing/glob*/t?.parquet', FILENAME=1) order by i;
|
||||
----
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
2 b data/parquet-testing/glob/t2.parquet
|
||||
3 c data/parquet-testing/glob2/t1.parquet
|
||||
|
||||
query III
|
||||
select i, j, filename as file from parquet_scan('data/parquet-testing/glob*/t?.parquet', FILENAME=1) where file='data/parquet-testing/glob2/t1.parquet' or file='data/parquet-testing/glob/t2.parquet' order by i;
|
||||
----
|
||||
2 b data/parquet-testing/glob/t2.parquet
|
||||
3 c data/parquet-testing/glob2/t1.parquet
|
||||
|
||||
query III
|
||||
select i, j, filename as file from parquet_scan('data/parquet-testing/glob*/t?.parquet', FILENAME=1) where file='data/parquet-testing/glob2/t1.parquet' and i=3 order by i;
|
||||
----
|
||||
3 c data/parquet-testing/glob2/t1.parquet
|
||||
|
||||
query III
|
||||
select i, j, filename as file from parquet_scan('data/parquet-testing/glob*/t?.parquet', FILENAME=1) where file='data/parquet-testing/glob2/t1.parquet' and i=2 order by i;
|
||||
----
|
||||
|
||||
# This query should trigger the file skipping mechanism, which prevents reading metadata for files that are not scanned
|
||||
query IIII
|
||||
select id, value, date, filename from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1, FILENAME=1) order by id;
|
||||
----
|
||||
1 value1 2012-01-01 data/parquet-testing/hive-partitioning/different_order/date=2012-01-01/part=a/test.parquet
|
||||
2 value2 2013-01-01 data/parquet-testing/hive-partitioning/different_order/part=b/date=2013-01-01/test.parquet
|
||||
|
||||
# These queries test that the file skipping mechanism works even for complex filters on multiple filename-based filters
|
||||
query IIII
|
||||
select id, value, date, filename from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1, FILENAME=1) where concat(date,filename)='2013-01-01data/parquet-testing/hive-partitioning/different_order/part=b/date=2013-01-01/test.parquet';
|
||||
----
|
||||
2 value2 2013-01-01 data/parquet-testing/hive-partitioning/different_order/part=b/date=2013-01-01/test.parquet
|
||||
|
||||
query IIII
|
||||
select id, value, date, filename from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1, FILENAME=1) where concat(date,filename)='2012-01-01data/parquet-testing/hive-partitioning/different_order/date=2012-01-01/part=a/test.parquet';
|
||||
----
|
||||
1 value1 2012-01-01 data/parquet-testing/hive-partitioning/different_order/date=2012-01-01/part=a/test.parquet
|
||||
|
||||
# Ensure we don't somehow endup mixing things up
|
||||
query III
|
||||
select id, value as f, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where f='value2';
|
||||
----
|
||||
2 value2 2013-01-01
|
||||
|
||||
query III
|
||||
select id, value as f, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where f='value1';
|
||||
----
|
||||
1 value1 2012-01-01
|
||||
|
||||
query III
|
||||
select id, value as f, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where filename='value1';
|
||||
----
|
||||
|
||||
# These tests confirm that the ParquetScanStats will properly handle the pruned files list
|
||||
|
||||
statement ok
|
||||
SET parquet_metadata_cache=true;
|
||||
|
||||
query II
|
||||
select id, value from parquet_scan('data/parquet-testing/hive-partitioning/*/*/*/test.parquet', FILENAME=1) where filename like '%mismatching_count%' and id > 1;
|
||||
----
|
||||
2 value2
|
||||
|
||||
query II
|
||||
select id, value from parquet_scan('data/parquet-testing/hive-partitioning/*/*/*/test.parquet', FILENAME=1) where filename like '%mismatching_count%' and id > 1;
|
||||
----
|
||||
2 value2
|
||||
|
||||
query II
|
||||
select id, value from parquet_scan('data/parquet-testing/hive-partitioning/*/*/*/test.parquet', FILENAME=1) where filename like '%mismatching_count%' and value = 'value1';
|
||||
----
|
||||
1 value1
|
||||
|
||||
query II
|
||||
select id, value from parquet_scan('data/parquet-testing/hive-partitioning/*/*/*/test.parquet', FILENAME=1) where filename like '%mismatching_count%' and value = 'value2';
|
||||
----
|
||||
2 value2
|
||||
|
||||
query II
|
||||
select id, value from parquet_scan('data/parquet-testing/hive-partitioning/*/*/*/test.parquet', FILENAME=1) where filename like '%simple%' and value = 'value1';
|
||||
----
|
||||
1 value1
|
||||
|
||||
query II
|
||||
select id, value from parquet_scan('data/parquet-testing/hive-partitioning/*/*/*/test.parquet', FILENAME=1) where filename like '%simple%' and value = 'value2';
|
||||
----
|
||||
2 value2
|
||||
42
external/duckdb/test/sql/copy/parquet/parquet_filter_bug1391.test
vendored
Normal file
42
external/duckdb/test/sql/copy/parquet/parquet_filter_bug1391.test
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
# name: test/sql/copy/parquet/parquet_filter_bug1391.test
|
||||
# description: Test basic parquet reading
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE VIEW tbl AS SELECT * FROM PARQUET_SCAN('data/parquet-testing/filter_bug1391.parquet');
|
||||
|
||||
query I
|
||||
SELECT ORGUNITID FROM tbl LIMIT 10
|
||||
----
|
||||
98
|
||||
13
|
||||
175
|
||||
200
|
||||
262
|
||||
206
|
||||
204
|
||||
131
|
||||
181
|
||||
269
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM tbl;
|
||||
----
|
||||
9789
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM tbl
|
||||
WHERE Namevalidfrom <= '2017-03-01'
|
||||
AND Namevalidto >= '2017-03-01'
|
||||
AND Parentnamevalidfrom <= '2017-03-01'
|
||||
AND Parentnamevalidto >= '2017-03-01'
|
||||
AND CustomerCode = 'CODE';
|
||||
----
|
||||
8722
|
||||
|
||||
|
||||
28
external/duckdb/test/sql/copy/parquet/parquet_fixed_length_blob_dict.test
vendored
Normal file
28
external/duckdb/test/sql/copy/parquet/parquet_fixed_length_blob_dict.test
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
# name: test/sql/copy/parquet/parquet_fixed_length_blob_dict.test
|
||||
# description: Parquet file with dictionary of fixed length byte arrays
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query IIIIIIIIIIII
|
||||
SELECT
|
||||
MIN(sfc_key), MAX(sfc_key),
|
||||
MIN(gps_time), MAX(gps_time),
|
||||
MIN(intensity), MAX(intensity),
|
||||
MIN(classification), MAX(classification),
|
||||
MIN(return_number), MAX(return_number),
|
||||
MIN(number_of_returns), MAX(number_of_returns)
|
||||
FROM parquet_scan('data/parquet-testing/sorted.zstd_18_131072_small.parquet')
|
||||
----
|
||||
\x00\xA0e\xFB\xF8|\xF0\xA8_t\x16\x9A
|
||||
\x03,\xDF$)\xF5\x13\x11\x9B\x11k\x10
|
||||
205949378.92443183
|
||||
205949634.3036811
|
||||
4
|
||||
1035
|
||||
1
|
||||
9
|
||||
1
|
||||
5
|
||||
1
|
||||
7
|
||||
100
external/duckdb/test/sql/copy/parquet/parquet_glob.test
vendored
Normal file
100
external/duckdb/test/sql/copy/parquet/parquet_glob.test
vendored
Normal file
@@ -0,0 +1,100 @@
|
||||
# name: test/sql/copy/parquet/parquet_glob.test
|
||||
# description: Test basic globbing of parquet files
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query II
|
||||
select * from parquet_scan('data/parquet-testing/glob*/t?.parquet') order by i
|
||||
----
|
||||
1 a
|
||||
2 b
|
||||
3 c
|
||||
|
||||
query II
|
||||
select * from parquet_scan('data/parquet-testing/glob/t[0-9].parquet') order by i
|
||||
----
|
||||
1 a
|
||||
2 b
|
||||
|
||||
query II
|
||||
select * from parquet_scan('data/parquet-testing/glob/*') order by i
|
||||
----
|
||||
1 a
|
||||
2 b
|
||||
|
||||
query II
|
||||
select * from parquet_scan('data/parquet-testing/glob/*.parquet') order by i
|
||||
----
|
||||
1 a
|
||||
2 b
|
||||
|
||||
query II
|
||||
select * from parquet_scan('data/parquet-testing/g*/*.parquet') order by i
|
||||
----
|
||||
1 a
|
||||
2 b
|
||||
3 c
|
||||
|
||||
query II
|
||||
select * from parquet_scan('data/parquet-testing/g*/t1.parquet') order by i
|
||||
----
|
||||
1 a
|
||||
3 c
|
||||
|
||||
# abs path
|
||||
query II
|
||||
select * from parquet_scan('__WORKING_DIRECTORY__/data/parquet-testing/g*/t1.parquet') order by i
|
||||
----
|
||||
1 a
|
||||
3 c
|
||||
|
||||
# forward slashes
|
||||
query II
|
||||
select * from parquet_scan('data\parquet-testing\g*\t1.parquet') order by i
|
||||
----
|
||||
1 a
|
||||
3 c
|
||||
|
||||
# Double partial matches
|
||||
query II rowsort
|
||||
FROM parquet_scan('data/parquet-testing/glob3/*/dir/*.parquet');
|
||||
----
|
||||
1 a
|
||||
3 c
|
||||
|
||||
statement error
|
||||
select count(*) from parquet_scan('')
|
||||
----
|
||||
<REGEX>:.*IO Error.*can only be set for.*
|
||||
|
||||
# schema mismatch in parquet glob
|
||||
statement error
|
||||
select * from parquet_scan('data/parquet-testing/*.parquet')
|
||||
----
|
||||
<REGEX>:.*Invalid Input Error: Failed to read file.*
|
||||
|
||||
# parquet glob with COPY FROM
|
||||
statement ok
|
||||
CREATE TABLE vals (i INTEGER, j BLOB)
|
||||
|
||||
statement ok
|
||||
COPY vals FROM 'data/parquet-testing/glob/t?.parquet' (FORMAT PARQUET);
|
||||
|
||||
query II
|
||||
SELECT * FROM vals ORDER BY 1
|
||||
----
|
||||
1 a
|
||||
2 b
|
||||
|
||||
# failed to copy: incorrect types found in parquet file
|
||||
statement ok
|
||||
CREATE TABLE vals2 (i INTEGER, j INTEGER)
|
||||
|
||||
statement error
|
||||
COPY vals2 FROM '*/sql/*/parquet/*/glob/t?.parquet' (FORMAT PARQUET);
|
||||
----
|
||||
<REGEX>:.*IO Error: No files found that match the pattern.*
|
||||
17
external/duckdb/test/sql/copy/parquet/parquet_go.test
vendored
Normal file
17
external/duckdb/test/sql/copy/parquet/parquet_go.test
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
# name: test/sql/copy/parquet/parquet_go.test
|
||||
# description: Issue #5744: Fail to import .parquet file created with parquet-go
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
query II
|
||||
SELECT * FROM 'data/parquet-testing/parquet_go.parquet'
|
||||
----
|
||||
John Hello World
|
||||
John Hello World
|
||||
John Hello World
|
||||
John Hello World
|
||||
John Hello World
|
||||
196
external/duckdb/test/sql/copy/parquet/parquet_hive.test
vendored
Normal file
196
external/duckdb/test/sql/copy/parquet/parquet_hive.test
vendored
Normal file
@@ -0,0 +1,196 @@
|
||||
# name: test/sql/copy/parquet/parquet_hive.test
|
||||
# description: Test the automatic parsing of the hive partitioning scheme
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# test parsing hive partitioning scheme
|
||||
query IIII
|
||||
select id, value, part, date from parquet_scan('data/parquet-testing/hive-partitioning/simple/*/*/test.parquet', HIVE_PARTITIONING=1) order by id
|
||||
----
|
||||
1 value1 a 2012-01-01
|
||||
2 value2 b 2013-01-01
|
||||
|
||||
# As long as the names match, we don't really mind since everything is a string anyway
|
||||
query IIII
|
||||
select id, value, part, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) order by id
|
||||
----
|
||||
1 value1 a 2012-01-01
|
||||
2 value2 b 2013-01-01
|
||||
|
||||
# Filter should work too
|
||||
query II
|
||||
select id, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where date = '2013-01-01';
|
||||
----
|
||||
2 2013-01-01
|
||||
|
||||
query II
|
||||
select id, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where date = '2012-01-01';
|
||||
----
|
||||
1 2012-01-01
|
||||
|
||||
query II
|
||||
select id, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where date = '2018-01-01';
|
||||
----
|
||||
|
||||
query IIII
|
||||
select id, value, part, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where part='a' OR part='b' order by id;
|
||||
----
|
||||
1 value1 a 2012-01-01
|
||||
2 value2 b 2013-01-01
|
||||
|
||||
query II
|
||||
select id, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where date = '2013-01-01' and id = 2;
|
||||
----
|
||||
2 2013-01-01
|
||||
|
||||
query II
|
||||
select id, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where date = '2013-01-01' and id = 1;
|
||||
----
|
||||
|
||||
# This query should trigger the file skipping mechanism, which prevents reading metadata for files that are not scanned
|
||||
query III
|
||||
select id, value, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where date = '2012-01-01' and id = 1;
|
||||
----
|
||||
1 value1 2012-01-01
|
||||
|
||||
query III
|
||||
select id, value, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where date = '2012-01-01' or id <= 2 order by id;
|
||||
----
|
||||
1 value1 2012-01-01
|
||||
2 value2 2013-01-01
|
||||
|
||||
# If the key names don't add up, there's nothing we can do
|
||||
statement error
|
||||
select * from parquet_scan('data/parquet-testing/hive-partitioning/mismatching_names/*/*/test.parquet', HIVE_PARTITIONING=1)
|
||||
----
|
||||
Hive partition mismatch
|
||||
|
||||
statement error
|
||||
select * from parquet_scan('data/parquet-testing/hive-partitioning/mismatching_count/*/*/test.parquet', HIVE_PARTITIONING=1) WHERE part=b
|
||||
----
|
||||
Hive partition mismatch
|
||||
|
||||
statement error
|
||||
select * from parquet_scan('data/parquet-testing/hive-partitioning/mismatching_names/*/*/test.parquet', HIVE_PARTITIONING=1, UNION_BY_NAME=1)
|
||||
----
|
||||
Hive partition mismatch
|
||||
|
||||
# Verify that filters are pushed down into the parquet scan (Only file with the filter are read)
|
||||
query II
|
||||
EXPLAIN select id, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where date = '2013-01-01';
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(date = '2013.*-01.*-01'\).*
|
||||
|
||||
query II
|
||||
EXPLAIN select id, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where date = '2018-01-01';
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(date = '2018.*-01.*-01'\).*
|
||||
|
||||
# No Parquet Scan Filters should be applied here
|
||||
query II
|
||||
EXPLAIN select id, value, part, date from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where part='a' OR part='b' order by id;
|
||||
----
|
||||
physical_plan <!REGEX>:.*PARQUET_SCAN.*File Filters:.*
|
||||
|
||||
query II
|
||||
EXPLAIN select id, date from parquet_scan('data/parquet-testing/hive-partitioning/simple/*/*/test.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where date = '2012-01-01' and id < 10;
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(date = '2012.*-01.*-01'\).*
|
||||
|
||||
query II
|
||||
EXPLAIN select id, date from parquet_scan('data/parquet-testing/hive-partitioning/simple/*/*/test.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where date = '2013-01-01' and id < 10;
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(date = '2013.*-01.*-01'\).*
|
||||
|
||||
# Complex filter filtering first file
|
||||
query IIII
|
||||
select id, value, part, CAST(date AS DATE) as date_cast from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where concat(date_cast::VARCHAR, part) == '2013-01-01b';
|
||||
----
|
||||
2 value2 b 2013-01-01
|
||||
|
||||
# Complex filter filtering first file, filter should be pruned completely
|
||||
query II
|
||||
explain select id, value, part, CAST(date AS DATE) as date_cast from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where concat(date_cast::VARCHAR, part) == '2013-01-01b';
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(concat\(CAST.*\(CAST.*\(date AS.*DATE\) AS VARCHAR\), part\).*= '2013-01-01b'\).*
|
||||
|
||||
# Complex filter filtering second file
|
||||
query IIII
|
||||
select id, value, part, CAST(date AS DATE) as date_cast from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1) where concat(date_cast::VARCHAR, part) == '2012-01-01a';
|
||||
----
|
||||
1 value1 a 2012-01-01
|
||||
|
||||
# Complex filter filtering second file, filter should be pruned completely
|
||||
query II
|
||||
explain select id, value, part, CAST(date AS DATE) as date_cast from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where concat(date_cast::VARCHAR, part) == '2012-01-01a';
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(concat\(CAST.*\(CAST.*\(date AS.*DATE\) AS VARCHAR\), part\).*= '2012-01-01a'\).*
|
||||
|
||||
# Currently, complex fiters combining hive columns and regular columns, can prevent filter pushdown for some situations
|
||||
# TODO: we want to support filter pushdown here too
|
||||
query II
|
||||
explain select id, value, part, CAST(date AS DATE) as date_cast from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where (date_cast=CAST('2013-01-01' as DATE) AND (value='value1' OR concat(date_cast::VARCHAR, part) == '2013-01-01b'));
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(CAST\(date AS DATE\) =.*'2013.*-01-01'::DATE\).*
|
||||
|
||||
# Idem
|
||||
query II
|
||||
explain select id, value, part, CAST(date AS DATE) as date_cast from parquet_scan('data/parquet-testing/hive-partitioning/different_order/*/*/test.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where (date_cast=CAST('2012-01-01' as DATE) AND (value='value2' OR concat(date_cast::VARCHAR, part) == '2012-01-01a'));
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(CAST\(date AS DATE\) =.*'2012.*-01-01'::DATE\).*
|
||||
|
||||
# Confirm that hive partitions override existing columns
|
||||
|
||||
# Without hive partitioning we just read the files, note the mismatch here between the hive partition in the filename and the col in the file
|
||||
query III
|
||||
SELECT a, b, replace(filename, '\', '/') filename FROM parquet_scan('data/parquet-testing/hive-partitioning/hive_col_also_in_file/*/test.parquet', HIVE_PARTITIONING=0, FILENAME=1) order by filename;
|
||||
----
|
||||
1 2 data/parquet-testing/hive-partitioning/hive_col_also_in_file/a=5/test.parquet
|
||||
3 4 data/parquet-testing/hive-partitioning/hive_col_also_in_file/a=6/test.parquet
|
||||
|
||||
# Hive col from path overrides col in file
|
||||
query III
|
||||
SELECT a, b, replace(filename, '\', '/') filename FROM parquet_scan('data/parquet-testing/hive-partitioning/hive_col_also_in_file/*/test.parquet', HIVE_PARTITIONING=1, FILENAME=1) order by filename;
|
||||
----
|
||||
5 2 data/parquet-testing/hive-partitioning/hive_col_also_in_file/a=5/test.parquet
|
||||
6 4 data/parquet-testing/hive-partitioning/hive_col_also_in_file/a=6/test.parquet
|
||||
|
||||
# Test handling missing files
|
||||
query IIII
|
||||
select id, value, part, date
|
||||
from parquet_scan('data/parquet-testing/hive-partitioning/missing/*/*/test.parquet', HIVE_PARTITIONING=1)
|
||||
order by id
|
||||
----
|
||||
3 value3 c 2014-01-01
|
||||
4 value4 d 2015-01-01
|
||||
|
||||
|
||||
# check cases where there are file filters AND table filters
|
||||
statement ok
|
||||
Create table t1 (a int, b int, c int);
|
||||
|
||||
foreach i 0 1 2 3 4 5 6 7 8 9
|
||||
|
||||
statement ok
|
||||
insert into t1 (select range, ${i}*10, ${i}*100 from range(0,10));
|
||||
|
||||
endloop
|
||||
|
||||
statement ok
|
||||
COPY (SELECT * FROM t1) TO '__TEST_DIR__/hive_filters' (FORMAT PARQUET, PARTITION_BY c);
|
||||
|
||||
statement ok
|
||||
COPY (SELECT * FROM t1) TO '__TEST_DIR__/hive_filters_2' (FORMAT PARQUET, PARTITION_BY (c, b));
|
||||
|
||||
# There should be Table Filters (id < 50) and regular filters
|
||||
query II
|
||||
EXPLAIN select a from parquet_scan('__TEST_DIR__/hive_filters/*/*.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where c::INT=500 and a::INT < 4;
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*Filters:.*a<4.*File Filters:.*\(CAST\(c AS.*INTEGER\) = 500\).*
|
||||
|
||||
# unsatisfiable file filters also show up
|
||||
query II
|
||||
EXPLAIN select a from parquet_scan('__TEST_DIR__/hive_filters_2/*/*/*.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where c::INT > 500 and c::INT < 500;
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(CAST\(c AS.*INTEGER\).*BETWEEN.*500 AND 500\).*
|
||||
46
external/duckdb/test/sql/copy/parquet/parquet_hive2.test
vendored
Normal file
46
external/duckdb/test/sql/copy/parquet/parquet_hive2.test
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
# name: test/sql/copy/parquet/parquet_hive2.test
|
||||
# description: Test generating hive partitioning scheme
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# See https://github.com/duckdb/duckdb/pull/9473#issuecomment-1786231577
|
||||
|
||||
statement ok
|
||||
create or replace table orders(m int,v int,j int);
|
||||
|
||||
statement ok
|
||||
insert into orders select i%12+1,i,j from range(360)t(i),range(1000)s(j);
|
||||
|
||||
statement ok
|
||||
copy (select 2000+(v//12)y,m,v,j from orders) TO '__TEST_DIR__/orders_m' (FORMAT PARQUET, PARTITION_BY (m));
|
||||
|
||||
query IIII
|
||||
SELECT AVG(y), AVG(m), AVG(v), AVG(j) FROM '__TEST_DIR__/orders_m/**/*.parquet'
|
||||
----
|
||||
2014.5 6.5 179.5 499.5
|
||||
|
||||
statement ok
|
||||
copy (select 2000+(v//12)y,m,v,j from orders) TO '__TEST_DIR__/orders_y' (FORMAT PARQUET, PARTITION_BY (y));
|
||||
|
||||
query IIII
|
||||
SELECT AVG(y), AVG(m), AVG(v), AVG(j) FROM '__TEST_DIR__/orders_y/**/*.parquet'
|
||||
----
|
||||
2014.5 6.5 179.5 499.5
|
||||
|
||||
statement ok
|
||||
copy (select 2000+(v//12)y,m,v,j from orders) TO '__TEST_DIR__/orders_ym' (FORMAT PARQUET,PARTITION_BY (y,m));
|
||||
|
||||
query IIII
|
||||
SELECT AVG(y), AVG(m), AVG(v), AVG(j) FROM '__TEST_DIR__/orders_ym/**/*.parquet'
|
||||
----
|
||||
2014.5 6.5 179.5 499.5
|
||||
|
||||
# random shuffle
|
||||
statement ok
|
||||
copy (select 2000+(v//12)y,m,v,j from orders order by random()) TO '__TEST_DIR__/orders_ym_rand' (FORMAT PARQUET,PARTITION_BY (y,m));
|
||||
|
||||
query IIII
|
||||
SELECT AVG(y), AVG(m), AVG(v), AVG(j) FROM '__TEST_DIR__/orders_ym_rand/**/*.parquet'
|
||||
----
|
||||
2014.5 6.5 179.5 499.5
|
||||
37
external/duckdb/test/sql/copy/parquet/parquet_hive_empty.test
vendored
Normal file
37
external/duckdb/test/sql/copy/parquet/parquet_hive_empty.test
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
# name: test/sql/copy/parquet/parquet_hive_empty.test
|
||||
# description: Test empty partitioning values
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query II
|
||||
select *
|
||||
from parquet_scan('data/parquet-testing/hive-partitioning/empty_string/*/*.parquet')
|
||||
ORDER BY ALL
|
||||
----
|
||||
a a
|
||||
b (empty)
|
||||
c NULL
|
||||
|
||||
# filter on hive partitioning with NULL values
|
||||
query II
|
||||
select *
|
||||
from parquet_scan('data/parquet-testing/hive-partitioning/empty_string/*/*.parquet')
|
||||
WHERE key IS NULL
|
||||
----
|
||||
c NULL
|
||||
|
||||
|
||||
query II
|
||||
select *
|
||||
from parquet_scan('data/parquet-testing/hive-partitioning/empty_string/*/*.parquet')
|
||||
WHERE key='a'
|
||||
----
|
||||
a a
|
||||
|
||||
query II
|
||||
select *
|
||||
from parquet_scan('data/parquet-testing/hive-partitioning/empty_string/*/*.parquet')
|
||||
WHERE key=''
|
||||
----
|
||||
b (empty)
|
||||
51
external/duckdb/test/sql/copy/parquet/parquet_hive_null.test
vendored
Normal file
51
external/duckdb/test/sql/copy/parquet/parquet_hive_null.test
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
# name: test/sql/copy/parquet/parquet_hive_null.test
|
||||
# description: Test NULL partitioning values
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
create table test as select i%5 as a, i%2 as b from range(0,10) tbl(i);
|
||||
|
||||
statement ok
|
||||
copy (FROM test UNION ALL select 'NULL' as a, 'NULL' as b) to '__TEST_DIR__/null-parquet' (PARTITION_BY (a,b), FORMAT 'parquet', WRITE_PARTITION_COLUMNS);
|
||||
|
||||
query II
|
||||
select *
|
||||
from parquet_scan('__TEST_DIR__/null-parquet/**/*.parquet', hive_partitioning=1, hive_types={'a': INT})
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 0
|
||||
0 1
|
||||
1 0
|
||||
1 1
|
||||
2 0
|
||||
2 1
|
||||
3 0
|
||||
3 1
|
||||
4 0
|
||||
4 1
|
||||
NULL NULL
|
||||
|
||||
statement ok
|
||||
create table test2 as select i%5 as a, i%2 as b, i as c from range(0,10) tbl(i);
|
||||
|
||||
statement ok
|
||||
copy (FROM test2 UNION ALL select 'NULL' as a, 'NULL' as b, 'NULL' as c) to '__TEST_DIR__/null-parquet' (PARTITION_BY (a,b), FORMAT 'parquet', OVERWRITE);
|
||||
|
||||
query III
|
||||
select *
|
||||
from parquet_scan('__TEST_DIR__/null-parquet/**/*.parquet', hive_partitioning=1, hive_types={'a': INT})
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 0 0
|
||||
1 1 1
|
||||
2 2 0
|
||||
3 3 1
|
||||
4 4 0
|
||||
5 0 1
|
||||
6 1 0
|
||||
7 2 1
|
||||
8 3 0
|
||||
9 4 1
|
||||
NULL NULL NULL
|
||||
139
external/duckdb/test/sql/copy/parquet/parquet_late_materialization.test
vendored
Normal file
139
external/duckdb/test/sql/copy/parquet/parquet_late_materialization.test
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
# name: test/sql/copy/parquet/parquet_late_materialization.test
|
||||
# description: Test Top N Optimization for Parquet
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
COPY (SELECT i, i + 1 AS j, i + 2 AS k, -i AS l FROM range(10) t(i)) TO '__TEST_DIR__/late_materialization.parquet';
|
||||
|
||||
statement ok
|
||||
CREATE VIEW test AS FROM '__TEST_DIR__/late_materialization.parquet';
|
||||
|
||||
statement ok
|
||||
SET explain_output='optimized_only'
|
||||
|
||||
# Top N optimization
|
||||
# this gets turned into a row-id join
|
||||
query II
|
||||
explain SELECT * FROM test ORDER BY j DESC LIMIT 2;
|
||||
----
|
||||
logical_opt <REGEX>:.*COMPARISON_JOIN.*
|
||||
|
||||
query IIII
|
||||
SELECT * FROM test ORDER BY j DESC LIMIT 2;
|
||||
----
|
||||
9 10 11 -9
|
||||
8 9 10 -8
|
||||
|
||||
query II
|
||||
explain SELECT * FROM test ORDER BY j, i LIMIT 2;
|
||||
----
|
||||
logical_opt <REGEX>:.*COMPARISON_JOIN.*
|
||||
|
||||
query IIII
|
||||
SELECT * FROM test ORDER BY j, i LIMIT 2;
|
||||
----
|
||||
0 1 2 0
|
||||
1 2 3 -1
|
||||
|
||||
# this does not, we cannot remove any columns by turning it into a row-id join
|
||||
query II
|
||||
explain SELECT i FROM test ORDER BY i LIMIT 2;
|
||||
----
|
||||
logical_opt <!REGEX>:.*COMPARISON_JOIN.*
|
||||
|
||||
# we cannot do this with volatile expressions
|
||||
query II
|
||||
explain SELECT * FROM (SELECT i + random() AS i, j, k, l FROM test) ORDER BY i LIMIT 2;
|
||||
----
|
||||
logical_opt <!REGEX>:.*COMPARISON_JOIN.*
|
||||
|
||||
# top-n with expressions
|
||||
query IIII
|
||||
SELECT * FROM (SELECT -i i, -j j, -k k, -l l FROM test) ORDER BY -j DESC LIMIT 2
|
||||
----
|
||||
-9 -10 -11 9
|
||||
-8 -9 -10 8
|
||||
|
||||
# multiple layers
|
||||
query IIII
|
||||
SELECT * FROM (SELECT 100 + i i, 1000 + j j, 10000 + k k, 100000 + l l FROM (SELECT -i i, -j j, -k k, -l l FROM test)) ORDER BY j DESC LIMIT 2
|
||||
----
|
||||
100 999 9998 100000
|
||||
99 998 9997 100001
|
||||
|
||||
# limit + offset
|
||||
query II
|
||||
explain SELECT * FROM test LIMIT 2 OFFSET 2;
|
||||
----
|
||||
logical_opt <REGEX>:.*COMPARISON_JOIN.*
|
||||
|
||||
query IIII
|
||||
SELECT * FROM test LIMIT 2 OFFSET 2;
|
||||
----
|
||||
2 3 4 -2
|
||||
3 4 5 -3
|
||||
|
||||
# sample
|
||||
query II
|
||||
explain SELECT * FROM test USING SAMPLE 2 ROWS
|
||||
----
|
||||
logical_opt <REGEX>:.*COMPARISON_JOIN.*
|
||||
|
||||
# we can only use joins when we are sampling rows
|
||||
query II
|
||||
explain SELECT * FROM test USING SAMPLE 10%
|
||||
----
|
||||
logical_opt <!REGEX>:.*COMPARISON_JOIN.*
|
||||
|
||||
# order on expression
|
||||
query IIII
|
||||
SELECT * FROM test ORDER BY -j DESC LIMIT 2
|
||||
----
|
||||
0 1 2 0
|
||||
1 2 3 -1
|
||||
|
||||
# projection in subquery
|
||||
query IIII
|
||||
SELECT * FROM (SELECT -i i, -j j, -k k, -l l FROM test) ORDER BY -j DESC LIMIT 2
|
||||
----
|
||||
-9 -10 -11 9
|
||||
-8 -9 -10 8
|
||||
|
||||
# filter after top-n
|
||||
query IIII
|
||||
SELECT * FROM (
|
||||
SELECT * FROM test ORDER BY j DESC LIMIT 2
|
||||
) WHERE i=8
|
||||
----
|
||||
8 9 10 -8
|
||||
|
||||
query I
|
||||
SELECT l FROM (
|
||||
SELECT * FROM test ORDER BY j DESC LIMIT 2
|
||||
) WHERE k=10
|
||||
----
|
||||
-8
|
||||
|
||||
# now with varchar columns
|
||||
statement ok
|
||||
COPY (SELECT i, printf('%02d', i + 1) AS j, printf('%02d', i + 2) AS k, -i AS l FROM range(10) t(i)) TO '__TEST_DIR__/late_materialization_varchar.parquet';
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW test AS FROM '__TEST_DIR__/late_materialization_varchar.parquet';
|
||||
|
||||
query IIII
|
||||
SELECT * FROM test ORDER BY j DESC LIMIT 2;
|
||||
----
|
||||
9 10 11 -9
|
||||
8 09 10 -8
|
||||
|
||||
query IIII
|
||||
SELECT j, k, l, i FROM test WHERE i > 5 ORDER BY j DESC LIMIT 2;
|
||||
----
|
||||
10 11 -9 9
|
||||
09 10 -8 8
|
||||
55
external/duckdb/test/sql/copy/parquet/parquet_list.test
vendored
Normal file
55
external/duckdb/test/sql/copy/parquet/parquet_list.test
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
# name: test/sql/copy/parquet/parquet_list.test
|
||||
# description: Test list syntax for reading multiple files
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
# standard list syntax
|
||||
query I
|
||||
select count(*) from parquet_scan(['data/parquet-testing/glob/t1.parquet', 'data/parquet-testing/glob/t2.parquet']);
|
||||
----
|
||||
2
|
||||
|
||||
# glob inside a list
|
||||
query I
|
||||
select count(*) from parquet_scan(['data/parquet-testing/glob/*.parquet', 'data/parquet-testing/glob/t2.parquet']);
|
||||
----
|
||||
3
|
||||
|
||||
# read the same file multiple times
|
||||
query I
|
||||
select count(*) from parquet_scan(['data/parquet-testing/glob/t1.parquet', 'data/parquet-testing/glob/t1.parquet', 'data/parquet-testing/glob/t1.parquet', 'data/parquet-testing/glob/t1.parquet', 'data/parquet-testing/glob/t1.parquet']);
|
||||
----
|
||||
5
|
||||
|
||||
# file does not exist
|
||||
statement error
|
||||
select count(*) from parquet_scan(['data/parquet-testing/glob/t1.parquet', 'data/parquet-testing/glob/t2.parquet', 'this/file/doesnot/exist/hopefully.parquet']);
|
||||
----
|
||||
|
||||
# empty list
|
||||
statement error
|
||||
select count(*) from parquet_scan([]::varchar[]);
|
||||
----
|
||||
at least one file
|
||||
|
||||
# null inside a list
|
||||
statement error
|
||||
select count(*) from parquet_scan([NULL]);
|
||||
----
|
||||
NULL
|
||||
|
||||
# null list
|
||||
statement error
|
||||
select count(*) from parquet_scan(NULL::VARCHAR[]);
|
||||
----
|
||||
NULL
|
||||
|
||||
# null varchar
|
||||
statement error
|
||||
select count(*) from parquet_scan(NULL::VARCHAR);
|
||||
----
|
||||
NULL
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user