should be it
This commit is contained in:
91
external/duckdb/test/sql/copy/parquet/writer/list_of_bools.test_slow
vendored
Normal file
91
external/duckdb/test/sql/copy/parquet/writer/list_of_bools.test_slow
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
# name: test/sql/copy/parquet/writer/list_of_bools.test_slow
|
||||
# description: Parquet write list of bools
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
# big list of bools
|
||||
statement ok
|
||||
CREATE TABLE list_of_bools AS
|
||||
SELECT LIST(i%2==0) l FROM range(1373) tbl(i)
|
||||
UNION ALL
|
||||
SELECT [true, false, NULL, false, true]
|
||||
UNION ALL
|
||||
SELECT []
|
||||
UNION ALL
|
||||
SELECT NULL
|
||||
UNION ALL
|
||||
SELECT LIST(i%3==0) l FROM range(9937) tbl(i)
|
||||
UNION ALL
|
||||
SELECT [true, false, NULL, false, true]
|
||||
|
||||
query III
|
||||
SELECT COUNT(*), COUNT(b), SUM(CASE WHEN b THEN 1 ELSE 0 END)
|
||||
FROM (SELECT unnest(l) b FROM list_of_bools)
|
||||
----
|
||||
11320 11318 4004
|
||||
|
||||
statement ok
|
||||
COPY list_of_bools TO '__TEST_DIR__/list_of_bools.parquet' (FORMAT PARQUET)
|
||||
|
||||
query III
|
||||
SELECT COUNT(*), COUNT(b), SUM(CASE WHEN b THEN 1 ELSE 0 END)
|
||||
FROM (SELECT unnest(l) b FROM '__TEST_DIR__/list_of_bools.parquet')
|
||||
----
|
||||
11320 11318 4004
|
||||
|
||||
# many lists of integers
|
||||
statement ok
|
||||
CREATE TABLE many_ints AS
|
||||
SELECT [1, 0, 1] AS l FROM range(1373)
|
||||
UNION ALL
|
||||
SELECT []
|
||||
UNION ALL
|
||||
SELECT NULL
|
||||
UNION ALL
|
||||
SELECT [1, 0, NULL, 0, 1]
|
||||
UNION ALL
|
||||
SELECT [1, 0, NULL, 1] l FROM range(9937) tbl(i)
|
||||
|
||||
query III
|
||||
SELECT COUNT(*), COUNT(b), SUM(b)
|
||||
FROM (SELECT unnest(l) b FROM many_ints)
|
||||
----
|
||||
43872 33934 22622
|
||||
|
||||
statement ok
|
||||
COPY many_ints TO '__TEST_DIR__/many_ints.parquet' (FORMAT PARQUET)
|
||||
|
||||
query III
|
||||
SELECT COUNT(*), COUNT(b), SUM(b)
|
||||
FROM (SELECT unnest(l) b FROM '__TEST_DIR__/many_ints.parquet')
|
||||
----
|
||||
43872 33934 22622
|
||||
|
||||
# many lists of bools
|
||||
statement ok
|
||||
CREATE TABLE many_bools AS
|
||||
SELECT [true, false, true] AS l FROM range(1373)
|
||||
UNION ALL
|
||||
SELECT []
|
||||
UNION ALL
|
||||
SELECT NULL
|
||||
UNION ALL
|
||||
SELECT [true, false, NULL, false, true]
|
||||
UNION ALL
|
||||
SELECT [true, false, NULL, true] l FROM range(9937) tbl(i)
|
||||
|
||||
query III
|
||||
SELECT COUNT(*), COUNT(b), SUM(CASE WHEN b THEN 1 ELSE 0 END)
|
||||
FROM (SELECT unnest(l) b FROM many_bools)
|
||||
----
|
||||
43872 33934 22622
|
||||
|
||||
statement ok
|
||||
COPY many_bools TO '__TEST_DIR__/many_bools.parquet' (FORMAT PARQUET)
|
||||
|
||||
query III
|
||||
SELECT COUNT(*), COUNT(b), SUM(CASE WHEN b THEN 1 ELSE 0 END)
|
||||
FROM (SELECT unnest(l) b FROM '__TEST_DIR__/many_bools.parquet')
|
||||
----
|
||||
43872 33934 22622
|
||||
20
external/duckdb/test/sql/copy/parquet/writer/parquet_large_blobs.test_slow
vendored
Normal file
20
external/duckdb/test/sql/copy/parquet/writer/parquet_large_blobs.test_slow
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_large_blobs.test_slow
|
||||
# description: Test writing of large blobs into parquet files
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE large_strings AS SELECT repeat('duckduck', 10000+i) i FROM range(4000) tbl(i);
|
||||
|
||||
query III nosort minmaxstrlen
|
||||
SELECT MIN(strlen(i)), MAX(strlen(i)), AVG(strlen(i)) FROM large_strings;
|
||||
|
||||
statement ok
|
||||
COPY large_strings TO '__TEST_DIR__/largestrings.parquet' (FORMAT PARQUET);
|
||||
|
||||
statement ok
|
||||
SELECT * FROM parquet_metadata('__TEST_DIR__/largestrings.parquet');
|
||||
|
||||
query III nosort minmaxstrlen
|
||||
SELECT MIN(strlen(i)), MAX(strlen(i)), AVG(strlen(i)) FROM large_strings;
|
||||
58
external/duckdb/test/sql/copy/parquet/writer/parquet_test_all_types.test
vendored
Normal file
58
external/duckdb/test/sql/copy/parquet/writer/parquet_test_all_types.test
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_test_all_types.test
|
||||
# description: Parquet test_all_types function
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
# intervals are saved with ms precision -> truncate microsecond precision to millisecond
|
||||
statement ok
|
||||
CREATE TABLE all_types AS
|
||||
SELECT * EXCLUDE (bit, "union") REPLACE (
|
||||
case when extract(month from interval) <> 0 then interval '1 month 1 day 12:13:34.123' else interval end AS interval
|
||||
)
|
||||
FROM test_all_types();
|
||||
|
||||
statement ok
|
||||
COPY all_types TO "__TEST_DIR__/all_types.parquet" (FORMAT PARQUET);
|
||||
|
||||
# we have to make some replacements to get result equivalence
|
||||
# hugeint/uhugeint is stored as double -> we have to cast
|
||||
# TIME WITH TIME ZONE loses the offset
|
||||
query I nosort alltypes
|
||||
SELECT * REPLACE (
|
||||
hugeint::DOUBLE AS hugeint,
|
||||
uhugeint::DOUBLE AS uhugeint,
|
||||
time_tz::TIME::TIMETZ AS time_tz
|
||||
)
|
||||
FROM all_types
|
||||
----
|
||||
|
||||
query I nosort alltypes
|
||||
SELECT *
|
||||
FROM '__TEST_DIR__/all_types.parquet'
|
||||
----
|
||||
|
||||
foreach type TINYINT SMALLINT INT BIGINT UTINYINT USMALLINT UINT UBIGINT HUGEINT UHUGEINT FLOAT DOUBLE
|
||||
|
||||
query II
|
||||
explain select "${type}" from '__TEST_DIR__/all_types.parquet'
|
||||
WHERE "${type}" IN (127);
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*Filters.*
|
||||
|
||||
endloop
|
||||
|
||||
query II
|
||||
explain select "VARCHAR" from '__TEST_DIR__/all_types.parquet'
|
||||
WHERE "VARCHAR" IN ('🦆🦆🦆🦆🦆🦆');
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*Filters.*
|
||||
|
||||
query II
|
||||
explain select "bool" from '__TEST_DIR__/all_types.parquet'
|
||||
WHERE "bool" IN (true);
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*Filters.*
|
||||
34
external/duckdb/test/sql/copy/parquet/writer/parquet_write_booleans.test
vendored
Normal file
34
external/duckdb/test/sql/copy/parquet/writer/parquet_write_booleans.test
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_booleans.test
|
||||
# description: Parquet bools round trip
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE bools(b BOOL)
|
||||
|
||||
statement ok
|
||||
INSERT INTO bools SELECT CASE WHEN i%2=0 THEN NULL ELSE i%7=0 OR i%3=0 END b FROM range(10000) tbl(i);
|
||||
|
||||
query IIIIII
|
||||
SELECT COUNT(*), COUNT(b), BOOL_AND(b), BOOL_OR(b), SUM(CASE WHEN b THEN 1 ELSE 0 END) true_count, SUM(CASE WHEN b THEN 0 ELSE 1 END) false_count
|
||||
FROM bools
|
||||
----
|
||||
10000 5000 False True 2143 7857
|
||||
|
||||
statement ok
|
||||
COPY bools TO '__TEST_DIR__/bools.parquet' (FORMAT 'parquet');
|
||||
|
||||
query IIIIII
|
||||
SELECT COUNT(*), COUNT(b), BOOL_AND(b), BOOL_OR(b), SUM(CASE WHEN b THEN 1 ELSE 0 END) true_count, SUM(CASE WHEN b THEN 0 ELSE 1 END) false_count
|
||||
FROM '__TEST_DIR__/bools.parquet'
|
||||
----
|
||||
10000 5000 False True 2143 7857
|
||||
|
||||
query I
|
||||
SELECT typeof(b) FROM '__TEST_DIR__/bools.parquet' LIMIT 1
|
||||
----
|
||||
BOOLEAN
|
||||
49
external/duckdb/test/sql/copy/parquet/writer/parquet_write_compression_level.test
vendored
Normal file
49
external/duckdb/test/sql/copy/parquet/writer/parquet_write_compression_level.test
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_compression_level.test
|
||||
# description: Parquet compression level
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
# NOTE: since updating ZSTD, compression levels between -131072 and 22 are now supported
|
||||
# We now also support this, and this test has been updated accordingly
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS FROM range(100) t(i)
|
||||
|
||||
statement error
|
||||
COPY integers TO '__TEST_DIR__/compress_level.parquet' (FORMAT 'parquet', COMPRESSION_LEVEL 10);
|
||||
----
|
||||
only supported
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/compress_level.parquet' (FORMAT 'parquet', CODEC ZSTD, COMPRESSION_LEVEL 0);
|
||||
|
||||
statement error
|
||||
COPY integers TO '__TEST_DIR__/compress_level.parquet' (FORMAT 'parquet', CODEC ZSTD, COMPRESSION_LEVEL 23);
|
||||
----
|
||||
level must be between
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/compress_level.parquet' (FORMAT 'parquet', CODEC ZSTD, COMPRESSION_LEVEL -131072);
|
||||
|
||||
statement error
|
||||
COPY integers TO '__TEST_DIR__/compress_level.parquet' (FORMAT 'parquet', CODEC ZSTD, COMPRESSION_LEVEL -131073);
|
||||
----
|
||||
level must be between
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/compress_level.parquet' (FORMAT 'parquet', CODEC ZSTD, COMPRESSION_LEVEL 1);
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/compress_level2.parquet' (FORMAT 'parquet', CODEC ZSTD, COMPRESSION_LEVEL 22);
|
||||
|
||||
query I nosort clevel
|
||||
SELECT * FROM '__TEST_DIR__/compress_level.parquet'
|
||||
----
|
||||
|
||||
query I nosort clevel
|
||||
SELECT * FROM '__TEST_DIR__/compress_level2.parquet'
|
||||
----
|
||||
35
external/duckdb/test/sql/copy/parquet/writer/parquet_write_date.test
vendored
Normal file
35
external/duckdb/test/sql/copy/parquet/writer/parquet_write_date.test
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_date.test
|
||||
# description: Parquet dates round trip
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE dates(d DATE)
|
||||
|
||||
statement ok
|
||||
INSERT INTO dates VALUES (DATE '1992-01-01'), (DATE '1900-01-01'), (NULL), (DATE '2020-09-27')
|
||||
|
||||
query I nosort date_scan
|
||||
SELECT * FROM dates
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY dates TO '__TEST_DIR__/dates.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I nosort date_scan
|
||||
SELECT * FROM '__TEST_DIR__/dates.parquet'
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT typeof(d) FROM '__TEST_DIR__/dates.parquet' LIMIT 1
|
||||
----
|
||||
DATE
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/dates.parquet' WHERE d='1992-01-01'
|
||||
----
|
||||
1992-01-01
|
||||
87
external/duckdb/test/sql/copy/parquet/writer/parquet_write_decimals.test
vendored
Normal file
87
external/duckdb/test/sql/copy/parquet/writer/parquet_write_decimals.test
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_decimals.test
|
||||
# description: Parquet decimal types round trip
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE decimals(
|
||||
dec4 DECIMAL(4,1),
|
||||
dec9 DECIMAL(9,2),
|
||||
dec18 DECIMAL(18,3),
|
||||
dec38 DECIMAL(38,4)
|
||||
);
|
||||
|
||||
statement ok
|
||||
INSERT INTO decimals VALUES (
|
||||
-999.9,
|
||||
-9999999.99,
|
||||
-999999999999999.999,
|
||||
-999999999999999999999999999999999.9999
|
||||
), (
|
||||
NULL, NULL, NULL, NULL
|
||||
), (
|
||||
42, 42, 42, 42
|
||||
), (
|
||||
-42, -42, -42, -42
|
||||
), (
|
||||
0, 0, 0, 0
|
||||
), (
|
||||
999.9,
|
||||
9999999.99,
|
||||
999999999999999.999,
|
||||
999999999999999999999999999999999.9999
|
||||
);
|
||||
|
||||
statement ok
|
||||
COPY decimals TO '__TEST_DIR__/decimals.parquet';
|
||||
|
||||
query IIII nosort decimal_scan
|
||||
SELECT * FROM decimals;
|
||||
|
||||
query IIII nosort decimal_scan
|
||||
SELECT * FROM '__TEST_DIR__/decimals.parquet';
|
||||
|
||||
query IIII
|
||||
SELECT stats_min, stats_max, stats_min_value, stats_max_value FROM parquet_metadata('__TEST_DIR__/decimals.parquet');
|
||||
----
|
||||
-999.9 999.9 -999.9 999.9
|
||||
-9999999.99 9999999.99 -9999999.99 9999999.99
|
||||
-999999999999999.999 999999999999999.999 -999999999999999.999 999999999999999.999
|
||||
-999999999999999999999999999999999.9999 999999999999999999999999999999999.9999 -999999999999999999999999999999999.9999 999999999999999999999999999999999.9999
|
||||
|
||||
# filter pushdown
|
||||
statement ok
|
||||
DELETE FROM decimals WHERE dec4<-42 OR dec4>42
|
||||
|
||||
statement ok
|
||||
COPY decimals TO '__TEST_DIR__/decimals.parquet';
|
||||
|
||||
foreach dec_column dec4 dec9 dec18 dec38
|
||||
|
||||
query IIII
|
||||
SELECT * FROM '__TEST_DIR__/decimals.parquet' WHERE ${dec_column}=42
|
||||
----
|
||||
42 42 42 42
|
||||
|
||||
query IIII
|
||||
SELECT * FROM '__TEST_DIR__/decimals.parquet' WHERE ${dec_column}=-43
|
||||
----
|
||||
|
||||
query IIII
|
||||
SELECT * FROM '__TEST_DIR__/decimals.parquet' WHERE ${dec_column}=43
|
||||
----
|
||||
|
||||
endloop
|
||||
|
||||
# check statistics
|
||||
statement ok
|
||||
PRAGMA disable_verification
|
||||
|
||||
query IIII
|
||||
SELECT stats(dec4), stats(dec9), stats(dec18), stats(dec38) FROM '__TEST_DIR__/decimals.parquet' LIMIT 1
|
||||
----
|
||||
[Min: -42.0, Max: 42.0][Has Null: true, Has No Null: true] [Min: -42.00, Max: 42.00][Has Null: true, Has No Null: true] [Min: -42.000, Max: 42.000][Has Null: true, Has No Null: true] [Min: -42.0000, Max: 42.0000][Has Null: true, Has No Null: true]
|
||||
141
external/duckdb/test/sql/copy/parquet/writer/parquet_write_enums.test
vendored
Normal file
141
external/duckdb/test/sql/copy/parquet/writer/parquet_write_enums.test
vendored
Normal file
@@ -0,0 +1,141 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_enums.test
|
||||
# description: ENUM tests
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
# standard enum
|
||||
statement ok
|
||||
CREATE TYPE mood AS ENUM ('joy', 'ok', 'happy');
|
||||
|
||||
statement ok
|
||||
CREATE TABLE enums(m mood);
|
||||
|
||||
statement ok
|
||||
INSERT INTO enums VALUES
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'), ('joy')
|
||||
|
||||
statement ok
|
||||
COPY enums TO '__TEST_DIR__/enums.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/enums.parquet'
|
||||
----
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
joy
|
||||
|
||||
# enum with null values
|
||||
statement ok
|
||||
UPDATE enums SET m=NULL WHERE m='joy'
|
||||
|
||||
statement ok
|
||||
COPY enums TO '__TEST_DIR__/enums.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/enums.parquet'
|
||||
----
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
|
||||
# all values are null
|
||||
statement ok
|
||||
UPDATE enums SET m=NULL
|
||||
|
||||
statement ok
|
||||
COPY enums TO '__TEST_DIR__/enums.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/enums.parquet'
|
||||
----
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
365
external/duckdb/test/sql/copy/parquet/writer/parquet_write_field_id.test
vendored
Normal file
365
external/duckdb/test/sql/copy/parquet/writer/parquet_write_field_id.test
vendored
Normal file
@@ -0,0 +1,365 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_field_id.test
|
||||
# description: Parquet writer FIELD_IDS tests
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
# need to supply an argument
|
||||
statement error
|
||||
copy (select range as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS)
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# j is not present so we can't have a field id
|
||||
statement error
|
||||
copy (select range as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {j:42})
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# this should work
|
||||
statement ok
|
||||
copy (select range as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:42})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
42
|
||||
|
||||
# needs to be castable to integer, so this works
|
||||
statement ok
|
||||
copy (select range as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:'42'})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
42
|
||||
|
||||
# but this doesn't
|
||||
statement error
|
||||
copy (select range as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:'abc'})
|
||||
----
|
||||
Invalid Input Error
|
||||
|
||||
# we can do casts
|
||||
statement ok
|
||||
copy (select range as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:42::hugeint})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
42
|
||||
|
||||
# wrong casts should lead to ConversionException
|
||||
statement error
|
||||
copy (select range as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:1024::utinyint})
|
||||
----
|
||||
Conversion Error
|
||||
|
||||
# field id can't be a colref
|
||||
statement error
|
||||
copy (select range as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:i})
|
||||
----
|
||||
Could not convert string 'i' to INT32
|
||||
|
||||
# this shouldn't work
|
||||
statement error
|
||||
copy (select range as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS 'oops')
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# can't have duplicate field id keys
|
||||
statement error
|
||||
copy (select range as i, range as j from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:42,i:43})
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# can't have duplicate field id values either
|
||||
statement error
|
||||
copy (select range as i, range as j from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:42,j:41+1})
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# we don't have to supply a field_id for all columns
|
||||
statement ok
|
||||
copy (select range as i, range as j from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:42})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'j'
|
||||
----
|
||||
NULL
|
||||
|
||||
# but we can
|
||||
statement ok
|
||||
copy (select range as i, range as j from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:42,j:43})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'j'
|
||||
----
|
||||
43
|
||||
|
||||
# we can also specify the col like this
|
||||
statement ok
|
||||
copy (select range as i, range as j from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{__duckdb_field_id:42},j:{__duckdb_field_id:43}})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'j'
|
||||
----
|
||||
43
|
||||
|
||||
# i is not a nested type, so we can't specify nested field ids
|
||||
statement error
|
||||
copy (select range as i, range as j from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{__duckdb_field_id:42,j:43}})
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# we tested a non-nested column type, now do all the nested types so we test all the code paths
|
||||
|
||||
# list
|
||||
statement ok
|
||||
copy (select range(range, range + 3) as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{__duckdb_field_id:42,element:43}})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'element'
|
||||
----
|
||||
43
|
||||
|
||||
# we don't have to specify a field_id for the top-level list, we can also just specify for the nested children
|
||||
statement ok
|
||||
copy (select range(range, range + 3) as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{element:43}})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
NULL
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'element'
|
||||
----
|
||||
43
|
||||
|
||||
# list child is always called "element"
|
||||
statement error
|
||||
copy (select range(range, range + 3) as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{__duckdb_field_id:42,elem:43}})
|
||||
----
|
||||
Binder Error: Column name "elem" specified in FIELD_IDS not found. Consider using WRITE_PARTITION_COLUMNS if this column is a partition column. Available column names: [element]
|
||||
|
||||
# struct
|
||||
statement ok
|
||||
copy (select {f : range} as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{__duckdb_field_id:42,f:43}})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i' and num_children > 0
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'f'
|
||||
----
|
||||
43
|
||||
|
||||
# struct does not have child "g"
|
||||
statement error
|
||||
copy (select {f : range} as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{__duckdb_field_id:42,g:43}})
|
||||
----
|
||||
Binder Error: Column name "g" specified in FIELD_IDS not found. Consider using WRITE_PARTITION_COLUMNS if this column is a partition column. Available column names: [f]
|
||||
|
||||
# map
|
||||
statement ok
|
||||
copy (select map {range : 10 - range} as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{__duckdb_field_id:42,key:43,value:44}})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i' and num_children > 0
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'key'
|
||||
----
|
||||
43
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'value'
|
||||
----
|
||||
44
|
||||
|
||||
# map type children need to be called "key" and "value"
|
||||
statement error
|
||||
copy (select map {range : 10 - range} as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{__duckdb_field_id:42,k:43,v:44}})
|
||||
----
|
||||
Binder Error: Column name "k" specified in FIELD_IDS not found.
|
||||
|
||||
# test auto-generation (flat)
|
||||
statement ok
|
||||
copy (select range as i, range as j from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS 'auto')
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
0
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'j'
|
||||
----
|
||||
1
|
||||
|
||||
# big nestedness
|
||||
statement ok
|
||||
set variable field_id_values={i:{__duckdb_field_id:42,key:43,value:{__duckdb_field_id:44,element:{__duckdb_field_id:45,j:46}}}}
|
||||
|
||||
statement ok
|
||||
copy (select map {'my_key' : [{j : 42}]} as i) to '__TEST_DIR__/my.parquet' (FIELD_IDS getvariable('field_id_values'))
|
||||
|
||||
query II
|
||||
select name, field_id from parquet_schema('__TEST_DIR__/my.parquet') where name in ('i', 'key', 'value', 'element', 'j') order by field_id
|
||||
----
|
||||
i 42
|
||||
key 43
|
||||
value 44
|
||||
element 45
|
||||
j 46
|
||||
|
||||
# we can't specify "f" in the first level struct
|
||||
statement error
|
||||
copy (select {f : range} as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{__duckdb_field_id:42}, f:43})
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# needs to be called exactly "__duckdb_field_id"
|
||||
statement error
|
||||
copy (select {f : range} as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i:{field_id:42, f:43}})
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# test auto-generation (list)
|
||||
statement ok
|
||||
copy (select range(range, range + 3) as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS 'auto')
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
0
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'element'
|
||||
----
|
||||
1
|
||||
|
||||
# test auto-generation (struct)
|
||||
statement ok
|
||||
copy (select {f : range} as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS 'auto')
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i' and num_children > 0
|
||||
----
|
||||
0
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'f'
|
||||
----
|
||||
1
|
||||
|
||||
# test auto-generation (map)
|
||||
statement ok
|
||||
copy (select map {range : 10 - range} as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS 'auto')
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i' and num_children > 0
|
||||
----
|
||||
0
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'key'
|
||||
----
|
||||
1
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'value'
|
||||
----
|
||||
2
|
||||
|
||||
# test auto-generation (big nestedness)
|
||||
statement ok
|
||||
copy (select map {'my_key' : [{j : 42}]} as i) to '__TEST_DIR__/my.parquet' (FIELD_IDS 'auto')
|
||||
|
||||
query II
|
||||
select name, field_id from parquet_schema('__TEST_DIR__/my.parquet') where name in ('i', 'key', 'value', 'element', 'j') order by field_id
|
||||
----
|
||||
i 0
|
||||
key 1
|
||||
value 2
|
||||
element 3
|
||||
j 4
|
||||
|
||||
# cannot have a column named "__duckdb_field_id"
|
||||
statement error
|
||||
copy (select range as __duckdb_field_id from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {__duckdb_field_id : 42})
|
||||
----
|
||||
Binder Error
|
||||
|
||||
statement error
|
||||
copy (select {__duckdb_field_id : range} as __duckdb_field_id from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {__duckdb_field_id : {__duckdb_field_id : 42}})
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# we should be case insensitive here (it's just DuckDB col names / struct col names)
|
||||
statement ok
|
||||
copy (select range as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {"I" : 42})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i'
|
||||
----
|
||||
42
|
||||
|
||||
statement ok
|
||||
copy (select range as "I" from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i : 42})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'I'
|
||||
----
|
||||
42
|
||||
|
||||
statement ok
|
||||
copy (select {f : range} as i from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {"I" : {__duckdb_field_id: 42, "F": 43}})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'i' and num_children > 0
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'f'
|
||||
----
|
||||
43
|
||||
|
||||
statement ok
|
||||
copy (select {"F" : range} as "I" from range(10)) to '__TEST_DIR__/my.parquet' (FIELD_IDS {i : {__duckdb_field_id: 42, f: 43}})
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'I' and num_children > 0
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
select field_id from parquet_schema('__TEST_DIR__/my.parquet') where name = 'F'
|
||||
----
|
||||
43
|
||||
63
external/duckdb/test/sql/copy/parquet/writer/parquet_write_home_directory.test
vendored
Normal file
63
external/duckdb/test/sql/copy/parquet/writer/parquet_write_home_directory.test
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_home_directory.test
|
||||
# description: Parquet writer home directory
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
SET home_directory='__TEST_DIR__'
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS SELECT * FROM range(10)
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/integers.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT * FROM '~/integers.parquet'
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers_load(i INTEGER);
|
||||
|
||||
statement ok
|
||||
COPY integers_load FROM '~/integers.parquet'
|
||||
|
||||
query I
|
||||
SELECT * FROM integers_load
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
|
||||
# glob from home directory
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/homedir_integers1.parquet'
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/homedir_integers2.parquet'
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '~/homedir_integers*.parquet'
|
||||
----
|
||||
20
|
||||
31
external/duckdb/test/sql/copy/parquet/writer/parquet_write_hugeint.test
vendored
Normal file
31
external/duckdb/test/sql/copy/parquet/writer/parquet_write_hugeint.test
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_hugeint.test
|
||||
# description: Parquet hugeint round trip
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE hugeints(h HUGEINT)
|
||||
|
||||
statement ok
|
||||
INSERT INTO hugeints VALUES (-1180591620717411303424), (0), (NULL), (1180591620717411303424)
|
||||
|
||||
statement ok
|
||||
COPY hugeints TO '__TEST_DIR__/hugeints.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/hugeints.parquet'
|
||||
----
|
||||
-1180591620717411303424
|
||||
0
|
||||
NULL
|
||||
1180591620717411303424
|
||||
|
||||
query I
|
||||
SELECT typeof(h) FROM '__TEST_DIR__/hugeints.parquet' LIMIT 1
|
||||
----
|
||||
DOUBLE
|
||||
|
||||
39
external/duckdb/test/sql/copy/parquet/writer/parquet_write_interval.test
vendored
Normal file
39
external/duckdb/test/sql/copy/parquet/writer/parquet_write_interval.test
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_interval.test
|
||||
# description: Parquet interval round trip
|
||||
# group: [writer]
|
||||
|
||||
statement ok
|
||||
SET default_null_order='nulls_first';
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE IF NOT EXISTS intervals (i interval);
|
||||
|
||||
statement ok
|
||||
INSERT INTO intervals VALUES
|
||||
(interval '1' day),
|
||||
(interval '00:00:01'),
|
||||
(NULL),
|
||||
(interval '0' month),
|
||||
(interval '1' month)
|
||||
|
||||
statement ok
|
||||
COPY intervals TO '__TEST_DIR__/intervals.parquet'
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/intervals.parquet' ORDER BY 1
|
||||
----
|
||||
NULL
|
||||
00:00:00
|
||||
00:00:01
|
||||
1 day
|
||||
1 month
|
||||
|
||||
statement error
|
||||
COPY (SELECT -interval '1 day') TO '__TEST_DIR__/intervals.parquet'
|
||||
----
|
||||
<REGEX>:.*IO Error.*do not support negative intervals.*
|
||||
74
external/duckdb/test/sql/copy/parquet/writer/parquet_write_issue_5779.test
vendored
Normal file
74
external/duckdb/test/sql/copy/parquet/writer/parquet_write_issue_5779.test
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_issue_5779.test
|
||||
# description: Fix #5779: write subsection of list vector to Parquet
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE empty_lists(i INTEGER[]);
|
||||
|
||||
statement ok
|
||||
INSERT INTO empty_lists SELECT [] FROM range(10) UNION ALL SELECT [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
||||
|
||||
statement ok
|
||||
COPY (SELECT * FROM empty_lists LIMIT 10) TO '__TEST_DIR__/emptylist_int.parquet';
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/emptylist_int.parquet'
|
||||
----
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
|
||||
statement ok
|
||||
CREATE TABLE empty_lists_varchar(i VARCHAR[]);
|
||||
|
||||
statement ok
|
||||
INSERT INTO empty_lists_varchar SELECT [] FROM range(10) UNION ALL SELECT ['hello', 'world', 'this', 'is', 'a', 'varchar', 'list']
|
||||
|
||||
statement ok
|
||||
COPY (SELECT * FROM empty_lists_varchar LIMIT 10) TO '__TEST_DIR__/emptylist_varchar.parquet';
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/emptylist_varchar.parquet'
|
||||
----
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
|
||||
statement ok
|
||||
CREATE TABLE empty_list_nested(i INT[][]);
|
||||
|
||||
statement ok
|
||||
INSERT INTO empty_list_nested SELECT [] FROM range(10) UNION ALL SELECT [[1, 2, 3], [4, 5], [6, 7, 8]]
|
||||
|
||||
statement ok
|
||||
COPY (SELECT * FROM empty_list_nested LIMIT 10) TO '__TEST_DIR__/empty_list_nested.parquet';
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/empty_list_nested.parquet'
|
||||
----
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
35
external/duckdb/test/sql/copy/parquet/writer/parquet_write_memory_limit.test_slow
vendored
Normal file
35
external/duckdb/test/sql/copy/parquet/writer/parquet_write_memory_limit.test_slow
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_memory_limit.test_slow
|
||||
# description: Verify data is streamed and memory limit is not exceeded in Parquet write
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
require 64bit
|
||||
|
||||
load __TEST_DIR__/parquet_write_memory_limit.db
|
||||
|
||||
# 100M rows, 2 BIGINT columns = 1.6GB uncompressed
|
||||
statement ok
|
||||
COPY (SELECT i, i // 5 AS j FROM range(100000000) t(i)) TO '__TEST_DIR__/large_integers.parquet'
|
||||
|
||||
statement ok
|
||||
SET memory_limit='0.3GB'
|
||||
|
||||
# we need to do this otherwise we buffer a lot more data in a BatchedDataCollection
|
||||
# by disable order preservation we can immediately flush the ColumnDataCollections
|
||||
statement ok
|
||||
set preserve_insertion_order=false
|
||||
|
||||
# stream from one parquet file to another
|
||||
query I
|
||||
COPY '__TEST_DIR__/large_integers.parquet' TO '__TEST_DIR__/large_integers2.parquet'
|
||||
----
|
||||
100000000
|
||||
|
||||
# verify that the file is correctly written
|
||||
statement ok
|
||||
SET memory_limit='-1'
|
||||
|
||||
query II
|
||||
SELECT * FROM '__TEST_DIR__/large_integers.parquet' EXCEPT FROM '__TEST_DIR__/large_integers2.parquet'
|
||||
----
|
||||
28
external/duckdb/test/sql/copy/parquet/writer/parquet_write_memory_usage.test
vendored
Normal file
28
external/duckdb/test/sql/copy/parquet/writer/parquet_write_memory_usage.test
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_memory_usage.test
|
||||
# description: Parquet writer memory usage
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/parquet_write_memory_usage.db
|
||||
|
||||
statement ok
|
||||
set threads=1
|
||||
|
||||
foreach memory_limit,row_group_size 0.8mb,20480 1.6mb,40960
|
||||
|
||||
statement ok
|
||||
set memory_limit='${memory_limit}'
|
||||
|
||||
statement ok
|
||||
copy (select * from range(163840)) to '__TEST_DIR__/parquet_write_memory_usage.parquet' (row_group_size ${row_group_size})
|
||||
|
||||
statement ok
|
||||
set memory_limit='4gb'
|
||||
|
||||
query T
|
||||
select sum(range) = (count(*) * (count(*) - 1)) // 2 from '__TEST_DIR__/parquet_write_memory_usage.parquet'
|
||||
----
|
||||
true
|
||||
|
||||
endloop
|
||||
75
external/duckdb/test/sql/copy/parquet/writer/parquet_write_signed.test
vendored
Normal file
75
external/duckdb/test/sql/copy/parquet/writer/parquet_write_signed.test
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_signed.test
|
||||
# description: Parquet signed types round trip
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE values_TINYINT AS SELECT d::TINYINT d FROM (VALUES
|
||||
(-128), (42), (NULL), (127)) tbl (d);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE values_SMALLINT AS SELECT d::SMALLINT d FROM (VALUES
|
||||
(-32768), (42), (NULL), (32767)) tbl (d);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE values_INTEGER AS SELECT d::INTEGER d FROM (VALUES
|
||||
(-2147483648), (42), (NULL), (2147483647)) tbl (d);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE values_BIGINT AS SELECT d::BIGINT d FROM (VALUES
|
||||
(-9223372036854775808), (42), (NULL), (9223372036854775807)) tbl (d);
|
||||
|
||||
foreach type TINYINT SMALLINT INTEGER BIGINT
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE TABLE signed(d ${type})
|
||||
|
||||
statement ok
|
||||
INSERT INTO signed SELECT * FROM values_${type}
|
||||
|
||||
statement ok
|
||||
COPY signed TO '__TEST_DIR__/signed.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/signed.parquet' EXCEPT SELECT * FROM signed
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT * FROM signed EXCEPT SELECT * FROM '__TEST_DIR__/signed.parquet'
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/signed.parquet' WHERE d=42
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/signed.parquet' WHERE d>42
|
||||
----
|
||||
1
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/signed.parquet' WHERE d>=42
|
||||
----
|
||||
2
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/signed.parquet' WHERE d<42
|
||||
----
|
||||
1
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/signed.parquet' WHERE d<=42
|
||||
----
|
||||
2
|
||||
|
||||
query I
|
||||
SELECT typeof(d)='${type}' FROM '__TEST_DIR__/signed.parquet' LIMIT 1
|
||||
----
|
||||
true
|
||||
|
||||
endloop
|
||||
16
external/duckdb/test/sql/copy/parquet/writer/parquet_write_string_distinct.test
vendored
Normal file
16
external/duckdb/test/sql/copy/parquet/writer/parquet_write_string_distinct.test
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_string_distinct.test
|
||||
# description: Write distinct stats for strings
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
COPY (SELECT 'hello' FROM range(10)) TO '__TEST_DIR__/string_dict.parquet';
|
||||
|
||||
query I
|
||||
SELECT stats_distinct_count FROM parquet_metadata('__TEST_DIR__/string_dict.parquet');
|
||||
----
|
||||
1
|
||||
263
external/duckdb/test/sql/copy/parquet/writer/parquet_write_strings.test
vendored
Normal file
263
external/duckdb/test/sql/copy/parquet/writer/parquet_write_strings.test
vendored
Normal file
@@ -0,0 +1,263 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_strings.test
|
||||
# description: Strings tests
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE strings(s VARCHAR);
|
||||
|
||||
statement ok
|
||||
INSERT INTO strings VALUES
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'),
|
||||
('happy'), ('happy'), ('joy'), ('joy'), ('surprise');
|
||||
|
||||
statement ok
|
||||
COPY strings TO '__TEST_DIR__/strings.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT encodings FROM parquet_metadata('__TEST_DIR__/strings.parquet')
|
||||
----
|
||||
RLE_DICTIONARY
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/strings.parquet'
|
||||
----
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
happy
|
||||
happy
|
||||
joy
|
||||
joy
|
||||
surprise
|
||||
|
||||
query I
|
||||
SELECT stats_distinct_count FROM parquet_metadata('__TEST_DIR__/strings.parquet')
|
||||
----
|
||||
3
|
||||
|
||||
# strings with null values
|
||||
statement ok
|
||||
UPDATE strings SET s=NULL WHERE s='joy'
|
||||
|
||||
statement ok
|
||||
COPY strings TO '__TEST_DIR__/strings.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/strings.parquet'
|
||||
----
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
happy
|
||||
happy
|
||||
NULL
|
||||
NULL
|
||||
surprise
|
||||
|
||||
# all values are null
|
||||
statement ok
|
||||
UPDATE strings SET s=NULL
|
||||
|
||||
statement ok
|
||||
COPY strings TO '__TEST_DIR__/strings.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/strings.parquet'
|
||||
----
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
|
||||
# empty table
|
||||
statement ok
|
||||
DELETE FROM strings
|
||||
|
||||
statement ok
|
||||
COPY strings TO '__TEST_DIR__/strings.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/strings.parquet'
|
||||
----
|
||||
|
||||
|
||||
# non-dictionary table, also no distinct count
|
||||
statement ok
|
||||
DELETE FROM strings
|
||||
|
||||
statement ok
|
||||
INSERT INTO strings VALUES
|
||||
('0'), ('1'), ('2'), ('3'), ('4'), ('5'), ('6'), ('7'), ('8'), ('9'),
|
||||
('10'), ('11'), ('12'), ('13'), ('14'), ('15'), ('16'), ('17'), ('18'), ('19'),
|
||||
('20'), ('21'), ('22'), ('23'), ('24'), ('25'), ('26'), ('27'), ('28'), ('29')
|
||||
|
||||
statement ok
|
||||
COPY strings TO '__TEST_DIR__/strings.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT encodings FROM parquet_metadata('__TEST_DIR__/strings.parquet')
|
||||
----
|
||||
PLAIN
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/strings.parquet'
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
11
|
||||
12
|
||||
13
|
||||
14
|
||||
15
|
||||
16
|
||||
17
|
||||
18
|
||||
19
|
||||
20
|
||||
21
|
||||
22
|
||||
23
|
||||
24
|
||||
25
|
||||
26
|
||||
27
|
||||
28
|
||||
29
|
||||
|
||||
query I
|
||||
SELECT stats_distinct_count FROM parquet_metadata('__TEST_DIR__/strings.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
# non-dictionary table with null
|
||||
statement ok
|
||||
DELETE FROM strings
|
||||
|
||||
statement ok
|
||||
INSERT INTO strings VALUES
|
||||
('0'), ('1'), ('2'), (NULL), ('4'), ('5'), ('6'), (NULL), ('8'), ('9'),
|
||||
('10'), ('11'), ('12'), ('13'), ('14'), ('15'), ('16'), ('17'), ('18'), ('19'),
|
||||
('20'), (NULL), ('22'), ('23'), ('24'), ('25'), (NULL), ('27'), ('28'), ('29')
|
||||
|
||||
statement ok
|
||||
COPY strings TO '__TEST_DIR__/strings.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/strings.parquet'
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
NULL
|
||||
4
|
||||
5
|
||||
6
|
||||
NULL
|
||||
8
|
||||
9
|
||||
10
|
||||
11
|
||||
12
|
||||
13
|
||||
14
|
||||
15
|
||||
16
|
||||
17
|
||||
18
|
||||
19
|
||||
20
|
||||
NULL
|
||||
22
|
||||
23
|
||||
24
|
||||
25
|
||||
NULL
|
||||
27
|
||||
28
|
||||
29
|
||||
79
external/duckdb/test/sql/copy/parquet/writer/parquet_write_timestamp.test
vendored
Normal file
79
external/duckdb/test/sql/copy/parquet/writer/parquet_write_timestamp.test
vendored
Normal file
@@ -0,0 +1,79 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_timestamp.test
|
||||
# description: Parquet timestamp round trip
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
foreach type TIMESTAMP TIMESTAMP_MS TIMESTAMP_S
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE TABLE timestamps(d ${type})
|
||||
|
||||
statement ok
|
||||
INSERT INTO timestamps VALUES
|
||||
(TIMESTAMP '1992-01-01 12:03:27'),
|
||||
(TIMESTAMP '1900-01-01 03:08:47'),
|
||||
(NULL),
|
||||
(TIMESTAMP '2020-09-27 13:12:01')
|
||||
|
||||
query I nosort ts_scan
|
||||
SELECT * FROM timestamps
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY timestamps TO '__TEST_DIR__/timestamps.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I nosort ts_scan
|
||||
SELECT * FROM '__TEST_DIR__/timestamps.parquet'
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/timestamps.parquet' WHERE d='1992-01-01 12:03:27'
|
||||
----
|
||||
1992-01-01 12:03:27
|
||||
|
||||
query I
|
||||
SELECT typeof(d) FROM '__TEST_DIR__/timestamps.parquet' LIMIT 1
|
||||
----
|
||||
TIMESTAMP
|
||||
|
||||
endloop
|
||||
|
||||
# Nanoseconds are their own type
|
||||
statement ok
|
||||
CREATE OR REPLACE TABLE timestamps(d TIMESTAMP_NS)
|
||||
|
||||
statement ok
|
||||
INSERT INTO timestamps VALUES
|
||||
('1992-01-01 12:03:27.123456789'),
|
||||
('1900-01-01 03:08:47.987654321'),
|
||||
(NULL),
|
||||
('2020-09-27 13:12:01')
|
||||
|
||||
query I nosort ns_scan
|
||||
SELECT * FROM timestamps
|
||||
----
|
||||
1992-01-01 12:03:27.123456789
|
||||
1900-01-01 03:08:47.987654321
|
||||
NULL
|
||||
2020-09-27 13:12:01
|
||||
|
||||
statement ok
|
||||
COPY timestamps TO '__TEST_DIR__/timestamps.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I nosort ns_scan
|
||||
SELECT * FROM '__TEST_DIR__/timestamps.parquet'
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/timestamps.parquet' WHERE d='1992-01-01 12:03:27.123456789'
|
||||
----
|
||||
1992-01-01 12:03:27.123456789
|
||||
|
||||
query I
|
||||
SELECT typeof(d) FROM '__TEST_DIR__/timestamps.parquet' LIMIT 1
|
||||
----
|
||||
TIMESTAMP_NS
|
||||
67
external/duckdb/test/sql/copy/parquet/writer/parquet_write_tpcds.test_slow
vendored
Normal file
67
external/duckdb/test/sql/copy/parquet/writer/parquet_write_tpcds.test_slow
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_tpcds.test_slow
|
||||
# description: Parquet TPC-DS tests
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
require tpcds
|
||||
|
||||
# answers are generated from postgres
|
||||
# hence check with NULLS LAST flag
|
||||
statement ok
|
||||
PRAGMA default_null_order='NULLS LAST'
|
||||
|
||||
statement ok
|
||||
CREATE SCHEMA tpcds;
|
||||
|
||||
statement ok
|
||||
CALL dsdgen(sf=1, schema='tpcds');
|
||||
|
||||
foreach tbl call_center catalog_page catalog_returns catalog_sales customer customer_demographics customer_address date_dim household_demographics inventory income_band item promotion reason ship_mode store store_returns store_sales time_dim warehouse web_page web_returns web_sales web_site
|
||||
|
||||
statement ok
|
||||
COPY tpcds.${tbl} TO '__TEST_DIR__/${tbl}.parquet' (FORMAT 'PARQUET', COMPRESSION 'ZSTD');
|
||||
|
||||
statement ok
|
||||
CREATE VIEW ${tbl} AS SELECT * FROM parquet_scan('__TEST_DIR__/${tbl}.parquet');
|
||||
|
||||
endloop
|
||||
|
||||
# too slow queries:
|
||||
# 64, 85
|
||||
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpcds(${i})
|
||||
----
|
||||
<FILE>:extension/tpcds/dsdgen/answers/sf1/0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 64
|
||||
|
||||
query I
|
||||
PRAGMA tpcds(${i})
|
||||
----
|
||||
<FILE>:extension/tpcds/dsdgen/answers/sf1/${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 65 85
|
||||
|
||||
query I
|
||||
PRAGMA tpcds(${i})
|
||||
----
|
||||
<FILE>:extension/tpcds/dsdgen/answers/sf1/${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 86 99
|
||||
|
||||
query I
|
||||
PRAGMA tpcds(${i})
|
||||
----
|
||||
<FILE>:extension/tpcds/dsdgen/answers/sf1/${i}.csv
|
||||
|
||||
endloop
|
||||
41
external/duckdb/test/sql/copy/parquet/writer/parquet_write_tpch.test_slow
vendored
Normal file
41
external/duckdb/test/sql/copy/parquet/writer/parquet_write_tpch.test_slow
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_tpch.test_slow
|
||||
# description: Parquet TPC-H tests
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
require tpch
|
||||
|
||||
statement ok
|
||||
CREATE SCHEMA tpch;
|
||||
|
||||
statement ok
|
||||
CALL dbgen(sf=1, schema='tpch');
|
||||
|
||||
foreach tbl lineitem nation orders supplier part partsupp region customer
|
||||
|
||||
statement ok
|
||||
COPY tpch.${tbl} TO '__TEST_DIR__/${tbl}.parquet' (FORMAT 'PARQUET', COMPRESSION 'ZSTD');
|
||||
|
||||
statement ok
|
||||
CREATE VIEW ${tbl} AS SELECT * FROM parquet_scan('__TEST_DIR__/${tbl}.parquet');
|
||||
|
||||
endloop
|
||||
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 23
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
|
||||
|
||||
endloop
|
||||
84
external/duckdb/test/sql/copy/parquet/writer/parquet_write_tpch_nested.test_slow
vendored
Normal file
84
external/duckdb/test/sql/copy/parquet/writer/parquet_write_tpch_nested.test_slow
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_tpch_nested.test_slow
|
||||
# description: Parquet TPC-H tests
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
require tpch
|
||||
|
||||
statement ok
|
||||
CREATE SCHEMA tpch;
|
||||
|
||||
statement ok
|
||||
CALL dbgen(sf=0.1, schema='tpch');
|
||||
|
||||
|
||||
# transform lineitem into a list of structs
|
||||
statement ok
|
||||
CREATE VIEW lineitem_array_view AS SELECT LIST({'l_orderkey': l_orderkey,
|
||||
'l_partkey': l_partkey,
|
||||
'l_suppkey': l_suppkey,
|
||||
'l_linenumber': l_linenumber,
|
||||
'l_quantity': l_quantity,
|
||||
'l_extendedprice': l_extendedprice,
|
||||
'l_discount': l_discount,
|
||||
'l_tax': l_tax,
|
||||
'l_returnflag': l_returnflag,
|
||||
'l_linestatus': l_linestatus,
|
||||
'l_shipdate': l_shipdate,
|
||||
'l_commitdate': l_commitdate,
|
||||
'l_receiptdate': l_receiptdate,
|
||||
'l_shipinstruct': l_shipinstruct,
|
||||
'l_shipmode': l_shipmode,
|
||||
'l_comment': l_comment}) lineitem_array FROM tpch.lineitem
|
||||
|
||||
statement ok
|
||||
COPY lineitem_array_view TO '__TEST_DIR__/lineitem.parquet' (FORMAT 'PARQUET', COMPRESSION 'ZSTD');
|
||||
|
||||
statement ok
|
||||
CREATE VIEW lineitem AS SELECT
|
||||
s.l_orderkey AS l_orderkey,
|
||||
s.l_partkey AS l_partkey,
|
||||
s.l_suppkey AS l_suppkey,
|
||||
s.l_linenumber AS l_linenumber,
|
||||
s.l_quantity AS l_quantity,
|
||||
s.l_extendedprice AS l_extendedprice,
|
||||
s.l_discount AS l_discount,
|
||||
s.l_tax AS l_tax,
|
||||
s.l_returnflag AS l_returnflag,
|
||||
s.l_linestatus AS l_linestatus,
|
||||
s.l_shipdate AS l_shipdate,
|
||||
s.l_commitdate AS l_commitdate,
|
||||
s.l_receiptdate AS l_receiptdate,
|
||||
s.l_shipinstruct AS l_shipinstruct,
|
||||
s.l_shipmode AS l_shipmode,
|
||||
s.l_comment AS l_comment
|
||||
FROM (SELECT UNNEST(lineitem_array) s FROM parquet_scan('__TEST_DIR__/lineitem.parquet'));
|
||||
|
||||
foreach tbl nation orders supplier part partsupp region customer
|
||||
|
||||
statement ok
|
||||
COPY tpch.${tbl} TO '__TEST_DIR__/${tbl}.parquet' (FORMAT 'PARQUET', COMPRESSION 'ZSTD');
|
||||
|
||||
statement ok
|
||||
CREATE VIEW ${tbl} AS SELECT * FROM parquet_scan('__TEST_DIR__/${tbl}.parquet');
|
||||
|
||||
endloop
|
||||
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf0.1/q0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 23
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf0.1/q${i}.csv
|
||||
|
||||
endloop
|
||||
31
external/duckdb/test/sql/copy/parquet/writer/parquet_write_uhugeint.test
vendored
Normal file
31
external/duckdb/test/sql/copy/parquet/writer/parquet_write_uhugeint.test
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_uhugeint.test
|
||||
# description: Parquet uhugeint round trip
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE hugeints(h UHUGEINT)
|
||||
|
||||
statement ok
|
||||
INSERT INTO hugeints VALUES (0), (1), (NULL), (1180591620717411303424)
|
||||
|
||||
statement ok
|
||||
COPY hugeints TO '__TEST_DIR__/hugeints.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/hugeints.parquet'
|
||||
----
|
||||
0
|
||||
1
|
||||
NULL
|
||||
1180591620717411303424
|
||||
|
||||
query I
|
||||
SELECT typeof(h) FROM '__TEST_DIR__/hugeints.parquet' LIMIT 1
|
||||
----
|
||||
DOUBLE
|
||||
|
||||
75
external/duckdb/test/sql/copy/parquet/writer/parquet_write_unsigned.test
vendored
Normal file
75
external/duckdb/test/sql/copy/parquet/writer/parquet_write_unsigned.test
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_unsigned.test
|
||||
# description: Parquet unsigned types round trip
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE values_UTINYINT AS SELECT d::UTINYINT d FROM (VALUES
|
||||
(0), (42), (NULL), (255)) tbl (d);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE values_USMALLINT AS SELECT d::USMALLINT d FROM (VALUES
|
||||
(0), (42), (NULL), (65535)) tbl (d);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE values_UINTEGER AS SELECT d::UINTEGER d FROM (VALUES
|
||||
(0), (42), (NULL), (4294967295)) tbl (d);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE values_UBIGINT AS SELECT d::UBIGINT d FROM (VALUES
|
||||
(0), (42), (NULL), (18446744073709551615)) tbl (d);
|
||||
|
||||
foreach type UTINYINT USMALLINT UINTEGER UBIGINT
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE TABLE unsigned(d ${type})
|
||||
|
||||
statement ok
|
||||
INSERT INTO unsigned SELECT * FROM values_${type}
|
||||
|
||||
statement ok
|
||||
COPY unsigned TO '__TEST_DIR__/unsigned.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/unsigned.parquet' EXCEPT SELECT * FROM unsigned
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT * FROM unsigned EXCEPT SELECT * FROM '__TEST_DIR__/unsigned.parquet'
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/unsigned.parquet' WHERE d=42
|
||||
----
|
||||
42
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/unsigned.parquet' WHERE d>42
|
||||
----
|
||||
1
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/unsigned.parquet' WHERE d>=42
|
||||
----
|
||||
2
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/unsigned.parquet' WHERE d<42
|
||||
----
|
||||
1
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/unsigned.parquet' WHERE d<=42
|
||||
----
|
||||
2
|
||||
|
||||
query I
|
||||
SELECT typeof(d)='${type}' FROM '__TEST_DIR__/unsigned.parquet' LIMIT 1
|
||||
----
|
||||
true
|
||||
|
||||
endloop
|
||||
82
external/duckdb/test/sql/copy/parquet/writer/parquet_write_uuid.test
vendored
Normal file
82
external/duckdb/test/sql/copy/parquet/writer/parquet_write_uuid.test
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_write_uuid.test
|
||||
# description: Parquet UUID round trip
|
||||
# group: [writer]
|
||||
|
||||
statement ok
|
||||
SET default_null_order='nulls_first';
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE IF NOT EXISTS uuid (u uuid);
|
||||
|
||||
statement ok
|
||||
INSERT INTO uuid VALUES
|
||||
('A0EEBC99-9C0B-4EF8-BB6D-6BB9BD380A11'),
|
||||
(NULL),
|
||||
('47183823-2574-4bfd-b411-99ed177d3e43'),
|
||||
('{10203040506070800102030405060708}'),
|
||||
('A0EEBC99-9C0B-4EF8-BB6D-6BB9BD380A11'),
|
||||
(NULL),
|
||||
('00112233-4455-6677-8899-aabbccddeeff'),
|
||||
('47183823-2574-4bfd-b411-99ed177d3e43'),
|
||||
('{10203040506070800102030405060708}'),
|
||||
('00000000-0000-0000-0000-000000000000'),
|
||||
('00000000-0000-0000-0000-000000000001'),
|
||||
('00000000-0000-0000-8000-000000000001'),
|
||||
('80000000-0000-0000-0000-000000000000'),
|
||||
('80000000-0000-0000-8000-000000000000'),
|
||||
('80000000-0000-0000-8fff-ffffffffffff'),
|
||||
('80000000-0000-0000-ffff-ffffffffffff'),
|
||||
('8fffffff-ffff-ffff-0000-000000000000'),
|
||||
('8fffffff-ffff-ffff-8000-000000000000'),
|
||||
('8fffffff-ffff-ffff-8fff-ffffffffffff'),
|
||||
('8fffffff-ffff-ffff-ffff-ffffffffffff'),
|
||||
('ffffffff-ffff-ffff-ffff-ffffffffffff');
|
||||
|
||||
statement ok
|
||||
COPY uuid TO '__TEST_DIR__/uuid.parquet'
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/uuid.parquet' ORDER BY 1
|
||||
----
|
||||
NULL
|
||||
NULL
|
||||
00000000-0000-0000-0000-000000000000
|
||||
00000000-0000-0000-0000-000000000001
|
||||
00000000-0000-0000-8000-000000000001
|
||||
00112233-4455-6677-8899-aabbccddeeff
|
||||
10203040-5060-7080-0102-030405060708
|
||||
10203040-5060-7080-0102-030405060708
|
||||
47183823-2574-4bfd-b411-99ed177d3e43
|
||||
47183823-2574-4bfd-b411-99ed177d3e43
|
||||
80000000-0000-0000-0000-000000000000
|
||||
80000000-0000-0000-8000-000000000000
|
||||
80000000-0000-0000-8fff-ffffffffffff
|
||||
80000000-0000-0000-ffff-ffffffffffff
|
||||
8fffffff-ffff-ffff-0000-000000000000
|
||||
8fffffff-ffff-ffff-8000-000000000000
|
||||
8fffffff-ffff-ffff-8fff-ffffffffffff
|
||||
8fffffff-ffff-ffff-ffff-ffffffffffff
|
||||
a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11
|
||||
a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11
|
||||
ffffffff-ffff-ffff-ffff-ffffffffffff
|
||||
|
||||
query IIII
|
||||
SELECT stats_min, stats_max, stats_min_value, stats_max_value FROM parquet_metadata('__TEST_DIR__/uuid.parquet')
|
||||
----
|
||||
00000000-0000-0000-0000-000000000000 ffffffff-ffff-ffff-ffff-ffffffffffff 00000000-0000-0000-0000-000000000000 ffffffff-ffff-ffff-ffff-ffffffffffff
|
||||
|
||||
statement ok
|
||||
CREATE TABLE uuid2 AS SELECT uuid '47183823-2574-4bfd-b411-99ed177d3e43' uuid_val union all select uuid '00112233-4455-6677-8899-aabbccddeeff';
|
||||
|
||||
statement ok
|
||||
COPY uuid2 TO '__TEST_DIR__/uuid2.parquet'
|
||||
|
||||
query IIII
|
||||
SELECT stats_min, stats_max, stats_min_value, stats_max_value FROM parquet_metadata('__TEST_DIR__/uuid2.parquet')
|
||||
----
|
||||
00112233-4455-6677-8899-aabbccddeeff 47183823-2574-4bfd-b411-99ed177d3e43 00112233-4455-6677-8899-aabbccddeeff 47183823-2574-4bfd-b411-99ed177d3e43
|
||||
40
external/duckdb/test/sql/copy/parquet/writer/parquet_zstd_sequence.test_slow
vendored
Normal file
40
external/duckdb/test/sql/copy/parquet/writer/parquet_zstd_sequence.test_slow
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
# name: test/sql/copy/parquet/writer/parquet_zstd_sequence.test_slow
|
||||
# description: Test writing of large blobs into parquet files
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
require 64bit
|
||||
|
||||
statement ok
|
||||
COPY (SELECT * FROM read_csv_auto('data/csv/sequences.csv.gz', delim=',', header=True) LIMIT 25000) TO '__TEST_DIR__/duckseq.parquet' (FORMAT 'PARQUET', CODEC 'ZSTD', ROW_GROUP_SIZE 25000);
|
||||
|
||||
query IIIIII
|
||||
select count(*), min(strain), max(strain), min(strlen(sequence)), max(strlen(sequence)), avg(strlen(sequence))
|
||||
from '__TEST_DIR__/duckseq.parquet';
|
||||
----
|
||||
25000 AUS/NT01/2020 canine/HKG/20-03695/2020 17340 30018 29855.647080
|
||||
|
||||
statement ok
|
||||
COPY
|
||||
(
|
||||
SELECT lstrain::VARCHAR[] lstrain, lsequence::VARCHAR[] lsequence FROM (VALUES ([], []), (NULL, NULL), ([], [])) tbl(lstrain, lsequence)
|
||||
UNION ALL
|
||||
SELECT * FROM (
|
||||
SELECT LIST(strain) AS lstrain, LIST(sequence) AS lsequence FROM '__TEST_DIR__/duckseq.parquet' LIMIT 10000
|
||||
)
|
||||
UNION ALL
|
||||
SELECT * FROM (VALUES ([], []), (NULL, NULL), ([], []))
|
||||
)
|
||||
TO '__TEST_DIR__/duckseq2.parquet' (FORMAT 'PARQUET', CODEC 'ZSTD');
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/duckseq2.parquet'
|
||||
----
|
||||
7
|
||||
|
||||
query IIIIII nosort querylabel
|
||||
select count(*), min(strain), max(strain), min(strlen(sequence)), max(strlen(sequence)), avg(strlen(sequence))
|
||||
from (SELECT UNNEST(lstrain) AS strain, UNNEST(lsequence) AS sequence FROM '__TEST_DIR__/duckseq2.parquet');
|
||||
----
|
||||
100000 ARG/Cordoba-1006-155/2020 tiger/NY/040420/2020 17340 30643 29821.264410
|
||||
24
external/duckdb/test/sql/copy/parquet/writer/partition_without_hive.test
vendored
Normal file
24
external/duckdb/test/sql/copy/parquet/writer/partition_without_hive.test
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# name: test/sql/copy/parquet/writer/partition_without_hive.test
|
||||
# description: Test writing partitioned files WITHOUT hive partitioning
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE t1(part_key INT, val INT);
|
||||
|
||||
statement ok
|
||||
INSERT INTO t1 SELECT i%2, i FROM range(10) t(i);
|
||||
|
||||
statement ok
|
||||
COPY t1 TO '__TEST_DIR__/hive_filters' (FORMAT PARQUET, PARTITION_BY part_key, HIVE_FILE_PATTERN false, WRITE_PARTITION_COLUMNS true);
|
||||
|
||||
query I
|
||||
SELECT file.replace('__TEST_DIR__', '').replace('\', '/') FROM GLOB('__TEST_DIR__/hive_filters/*.parquet') ORDER BY ALL
|
||||
----
|
||||
/hive_filters/data_0.parquet
|
||||
/hive_filters/data_1.parquet
|
||||
|
||||
query II
|
||||
FROM '__TEST_DIR__/hive_filters/*.parquet' EXCEPT ALL FROM t1
|
||||
----
|
||||
106
external/duckdb/test/sql/copy/parquet/writer/row_group_size_bytes.test
vendored
Normal file
106
external/duckdb/test/sql/copy/parquet/writer/row_group_size_bytes.test
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
# name: test/sql/copy/parquet/writer/row_group_size_bytes.test
|
||||
# description: Parquet writer ROW_GROUP_SIZE_BYTES tests
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
require vector_size 1024
|
||||
|
||||
# different vector sizes result in different numbers of rows
|
||||
require no_vector_verification
|
||||
|
||||
statement ok
|
||||
SET preserve_insertion_order=false
|
||||
|
||||
statement error
|
||||
copy (select 42) to '__TEST_DIR__/tbl.parquet' (ROW_GROUP_SIZE_BYTES)
|
||||
----
|
||||
|
||||
# we can use human-readable memory limits
|
||||
statement ok
|
||||
copy (
|
||||
select range c0,
|
||||
range c1,
|
||||
range c2,
|
||||
range c3,
|
||||
range c4,
|
||||
range c5,
|
||||
range c6,
|
||||
range c7,
|
||||
from range(50000)
|
||||
) to '__TEST_DIR__/tbl.parquet' (ROW_GROUP_SIZE_BYTES '1mb')
|
||||
|
||||
query T
|
||||
select max(row_group_num_rows) from parquet_metadata('__TEST_DIR__/tbl.parquet')
|
||||
----
|
||||
16384
|
||||
|
||||
# also test that we set this thing
|
||||
query T
|
||||
select min(row_group_bytes) != 0 from parquet_metadata('__TEST_DIR__/tbl.parquet')
|
||||
----
|
||||
1
|
||||
|
||||
# and also just integer values
|
||||
# we set the memory limit to be half as big, and we get a max row group size of half what we had before
|
||||
statement ok
|
||||
copy (
|
||||
select range c0,
|
||||
range c1,
|
||||
range c2,
|
||||
range c3,
|
||||
range c4,
|
||||
range c5,
|
||||
range c6,
|
||||
range c7,
|
||||
from range(50000)
|
||||
) to '__TEST_DIR__/tbl.parquet' (ROW_GROUP_SIZE_BYTES 500000)
|
||||
|
||||
query T
|
||||
select max(row_group_num_rows) from parquet_metadata('__TEST_DIR__/tbl.parquet')
|
||||
----
|
||||
8192
|
||||
|
||||
# either limit is checked, so we should get row groups of 10240 even though we set a 1GB limit
|
||||
statement ok
|
||||
copy (
|
||||
select range c0,
|
||||
range c1,
|
||||
range c2,
|
||||
range c3,
|
||||
range c4,
|
||||
range c5,
|
||||
range c6,
|
||||
range c7,
|
||||
from range(50000)
|
||||
) to '__TEST_DIR__/tbl.parquet' (ROW_GROUP_SIZE 10000, ROW_GROUP_SIZE_BYTES '1GB')
|
||||
|
||||
query T
|
||||
select max(row_group_num_rows) from parquet_metadata('__TEST_DIR__/tbl.parquet')
|
||||
----
|
||||
10240
|
||||
|
||||
# these strings take around 16 + 50 = 66 bytes per string, so 2048 * 66 = 135168 per chunk
|
||||
# if we set the limit to 200000, then we should get row groups of 4096
|
||||
statement ok
|
||||
copy (
|
||||
select range || repeat('0', 50) c0
|
||||
from range(50000)
|
||||
) to '__TEST_DIR__/tbl.parquet' (ROW_GROUP_SIZE_BYTES 200000)
|
||||
|
||||
query T
|
||||
select max(row_group_num_rows) from parquet_metadata('__TEST_DIR__/tbl.parquet')
|
||||
----
|
||||
4096
|
||||
|
||||
# if we set it to 650000 we should get 10240 row groups
|
||||
statement ok
|
||||
copy (
|
||||
select range || repeat('0', 50) c0
|
||||
from range(50000)
|
||||
) to '__TEST_DIR__/tbl.parquet' (ROW_GROUP_SIZE_BYTES 650000)
|
||||
|
||||
query T
|
||||
select max(row_group_num_rows) from parquet_metadata('__TEST_DIR__/tbl.parquet')
|
||||
----
|
||||
10240
|
||||
74
external/duckdb/test/sql/copy/parquet/writer/skip_empty_write.test
vendored
Normal file
74
external/duckdb/test/sql/copy/parquet/writer/skip_empty_write.test
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
# name: test/sql/copy/parquet/writer/skip_empty_write.test
|
||||
# description: Parquet writer WRITE_EMPTY_FILE false option
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
statement ok
|
||||
CREATE TABLE empty_tbl(i INT, j VARCHAR);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE tbl AS FROM range(10000) t(i) UNION ALL SELECT 100000
|
||||
|
||||
# basic usage
|
||||
statement ok
|
||||
copy (select 42 where 42=84) to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false)
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM glob('__TEST_DIR__/empty.parquet')
|
||||
----
|
||||
0
|
||||
|
||||
foreach preserve_order true false
|
||||
|
||||
statement ok
|
||||
SET preserve_insertion_order=${preserve_order}
|
||||
|
||||
# no file name returned
|
||||
query IIIIII
|
||||
copy (select 42 where 42=84) to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, RETURN_STATS)
|
||||
----
|
||||
|
||||
# now with a table
|
||||
query IIIIII
|
||||
copy empty_tbl to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, RETURN_STATS)
|
||||
----
|
||||
|
||||
query II
|
||||
copy empty_tbl to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, RETURN_FILES)
|
||||
----
|
||||
0 []
|
||||
|
||||
query IIIIII
|
||||
copy (from tbl where i = 20000) to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, RETURN_STATS)
|
||||
----
|
||||
|
||||
endloop
|
||||
|
||||
# write_empty_file with file_size_bytes
|
||||
query I
|
||||
copy (select 42 where 42=84) to '__TEST_DIR__/empty_file_size_bytes/' (FORMAT PARQUET, WRITE_EMPTY_FILE false, FILENAME_PATTERN '{uuidv7}.parquet', FILE_SIZE_BYTES 128)
|
||||
----
|
||||
0
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM glob('__TEST_DIR__/empty_file_size_bytes/*.parquet')
|
||||
----
|
||||
0
|
||||
|
||||
statement ok
|
||||
copy tbl to '__TEST_DIR__/empty_row_groups_per_file.parquet' (WRITE_EMPTY_FILE false, ROW_GROUPS_PER_FILE 1)
|
||||
|
||||
# these combinations are not allowed
|
||||
statement error
|
||||
copy empty_tbl to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, PARTITION_BY (i))
|
||||
----
|
||||
Can't combine
|
||||
|
||||
statement error
|
||||
copy tbl to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, PER_THREAD_OUTPUT)
|
||||
----
|
||||
Can't combine
|
||||
75
external/duckdb/test/sql/copy/parquet/writer/test_copy_overwrite_parquet.test
vendored
Normal file
75
external/duckdb/test/sql/copy/parquet/writer/test_copy_overwrite_parquet.test
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
# name: test/sql/copy/parquet/writer/test_copy_overwrite_parquet.test
|
||||
# description: Test copy statement with file overwrite on parquet
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
# create a table and insert some values
|
||||
statement ok
|
||||
CREATE TABLE test (a INTEGER, b VARCHAR(10));
|
||||
|
||||
statement ok
|
||||
INSERT INTO test VALUES (1, 'hello'), (2, 'world '), (3, ' xx');
|
||||
|
||||
query IT
|
||||
SELECT * FROM test ORDER BY 1;
|
||||
----
|
||||
1 hello
|
||||
2 world
|
||||
3 xx
|
||||
|
||||
# copy to the parquet file
|
||||
query I
|
||||
COPY test TO '__TEST_DIR__/overwrite.parquet' (FORMAT PARQUET)
|
||||
----
|
||||
3
|
||||
|
||||
# now copy to the file again
|
||||
query I
|
||||
COPY (SELECT * FROM test LIMIT 2) TO '__TEST_DIR__/overwrite.parquet' (FORMAT PARQUET);
|
||||
----
|
||||
2
|
||||
|
||||
# reload the data from the file: it should only have two rows
|
||||
statement ok
|
||||
DELETE FROM test;
|
||||
|
||||
query I
|
||||
COPY test FROM '__TEST_DIR__/overwrite.parquet' (FORMAT PARQUET);
|
||||
----
|
||||
2
|
||||
|
||||
query IT
|
||||
SELECT * FROM test ORDER BY 1;
|
||||
----
|
||||
1 hello
|
||||
2 world
|
||||
|
||||
# test query returning error does not export to file
|
||||
statement error
|
||||
COPY (SELECT i FROM range(1) tbl(i) UNION ALL SELECT concat('hello', i)::INT i FROM range(1) tbl(i)) to '__TEST_DIR__/overwrite.parquet' (FORMAT PARQUET);
|
||||
----
|
||||
|
||||
statement ok
|
||||
DELETE FROM test;
|
||||
|
||||
query I
|
||||
COPY test FROM '__TEST_DIR__/overwrite.parquet' (FORMAT PARQUET);
|
||||
----
|
||||
2
|
||||
|
||||
# this test should still pass as data was not overwritten
|
||||
query IT
|
||||
SELECT * FROM test ORDER BY 1;
|
||||
----
|
||||
1 hello
|
||||
2 world
|
||||
|
||||
# Test USE_TMP_FILE flag with parquet
|
||||
statement error
|
||||
COPY (SELECT i FROM range(1) tbl(i) UNION ALL SELECT concat('hello', i)::INT i FROM range(1) tbl(i)) to '__TEST_DIR__/overwrite.parquet' (FORMAT PARQUET, USE_TMP_FILE FALSE);
|
||||
----
|
||||
|
||||
statement error
|
||||
SELECT * FROM '__TEST_DIR__/overwrite.parquet';
|
||||
----
|
||||
37
external/duckdb/test/sql/copy/parquet/writer/test_parquet_write.test
vendored
Normal file
37
external/duckdb/test/sql/copy/parquet/writer/test_parquet_write.test
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
# name: test/sql/copy/parquet/writer/test_parquet_write.test
|
||||
# description: Parquet basic write
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
COPY (SELECT 42) TO '__TEST_DIR__/scalar.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/scalar.parquet');
|
||||
----
|
||||
42
|
||||
|
||||
# empty result set, single thread
|
||||
statement ok
|
||||
CREATE TABLE empty(i INTEGER)
|
||||
|
||||
statement ok
|
||||
COPY (SELECT * FROM empty) TO '__TEST_DIR__/empty.parquet' (FORMAT 'parquet')
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM parquet_scan('__TEST_DIR__/empty.parquet')
|
||||
----
|
||||
0
|
||||
|
||||
statement ok
|
||||
SET threads=4;
|
||||
|
||||
# empty result set, multi thread
|
||||
statement ok
|
||||
COPY (SELECT * FROM empty) TO '__TEST_DIR__/empty_multithread' (FORMAT 'parquet', PER_THREAD_OUTPUT True)
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM parquet_scan('__TEST_DIR__/empty_multithread/*.parquet')
|
||||
----
|
||||
0
|
||||
77
external/duckdb/test/sql/copy/parquet/writer/test_parquet_write_complex.test
vendored
Normal file
77
external/duckdb/test/sql/copy/parquet/writer/test_parquet_write_complex.test
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
# name: test/sql/copy/parquet/writer/test_parquet_write_complex.test
|
||||
# description: Parquet read and re-write various files
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
# alltypes_dictionary: scan as parquet
|
||||
query I nosort alltypes_dictionary
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/arrow/alltypes_dictionary.parquet');
|
||||
----
|
||||
|
||||
# rewrite the file
|
||||
statement ok
|
||||
COPY (SELECT * FROM parquet_scan('data/parquet-testing/arrow/alltypes_dictionary.parquet')) TO '__TEST_DIR__/alltypes_dictionary.parquet' (FORMAT 'PARQUET')
|
||||
|
||||
# verify that the rewritten file has the same values again
|
||||
query I nosort alltypes_dictionary
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/alltypes_dictionary.parquet');
|
||||
----
|
||||
|
||||
# bug687_nulls.parquet
|
||||
query I nosort bug687_nulls
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/bug687_nulls.parquet') LIMIT 10;
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY (SELECT * FROM parquet_scan('data/parquet-testing/bug687_nulls.parquet')) TO '__TEST_DIR__/bug687_nulls.parquet' (FORMAT 'PARQUET')
|
||||
|
||||
query I nosort bug687_nulls
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/bug687_nulls.parquet') LIMIT 10;
|
||||
----
|
||||
|
||||
# Issue #1637: booleans encoded incorrectly
|
||||
statement ok
|
||||
COPY (SELECT true as x UNION ALL SELECT true) TO '__TEST_DIR__/bug1637_booleans.parquet' (FORMAT 'PARQUET');
|
||||
|
||||
# Prior to the #1637 fix, duckdb wrote a parquet file containing true, false
|
||||
query I
|
||||
SELECT COUNT(*) FROM parquet_scan('__TEST_DIR__/bug1637_booleans.parquet') WHERE x;
|
||||
----
|
||||
2
|
||||
|
||||
# userdata1.parquet
|
||||
query I nosort userdata1.parquet
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/userdata1.parquet') ORDER BY 1 LIMIT 10;
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY (SELECT * FROM parquet_scan('data/parquet-testing/userdata1.parquet')) TO '__TEST_DIR__/userdata1.parquet' (FORMAT 'PARQUET')
|
||||
|
||||
query I nosort userdata1.parquet
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/userdata1.parquet') ORDER BY 1 LIMIT 10;
|
||||
----
|
||||
|
||||
# gzip codec
|
||||
statement ok
|
||||
COPY (SELECT * FROM parquet_scan('data/parquet-testing/userdata1.parquet')) TO '__TEST_DIR__/userdata1-gzip.parquet' (FORMAT 'PARQUET', CODEC 'GZIP')
|
||||
|
||||
query I nosort userdata1.parquet
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/userdata1-gzip.parquet') ORDER BY 1 LIMIT 10;
|
||||
----
|
||||
|
||||
# uncompressed codec
|
||||
statement ok
|
||||
COPY (SELECT * FROM parquet_scan('data/parquet-testing/userdata1.parquet')) TO '__TEST_DIR__/userdata1-uncompressed.parquet' (FORMAT 'PARQUET', CODEC 'UNCOMPRESSED')
|
||||
|
||||
query I nosort userdata1.parquet
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/userdata1-uncompressed.parquet') ORDER BY 1 LIMIT 10;
|
||||
----
|
||||
|
||||
# zstd codec
|
||||
statement ok
|
||||
COPY (SELECT * FROM parquet_scan('data/parquet-testing/userdata1.parquet')) TO '__TEST_DIR__/userdata1-zstd.parquet' (FORMAT 'PARQUET', CODEC 'ZSTD')
|
||||
|
||||
query I nosort userdata1.parquet
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/userdata1-zstd.parquet') ORDER BY 1 LIMIT 10;
|
||||
----
|
||||
22
external/duckdb/test/sql/copy/parquet/writer/write_big_list.test_slow
vendored
Normal file
22
external/duckdb/test/sql/copy/parquet/writer/write_big_list.test_slow
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
# name: test/sql/copy/parquet/writer/write_big_list.test_slow
|
||||
# description: Parquet write big list
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
# big list (> vector size)
|
||||
statement ok
|
||||
CREATE TABLE big_list AS SELECT LIST(CASE WHEN i%2=0 THEN NULL ELSE i END) l FROM range(20000) tbl(i);
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM (SELECT UNNEST(l) FROM big_list) t(i)
|
||||
----
|
||||
100000000
|
||||
|
||||
statement ok
|
||||
COPY big_list TO '__TEST_DIR__/big_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM (SELECT UNNEST(l) FROM '__TEST_DIR__/big_list.parquet') t(i)
|
||||
----
|
||||
100000000
|
||||
137
external/duckdb/test/sql/copy/parquet/writer/write_complex_nested.test
vendored
Normal file
137
external/duckdb/test/sql/copy/parquet/writer/write_complex_nested.test
vendored
Normal file
@@ -0,0 +1,137 @@
|
||||
# name: test/sql/copy/parquet/writer/write_complex_nested.test
|
||||
# description: Parquet write complex structures
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
# struct of lists
|
||||
statement ok
|
||||
CREATE TABLE struct_of_lists AS SELECT * FROM (VALUES
|
||||
({'a': [1, 2, 3], 'b': ['hello', 'world']}),
|
||||
({'a': [4, NULL, 5], 'b': ['duckduck', 'goose']}),
|
||||
({'a': NULL, 'b': ['longlonglonglonglonglong', NULL, NULL]}),
|
||||
(NULL),
|
||||
({'a': [], 'b': []}),
|
||||
({'a': [1, 2, 3], 'b': NULL})
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY struct_of_lists TO '__TEST_DIR__/complex_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT i FROM parquet_scan('__TEST_DIR__/complex_list.parquet');
|
||||
----
|
||||
{'a': [1, 2, 3], 'b': [hello, world]}
|
||||
{'a': [4, NULL, 5], 'b': [duckduck, goose]}
|
||||
{'a': NULL, 'b': [longlonglonglonglonglong, NULL, NULL]}
|
||||
NULL
|
||||
{'a': [], 'b': []}
|
||||
{'a': [1, 2, 3], 'b': NULL}
|
||||
|
||||
# list of structs
|
||||
statement ok
|
||||
CREATE TABLE list_of_structs AS SELECT * FROM (VALUES
|
||||
([{'a': 1, 'b': 100}, NULL, {'a': 2, 'b': 101}]),
|
||||
(NULL),
|
||||
([]),
|
||||
([{'a': NULL, 'b': 102}, {'a': 3, 'b': NULL}, NULL])
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY list_of_structs TO '__TEST_DIR__/complex_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT i FROM parquet_scan('__TEST_DIR__/complex_list.parquet');
|
||||
----
|
||||
[{'a': 1, 'b': 100}, NULL, {'a': 2, 'b': 101}]
|
||||
NULL
|
||||
[]
|
||||
[{'a': NULL, 'b': 102}, {'a': 3, 'b': NULL}, NULL]
|
||||
|
||||
# list of structs of structs
|
||||
statement ok
|
||||
CREATE TABLE list_of_struct_of_structs AS SELECT * FROM (VALUES
|
||||
([{'a': {'x': 33}, 'b': {'y': 42, 'z': 99}}, NULL, {'a': {'x': NULL}, 'b': {'y': 43, 'z': 100}}]),
|
||||
(NULL),
|
||||
([]),
|
||||
([{'a': NULL, 'b': {'y': NULL, 'z': 101}}, {'a': {'x': 34}, 'b': {'y': 43, 'z': NULL}}]),
|
||||
([{'a': NULL, 'b': NULL}])
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY list_of_struct_of_structs TO '__TEST_DIR__/complex_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT i FROM parquet_scan('__TEST_DIR__/complex_list.parquet');
|
||||
----
|
||||
[{'a': {'x': 33}, 'b': {'y': 42, 'z': 99}}, NULL, {'a': {'x': NULL}, 'b': {'y': 43, 'z': 100}}]
|
||||
NULL
|
||||
[]
|
||||
[{'a': NULL, 'b': {'y': NULL, 'z': 101}}, {'a': {'x': 34}, 'b': {'y': 43, 'z': NULL}}]
|
||||
[{'a': NULL, 'b': NULL}]
|
||||
|
||||
# list of lists
|
||||
# no empty lists or nulls
|
||||
statement ok
|
||||
CREATE TABLE list_of_lists_simple AS SELECT * FROM (VALUES
|
||||
([[1, 2, 3], [4, 5]]),
|
||||
([[6, 7]]),
|
||||
([[8, 9, 10], [11, 12]])
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY list_of_lists_simple TO '__TEST_DIR__/complex_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT i FROM parquet_scan('__TEST_DIR__/complex_list.parquet');
|
||||
----
|
||||
[[1, 2, 3], [4, 5]]
|
||||
[[6, 7]]
|
||||
[[8, 9, 10], [11, 12]]
|
||||
|
||||
# list of lists with nulls and empty lists
|
||||
statement ok
|
||||
CREATE TABLE list_of_lists AS SELECT * FROM (VALUES
|
||||
([[1, 2, 3], [4, 5], [], [6, 7]]),
|
||||
([[8, NULL, 10], NULL, []]),
|
||||
([]),
|
||||
(NULL),
|
||||
([[11, 12, 13, 14], [], NULL, [], [], [15], [NULL, NULL, NULL]])
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY list_of_lists TO '__TEST_DIR__/complex_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT i FROM parquet_scan('__TEST_DIR__/complex_list.parquet');
|
||||
----
|
||||
[[1, 2, 3], [4, 5], [], [6, 7]]
|
||||
[[8, NULL, 10], NULL, []]
|
||||
[]
|
||||
NULL
|
||||
[[11, 12, 13, 14], [], NULL, [], [], [15], [NULL, NULL, NULL]]
|
||||
|
||||
# list of lists of lists of lists
|
||||
statement ok
|
||||
CREATE TABLE list_of_lists_of_lists_of_lists AS
|
||||
SELECT [LIST(i)] i FROM list_of_lists
|
||||
UNION ALL
|
||||
SELECT NULL
|
||||
UNION ALL
|
||||
SELECT [NULL]
|
||||
UNION ALL
|
||||
SELECT [[], NULL, [], []]
|
||||
UNION ALL
|
||||
SELECT [[[NULL, NULL, [NULL]], NULL, [[], [7, 8, 9], [NULL], NULL, []]], [], [NULL]]
|
||||
|
||||
statement ok
|
||||
COPY list_of_lists_of_lists_of_lists TO '__TEST_DIR__/complex_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT i FROM parquet_scan('__TEST_DIR__/complex_list.parquet');
|
||||
----
|
||||
[[[[1, 2, 3], [4, 5], [], [6, 7]], [[8, NULL, 10], NULL, []], [], NULL, [[11, 12, 13, 14], [], NULL, [], [], [15], [NULL, NULL, NULL]]]]
|
||||
NULL
|
||||
[NULL]
|
||||
[[], NULL, [], []]
|
||||
[[[NULL, NULL, [NULL]], NULL, [[], [7, 8, 9], [NULL], NULL, []]], [], [NULL]]
|
||||
75
external/duckdb/test/sql/copy/parquet/writer/write_list.test
vendored
Normal file
75
external/duckdb/test/sql/copy/parquet/writer/write_list.test
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
# name: test/sql/copy/parquet/writer/write_list.test
|
||||
# description: Parquet write list
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
# standard list
|
||||
statement ok
|
||||
CREATE TABLE list AS SELECT * FROM (VALUES
|
||||
([1, 2, 3]),
|
||||
([4, 5]),
|
||||
([6, 7]),
|
||||
([8, 9, 10, 11])
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY list TO '__TEST_DIR__/test_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT i FROM parquet_scan('__TEST_DIR__/test_list.parquet');
|
||||
----
|
||||
[1, 2, 3]
|
||||
[4, 5]
|
||||
[6, 7]
|
||||
[8, 9, 10, 11]
|
||||
|
||||
# empty and NULL lists
|
||||
statement ok
|
||||
CREATE TABLE null_empty_list AS SELECT * FROM (VALUES
|
||||
([1, 2, 3]),
|
||||
([4, 5]),
|
||||
([6, 7]),
|
||||
([NULL]),
|
||||
([]),
|
||||
([]),
|
||||
([]),
|
||||
([]),
|
||||
([8, NULL, 10, 11]),
|
||||
(NULL)
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY null_empty_list TO '__TEST_DIR__/test_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/test_list.parquet');
|
||||
----
|
||||
[1, 2, 3]
|
||||
[4, 5]
|
||||
[6, 7]
|
||||
[NULL]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[8, NULL, 10, 11]
|
||||
NULL
|
||||
|
||||
# empty list
|
||||
statement ok
|
||||
COPY (SELECT []::INT[]) TO '__TEST_DIR__/test_empty_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/test_empty_list.parquet'
|
||||
----
|
||||
[]
|
||||
|
||||
# null list
|
||||
statement ok
|
||||
COPY (SELECT NULL::INT[]) TO '__TEST_DIR__/test_null_list.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/test_null_list.parquet'
|
||||
----
|
||||
NULL
|
||||
102
external/duckdb/test/sql/copy/parquet/writer/write_map.test
vendored
Normal file
102
external/duckdb/test/sql/copy/parquet/writer/write_map.test
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
# name: test/sql/copy/parquet/writer/write_map.test
|
||||
# description: Write maps
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
# int -> int map
|
||||
statement ok
|
||||
CREATE TABLE int_maps(m MAP(INTEGER,INTEGER));
|
||||
|
||||
statement ok
|
||||
INSERT INTO int_maps VALUES
|
||||
(MAP([42, 84], [1, 2])),
|
||||
(MAP([101, 201, 301], [3, NULL, 5])),
|
||||
(MAP([55, 66, 77], [6, 7, NULL]))
|
||||
;
|
||||
|
||||
statement ok
|
||||
COPY int_maps TO '__TEST_DIR__/int_map.parquet' (FORMAT PARQUET)
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/int_map.parquet'
|
||||
----
|
||||
{42=1, 84=2}
|
||||
{101=3, 201=NULL, 301=5}
|
||||
{55=6, 66=7, 77=NULL}
|
||||
|
||||
statement error
|
||||
INSERT INTO int_maps VALUES
|
||||
(MAP([NULL], [NULL]))
|
||||
;
|
||||
----
|
||||
<REGEX>:.*Invalid Input Error: Map keys can not be NULL.*
|
||||
|
||||
# parquet does not support keys with null values
|
||||
statement error
|
||||
COPY string_map TO '__TEST_DIR__/int_maps.parquet' (FORMAT PARQUET)
|
||||
----
|
||||
<REGEX>:.*Catalog Error.*does not exist!.*
|
||||
|
||||
# string -> string map
|
||||
statement ok
|
||||
CREATE TABLE string_map(m MAP(VARCHAR,VARCHAR));
|
||||
|
||||
statement ok
|
||||
INSERT INTO string_map VALUES
|
||||
(MAP(['key1', 'key2'], ['value1', 'value2'])),
|
||||
(MAP(['best band', 'best boyband', 'richest person'], ['Tenacious D', 'Backstreet Boys', 'Jon Lajoie'])),
|
||||
(MAP([], [])),
|
||||
(NULL),
|
||||
(MAP(['option'], [NULL]))
|
||||
;
|
||||
|
||||
statement ok
|
||||
COPY string_map TO '__TEST_DIR__/string_map.parquet' (FORMAT PARQUET)
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/string_map.parquet'
|
||||
----
|
||||
{key1=value1, key2=value2}
|
||||
{best band=Tenacious D, best boyband=Backstreet Boys, richest person=Jon Lajoie}
|
||||
{}
|
||||
NULL
|
||||
{option=NULL}
|
||||
|
||||
statement error
|
||||
INSERT INTO string_map VALUES
|
||||
(MAP([NULL], [NULL]))
|
||||
;
|
||||
----
|
||||
<REGEX>:.*Invalid Input Error: Map keys can not be NULL.*
|
||||
|
||||
# list -> list map
|
||||
statement ok
|
||||
CREATE TABLE list_map(m MAP(INT[],INT[]));
|
||||
|
||||
statement ok
|
||||
INSERT INTO list_map VALUES
|
||||
(MAP([[1, 2, 3], [], [4, 5]], [[6, 7, 8], NULL, [NULL]])),
|
||||
(MAP([], [])),
|
||||
(MAP([[1]], [NULL])),
|
||||
(MAP([[10, 12, 14, 16, 18, 20], []], [[1], [2]]))
|
||||
;
|
||||
|
||||
statement ok
|
||||
COPY list_map TO '__TEST_DIR__/list_map.parquet' (FORMAT PARQUET)
|
||||
|
||||
query I
|
||||
SELECT * FROM '__TEST_DIR__/list_map.parquet'
|
||||
----
|
||||
{[1, 2, 3]=[6, 7, 8], []=NULL, [4, 5]=[NULL]}
|
||||
{}
|
||||
{[1]=NULL}
|
||||
{[10, 12, 14, 16, 18, 20]=[1], []=[2]}
|
||||
|
||||
# Keys can not be NULL;
|
||||
statement error
|
||||
INSERT INTO list_map VALUES
|
||||
(MAP([NULL], [NULL]))
|
||||
;
|
||||
----
|
||||
<REGEX>:.*Invalid Input Error: Map keys can not be NULL.*
|
||||
35
external/duckdb/test/sql/copy/parquet/writer/write_stats_big_string.test
vendored
Normal file
35
external/duckdb/test/sql/copy/parquet/writer/write_stats_big_string.test
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# name: test/sql/copy/parquet/writer/write_stats_big_string.test
|
||||
# description: We avoid writing min/max stats of large strings
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE varchar(v VARCHAR);
|
||||
|
||||
statement ok
|
||||
INSERT INTO varchar VALUES (NULL), ('hello'), (NULL), ('world'), (NULL)
|
||||
|
||||
# we write stats when there are only small strings
|
||||
statement ok
|
||||
COPY varchar TO '__TEST_DIR__/bigvarchar.parquet'
|
||||
|
||||
query IIIIII
|
||||
SELECT stats_min_value, stats_max_value, stats_min, stats_max, min_is_exact, max_is_exact FROM parquet_metadata('__TEST_DIR__/bigvarchar.parquet')
|
||||
----
|
||||
hello world hello world true true
|
||||
|
||||
# we truncate stats of large strings
|
||||
statement ok
|
||||
INSERT INTO varchar SELECT repeat('A', 100000) v
|
||||
|
||||
statement ok
|
||||
COPY varchar TO '__TEST_DIR__/bigvarchar.parquet'
|
||||
|
||||
query IIIIII
|
||||
SELECT stats_min_value, stats_max_value, stats_min, stats_max, min_is_exact, max_is_exact FROM parquet_metadata('__TEST_DIR__/bigvarchar.parquet')
|
||||
----
|
||||
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA world AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA world false true
|
||||
226
external/duckdb/test/sql/copy/parquet/writer/write_stats_min_max.test_slow
vendored
Normal file
226
external/duckdb/test/sql/copy/parquet/writer/write_stats_min_max.test_slow
vendored
Normal file
@@ -0,0 +1,226 @@
|
||||
# name: test/sql/copy/parquet/writer/write_stats_min_max.test_slow
|
||||
# description: Write min/max stats to Parquet files
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification;
|
||||
|
||||
statement ok
|
||||
PRAGMA explain_output = OPTIMIZED_ONLY;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE boolean_limits AS SELECT (false)::BOOLEAN min, true::BOOLEAN max
|
||||
|
||||
statement ok
|
||||
CREATE TABLE tinyint_limits AS SELECT (-128)::TINYINT min, 127::TINYINT max
|
||||
|
||||
statement ok
|
||||
CREATE TABLE smallint_limits AS SELECT (-32768)::SMALLINT min, 32767::SMALLINT max
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integer_limits AS SELECT (-2147483648)::INTEGER min, 2147483647::INTEGER max
|
||||
|
||||
statement ok
|
||||
CREATE TABLE bigint_limits AS SELECT (-9223372036854775808)::BIGINT min, 9223372036854775807::BIGINT max
|
||||
|
||||
statement ok
|
||||
CREATE TABLE float_limits AS SELECT (-0.5)::FLOAT min, 0.5::FLOAT max
|
||||
|
||||
statement ok
|
||||
CREATE TABLE double_limits AS SELECT (-0.5)::DOUBLE min, 0.5::DOUBLE max
|
||||
|
||||
statement ok
|
||||
CREATE TABLE varchar_limits AS SELECT 'hello world 👤🏠📕' min, 'look at my ducks 🦆🦆🦆' max;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE blob_limits AS SELECT blob '\x00hello\x00world\x00' min, blob '\x00look\x00at\x00my\x00nullbytes\x00' max;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE date_limits AS SELECT date '1900-01-01' min, date '2030-12-31' max;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE time_limits AS SELECT time '00:00:00' min, time '23:59:59' max;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE timestamp_limits AS SELECT timestamp '1900-01-01 00:00:00' min, timestamp '2030-12-31 23:59:59' max;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE timestamp_s_limits AS SELECT '1900-01-01 00:00:00'::timestamp_s min, '2030-12-31 23:59:59'::timestamp_s max;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE timestamp_ms_limits AS SELECT '1900-01-01 00:00:00'::timestamp_ms min, '2030-12-31 23:59:59'::timestamp_ms max;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE timestamp_ns_limits AS SELECT '1900-01-01 00:00:00'::timestamp_ns min, '2030-12-31 23:59:59'::timestamp_ns max;
|
||||
|
||||
# min/max/min_value/max_value for signed tables
|
||||
foreach type date time timestamp timestamp_s timestamp_ms timestamp_ns varchar blob boolean tinyint smallint integer bigint float double
|
||||
|
||||
statement ok
|
||||
CREATE TABLE tbl(i ${type});
|
||||
|
||||
# empty stats (all values are NULL)
|
||||
statement ok
|
||||
INSERT INTO tbl SELECT NULL
|
||||
|
||||
statement ok
|
||||
COPY tbl TO '__TEST_DIR__/${type}_stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
query IIII
|
||||
SELECT stats_min_value::${type}, stats_max_value::${type}, stats_min::${type}, stats_max::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL NULL NULL NULL
|
||||
|
||||
# min/max stats
|
||||
statement ok
|
||||
INSERT INTO tbl SELECT min FROM ${type}_limits
|
||||
|
||||
statement ok
|
||||
INSERT INTO tbl SELECT max FROM ${type}_limits
|
||||
|
||||
statement ok
|
||||
COPY tbl TO '__TEST_DIR__/${type}_stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT stats_min_value::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet') EXCEPT SELECT min FROM ${type}_limits
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT stats_max_value::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet') EXCEPT SELECT max FROM ${type}_limits
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT stats_min::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet') EXCEPT SELECT min FROM ${type}_limits
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT stats_max::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet') EXCEPT SELECT max FROM ${type}_limits
|
||||
----
|
||||
|
||||
statement ok
|
||||
DROP TABLE tbl
|
||||
|
||||
endloop
|
||||
|
||||
statement ok
|
||||
CREATE TABLE utinyint_limits AS SELECT (0)::UTINYINT min, 255::UTINYINT max
|
||||
|
||||
statement ok
|
||||
CREATE TABLE usmallint_limits AS SELECT (0)::USMALLINT min, 65535::USMALLINT max
|
||||
|
||||
statement ok
|
||||
CREATE TABLE uinteger_limits AS SELECT 0::UINTEGER min, 4294967295::UINTEGER max
|
||||
|
||||
statement ok
|
||||
CREATE TABLE ubigint_limits AS SELECT 0::UBIGINT min, 18446744073709551615::UBIGINT max
|
||||
|
||||
# unsigned types only define min_value/max_value
|
||||
foreach type utinyint usmallint uinteger ubigint
|
||||
|
||||
statement ok
|
||||
CREATE TABLE tbl(i ${type});
|
||||
|
||||
# empty stats (all values are NULL)
|
||||
statement ok
|
||||
INSERT INTO tbl SELECT NULL
|
||||
|
||||
statement ok
|
||||
COPY tbl TO '__TEST_DIR__/${type}_stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT stats_min_value::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
query I
|
||||
SELECT stats_max_value::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
query I
|
||||
SELECT stats_min::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
query I
|
||||
SELECT stats_max::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
# min/max stats
|
||||
statement ok
|
||||
INSERT INTO tbl SELECT min FROM ${type}_limits
|
||||
|
||||
statement ok
|
||||
INSERT INTO tbl SELECT max FROM ${type}_limits
|
||||
|
||||
statement ok
|
||||
COPY tbl TO '__TEST_DIR__/${type}_stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT stats_min_value::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet') EXCEPT SELECT min FROM ${type}_limits
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT stats_max_value::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet') EXCEPT SELECT max FROM ${type}_limits
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT stats_min::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
query I
|
||||
SELECT stats_max::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
statement ok
|
||||
DROP TABLE tbl
|
||||
|
||||
endloop
|
||||
|
||||
# no stats for these types
|
||||
statement ok
|
||||
CREATE TABLE hugeint_limits AS SELECT (-170141183460469231731687303715884105728)::HUGEINT min, 170141183460469231731687303715884105727::HUGEINT max
|
||||
|
||||
foreach type hugeint
|
||||
|
||||
statement ok
|
||||
CREATE TABLE tbl(i ${type});
|
||||
|
||||
statement ok
|
||||
INSERT INTO tbl SELECT min FROM ${type}_limits
|
||||
|
||||
statement ok
|
||||
INSERT INTO tbl SELECT max FROM ${type}_limits
|
||||
|
||||
statement ok
|
||||
COPY tbl TO '__TEST_DIR__/${type}_stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT stats_min_value::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
query I
|
||||
SELECT stats_max_value::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
query I
|
||||
SELECT stats_min::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
query I
|
||||
SELECT stats_max::${type} FROM parquet_metadata('__TEST_DIR__/${type}_stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
statement ok
|
||||
DROP TABLE tbl
|
||||
|
||||
endloop
|
||||
96
external/duckdb/test/sql/copy/parquet/writer/write_stats_null_count.test
vendored
Normal file
96
external/duckdb/test/sql/copy/parquet/writer/write_stats_null_count.test
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
# name: test/sql/copy/parquet/writer/write_stats_null_count.test
|
||||
# description: Write null_count stats to Parquet files
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification;
|
||||
|
||||
statement ok
|
||||
PRAGMA explain_output = OPTIMIZED_ONLY;
|
||||
|
||||
# null count
|
||||
statement ok
|
||||
COPY (SELECT 42 i) TO '__TEST_DIR__/stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT stats_null_count FROM parquet_metadata('__TEST_DIR__/stats.parquet')
|
||||
----
|
||||
0
|
||||
|
||||
# we can filter out the IS NULL clause if there are no NULL values
|
||||
query II
|
||||
EXPLAIN SELECT COUNT(*) FROM '__TEST_DIR__/stats.parquet' WHERE i IS NULL
|
||||
----
|
||||
logical_opt <!REGEX>:.*IS.*NULL.*
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/stats.parquet' WHERE i IS NULL
|
||||
----
|
||||
0
|
||||
|
||||
statement ok
|
||||
COPY (SELECT NULL i) TO '__TEST_DIR__/stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT stats_null_count FROM parquet_metadata('__TEST_DIR__/stats.parquet')
|
||||
----
|
||||
1
|
||||
|
||||
# we can also filter out the IS NULL clause when everything is NULL
|
||||
query II
|
||||
EXPLAIN SELECT COUNT(*) FROM '__TEST_DIR__/stats.parquet' WHERE i IS NULL
|
||||
----
|
||||
logical_opt <!REGEX>:.*IS.*NULL.*
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/stats.parquet' WHERE i IS NULL
|
||||
----
|
||||
1
|
||||
|
||||
statement ok
|
||||
COPY (SELECT * FROM VALUES (42), (NULL) tbl(i)) TO '__TEST_DIR__/stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
# we cannot filter out the IS NULL clause when there are mixed NULL/valid
|
||||
query II
|
||||
EXPLAIN SELECT COUNT(*) FROM '__TEST_DIR__/stats.parquet' WHERE i IS NULL
|
||||
----
|
||||
logical_opt <REGEX>:.*IS.*NULL.*
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM '__TEST_DIR__/stats.parquet' WHERE i IS NULL
|
||||
----
|
||||
1
|
||||
|
||||
# list null count not supported (i.e. we don't write the null count in this case)
|
||||
statement ok
|
||||
COPY (SELECT [42, NULL, 43] i) TO '__TEST_DIR__/stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT stats_null_count FROM parquet_metadata('__TEST_DIR__/stats.parquet')
|
||||
----
|
||||
NULL
|
||||
|
||||
statement ok
|
||||
COPY (SELECT {'a': NULL, 'b': 42} i) TO '__TEST_DIR__/stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT stats_null_count FROM parquet_metadata('__TEST_DIR__/stats.parquet')
|
||||
----
|
||||
1
|
||||
0
|
||||
|
||||
# struct null count is propagated to the children
|
||||
# i.e. if a struct itself is null, this counts as NULL for the children
|
||||
statement ok
|
||||
CREATE TABLE structs AS SELECT {'a': NULL, 'b': 'hello'} i UNION ALL SELECT NULL UNION ALL SELECT {'a': 84, 'b': 'world'};
|
||||
|
||||
statement ok
|
||||
COPY structs TO '__TEST_DIR__/stats.parquet' (FORMAT PARQUET);
|
||||
|
||||
query I
|
||||
SELECT stats_null_count FROM parquet_metadata('__TEST_DIR__/stats.parquet')
|
||||
----
|
||||
2
|
||||
1
|
||||
139
external/duckdb/test/sql/copy/parquet/writer/write_struct.test
vendored
Normal file
139
external/duckdb/test/sql/copy/parquet/writer/write_struct.test
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
# name: test/sql/copy/parquet/writer/write_struct.test
|
||||
# description: Parquet write struct
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
# standard struct
|
||||
statement ok
|
||||
CREATE TABLE struct AS SELECT * FROM (VALUES
|
||||
({'a': 42, 'b': 84}),
|
||||
({'a': 33, 'b': 32}),
|
||||
({'a': 42, 'b': 27})
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY struct TO '__TEST_DIR__/test_struct.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/test_struct.parquet');
|
||||
----
|
||||
{'a': 42, 'b': 84}
|
||||
{'a': 33, 'b': 32}
|
||||
{'a': 42, 'b': 27}
|
||||
|
||||
# struct with nulls
|
||||
statement ok
|
||||
CREATE TABLE struct_nulls AS SELECT * FROM (VALUES
|
||||
({'a': 42, 'b': 84}),
|
||||
({'a': NULL, 'b': 32}),
|
||||
(NULL),
|
||||
({'a': 42, 'b': NULL})
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY struct_nulls TO '__TEST_DIR__/test_struct_nulls.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/test_struct_nulls.parquet');
|
||||
----
|
||||
{'a': 42, 'b': 84}
|
||||
{'a': NULL, 'b': 32}
|
||||
NULL
|
||||
{'a': 42, 'b': NULL}
|
||||
|
||||
# nested structs
|
||||
statement ok
|
||||
CREATE TABLE struct_nested AS SELECT * FROM (VALUES
|
||||
({'a': {'x': 3, 'x1': 22}, 'b': {'y': 27, 'y1': 44}}),
|
||||
({'a': {'x': 9, 'x1': 26}, 'b': {'y': 1, 'y1': 999}}),
|
||||
({'a': {'x': 17, 'x1': 23}, 'b': {'y': 3, 'y1': 9999}})
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY struct_nested TO '__TEST_DIR__/struct_nested.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/struct_nested.parquet');
|
||||
----
|
||||
{'a': {'x': 3, 'x1': 22}, 'b': {'y': 27, 'y1': 44}}
|
||||
{'a': {'x': 9, 'x1': 26}, 'b': {'y': 1, 'y1': 999}}
|
||||
{'a': {'x': 17, 'x1': 23}, 'b': {'y': 3, 'y1': 9999}}
|
||||
|
||||
# nested structs
|
||||
statement ok
|
||||
CREATE TABLE struct_nested_null AS SELECT * FROM (VALUES
|
||||
({'a': {'x': 3, 'x1': 22}, 'b': {'y': NULL, 'y1': 44}}),
|
||||
({'a': {'x': NULL, 'x1': 26}, 'b': {'y': 1, 'y1': NULL}}),
|
||||
({'a': {'x': 17, 'x1': NULL}, 'b': {'y': 3, 'y1': 9999}}),
|
||||
(NULL),
|
||||
({'a': NULL, 'b': NULL})
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY struct_nested_null TO '__TEST_DIR__/struct_nested_null.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/struct_nested_null.parquet');
|
||||
----
|
||||
{'a': {'x': 3, 'x1': 22}, 'b': {'y': NULL, 'y1': 44}}
|
||||
{'a': {'x': NULL, 'x1': 26}, 'b': {'y': 1, 'y1': NULL}}
|
||||
{'a': {'x': 17, 'x1': NULL}, 'b': {'y': 3, 'y1': 9999}}
|
||||
NULL
|
||||
{'a': NULL, 'b': NULL}
|
||||
|
||||
# single struct
|
||||
statement ok
|
||||
CREATE TABLE single_struct AS SELECT * FROM (VALUES
|
||||
({'a': 42}),
|
||||
({'a': 33}),
|
||||
({'a': 42})
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY single_struct TO '__TEST_DIR__/single_struct.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/single_struct.parquet');
|
||||
----
|
||||
{'a': 42}
|
||||
{'a': 33}
|
||||
{'a': 42}
|
||||
|
||||
# single struct nulls
|
||||
statement ok
|
||||
CREATE TABLE single_struct_null AS SELECT * FROM (VALUES
|
||||
({'a': 42}),
|
||||
({'a': NULL}),
|
||||
(NULL)
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY single_struct_null TO '__TEST_DIR__/single_struct_null.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/single_struct_null.parquet');
|
||||
----
|
||||
{'a': 42}
|
||||
{'a': NULL}
|
||||
NULL
|
||||
|
||||
# nested single struct
|
||||
statement ok
|
||||
CREATE TABLE nested_single_struct AS SELECT * FROM (VALUES
|
||||
({'a': {'b': 42}}),
|
||||
({'a': {'b': NULL}}),
|
||||
({'a': NULL}),
|
||||
(NULL)
|
||||
) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY nested_single_struct TO '__TEST_DIR__/nested_single_struct.parquet' (FORMAT 'parquet');
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/nested_single_struct.parquet');
|
||||
----
|
||||
{'a': {'b': 42}}
|
||||
{'a': {'b': NULL}}
|
||||
{'a': NULL}
|
||||
NULL
|
||||
35
external/duckdb/test/sql/copy/parquet/writer/writer_round_trip.test_slow
vendored
Normal file
35
external/duckdb/test/sql/copy/parquet/writer/writer_round_trip.test_slow
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# name: test/sql/copy/parquet/writer/writer_round_trip.test_slow
|
||||
# description: Parquet read and re-write various files
|
||||
# group: [writer]
|
||||
|
||||
require parquet
|
||||
|
||||
foreach parquet_file data/parquet-testing/manyrowgroups.parquet data/parquet-testing/map.parquet data/parquet-testing/arrow/int32_decimal.parquet data/parquet-testing/arrow/nonnullable.impala.parquet data/parquet-testing/bug687_nulls.parquet data/parquet-testing/bug1554.parquet data/parquet-testing/apkwan.parquet data/parquet-testing/arrow/nested_lists.snappy.parquet data/parquet-testing/arrow/nulls.snappy.parquet data/parquet-testing/nan-float.parquet data/parquet-testing/manyrowgroups2.parquet data/parquet-testing/struct.parquet data/parquet-testing/arrow/list_columns.parquet data/parquet-testing/timestamp-ms.parquet data/parquet-testing/arrow/alltypes_dictionary.parquet data/parquet-testing/arrow/binary.parquet data/parquet-testing/arrow/nation.dict-malformed.parquet data/parquet-testing/lineitem-top10000.gzip.parquet data/parquet-testing/arrow/nested_maps.snappy.parquet data/parquet-testing/arrow/dict-page-offset-zero.parquet data/parquet-testing/silly-names.parquet data/parquet-testing/zstd.parquet data/parquet-testing/bug1618_struct_strings.parquet data/parquet-testing/arrow/single_nan.parquet data/parquet-testing/arrow/int64_decimal.parquet data/parquet-testing/filter_bug1391.parquet data/parquet-testing/arrow/fixed_length_decimal_legacy.parquet data/parquet-testing/timestamp.parquet data/parquet-testing/arrow/fixed_length_decimal.parquet data/parquet-testing/leftdate3_192_loop_1.parquet data/parquet-testing/blob.parquet data/parquet-testing/bug1588.parquet data/parquet-testing/bug1589.parquet data/parquet-testing/arrow/alltypes_plain.parquet data/parquet-testing/arrow/repeated_no_annotation.parquet data/parquet-testing/data-types.parquet data/parquet-testing/unsigned.parquet data/parquet-testing/pandas-date.parquet data/parquet-testing/date.parquet data/parquet-testing/arrow/nullable.impala.parquet data/parquet-testing/fixed.parquet data/parquet-testing/arrow/alltypes_plain.snappy.parquet data/parquet-testing/decimal/int32_decimal.parquet data/parquet-testing/decimal/pandas_decimal.parquet data/parquet-testing/decimal/decimal_dc.parquet data/parquet-testing/decimal/int64_decimal.parquet data/parquet-testing/decimal/fixed_length_decimal_legacy.parquet data/parquet-testing/decimal/fixed_length_decimal.parquet data/parquet-testing/glob2/t1.parquet data/parquet-testing/cache/cache1.parquet data/parquet-testing/cache/cache2.parquet data/parquet-testing/glob/t2.parquet data/parquet-testing/glob/t1.parquet data/parquet-testing/bug2557.parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE parquet_read AS SELECT * FROM parquet_scan('${parquet_file}');
|
||||
|
||||
statement ok
|
||||
COPY parquet_read TO '__TEST_DIR__/test_round_trip.parquet'
|
||||
|
||||
statement ok
|
||||
CREATE TABLE parquet_write AS SELECT * FROM parquet_scan('__TEST_DIR__/test_round_trip.parquet');
|
||||
|
||||
# verify that the count is the same
|
||||
query I
|
||||
SELECT COUNT(*) FROM parquet_read EXCEPT SELECT COUNT(*) FROM parquet_write
|
||||
----
|
||||
|
||||
# verify that the data is the same
|
||||
query I
|
||||
SELECT COUNT(*) FROM (SELECT * FROM parquet_read EXCEPT SELECT * FROM parquet_write)
|
||||
----
|
||||
0
|
||||
|
||||
statement ok
|
||||
DROP TABLE parquet_read
|
||||
|
||||
statement ok
|
||||
DROP TABLE parquet_write
|
||||
|
||||
endloop
|
||||
Reference in New Issue
Block a user