should be it
This commit is contained in:
52
external/duckdb/test/parquet/bss_roundtrip.test_slow
vendored
Normal file
52
external/duckdb/test/parquet/bss_roundtrip.test_slow
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
# name: test/parquet/bss_roundtrip.test_slow
|
||||
# description: Test BYTE_STREAM_SPLIT roundtrip
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
require tpch
|
||||
|
||||
statement ok
|
||||
call dbgen(sf=0.01);
|
||||
|
||||
statement ok
|
||||
create view doubles as
|
||||
select l_quantity::double l_quantity,
|
||||
l_extendedprice::double l_extendedprice,
|
||||
l_discount::double l_discount,
|
||||
l_tax::double l_tax,
|
||||
from lineitem
|
||||
|
||||
query IIII nosort q0
|
||||
from doubles
|
||||
----
|
||||
|
||||
|
||||
statement ok
|
||||
copy doubles to '__TEST_DIR__/bss.parquet' (PARQUET_VERSION V2);
|
||||
|
||||
query IIII nosort q0
|
||||
from '__TEST_DIR__/bss.parquet';
|
||||
----
|
||||
|
||||
|
||||
statement ok
|
||||
create view floats as
|
||||
select l_quantity::float l_quantity,
|
||||
l_extendedprice::float l_extendedprice,
|
||||
l_discount::float l_discount,
|
||||
l_tax::float l_tax,
|
||||
from lineitem
|
||||
|
||||
query IIII nosort q1
|
||||
from floats
|
||||
----
|
||||
|
||||
|
||||
statement ok
|
||||
copy floats to '__TEST_DIR__/bss.parquet' (PARQUET_VERSION V2);
|
||||
|
||||
query IIII nosort q1
|
||||
from '__TEST_DIR__/bss.parquet';
|
||||
----
|
||||
|
||||
10
external/duckdb/test/parquet/concatenated_gzip_members.test
vendored
Normal file
10
external/duckdb/test/parquet/concatenated_gzip_members.test
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# name: test/parquet/concatenated_gzip_members.test
|
||||
# description: Test reading Parquet file with concatenated GZIP members
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
from 'data/parquet-testing/concatenated_gzip_members.parquet' offset 512;
|
||||
----
|
||||
513
|
||||
26
external/duckdb/test/parquet/constant_dictionary_vector_parquet.test
vendored
Normal file
26
external/duckdb/test/parquet/constant_dictionary_vector_parquet.test
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
# name: test/parquet/constant_dictionary_vector_parquet.test
|
||||
# description: Test that we retain constant/dictionary compression for strings when writing to Parquet (small data)
|
||||
# group: [parquet]
|
||||
|
||||
require vector_size 2048
|
||||
|
||||
require parquet
|
||||
|
||||
# low memory limit to test that we don't blow up intermediates
|
||||
statement ok
|
||||
set memory_limit='10mb'
|
||||
|
||||
# we should be able to do this without spilling
|
||||
statement ok
|
||||
set temp_directory=null
|
||||
|
||||
# 1k strings of ~50kb = ~50 MB
|
||||
# the ColumnDataCollection should keep the constant string compressed
|
||||
# and the Parquet writer will use dictionary compression, not blowing them up there either
|
||||
statement ok
|
||||
copy (select repeat('a', 50_000) s from range(1000)) to '__TEST_DIR__/cdc_constant.parquet'
|
||||
|
||||
# the written file has dictionary compression
|
||||
# when we copy it over to another file we should still be able to avoid blowing it up
|
||||
statement ok
|
||||
copy (from '__TEST_DIR__/cdc_constant.parquet') to '__TEST_DIR__/cdc_dictionary.parquet'
|
||||
26
external/duckdb/test/parquet/constant_dictionary_vector_parquet.test_slow
vendored
Normal file
26
external/duckdb/test/parquet/constant_dictionary_vector_parquet.test_slow
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
# name: test/parquet/constant_dictionary_vector_parquet.test_slow
|
||||
# description: Test that we retain constant/dictionary compression for strings when writing to Parquet (big data)
|
||||
# group: [parquet]
|
||||
|
||||
require vector_size 2048
|
||||
|
||||
require parquet
|
||||
|
||||
# low memory limit to test that we don't blow up intermediates
|
||||
statement ok
|
||||
set memory_limit='100mb'
|
||||
|
||||
# we should be able to do this without spilling
|
||||
statement ok
|
||||
set temp_directory=null
|
||||
|
||||
# 100k strings of ~50kb = ~5 GB
|
||||
# the ColumnDataCollection should keep the constant string compressed
|
||||
# and the Parquet writer will use dictionary compression, not blowing them up there either
|
||||
statement ok
|
||||
copy (select repeat('a', 50_000) s from range(100_000)) to '__TEST_DIR__/cdc_constant.parquet'
|
||||
|
||||
# the written file has dictionary compression
|
||||
# when we copy it over to another file we should still be able to avoid blowing it up
|
||||
statement ok
|
||||
copy (from '__TEST_DIR__/cdc_constant.parquet') to '__TEST_DIR__/cdc_dictionary.parquet'
|
||||
11
external/duckdb/test/parquet/dbp_small_decimal.test
vendored
Normal file
11
external/duckdb/test/parquet/dbp_small_decimal.test
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
# name: test/parquet/dbp_small_decimal.test
|
||||
# description: Test parquet file with a small decimal column (1,0) in dbp encoding
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query III
|
||||
select * from 'data/parquet-testing/dbp_small_decimal.parquet' ;
|
||||
----
|
||||
1 10.0 diez
|
||||
2 20.0 vente
|
||||
19
external/duckdb/test/parquet/encrypted_parquet.test
vendored
Normal file
19
external/duckdb/test/parquet/encrypted_parquet.test
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
# name: test/parquet/encrypted_parquet.test
|
||||
# description: Test Parquet reader on data/parquet-testing/encryption
|
||||
# group: [parquet]
|
||||
|
||||
# TODO: re-enable these tests once we encrypt the full Parquet Encryption spec
|
||||
# for now, parquet crypto tests are in test/sql/copy/parquet/parquet_encryption.test_slow
|
||||
mode skip
|
||||
|
||||
require parquet
|
||||
|
||||
statement error
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/encryption/encrypted_footer.parquet') limit 50;
|
||||
----
|
||||
Invalid Input Error: Encrypted Parquet files are not supported for file 'data/parquet-testing/encryption/encrypted_footer.parquet'
|
||||
|
||||
statement error
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/encryption/encrypted_column.parquet') limit 50;
|
||||
----
|
||||
Invalid Error: Failed to read Parquet file "data/parquet-testing/encryption/encrypted_column.parquet": Encrypted Parquet files are not supported
|
||||
134
external/duckdb/test/parquet/generate_parquet_test.py
vendored
Normal file
134
external/duckdb/test/parquet/generate_parquet_test.py
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
import duckdb
|
||||
import os
|
||||
import sys
|
||||
|
||||
try:
|
||||
import pyarrow
|
||||
import pyarrow.parquet
|
||||
|
||||
can_run = True
|
||||
except:
|
||||
can_run = False
|
||||
|
||||
|
||||
def generate_header(f):
|
||||
f.write(
|
||||
'''# name: test/parquet/test_parquet_reader.test
|
||||
# description: Test Parquet Reader with files on data/parquet-testing
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
'''
|
||||
)
|
||||
|
||||
|
||||
def get_files():
|
||||
files_path = []
|
||||
path = os.path.dirname(os.path.realpath(__file__))
|
||||
path = os.path.join(path, '..', '..')
|
||||
os.chdir(path)
|
||||
path = os.path.join('data', 'parquet-testing')
|
||||
for root, dirs, files in os.walk(path):
|
||||
for file in files:
|
||||
if file.endswith(".parquet"):
|
||||
files_path.append(os.path.join(root, file))
|
||||
return files_path
|
||||
|
||||
|
||||
def get_duckdb_answer(file_path):
|
||||
answer = []
|
||||
try:
|
||||
answer = duckdb.query("SELECT * FROM parquet_scan('" + file_path + "') limit 50").fetchall()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
answer = 'fail'
|
||||
return answer
|
||||
|
||||
|
||||
def get_arrow_answer(file_path):
|
||||
answer = []
|
||||
try:
|
||||
arrow = pyarrow.parquet.read_table(file_path)
|
||||
duck_rel = duckdb.from_arrow(arrow).limit(50)
|
||||
answer = duck_rel.fetchall()
|
||||
return answer
|
||||
except:
|
||||
return 'fail'
|
||||
|
||||
|
||||
def check_result(duckdb_result, arrow_result):
|
||||
if arrow_result == 'fail':
|
||||
return 'skip'
|
||||
if duckdb_result == 'fail':
|
||||
return 'fail'
|
||||
if duckdb_result != arrow_result:
|
||||
return 'fail'
|
||||
return 'pass'
|
||||
|
||||
|
||||
def sanitize_string(s):
|
||||
return str(s).replace('None', 'NULL').replace("b'", "").replace("'", "")
|
||||
|
||||
|
||||
def result_to_string(arrow_result):
|
||||
result = ''
|
||||
for row_idx in range(len(arrow_result)):
|
||||
for col_idx in range(len(arrow_result[0])):
|
||||
value = arrow_result[row_idx][col_idx]
|
||||
if isinstance(value, dict):
|
||||
items = [f"'{k}': {sanitize_string(v)}" for k, v in value.items()] # no quotes
|
||||
value = "{" + ", ".join(items) + "}"
|
||||
print(type(value), value)
|
||||
else:
|
||||
value = sanitize_string(value)
|
||||
result += value + "\t"
|
||||
result += "\n"
|
||||
result += "\n"
|
||||
return result
|
||||
|
||||
|
||||
def generate_parquet_test_body(result, arrow_result, file_path):
|
||||
columns = 'I' * len(arrow_result[0])
|
||||
test_body = "query " + columns + "\n"
|
||||
test_body += "SELECT * FROM parquet_scan('" + file_path + "') limit 50 \n"
|
||||
test_body += "----\n"
|
||||
test_body += result_to_string(arrow_result)
|
||||
return test_body
|
||||
|
||||
|
||||
def generate_test(file_path):
|
||||
duckdb_result = get_duckdb_answer(file_path)
|
||||
arrow_result = get_arrow_answer(file_path)
|
||||
result = check_result(duckdb_result, arrow_result)
|
||||
test_body = ""
|
||||
if result == 'skip':
|
||||
return
|
||||
if result == 'fail':
|
||||
test_body += "mode skip \n\n"
|
||||
test_body += generate_parquet_test_body(result, arrow_result, file_path)
|
||||
test_body += "mode unskip \n\n"
|
||||
else:
|
||||
test_body += generate_parquet_test_body(result, duckdb_result, file_path)
|
||||
return test_body
|
||||
|
||||
|
||||
def generate_body(f):
|
||||
files_path = get_files()
|
||||
for file in files_path:
|
||||
print(file)
|
||||
test_body = generate_test(file)
|
||||
if test_body != None:
|
||||
f.write(test_body)
|
||||
|
||||
|
||||
f = open("test_parquet_reader.test", "w")
|
||||
|
||||
generate_header(f)
|
||||
generate_body(f)
|
||||
|
||||
|
||||
f.close()
|
||||
18
external/duckdb/test/parquet/invalid_parquet.test
vendored
Normal file
18
external/duckdb/test/parquet/invalid_parquet.test
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
# name: test/parquet/invalid_parquet.test
|
||||
# description: Test Parquet Reader on data/parquet-testing/invalid.parquet
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement error
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/invalid.parquet') limit 50;
|
||||
----
|
||||
Invalid Input Error: Invalid string encoding found in Parquet file: value "TREL\xC3" is not valid UTF8!
|
||||
|
||||
statement ok
|
||||
pragma disable_optimizer
|
||||
|
||||
statement error
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/invalid.parquet') limit 50;
|
||||
----
|
||||
Invalid Input Error: Invalid string encoding found in Parquet file: value "TREL\xC3" is not valid UTF8!
|
||||
33
external/duckdb/test/parquet/parquet_combine.test
vendored
Normal file
33
external/duckdb/test/parquet/parquet_combine.test
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
# name: test/parquet/parquet_combine.test
|
||||
# description: Test Parquet Reader row group combining
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
require vector_size 2048
|
||||
|
||||
statement ok
|
||||
set threads=2;
|
||||
|
||||
# before we combined data from threads into multiple row groups,
|
||||
# this would create 4 row groups, now it should create 3
|
||||
statement ok
|
||||
copy (with cte as (from range(2049) union all from range(2049)) from cte) to '__TEST_DIR__/parquet_combine.parquet' (row_group_size 2048);
|
||||
|
||||
query I
|
||||
select count(*) from parquet_metadata('__TEST_DIR__/parquet_combine.parquet')
|
||||
----
|
||||
3
|
||||
|
||||
# works not just with row_group_size, but also with row_group_size_bytes
|
||||
statement ok
|
||||
set preserve_insertion_order=false;
|
||||
|
||||
# used to create 4, now it should create 3
|
||||
statement ok
|
||||
copy (with cte as (from range(100_000) union all from range(100_000)) from cte) to '__TEST_DIR__/parquet_combine.parquet' (row_group_size_bytes 750_000);
|
||||
|
||||
query I
|
||||
select count(*) from parquet_metadata('__TEST_DIR__/parquet_combine.parquet')
|
||||
----
|
||||
3
|
||||
17
external/duckdb/test/parquet/parquet_fuzzer_issues.test
vendored
Normal file
17
external/duckdb/test/parquet/parquet_fuzzer_issues.test
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
# name: test/parquet/parquet_fuzzer_issues.test
|
||||
# description: Test Parquet fuzzer issues
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# internal issue 6129
|
||||
statement error
|
||||
from 'data/parquet-testing/broken/internal_6129.parquet'
|
||||
----
|
||||
invalid number of miniblocks per block
|
||||
|
||||
# internal issue 6165
|
||||
statement error
|
||||
from 'data/parquet-testing/broken/internal_6165.parquet';
|
||||
----
|
||||
row group does not have enough columns
|
||||
26
external/duckdb/test/parquet/parquet_long_string_stats.test
vendored
Normal file
26
external/duckdb/test/parquet/parquet_long_string_stats.test
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
# name: test/parquet/parquet_long_string_stats.test
|
||||
# description: Test internal issue #2289 - Performance of Parquet reader
|
||||
# group: [parquet]
|
||||
|
||||
require httpfs
|
||||
|
||||
require parquet
|
||||
|
||||
# need to disable this otherwise we just cache everything
|
||||
statement ok
|
||||
set enable_external_file_cache=false;
|
||||
|
||||
statement ok
|
||||
set parquet_metadata_cache=true;
|
||||
|
||||
# the constant comparison that is pushed down is longer than DuckDB's 8 bytes that are used in StringStatistics
|
||||
# its prefix is equal to the max up to the last byte
|
||||
# previously, we would read 5.4MB to figure out that we can prune the entire file
|
||||
# now, we can prune it based on the metadata
|
||||
query II
|
||||
explain analyze
|
||||
select count(*)
|
||||
FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/event_baserunning_advance_attempt.parquet'
|
||||
where game_id > 'WS2197109301';
|
||||
----
|
||||
analyzed_plan <REGEX>:.*GET: 1.*
|
||||
441
external/duckdb/test/parquet/parquet_null_compressed_materialization.test_slow
vendored
Normal file
441
external/duckdb/test/parquet/parquet_null_compressed_materialization.test_slow
vendored
Normal file
@@ -0,0 +1,441 @@
|
||||
# name: test/parquet/parquet_null_compressed_materialization.test_slow
|
||||
# description: Test if we can do compressed materialization for all-NULL Parquet columns in a join
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
SET preserve_insertion_order=false;
|
||||
|
||||
# create a huge Parquet file with mostly NULL in it
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT
|
||||
range pk,
|
||||
NULL::VARCHAR c0,
|
||||
NULL::VARCHAR c1,
|
||||
NULL::VARCHAR c2,
|
||||
NULL::VARCHAR c3,
|
||||
NULL::VARCHAR c4,
|
||||
NULL::VARCHAR c5,
|
||||
NULL::VARCHAR c6,
|
||||
NULL::VARCHAR c7,
|
||||
NULL::VARCHAR c8,
|
||||
NULL::VARCHAR c9,
|
||||
NULL::VARCHAR c10,
|
||||
NULL::VARCHAR c11,
|
||||
NULL::VARCHAR c12,
|
||||
NULL::VARCHAR c13,
|
||||
NULL::VARCHAR c14,
|
||||
NULL::VARCHAR c15,
|
||||
NULL::VARCHAR c16,
|
||||
NULL::VARCHAR c17,
|
||||
NULL::VARCHAR c18,
|
||||
NULL::VARCHAR c19,
|
||||
NULL::VARCHAR c20,
|
||||
NULL::VARCHAR c21,
|
||||
NULL::VARCHAR c22,
|
||||
NULL::VARCHAR c23,
|
||||
NULL::VARCHAR c24,
|
||||
NULL::VARCHAR c25,
|
||||
NULL::VARCHAR c26,
|
||||
NULL::VARCHAR c27,
|
||||
NULL::VARCHAR c28,
|
||||
NULL::VARCHAR c29,
|
||||
NULL::VARCHAR c30,
|
||||
NULL::VARCHAR c31,
|
||||
NULL::VARCHAR c32,
|
||||
NULL::VARCHAR c33,
|
||||
NULL::VARCHAR c34,
|
||||
NULL::VARCHAR c35,
|
||||
NULL::VARCHAR c36,
|
||||
NULL::VARCHAR c37,
|
||||
NULL::VARCHAR c38,
|
||||
NULL::VARCHAR c39,
|
||||
NULL::VARCHAR c40,
|
||||
NULL::VARCHAR c41,
|
||||
NULL::VARCHAR c42,
|
||||
NULL::VARCHAR c43,
|
||||
NULL::VARCHAR c44,
|
||||
NULL::VARCHAR c45,
|
||||
NULL::VARCHAR c46,
|
||||
NULL::VARCHAR c47,
|
||||
NULL::VARCHAR c48,
|
||||
NULL::VARCHAR c49,
|
||||
NULL::VARCHAR c50,
|
||||
NULL::VARCHAR c51,
|
||||
NULL::VARCHAR c52,
|
||||
NULL::VARCHAR c53,
|
||||
NULL::VARCHAR c54,
|
||||
NULL::VARCHAR c55,
|
||||
NULL::VARCHAR c56,
|
||||
NULL::VARCHAR c57,
|
||||
NULL::VARCHAR c58,
|
||||
NULL::VARCHAR c59,
|
||||
NULL::VARCHAR c60,
|
||||
NULL::VARCHAR c61,
|
||||
NULL::VARCHAR c62,
|
||||
NULL::VARCHAR c63,
|
||||
NULL::VARCHAR c64,
|
||||
NULL::VARCHAR c65,
|
||||
NULL::VARCHAR c66,
|
||||
NULL::VARCHAR c67,
|
||||
NULL::VARCHAR c68,
|
||||
NULL::VARCHAR c69,
|
||||
NULL::VARCHAR c70,
|
||||
NULL::VARCHAR c71,
|
||||
NULL::VARCHAR c72,
|
||||
NULL::VARCHAR c73,
|
||||
NULL::VARCHAR c74,
|
||||
NULL::VARCHAR c75,
|
||||
NULL::VARCHAR c76,
|
||||
NULL::VARCHAR c77,
|
||||
NULL::VARCHAR c78,
|
||||
NULL::VARCHAR c79,
|
||||
NULL::VARCHAR c80,
|
||||
NULL::VARCHAR c81,
|
||||
NULL::VARCHAR c82,
|
||||
NULL::VARCHAR c83,
|
||||
NULL::VARCHAR c84,
|
||||
NULL::VARCHAR c85,
|
||||
NULL::VARCHAR c86,
|
||||
NULL::VARCHAR c87,
|
||||
NULL::VARCHAR c88,
|
||||
NULL::VARCHAR c89,
|
||||
NULL::VARCHAR c90,
|
||||
NULL::VARCHAR c91,
|
||||
NULL::VARCHAR c92,
|
||||
NULL::VARCHAR c93,
|
||||
NULL::VARCHAR c94,
|
||||
NULL::VARCHAR c95,
|
||||
NULL::VARCHAR c96,
|
||||
NULL::VARCHAR c97,
|
||||
NULL::VARCHAR c98,
|
||||
NULL::VARCHAR c99,
|
||||
NULL::VARCHAR c100,
|
||||
NULL::VARCHAR c101,
|
||||
NULL::VARCHAR c102,
|
||||
NULL::VARCHAR c103,
|
||||
NULL::VARCHAR c104,
|
||||
NULL::VARCHAR c105,
|
||||
NULL::VARCHAR c106,
|
||||
NULL::VARCHAR c107,
|
||||
NULL::VARCHAR c108,
|
||||
NULL::VARCHAR c109,
|
||||
NULL::VARCHAR c110,
|
||||
NULL::VARCHAR c111,
|
||||
NULL::VARCHAR c112,
|
||||
NULL::VARCHAR c113,
|
||||
NULL::VARCHAR c114,
|
||||
NULL::VARCHAR c115,
|
||||
NULL::VARCHAR c116,
|
||||
NULL::VARCHAR c117,
|
||||
NULL::VARCHAR c118,
|
||||
NULL::VARCHAR c119,
|
||||
NULL::VARCHAR c120,
|
||||
NULL::VARCHAR c121,
|
||||
NULL::VARCHAR c122,
|
||||
NULL::VARCHAR c123,
|
||||
NULL::VARCHAR c124,
|
||||
NULL::VARCHAR c125,
|
||||
NULL::VARCHAR c126,
|
||||
NULL::VARCHAR c127,
|
||||
NULL::VARCHAR c128,
|
||||
NULL::VARCHAR c129,
|
||||
NULL::VARCHAR c130,
|
||||
NULL::VARCHAR c131,
|
||||
NULL::VARCHAR c132,
|
||||
NULL::VARCHAR c133,
|
||||
NULL::VARCHAR c134,
|
||||
NULL::VARCHAR c135,
|
||||
NULL::VARCHAR c136,
|
||||
NULL::VARCHAR c137,
|
||||
NULL::VARCHAR c138,
|
||||
NULL::VARCHAR c139,
|
||||
NULL::VARCHAR c140,
|
||||
NULL::VARCHAR c141,
|
||||
NULL::VARCHAR c142,
|
||||
NULL::VARCHAR c143,
|
||||
NULL::VARCHAR c144,
|
||||
NULL::VARCHAR c145,
|
||||
NULL::VARCHAR c146,
|
||||
NULL::VARCHAR c147,
|
||||
NULL::VARCHAR c148,
|
||||
NULL::VARCHAR c149,
|
||||
NULL::VARCHAR c150,
|
||||
NULL::VARCHAR c151,
|
||||
NULL::VARCHAR c152,
|
||||
NULL::VARCHAR c153,
|
||||
NULL::VARCHAR c154,
|
||||
NULL::VARCHAR c155,
|
||||
NULL::VARCHAR c156,
|
||||
NULL::VARCHAR c157,
|
||||
NULL::VARCHAR c158,
|
||||
NULL::VARCHAR c159,
|
||||
NULL::VARCHAR c160,
|
||||
NULL::VARCHAR c161,
|
||||
NULL::VARCHAR c162,
|
||||
NULL::VARCHAR c163,
|
||||
NULL::VARCHAR c164,
|
||||
NULL::VARCHAR c165,
|
||||
NULL::VARCHAR c166,
|
||||
NULL::VARCHAR c167,
|
||||
NULL::VARCHAR c168,
|
||||
NULL::VARCHAR c169,
|
||||
NULL::VARCHAR c170,
|
||||
NULL::VARCHAR c171,
|
||||
NULL::VARCHAR c172,
|
||||
NULL::VARCHAR c173,
|
||||
NULL::VARCHAR c174,
|
||||
NULL::VARCHAR c175,
|
||||
NULL::VARCHAR c176,
|
||||
NULL::VARCHAR c177,
|
||||
NULL::VARCHAR c178,
|
||||
NULL::VARCHAR c179,
|
||||
NULL::VARCHAR c180,
|
||||
NULL::VARCHAR c181,
|
||||
NULL::VARCHAR c182,
|
||||
NULL::VARCHAR c183,
|
||||
NULL::VARCHAR c184,
|
||||
NULL::VARCHAR c185,
|
||||
NULL::VARCHAR c186,
|
||||
NULL::VARCHAR c187,
|
||||
NULL::VARCHAR c188,
|
||||
NULL::VARCHAR c189,
|
||||
NULL::VARCHAR c190,
|
||||
NULL::VARCHAR c191,
|
||||
NULL::VARCHAR c192,
|
||||
NULL::VARCHAR c193,
|
||||
NULL::VARCHAR c194,
|
||||
NULL::VARCHAR c195,
|
||||
NULL::VARCHAR c196,
|
||||
NULL::VARCHAR c197,
|
||||
NULL::VARCHAR c198,
|
||||
NULL::VARCHAR c199,
|
||||
NULL::VARCHAR c200,
|
||||
NULL::VARCHAR c201,
|
||||
NULL::VARCHAR c202,
|
||||
NULL::VARCHAR c203,
|
||||
NULL::VARCHAR c204,
|
||||
NULL::VARCHAR c205,
|
||||
NULL::VARCHAR c206,
|
||||
NULL::VARCHAR c207,
|
||||
NULL::VARCHAR c208,
|
||||
NULL::VARCHAR c209,
|
||||
NULL::VARCHAR c210,
|
||||
NULL::VARCHAR c211,
|
||||
NULL::VARCHAR c212,
|
||||
NULL::VARCHAR c213,
|
||||
NULL::VARCHAR c214,
|
||||
NULL::VARCHAR c215,
|
||||
NULL::VARCHAR c216,
|
||||
NULL::VARCHAR c217,
|
||||
NULL::VARCHAR c218,
|
||||
NULL::VARCHAR c219,
|
||||
NULL::VARCHAR c220,
|
||||
NULL::VARCHAR c221,
|
||||
NULL::VARCHAR c222,
|
||||
NULL::VARCHAR c223,
|
||||
NULL::VARCHAR c224,
|
||||
NULL::VARCHAR c225,
|
||||
NULL::VARCHAR c226,
|
||||
NULL::VARCHAR c227,
|
||||
NULL::VARCHAR c228,
|
||||
NULL::VARCHAR c229,
|
||||
NULL::VARCHAR c230,
|
||||
NULL::VARCHAR c231,
|
||||
NULL::VARCHAR c232,
|
||||
NULL::VARCHAR c233,
|
||||
NULL::VARCHAR c234,
|
||||
NULL::VARCHAR c235,
|
||||
NULL::VARCHAR c236,
|
||||
NULL::VARCHAR c237,
|
||||
NULL::VARCHAR c238,
|
||||
NULL::VARCHAR c239,
|
||||
NULL::VARCHAR c240,
|
||||
NULL::VARCHAR c241,
|
||||
NULL::VARCHAR c242,
|
||||
NULL::VARCHAR c243,
|
||||
NULL::VARCHAR c244,
|
||||
NULL::VARCHAR c245,
|
||||
NULL::VARCHAR c246,
|
||||
NULL::VARCHAR c247,
|
||||
NULL::VARCHAR c248,
|
||||
NULL::VARCHAR c249,
|
||||
NULL::VARCHAR c250,
|
||||
NULL::VARCHAR c251,
|
||||
NULL::VARCHAR c252,
|
||||
NULL::VARCHAR c253,
|
||||
NULL::VARCHAR c254,
|
||||
NULL::VARCHAR c255,
|
||||
NULL::VARCHAR c256,
|
||||
NULL::VARCHAR c257,
|
||||
NULL::VARCHAR c258,
|
||||
NULL::VARCHAR c259,
|
||||
NULL::VARCHAR c260,
|
||||
NULL::VARCHAR c261,
|
||||
NULL::VARCHAR c262,
|
||||
NULL::VARCHAR c263,
|
||||
NULL::VARCHAR c264,
|
||||
NULL::VARCHAR c265,
|
||||
NULL::VARCHAR c266,
|
||||
NULL::VARCHAR c267,
|
||||
NULL::VARCHAR c268,
|
||||
NULL::VARCHAR c269,
|
||||
NULL::VARCHAR c270,
|
||||
NULL::VARCHAR c271,
|
||||
NULL::VARCHAR c272,
|
||||
NULL::VARCHAR c273,
|
||||
NULL::VARCHAR c274,
|
||||
NULL::VARCHAR c275,
|
||||
NULL::VARCHAR c276,
|
||||
NULL::VARCHAR c277,
|
||||
NULL::VARCHAR c278,
|
||||
NULL::VARCHAR c279,
|
||||
NULL::VARCHAR c280,
|
||||
NULL::VARCHAR c281,
|
||||
NULL::VARCHAR c282,
|
||||
NULL::VARCHAR c283,
|
||||
NULL::VARCHAR c284,
|
||||
NULL::VARCHAR c285,
|
||||
NULL::VARCHAR c286,
|
||||
NULL::VARCHAR c287,
|
||||
NULL::VARCHAR c288,
|
||||
NULL::VARCHAR c289,
|
||||
NULL::VARCHAR c290,
|
||||
NULL::VARCHAR c291,
|
||||
NULL::VARCHAR c292,
|
||||
NULL::VARCHAR c293,
|
||||
NULL::VARCHAR c294,
|
||||
NULL::VARCHAR c295,
|
||||
NULL::VARCHAR c296,
|
||||
NULL::VARCHAR c297,
|
||||
NULL::VARCHAR c298,
|
||||
NULL::VARCHAR c299,
|
||||
NULL::VARCHAR c300,
|
||||
NULL::VARCHAR c301,
|
||||
NULL::VARCHAR c302,
|
||||
NULL::VARCHAR c303,
|
||||
NULL::VARCHAR c304,
|
||||
NULL::VARCHAR c305,
|
||||
NULL::VARCHAR c306,
|
||||
NULL::VARCHAR c307,
|
||||
NULL::VARCHAR c308,
|
||||
NULL::VARCHAR c309,
|
||||
NULL::VARCHAR c310,
|
||||
NULL::VARCHAR c311,
|
||||
NULL::VARCHAR c312,
|
||||
NULL::VARCHAR c313,
|
||||
NULL::VARCHAR c314,
|
||||
NULL::VARCHAR c315,
|
||||
NULL::VARCHAR c316,
|
||||
NULL::VARCHAR c317,
|
||||
NULL::VARCHAR c318,
|
||||
NULL::VARCHAR c319,
|
||||
NULL::VARCHAR c320,
|
||||
NULL::VARCHAR c321,
|
||||
NULL::VARCHAR c322,
|
||||
NULL::VARCHAR c323,
|
||||
NULL::VARCHAR c324,
|
||||
NULL::VARCHAR c325,
|
||||
NULL::VARCHAR c326,
|
||||
NULL::VARCHAR c327,
|
||||
NULL::VARCHAR c328,
|
||||
NULL::VARCHAR c329,
|
||||
NULL::VARCHAR c330,
|
||||
NULL::VARCHAR c331,
|
||||
NULL::VARCHAR c332,
|
||||
NULL::VARCHAR c333,
|
||||
NULL::VARCHAR c334,
|
||||
NULL::VARCHAR c335,
|
||||
NULL::VARCHAR c336,
|
||||
NULL::VARCHAR c337,
|
||||
NULL::VARCHAR c338,
|
||||
NULL::VARCHAR c339,
|
||||
NULL::VARCHAR c340,
|
||||
NULL::VARCHAR c341,
|
||||
NULL::VARCHAR c342,
|
||||
NULL::VARCHAR c343,
|
||||
NULL::VARCHAR c344,
|
||||
NULL::VARCHAR c345,
|
||||
NULL::VARCHAR c346,
|
||||
NULL::VARCHAR c347,
|
||||
NULL::VARCHAR c348,
|
||||
NULL::VARCHAR c349,
|
||||
NULL::VARCHAR c350,
|
||||
NULL::VARCHAR c351,
|
||||
NULL::VARCHAR c352,
|
||||
NULL::VARCHAR c353,
|
||||
NULL::VARCHAR c354,
|
||||
NULL::VARCHAR c355,
|
||||
NULL::VARCHAR c356,
|
||||
NULL::VARCHAR c357,
|
||||
NULL::VARCHAR c358,
|
||||
NULL::VARCHAR c359,
|
||||
NULL::VARCHAR c360,
|
||||
NULL::VARCHAR c361,
|
||||
NULL::VARCHAR c362,
|
||||
NULL::VARCHAR c363,
|
||||
NULL::VARCHAR c364,
|
||||
NULL::VARCHAR c365,
|
||||
NULL::VARCHAR c366,
|
||||
NULL::VARCHAR c367,
|
||||
NULL::VARCHAR c368,
|
||||
NULL::VARCHAR c369,
|
||||
NULL::VARCHAR c370,
|
||||
NULL::VARCHAR c371,
|
||||
NULL::VARCHAR c372,
|
||||
NULL::VARCHAR c373,
|
||||
NULL::VARCHAR c374,
|
||||
NULL::VARCHAR c375,
|
||||
NULL::VARCHAR c376,
|
||||
NULL::VARCHAR c377,
|
||||
NULL::VARCHAR c378,
|
||||
NULL::VARCHAR c379,
|
||||
NULL::VARCHAR c380,
|
||||
NULL::VARCHAR c381,
|
||||
NULL::VARCHAR c382,
|
||||
NULL::VARCHAR c383,
|
||||
NULL::VARCHAR c384,
|
||||
NULL::VARCHAR c385,
|
||||
NULL::VARCHAR c386,
|
||||
NULL::VARCHAR c387,
|
||||
NULL::VARCHAR c388,
|
||||
NULL::VARCHAR c389,
|
||||
NULL::VARCHAR c390,
|
||||
NULL::VARCHAR c391,
|
||||
NULL::VARCHAR c392,
|
||||
NULL::VARCHAR c393,
|
||||
NULL::VARCHAR c394,
|
||||
NULL::VARCHAR c395,
|
||||
NULL::VARCHAR c396,
|
||||
NULL::VARCHAR c397,
|
||||
NULL::VARCHAR c398,
|
||||
NULL::VARCHAR c399,
|
||||
FROM
|
||||
range(600_000)
|
||||
) TO '__TEST_DIR__/many_nulls.parquet' (ROW_GROUP_SIZE_BYTES '64mb');
|
||||
|
||||
# set a low memory env
|
||||
statement ok
|
||||
SET threads=4;
|
||||
|
||||
# we creating a build side of approximately:
|
||||
# uncompressed: 1_200_000 * 400 * 16 = ~8 GB
|
||||
# uncompressed: 1_200_000 * 400 * 1 = ~0.5 GB
|
||||
# this memory limit tests if the build side compresses well
|
||||
statement ok
|
||||
SET memory_limit='1.5GB';
|
||||
|
||||
statement ok
|
||||
SET temp_directory=NULL
|
||||
|
||||
# join, this should take many GBs of memory, but all the NULLs get compressed to UTINYINT so it should fit
|
||||
# we should see at least 10 compresses in the output (should be 400 but gets truncated)
|
||||
query II
|
||||
EXPLAIN ANALYZE SELECT ANY_VALUE(COLUMNS(*))
|
||||
FROM read_parquet(['__TEST_DIR__/many_nulls.parquet' for _ in range(2)], union_by_name=true) build
|
||||
JOIN read_parquet(['__TEST_DIR__/many_nulls.parquet' for _ in range(3)]) probe
|
||||
USING (pk)
|
||||
----
|
||||
analyzed_plan <REGEX>:(.*internal_compress_string.*){10,}
|
||||
45
external/duckdb/test/parquet/parquet_stats_function.test
vendored
Normal file
45
external/duckdb/test/parquet/parquet_stats_function.test
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
# name: test/parquet/parquet_stats_function.test
|
||||
# description: Test stats(col) function on Parquet files
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# we can derive whether all values in a parquet column are NULL
|
||||
statement ok
|
||||
copy (select null i) to '__TEST_DIR__/all_null.parquet'
|
||||
|
||||
# "Has No Null" is "false", meaning there are no non-NULL values
|
||||
query I
|
||||
select stats(i) from '__TEST_DIR__/all_null.parquet'
|
||||
----
|
||||
[Min: NULL, Max: NULL][Has Null: true, Has No Null: false]
|
||||
|
||||
# create 0-9 with no NULL
|
||||
statement ok
|
||||
copy (select range i from range(10)) to '__TEST_DIR__/parquet_stats_function1.parquet'
|
||||
|
||||
query I
|
||||
select stats(i) from read_parquet('__TEST_DIR__/parquet_stats_function1.parquet', union_by_name=true) limit 1
|
||||
----
|
||||
[Min: 0, Max: 9][Has Null: false, Has No Null: true]
|
||||
|
||||
# create 100-109 with NULL
|
||||
statement ok
|
||||
copy (select range i from range(100, 110) union all select null i) to '__TEST_DIR__/parquet_stats_function2.parquet'
|
||||
|
||||
query I
|
||||
select stats(i) from read_parquet('__TEST_DIR__/parquet_stats_function2.parquet', union_by_name=true) limit 1
|
||||
----
|
||||
[Min: 100, Max: 109][Has Null: true, Has No Null: true]
|
||||
|
||||
# query combined WITHOUT union_by_name (should give back no stats)
|
||||
query I
|
||||
select stats(i) from read_parquet('__TEST_DIR__/parquet_stats_function*.parquet', union_by_name=false) limit 1
|
||||
----
|
||||
[Min: NULL, Max: NULL][Has Null: true, Has No Null: true]
|
||||
|
||||
# now query combined WITH union_by_name (should give back stats)
|
||||
query I
|
||||
select stats(i) from read_parquet('__TEST_DIR__/parquet_stats_function*.parquet', union_by_name=true) limit 1
|
||||
----
|
||||
[Min: 0, Max: 109][Has Null: true, Has No Null: true]
|
||||
42
external/duckdb/test/parquet/parquet_version.test
vendored
Normal file
42
external/duckdb/test/parquet/parquet_version.test
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
# name: test/parquet/parquet_version.test
|
||||
# description: Test Parquet writer parquet_version parameter
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement error
|
||||
copy (select range i from range(20000)) to '__TEST_DIR__/parquet_version.parquet' (parquet_version)
|
||||
----
|
||||
Invalid Input Error
|
||||
|
||||
statement error
|
||||
copy (select range i from range(20000)) to '__TEST_DIR__/parquet_version.parquet' (parquet_version v3)
|
||||
----
|
||||
Binder Error
|
||||
|
||||
# defaults to V1
|
||||
statement ok
|
||||
copy (select range i from range(20000)) to '__TEST_DIR__/parquet_version.parquet'
|
||||
|
||||
query I
|
||||
select encodings from parquet_metadata('__TEST_DIR__/parquet_version.parquet')
|
||||
----
|
||||
PLAIN
|
||||
|
||||
# we do PLAIN if we can't do dictionary for V1
|
||||
statement ok
|
||||
copy (select range i from range(20000)) to '__TEST_DIR__/parquet_version.parquet' (parquet_version v1)
|
||||
|
||||
query I
|
||||
select encodings from parquet_metadata('__TEST_DIR__/parquet_version.parquet')
|
||||
----
|
||||
PLAIN
|
||||
|
||||
# we do DELTA_BINARY_PACKED for V2
|
||||
statement ok
|
||||
copy (select range i from range(20000)) to '__TEST_DIR__/parquet_version.parquet' (parquet_version v2)
|
||||
|
||||
query I
|
||||
select encodings from parquet_metadata('__TEST_DIR__/parquet_version.parquet')
|
||||
----
|
||||
DELTA_BINARY_PACKED
|
||||
33
external/duckdb/test/parquet/prefetching.test
vendored
Normal file
33
external/duckdb/test/parquet/prefetching.test
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
# name: test/parquet/prefetching.test
|
||||
# description: Test parquet files using the prefetching mechanism
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# Normally, local files do not use the prefetching mechanism, however this debugging options will force the mechanism
|
||||
statement ok
|
||||
set prefetch_all_parquet_files=true;
|
||||
|
||||
# With default settings, this query will fail: the incorrectly set index page offsets mess with duckdb's prefetching mechanism
|
||||
statement error
|
||||
FROM 'data/parquet-testing/incorrect_index_page_offsets.parquet'
|
||||
----
|
||||
IO Error: The parquet file 'data/parquet-testing/incorrect_index_page_offsets.parquet' seems to have incorrectly set page offsets. This interferes with DuckDB's prefetching optimization. DuckDB may still be able to scan this file by manually disabling the prefetching mechanism using: 'SET disable_parquet_prefetching=true'.
|
||||
|
||||
# Now we disable prefetching
|
||||
statement ok
|
||||
set disable_parquet_prefetching=true;
|
||||
|
||||
query IIIIIIIIIII
|
||||
FROM 'data/parquet-testing/incorrect_index_page_offsets.parquet'
|
||||
----
|
||||
0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43 0
|
||||
0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31 1
|
||||
0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31 2
|
||||
0.29 Premium I VS2 62.4 58.0 334 4.2 4.23 2.63 3
|
||||
0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75 4
|
||||
0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48 5
|
||||
0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47 6
|
||||
0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53 7
|
||||
0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49 8
|
||||
0.23 Very Good H VS1 59.4 61.0 338 4.0 4.05 2.39 9
|
||||
34
external/duckdb/test/parquet/test_18470.test
vendored
Normal file
34
external/duckdb/test/parquet/test_18470.test
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# name: test/parquet/test_18470.test
|
||||
# description: Test issue #18470 - Internal Error when querying multiple Parquet files that have mixed data type for same column
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
require notwindows
|
||||
|
||||
set seed 0.42
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT
|
||||
(random() * 65535)::UINT16 as column1,
|
||||
'text_value_' || row_number() OVER () as column2
|
||||
FROM generate_series(1, 100)
|
||||
) TO '__TEST_DIR__/20250101.parquet' (FORMAT PARQUET);
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT
|
||||
(random() * 65535)::UINT64 as column1,
|
||||
'text_value_' || row_number() OVER () as column2
|
||||
FROM generate_series(1, 100)
|
||||
) TO '__TEST_DIR__/20250102.parquet' (FORMAT PARQUET);
|
||||
|
||||
query III
|
||||
SELECT filename = '__TEST_DIR__/20250101.parquet', *
|
||||
FROM read_parquet('__TEST_DIR__/2025010*.parquet')
|
||||
WHERE filename >= '__TEST_DIR__/20250101.parquet'
|
||||
AND column1 = 72
|
||||
LIMIT 10;
|
||||
----
|
||||
true 72 text_value_66
|
||||
62
external/duckdb/test/parquet/test_filename_column.test
vendored
Normal file
62
external/duckdb/test/parquet/test_filename_column.test
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
# name: test/parquet/test_filename_column.test
|
||||
# description: Test MultiFileReader filename column rename
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# anything non-VARCHAR will be cast to boolean, and interpreted as such
|
||||
query I
|
||||
SELECT pq.filename FROM read_parquet('data/parquet-testing/enum.parquet', filename=true) pq LIMIT 1
|
||||
----
|
||||
data/parquet-testing/enum.parquet
|
||||
|
||||
query I
|
||||
SELECT pq.filename FROM read_parquet('data/parquet-testing/enum.parquet', filename=1) pq LIMIT 1
|
||||
----
|
||||
data/parquet-testing/enum.parquet
|
||||
|
||||
# the string TRUE can be a column name
|
||||
query I
|
||||
SELECT "TRUE" FROM read_parquet('data/parquet-testing/enum.parquet', filename='TRUE') pq LIMIT 1
|
||||
----
|
||||
data/parquet-testing/enum.parquet
|
||||
|
||||
# FALSR too
|
||||
query I
|
||||
SELECT "FALSE" FROM read_parquet('data/parquet-testing/enum.parquet', filename='FALSE') pq LIMIT 1
|
||||
----
|
||||
data/parquet-testing/enum.parquet
|
||||
|
||||
# this is the output without an additional filename column
|
||||
query IIIIIII nosort q0
|
||||
SELECT * FROM read_parquet('data/parquet-testing/enum.parquet')
|
||||
----
|
||||
|
||||
# this shouldn't somehow add a column with the name false/0/FALSE
|
||||
query IIIIIII nosort q0
|
||||
SELECT * FROM read_parquet('data/parquet-testing/enum.parquet', filename=false)
|
||||
----
|
||||
|
||||
|
||||
query IIIIIII nosort q0
|
||||
SELECT * FROM read_parquet('data/parquet-testing/enum.parquet', filename=0)
|
||||
----
|
||||
|
||||
|
||||
# cool names work too
|
||||
query I
|
||||
SELECT my_cool_filename FROM read_parquet('data/parquet-testing/enum.parquet', filename='my_cool_filename') LIMIT 1
|
||||
----
|
||||
data/parquet-testing/enum.parquet
|
||||
|
||||
query I
|
||||
SELECT my_cool_filename FROM read_parquet('data/parquet-testing/enum.parquet', filename=my_cool_filename) LIMIT 1
|
||||
----
|
||||
data/parquet-testing/enum.parquet
|
||||
|
||||
query III
|
||||
select file_name[22:], row_group_id, bloom_filter_excludes from parquet_bloom_probe('data/parquet-testing/multi_bloom_*.parquet', 'a', 1)
|
||||
----
|
||||
multi_bloom_a.parquet 0 false
|
||||
multi_bloom_b.parquet 0 true
|
||||
multi_bloom_c.parquet 0 true
|
||||
11
external/duckdb/test/parquet/test_internal_5021.test
vendored
Normal file
11
external/duckdb/test/parquet/test_internal_5021.test
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
# name: test/parquet/test_internal_5021.test
|
||||
# description: Internal issue 5021: Assertion failure in DbpEncoder when writing Parquet V2
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE tbl AS SELECT 'hello world' || i str FROM range(11) t(i);
|
||||
|
||||
statement ok
|
||||
COPY tbl TO '__TEST_DIR__/file.parquet' (PARQUET_VERSION 'V2', DICTIONARY_SIZE_LIMIT 1);
|
||||
9
external/duckdb/test/parquet/test_legacy_empty_pandas_parquet.test
vendored
Normal file
9
external/duckdb/test/parquet/test_legacy_empty_pandas_parquet.test
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
# name: test/parquet/test_legacy_empty_pandas_parquet.test
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# This file includes the unsupported NULL (24) ConvertedType
|
||||
# Which is not supported by the spec, but written by some ancient versions of Pandas (pre-2020)
|
||||
statement ok
|
||||
select * from 'data/parquet-testing/empty.parquet'
|
||||
1321
external/duckdb/test/parquet/test_parquet_reader.test_slow
vendored
Normal file
1321
external/duckdb/test/parquet/test_parquet_reader.test_slow
vendored
Normal file
File diff suppressed because it is too large
Load Diff
82
external/duckdb/test/parquet/test_parquet_reader_compression.test
vendored
Normal file
82
external/duckdb/test/parquet/test_parquet_reader_compression.test
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
# name: test/parquet/test_parquet_reader_compression.test
|
||||
# description: Test Parquet Reader with files on data/parquet-testing/compression
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
foreach codec NONE SNAPPY GZIP ZSTD LZ4 BROTLI
|
||||
|
||||
query IIII
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/compression/generated/data_page=1_${codec}.parquet', hive_partitioning=0) limit 50
|
||||
----
|
||||
0 20 {'string': foo, 'int': 22} []
|
||||
1 6 {'string': baz, 'int': 10} NULL
|
||||
2 23 {'string': bar, 'int': NULL} NULL
|
||||
3 9 {'string': baz, 'int': 12} [25, 7, 5, 22, 24, 18, 30, 7, 19, 7, 17, 11, 30, 40, 30]
|
||||
4 6 {'string': foo, 'int': 41} NULL
|
||||
5 23 NULL [5, 22, 17, 7, 9, 37, 28, 37, 26, 30, 38, 40, 2]
|
||||
6 19 {'string': foo, 'int': NULL} [NULL, 25, 21]
|
||||
7 20 {'string': baz, 'int': 10} [35, 32, 11, 26, 27, 4, 1, 13, 31, 2, 32, 38, 16, 0, 29, 23, 32, 7, 17]
|
||||
8 29 {'string': baz, 'int': 35} NULL
|
||||
9 11 NULL [14, 0, NULL, 29, 23, 14, 13, 13, 15, 26, 29, 32, 5, 13, 32, 29, 38]
|
||||
10 25 {'string': baz, 'int': 23} [5, 20, 9, 18, 32, 6, 21, 18, 1, 32, 34, 17, 3, 26, NULL, 1, 16, 9, 41]
|
||||
11 9 NULL []
|
||||
12 17 {'string': bar, 'int': 25} [8, 37, NULL, 34, 1, 5, 9, 40, 1, 28, 27, 14, 28, 0, 14, 33, 1, 26, 18]
|
||||
13 17 {'string': foo, 'int': 20} [38, 7, 40, 18, 26]
|
||||
14 6 NULL [16, 31, 9, 30, 36, 24, 29, 20, 20, 20, 17, 37, 4, 41, 25, 12, 21, 24]
|
||||
15 5 {'string': bar, 'int': NULL} [38, 35, 41, 4, 34, NULL, 37, 12, 21, 31, 16, 13, 20, 36, 22, 19, 35]
|
||||
16 6 {'string': bar, 'int': 25} [3]
|
||||
17 20 {'string': bar, 'int': 35} [6, 11, 25, 14, 38, 19, 9, 21, 12, 41, 36, 31]
|
||||
18 18 {'string': NULL, 'int': 19} [28]
|
||||
19 28 NULL [0, 41, 26, 27, 23, 40]
|
||||
20 21 {'string': bar, 'int': 3} [15, 35, 40, 29, 37, 8, 4, 9, 6, 37, 16, 14, 32, 29, NULL, 18, 1]
|
||||
21 7 {'string': NULL, 'int': 36} [19]
|
||||
22 27 NULL [3, 0, 15, 35, 6, 13, 24, 14, 7, 3, 32]
|
||||
23 28 {'string': NULL, 'int': NULL} [26, 17, 33, 17, 21, 34, 20, 25, 33, 21, 4, 1, 23, 9, 32]
|
||||
24 21 {'string': foo, 'int': 12} [19, 15, 36, 37, 1, 19, 21, 4, 40, NULL, NULL, 19, 4]
|
||||
25 20 {'string': foo, 'int': NULL} NULL
|
||||
26 3 {'string': NULL, 'int': 15} [32, 31, 3, 26, 34, 1, 6, 29, 5, 22, 11, 1, 18]
|
||||
27 2 {'string': foo, 'int': 25} [19]
|
||||
28 7 {'string': foo, 'int': 34} [20, 1, 18, 20, 1, 3, 25, 2, 31, 22, NULL, 40, 23, 32, 40, 10]
|
||||
29 13 {'string': bar, 'int': 8} [40, 32, 9, 2, 2, 40, 7, 0, 32, 31, 11, 14, 4, 14, 40, 20, 29, 17, 41]
|
||||
|
||||
query IIII
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/compression/generated/data_page=2_${codec}.parquet', hive_partitioning=0) limit 50
|
||||
----
|
||||
0 20 {'string': foo, 'int': 22} []
|
||||
1 6 {'string': baz, 'int': 10} NULL
|
||||
2 23 {'string': bar, 'int': NULL} NULL
|
||||
3 9 {'string': baz, 'int': 12} [25, 7, 5, 22, 24, 18, 30, 7, 19, 7, 17, 11, 30, 40, 30]
|
||||
4 6 {'string': foo, 'int': 41} NULL
|
||||
5 23 NULL [5, 22, 17, 7, 9, 37, 28, 37, 26, 30, 38, 40, 2]
|
||||
6 19 {'string': foo, 'int': NULL} [NULL, 25, 21]
|
||||
7 20 {'string': baz, 'int': 10} [35, 32, 11, 26, 27, 4, 1, 13, 31, 2, 32, 38, 16, 0, 29, 23, 32, 7, 17]
|
||||
8 29 {'string': baz, 'int': 35} NULL
|
||||
9 11 NULL [14, 0, NULL, 29, 23, 14, 13, 13, 15, 26, 29, 32, 5, 13, 32, 29, 38]
|
||||
10 25 {'string': baz, 'int': 23} [5, 20, 9, 18, 32, 6, 21, 18, 1, 32, 34, 17, 3, 26, NULL, 1, 16, 9, 41]
|
||||
11 9 NULL []
|
||||
12 17 {'string': bar, 'int': 25} [8, 37, NULL, 34, 1, 5, 9, 40, 1, 28, 27, 14, 28, 0, 14, 33, 1, 26, 18]
|
||||
13 17 {'string': foo, 'int': 20} [38, 7, 40, 18, 26]
|
||||
14 6 NULL [16, 31, 9, 30, 36, 24, 29, 20, 20, 20, 17, 37, 4, 41, 25, 12, 21, 24]
|
||||
15 5 {'string': bar, 'int': NULL} [38, 35, 41, 4, 34, NULL, 37, 12, 21, 31, 16, 13, 20, 36, 22, 19, 35]
|
||||
16 6 {'string': bar, 'int': 25} [3]
|
||||
17 20 {'string': bar, 'int': 35} [6, 11, 25, 14, 38, 19, 9, 21, 12, 41, 36, 31]
|
||||
18 18 {'string': NULL, 'int': 19} [28]
|
||||
19 28 NULL [0, 41, 26, 27, 23, 40]
|
||||
20 21 {'string': bar, 'int': 3} [15, 35, 40, 29, 37, 8, 4, 9, 6, 37, 16, 14, 32, 29, NULL, 18, 1]
|
||||
21 7 {'string': NULL, 'int': 36} [19]
|
||||
22 27 NULL [3, 0, 15, 35, 6, 13, 24, 14, 7, 3, 32]
|
||||
23 28 {'string': NULL, 'int': NULL} [26, 17, 33, 17, 21, 34, 20, 25, 33, 21, 4, 1, 23, 9, 32]
|
||||
24 21 {'string': foo, 'int': 12} [19, 15, 36, 37, 1, 19, 21, 4, 40, NULL, NULL, 19, 4]
|
||||
25 20 {'string': foo, 'int': NULL} NULL
|
||||
26 3 {'string': NULL, 'int': 15} [32, 31, 3, 26, 34, 1, 6, 29, 5, 22, 11, 1, 18]
|
||||
27 2 {'string': foo, 'int': 25} [19]
|
||||
28 7 {'string': foo, 'int': 34} [20, 1, 18, 20, 1, 3, 25, 2, 31, 22, NULL, 40, 23, 32, 40, 10]
|
||||
29 13 {'string': bar, 'int': 8} [40, 32, 9, 2, 2, 40, 7, 0, 32, 31, 11, 14, 4, 14, 40, 20, 29, 17, 41]
|
||||
|
||||
query I
|
||||
SELECT * FROM parquet_scan('data/parquet-testing/compression/empty_datapage_v2.snappy.parquet', hive_partitioning=0) limit 50
|
||||
----
|
||||
NULL
|
||||
|
||||
endloop
|
||||
357
external/duckdb/test/parquet/test_parquet_schema.test
vendored
Normal file
357
external/duckdb/test/parquet/test_parquet_schema.test
vendored
Normal file
@@ -0,0 +1,357 @@
|
||||
# name: test/parquet/test_parquet_schema.test
|
||||
# description: Parquet reader schema parameter tests
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
COPY (SELECT 42::INTEGER i) TO '__TEST_DIR__/integers.parquet' (FIELD_IDS {i: 0})
|
||||
|
||||
statement error
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map{})
|
||||
----
|
||||
Invalid Input Error: 'schema' expects a STRUCT as the value type of the map
|
||||
|
||||
# can't combine with union_by_name
|
||||
statement error
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
0: {name: 'renamed_i', type: 'BIGINT', default_value: NULL},
|
||||
1: {name: 'new_column', type: 'UTINYINT', default_value: 43}
|
||||
}, union_by_name=true)
|
||||
----
|
||||
Binder Error: Parquet schema cannot be combined with union_by_name=true or hive_partitioning=true
|
||||
|
||||
# can't combine with hive_partitioning
|
||||
statement error
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/*.parquet', schema=map {
|
||||
0: {name: 'renamed_i', type: 'BIGINT', default_value: NULL},
|
||||
1: {name: 'new_column', type: 'UTINYINT', default_value: 43}
|
||||
}, hive_partitioning=true)
|
||||
----
|
||||
Binder Error: Parquet schema cannot be combined with union_by_name=true or hive_partitioning=true
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT 1 i1, 3 i3, 4 i4, 5 i5 UNION ALL
|
||||
SELECT 2 i1, 3 i3, 4 i4, 5 i5
|
||||
) TO '__TEST_DIR__/partitioned' (FIELD_IDS {i1: 5, i3: 3, i4: 2, i5: 1}, PARTITION_BY i1, FORMAT parquet, WRITE_PARTITION_COLUMNS)
|
||||
|
||||
# auto-detection of hive partitioning is enabled by default,
|
||||
# but automatically disabled when a schema is supplied, so this should succeed
|
||||
query IIII
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/partitioned/*/*.parquet', schema=map {
|
||||
1: {name: 'i1', type: 'BIGINT', default_value: NULL},
|
||||
3: {name: 'i3', type: 'BIGINT', default_value: NULL},
|
||||
4: {name: 'i4', type: 'BIGINT', default_value: 2},
|
||||
5: {name: 'i5', type: 'BIGINT', default_value: NULL}
|
||||
})
|
||||
----
|
||||
5 3 2 1
|
||||
5 3 2 2
|
||||
|
||||
# when partition columns are specified in FIELD_IDS, error message should suggest WRITE_PARTITION_COLUMNS option
|
||||
statement error
|
||||
COPY (
|
||||
SELECT 1 i1, 3 i3, 4 i4, 5 i5 UNION ALL
|
||||
SELECT 2 i1, 3 i3, 4 i4, 5 i5
|
||||
) TO '__TEST_DIR__/partitioned2' (FIELD_IDS {i1: 5, i3: 3, i4: 2, i5: 1}, PARTITION_BY i1, FORMAT parquet)
|
||||
----
|
||||
Binder Error: Column name "i1" specified in FIELD_IDS not found. Consider using WRITE_PARTITION_COLUMNS if this column is a partition column. Available column names:
|
||||
|
||||
# cannot duplicate field_ids
|
||||
statement error
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
0: {name: 'renamed_i', type: 'BIGINT', default_value: NULL},
|
||||
0: {name: 'new_column', type: 'UTINYINT', default_value: 43}
|
||||
})
|
||||
----
|
||||
Map keys must be unique
|
||||
|
||||
# cannot duplicate column names
|
||||
statement error
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
0: {name: 'cool_column', type: 'BIGINT', default_value: NULL},
|
||||
1: {name: 'cool_column', type: 'UTINYINT', default_value: 43}
|
||||
}) pq
|
||||
----
|
||||
Binder Error: table "pq" has duplicate column name "cool_column"
|
||||
|
||||
# the supplied default value must be castable to the given type for that column
|
||||
statement error
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
0: {name: 'cool_column', type: 'BIGINT', default_value: NULL},
|
||||
1: {name: 'cool_column', type: 'UTINYINT', default_value: 'bla'}
|
||||
}) pq
|
||||
----
|
||||
Binder Error: Unable to cast Parquet schema default_value "bla" to UTINYINT
|
||||
|
||||
query IIIIII
|
||||
DESCRIBE SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
0: {name: 'renamed_i', type: 'BIGINT', default_value: NULL},
|
||||
1: {name: 'new_column', type: 'UTINYINT', default_value: 43}
|
||||
})
|
||||
----
|
||||
renamed_i BIGINT YES NULL NULL NULL
|
||||
new_column UTINYINT YES NULL NULL NULL
|
||||
|
||||
query IIIIII
|
||||
DESCRIBE SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
0: {name: 'renamed_i', type: 'BIGINT', default_value: NULL},
|
||||
1: {name: 'new_column', type: 'UTINYINT', default_value: 43}
|
||||
}, filename=true)
|
||||
----
|
||||
renamed_i BIGINT YES NULL NULL NULL
|
||||
new_column UTINYINT YES NULL NULL NULL
|
||||
filename VARCHAR YES NULL NULL NULL
|
||||
|
||||
# we'll test if filename works on a persistent file otherwise __TEST_DIR__ will be different every time
|
||||
query II
|
||||
SELECT *
|
||||
FROM read_parquet('data/parquet-testing/enum.parquet', schema=map {
|
||||
1: {name: 'cool_column', type: 'VARCHAR', default_value: NULL}
|
||||
}, filename=true)
|
||||
LIMIT 1
|
||||
----
|
||||
1 data/parquet-testing/enum.parquet
|
||||
|
||||
query II
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
0: {name: 'renamed_i', type: 'BIGINT', default_value: NULL},
|
||||
1: {name: 'new_column', type: 'UTINYINT', default_value: 43}
|
||||
})
|
||||
----
|
||||
42 43
|
||||
|
||||
# we just get a cast error when we can't cast to the supplied type
|
||||
statement error
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
0: {name: 'renamed_i', type: 'DATE', default_value: NULL}
|
||||
})
|
||||
----
|
||||
Conversion Error
|
||||
|
||||
# if we don't supply a field id, we can't refer to it using the schema parameter
|
||||
statement ok
|
||||
COPY (SELECT 42::INTEGER i) TO '__TEST_DIR__/integers.parquet'
|
||||
|
||||
query II
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
0: {name: 'renamed_i', type: 'BIGINT', default_value: NULL},
|
||||
1: {name: 'new_column', type: 'UTINYINT', default_value: 43}
|
||||
})
|
||||
----
|
||||
NULL 43
|
||||
|
||||
# let's spice it up with more columns
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT 1 i1, 3 i3, 4 i4, 5 i5
|
||||
) TO '__TEST_DIR__/integers.parquet' (FIELD_IDS {i1: 5, i3: 3, i4: 2, i5: 1})
|
||||
|
||||
# this is purposely a bit confusing but we're:
|
||||
# 1. deleting field id 2
|
||||
# 2. creating field id 4
|
||||
# 3. reversing the order of the columns
|
||||
# 4. renaming them (except i3)
|
||||
# 5. upcasting them
|
||||
query IIII
|
||||
SELECT *
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
1: {name: 'i1', type: 'BIGINT', default_value: NULL},
|
||||
3: {name: 'i3', type: 'BIGINT', default_value: NULL},
|
||||
4: {name: 'i4', type: 'BIGINT', default_value: 2},
|
||||
5: {name: 'i5', type: 'BIGINT', default_value: NULL}
|
||||
})
|
||||
----
|
||||
5 3 2 1
|
||||
|
||||
# projection still ok
|
||||
query I
|
||||
SELECT i1
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
1: {name: 'i1', type: 'BIGINT', default_value: NULL},
|
||||
3: {name: 'i3', type: 'BIGINT', default_value: NULL},
|
||||
4: {name: 'i4', type: 'BIGINT', default_value: 2},
|
||||
5: {name: 'i5', type: 'BIGINT', default_value: NULL}
|
||||
})
|
||||
----
|
||||
5
|
||||
|
||||
# we can still select virtual columns as well
|
||||
query III
|
||||
SELECT file_row_number, filename[-16:], i4
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
1: {name: 'i1', type: 'BIGINT', default_value: NULL},
|
||||
3: {name: 'i3', type: 'BIGINT', default_value: NULL},
|
||||
4: {name: 'i4', type: 'BIGINT', default_value: 2},
|
||||
5: {name: 'i5', type: 'BIGINT', default_value: NULL}
|
||||
})
|
||||
----
|
||||
0 integers.parquet 2
|
||||
|
||||
# projection still, even with different generated columns
|
||||
query III
|
||||
SELECT file_row_number, filename[-16:], i4
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
1: {name: 'i1', type: 'BIGINT', default_value: NULL},
|
||||
3: {name: 'i3', type: 'BIGINT', default_value: NULL},
|
||||
4: {name: 'i4', type: 'BIGINT', default_value: 2},
|
||||
5: {name: 'i5', type: 'BIGINT', default_value: NULL}
|
||||
}, file_row_number=1, filename=1)
|
||||
----
|
||||
0 integers.parquet 2
|
||||
|
||||
# count(*) still ok
|
||||
query I
|
||||
SELECT count(*)
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
1: {name: 'i1', type: 'BIGINT', default_value: NULL},
|
||||
3: {name: 'i3', type: 'BIGINT', default_value: NULL},
|
||||
4: {name: 'i4', type: 'BIGINT', default_value: 2},
|
||||
5: {name: 'i5', type: 'BIGINT', default_value: NULL}
|
||||
})
|
||||
----
|
||||
1
|
||||
|
||||
# combine with constant column
|
||||
query II
|
||||
SELECT i1, filename[-16:]
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
1: {name: 'i1', type: 'BIGINT', default_value: NULL},
|
||||
3: {name: 'i3', type: 'BIGINT', default_value: NULL},
|
||||
4: {name: 'i4', type: 'BIGINT', default_value: 2},
|
||||
5: {name: 'i5', type: 'BIGINT', default_value: NULL}
|
||||
}, filename=true)
|
||||
----
|
||||
5 integers.parquet
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT range % 4 g, range i FROM range(1000)
|
||||
) TO '__TEST_DIR__/integers.parquet' (FIELD_IDS {g: 33, i: 42})
|
||||
|
||||
# let's also do a query with a filter and a downcast
|
||||
query II
|
||||
SELECT my_cool_group, sum(my_cool_value)
|
||||
FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
33: {name: 'my_cool_group', type: 'UINTEGER', default_value: NULL},
|
||||
42: {name: 'my_cool_value', type: 'UINTEGER', default_value: NULL}
|
||||
})
|
||||
WHERE my_cool_group = 2
|
||||
GROUP BY my_cool_group
|
||||
----
|
||||
2 125000
|
||||
|
||||
# also test multi-file reading with different field ids
|
||||
# field id -> value:
|
||||
# 1 -> 5
|
||||
# 2 -> 4 (unused)
|
||||
# 3 -> 3
|
||||
# 4 -> - (missing)
|
||||
# 5 -> 1
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT
|
||||
1 i1,
|
||||
3 i3,
|
||||
4 i4,
|
||||
5 i5
|
||||
) TO '__TEST_DIR__/multifile1.parquet' (FIELD_IDS {
|
||||
i1: 5,
|
||||
i3: 3,
|
||||
i4: 2,
|
||||
i5: 1
|
||||
})
|
||||
|
||||
# field_id -> value:
|
||||
# 1 -> 1
|
||||
# 2 -> 3 (unused)
|
||||
# 3 -> 4
|
||||
# 4 -> 5
|
||||
# 5 -> - (missing)
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT
|
||||
1 j1,
|
||||
3 j3,
|
||||
4 j4,
|
||||
5 j5
|
||||
) TO '__TEST_DIR__/multifile2.parquet' (FIELD_IDS {
|
||||
j1: 1,
|
||||
j3: 2,
|
||||
j4: 3,
|
||||
j5: 4
|
||||
})
|
||||
|
||||
query IIIII
|
||||
SELECT i1, i3, i4, i5, filename[-18:]
|
||||
FROM read_parquet('__TEST_DIR__/multifile*.parquet', schema=map {
|
||||
1: {name: 'i1', type: 'BIGINT', default_value: NULL},
|
||||
3: {name: 'i3', type: 'BIGINT', default_value: NULL},
|
||||
4: {name: 'i4', type: 'BIGINT', default_value: 2},
|
||||
5: {name: 'i5', type: 'BIGINT', default_value: NULL}
|
||||
}, filename=true)
|
||||
ORDER BY filename
|
||||
----
|
||||
5 3 2 1 multifile1.parquet
|
||||
1 4 5 NULL multifile2.parquet
|
||||
|
||||
statement error
|
||||
select * FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
True: {name: 'my_cool_group', type: 'UINTEGER', default_value: NULL},
|
||||
False: {name: 'my_cool_value', type: 'UINTEGER', default_value: NULL}
|
||||
});
|
||||
----
|
||||
Invalid Input Error: 'schema' expects the value type of the map to be either INTEGER or VARCHAR, not BOOLEAN
|
||||
|
||||
query II
|
||||
SELECT alias(COLUMNS(*)) FROM read_parquet('__TEST_DIR__/integers.parquet', schema=map {
|
||||
'i': {name: 'renamed_i', type: 'BIGINT', default_value: NULL},
|
||||
'j': {name: 'new_column', type: 'UTINYINT', default_value: 43}
|
||||
}) limit 1;
|
||||
----
|
||||
renamed_i new_column
|
||||
|
||||
# issue 15504
|
||||
statement ok
|
||||
COPY (select 1 as id, list_value('a', 'b', 'c') as arr, { key: 1, v1: 'a', v2: 'b' } as s) TO '__TEST_DIR__/15504.parquet' (field_ids { 'id': 0, 'arr': 1, 's': 2 });
|
||||
|
||||
query III
|
||||
SELECT * FROM read_parquet('__TEST_DIR__/15504.parquet', schema=map { 0: { name: 'id', type: 'int32', default_value: NULL }, 1: { name: 'arr', type: 'varchar[]', default_value: NULL }, 2: { name: 's', type: 'STRUCT(key INT, v1 TEXT, v2 TEXT)', default_value: NULL } });
|
||||
----
|
||||
1 [a, b, c] {'key': 1, 'v1': a, 'v2': b}
|
||||
|
||||
# issue 16094
|
||||
statement ok
|
||||
copy (
|
||||
select
|
||||
x
|
||||
from generate_series(1,100) as g(x)
|
||||
) to '__TEST_DIR__/16094.parquet'
|
||||
with (
|
||||
field_ids {x: 1}
|
||||
);
|
||||
|
||||
statement ok
|
||||
select
|
||||
x,
|
||||
filename
|
||||
from read_parquet(
|
||||
'__TEST_DIR__/16094.parquet',
|
||||
schema=map {
|
||||
1: {name: 'x', type: 'int', default_value: NULL}
|
||||
},
|
||||
filename=True
|
||||
) where x = 1;
|
||||
54
external/duckdb/test/parquet/timens_parquet.test
vendored
Normal file
54
external/duckdb/test/parquet/timens_parquet.test
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
# name: test/parquet/timens_parquet.test
|
||||
# description: Round trip of TIME_NS data
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# Insertion
|
||||
statement ok
|
||||
CREATE TABLE times(tns TIME_NS)
|
||||
|
||||
statement ok
|
||||
INSERT INTO times VALUES
|
||||
('00:00:00'),
|
||||
('00:01:20'),
|
||||
('10:21:00.0'),
|
||||
('10:21:00.1'),
|
||||
('10:21:00.9'),
|
||||
('16:04:22.01'),
|
||||
('16:04:22.12'),
|
||||
('16:04:22.97'),
|
||||
('20:08:10.001'),
|
||||
('20:08:10.123'),
|
||||
('20:08:10.998'),
|
||||
('03:45:47.0001'),
|
||||
('03:45:47.1234'),
|
||||
('03:45:47.9999'),
|
||||
('02:27:19.00001'),
|
||||
('02:27:19.12345'),
|
||||
('02:27:19.99899'),
|
||||
('09:01:54.000001'),
|
||||
('09:01:54.123456'),
|
||||
('09:01:54.999978'),
|
||||
('23:35:57.0000001'),
|
||||
('23:35:57.1234567'),
|
||||
('23:35:57.9999999'),
|
||||
('13:00:00.00000001'),
|
||||
('13:00:00.12345678'),
|
||||
('13:00:00.99999989'),
|
||||
('23:59:59.000000001'),
|
||||
('23:59:59.123456789'),
|
||||
('23:59:59.999999999'),
|
||||
('24:00:00.000000000'),
|
||||
(NULL)
|
||||
|
||||
query I nosort t0
|
||||
from times
|
||||
----
|
||||
|
||||
statement ok
|
||||
copy times to '__TEST_DIR__/time_ns.parquet' (PARQUET_VERSION V2);
|
||||
|
||||
query I nosort t0
|
||||
from '__TEST_DIR__/time_ns.parquet';
|
||||
----
|
||||
47
external/duckdb/test/parquet/timetz_parquet.test
vendored
Normal file
47
external/duckdb/test/parquet/timetz_parquet.test
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
# name: test/parquet/timetz_parquet.test
|
||||
# description: Test parquet file with time with time zone data
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
query I
|
||||
select * from 'data/parquet-testing/timetz.parquet' ;
|
||||
----
|
||||
14:30:00+00
|
||||
11:35:00+00
|
||||
01:59:00+00
|
||||
|
||||
query I
|
||||
select COL_TIME from 'data/parquet-testing/date-with-timezone-int64.parquet' ;
|
||||
----
|
||||
12:00:00+00
|
||||
|
||||
query II
|
||||
select pruefbahn_id, arbeits_beginn
|
||||
from 'data/parquet-testing/timetz-nanos.parquet'
|
||||
where pruefbahn_id = '58981';
|
||||
----
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
58981 07:20:00+00
|
||||
|
||||
query I
|
||||
select col33
|
||||
from 'data/parquet-testing/negative-timetz.parquet';
|
||||
----
|
||||
20:08:21+00
|
||||
09:01:00+00
|
||||
13:04:04+00
|
||||
|
||||
57
external/duckdb/test/parquet/variant/variant_all_types_shredded.test
vendored
Normal file
57
external/duckdb/test/parquet/variant/variant_all_types_shredded.test
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
# name: test/parquet/variant/variant_all_types_shredded.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
require json
|
||||
|
||||
statement ok
|
||||
create macro data() as table (
|
||||
select COLUMNS([
|
||||
x for x in (*) if x NOT IN [
|
||||
'utinyint',
|
||||
'usmallint',
|
||||
'uint',
|
||||
'ubigint',
|
||||
'hugeint',
|
||||
'uhugeint',
|
||||
'bignum',
|
||||
'timestamp_s',
|
||||
'timestamp_ms',
|
||||
'timestamp_tz',
|
||||
'time_tz',
|
||||
'interval',
|
||||
'bit',
|
||||
'dec_4_1', -- Parquet VARIANT doesn't have int16_t DECIMAL
|
||||
-- Conversion isn't 1-to-1
|
||||
'dec_9_4', -- can't roundtrip with json
|
||||
'dec_18_6', -- can't roundtrip with json
|
||||
'dec38_10', -- can't roundtrip with json
|
||||
'blob' -- data is base64-encoded in parquet read
|
||||
]
|
||||
])::VARIANT var from test_all_types()
|
||||
)
|
||||
|
||||
query I nosort expected_res
|
||||
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
|
||||
----
|
||||
|
||||
foreach type bool tinyint smallint int bigint date time timestamp timestamp_ns timestamp_tz float double dec_9_4 dec_18_6 dec38_10 uuid varchar blob small_enum medium_enum large_enum int_array double_array date_array timestamp_array timestamptz_array varchar_array nested_int_array struct struct_of_arrays array_of_structs
|
||||
|
||||
statement ok
|
||||
SET VARIABLE type_str = (SELECT $$STRUCT("${type}" $$ || typeof("${type}") || ')' from test_all_types() limit 1);
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
FROM data()
|
||||
) TO '__TEST_DIR__/all_types_shredded_${type}.parquet' (
|
||||
SHREDDING {
|
||||
'var': getvariable('type_str')
|
||||
}
|
||||
)
|
||||
|
||||
query I nosort expected_res
|
||||
select * from '__TEST_DIR__/all_types_shredded_${type}.parquet'
|
||||
----
|
||||
|
||||
endloop
|
||||
227
external/duckdb/test/parquet/variant/variant_basic.test
vendored
Normal file
227
external/duckdb/test/parquet/variant/variant_basic.test
vendored
Normal file
@@ -0,0 +1,227 @@
|
||||
# name: test/parquet/variant/variant_basic.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
# Array
|
||||
query II
|
||||
from 'data/parquet-testing/variant_array_array_string_and_integer.parquet';
|
||||
----
|
||||
1 [["string","iceberg",34],[34,null],[],["string","iceberg"],34]
|
||||
|
||||
# String
|
||||
query II
|
||||
from 'data/parquet-testing/variant_string.parquet';
|
||||
----
|
||||
1 "iceberg"
|
||||
|
||||
# BOOL TRUE
|
||||
query II
|
||||
from 'data/parquet-testing/variant_bool_true.parquet';
|
||||
----
|
||||
1 true
|
||||
|
||||
# Decimal4
|
||||
query II
|
||||
from 'data/parquet-testing/variant_decimal4_positive.parquet';
|
||||
----
|
||||
1 "123456.789"
|
||||
|
||||
# UUID
|
||||
query II
|
||||
from 'data/parquet-testing/variant_uuid.parquet';
|
||||
----
|
||||
1 "f24f9b64-81fa-49d1-b74e-8c09a6e31c56"
|
||||
|
||||
# Empty array
|
||||
query II
|
||||
from 'data/parquet-testing/variant_array_empty.parquet';
|
||||
----
|
||||
1 []
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_int16.parquet';
|
||||
----
|
||||
1 -1234
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_int32.parquet';
|
||||
----
|
||||
1 -12345
|
||||
|
||||
# Binary
|
||||
query II
|
||||
from 'data/parquet-testing/variant_binary.parquet';
|
||||
----
|
||||
1 "CgsMDQ=="
|
||||
|
||||
# Decimal16
|
||||
query II
|
||||
from 'data/parquet-testing/variant_decimal16.parquet';
|
||||
----
|
||||
1 "9876543210.123456789"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_int64.parquet';
|
||||
----
|
||||
1 -9876543210
|
||||
|
||||
# TIMESTAMP_NANOS_NTZ
|
||||
query II
|
||||
from 'data/parquet-testing/variant_timestamp_nanos_ntz.parquet';
|
||||
----
|
||||
1 "1957-11-07 12:33:54.123456789"
|
||||
|
||||
# Array of strings (2-dimensional)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_array_array_string.parquet';
|
||||
----
|
||||
1 [["string","iceberg"],["apple","banana"]]
|
||||
|
||||
# TIMESTAMP_MICROS
|
||||
query II
|
||||
from 'data/parquet-testing/variant_timestamp_micros.parquet';
|
||||
----
|
||||
1 "1957-11-07 12:33:54.123456+00"
|
||||
|
||||
# Object {'a': .., 'c': ...}
|
||||
query II
|
||||
from 'data/parquet-testing/variant_object_primitives.parquet';
|
||||
----
|
||||
1 {"a":123456789,"c":"string"}
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_timestamp_micros_positive.parquet';
|
||||
----
|
||||
1 "2024-11-07 12:33:54.123456+00"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_int16_positive.parquet';
|
||||
----
|
||||
1 1234
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_time_ntz.parquet';
|
||||
----
|
||||
1 "12:33:54.123456"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_decimal16_negative.parquet';
|
||||
----
|
||||
1 "-9876543210.123456789"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_timestamp_nanos1.parquet';
|
||||
----
|
||||
1 "1957-11-07 12:33:54.123457+00"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_decimal8_negative.parquet';
|
||||
----
|
||||
1 "-123456789.987654321"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_timestamp_micros_negative.parquet';
|
||||
----
|
||||
1 "1957-11-07 12:33:54.123456"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_int8_positive.parquet';
|
||||
----
|
||||
1 34
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_timestamp_nanos2.parquet';
|
||||
----
|
||||
1 "2024-11-07 12:33:54.123456+00"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_int8_negative.parquet';
|
||||
----
|
||||
1 -34
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_array_string.parquet';
|
||||
----
|
||||
1 ["iceberg","string"]
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_date_negative.parquet';
|
||||
----
|
||||
1 "1957-11-07"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_int64_positive.parquet';
|
||||
----
|
||||
1 9876543210
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_array_object_string_and_integer.parquet';
|
||||
----
|
||||
1 [{"a":123456789,"c":"string"},{"a":123456789,"c":"string"},"iceberg",34]
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_int32_positive.parquet';
|
||||
----
|
||||
1 12345
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_double_negative.parquet';
|
||||
----
|
||||
1 -14.3
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_object_empty.parquet';
|
||||
----
|
||||
1 {}
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_null.parquet';
|
||||
----
|
||||
1 NULL
|
||||
|
||||
# -10.11 in the test that it was generated from
|
||||
query II
|
||||
from 'data/parquet-testing/variant_float_negative.parquet';
|
||||
----
|
||||
1 -10.109999656677246
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_object_string_and_array.parquet';
|
||||
----
|
||||
1 {"a":123456789,"c":["string","iceberg"]}
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_object_null_and_string.parquet';
|
||||
----
|
||||
1 {"a":null,"d":"iceberg"}
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_date_positive.parquet';
|
||||
----
|
||||
1 "2024-11-07"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_bool_false.parquet';
|
||||
----
|
||||
1 false
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_array_object_string.parquet';
|
||||
----
|
||||
1 [{"a":123456789,"c":"string"},{"a":123456789,"c":"string"}]
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_decimal4_negative.parquet';
|
||||
----
|
||||
1 "-123456.789"
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_double_positive.parquet';
|
||||
----
|
||||
1 14.3
|
||||
|
||||
query II
|
||||
from 'data/parquet-testing/variant_timestamp_micros_ntz_positive.parquet';
|
||||
----
|
||||
1 "2024-11-07 12:33:54.123456"
|
||||
49
external/duckdb/test/parquet/variant/variant_basic_shredded_writing.test
vendored
Normal file
49
external/duckdb/test/parquet/variant/variant_basic_shredded_writing.test
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
# name: test/parquet/variant/variant_basic_shredded_writing.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
require json
|
||||
|
||||
statement ok
|
||||
create macro data() AS TABLE (
|
||||
FROM (VALUES
|
||||
({'a': 21::INTEGER, 'b': NULL}::VARIANT),
|
||||
({'a': 42::INTEGER, 'd': 'test'}::VARIANT),
|
||||
([]::VARIANT),
|
||||
(NULL::VARIANT),
|
||||
([{'b': True, 'c': 'test'}::VARIANT, 'test', 21, {'a': True}, [1::VARIANT, 2, True, 'false']]::VARIANT),
|
||||
('this is a long string'::VARIANT),
|
||||
('this is big enough to not be classified as a "short string" by parquet VARIANT'::VARIANT)
|
||||
) t(a)
|
||||
)
|
||||
|
||||
query I nosort expected_res
|
||||
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
from data() t(a)
|
||||
) TO '__TEST_DIR__/shredded_struct.parquet' (
|
||||
shredding {
|
||||
a: 'STRUCT(a INTEGER, b VARIANT, c BOOLEAN)'
|
||||
}
|
||||
)
|
||||
|
||||
query I nosort expected_res
|
||||
select * from '__TEST_DIR__/shredded_struct.parquet';
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
select a from data()
|
||||
) TO '__TEST_DIR__/shredded_list.parquet' (
|
||||
shredding {
|
||||
a: 'VARCHAR[]'
|
||||
}
|
||||
)
|
||||
|
||||
query I nosort expected_res
|
||||
select * from '__TEST_DIR__/shredded_list.parquet';
|
||||
----
|
||||
116
external/duckdb/test/parquet/variant/variant_basic_writing.test
vendored
Normal file
116
external/duckdb/test/parquet/variant/variant_basic_writing.test
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
# name: test/parquet/variant/variant_basic_writing.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
require json
|
||||
|
||||
# STRUCT(a INTEGER, b INTEGER[])
|
||||
statement ok
|
||||
COPY (select
|
||||
{
|
||||
'a': 42,
|
||||
'b': [null, 1, 2]
|
||||
}::VARIANT
|
||||
from range(10)
|
||||
) TO '__TEST_DIR__/integer_variant.parquet';
|
||||
|
||||
query I
|
||||
select * from '__TEST_DIR__/integer_variant.parquet';
|
||||
----
|
||||
{"a":42,"b":[null,1,2]}
|
||||
{"a":42,"b":[null,1,2]}
|
||||
{"a":42,"b":[null,1,2]}
|
||||
{"a":42,"b":[null,1,2]}
|
||||
{"a":42,"b":[null,1,2]}
|
||||
{"a":42,"b":[null,1,2]}
|
||||
{"a":42,"b":[null,1,2]}
|
||||
{"a":42,"b":[null,1,2]}
|
||||
{"a":42,"b":[null,1,2]}
|
||||
{"a":42,"b":[null,1,2]}
|
||||
|
||||
statement ok
|
||||
COPY (select
|
||||
'[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]'::JSON::VARIANT
|
||||
from range(5)
|
||||
) TO '__TEST_DIR__/list_of_list_variant.parquet'
|
||||
|
||||
query I
|
||||
select * from '__TEST_DIR__/list_of_list_variant.parquet';
|
||||
----
|
||||
[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]
|
||||
[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]
|
||||
[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]
|
||||
[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]
|
||||
[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
with cte as (
|
||||
FROM (VALUES
|
||||
({'a': 21, 'b': NULL}::VARIANT),
|
||||
([]::VARIANT),
|
||||
(NULL::VARIANT),
|
||||
([{'b': True, 'c': 'test'}]::VARIANT),
|
||||
('this is a long string'::VARIANT),
|
||||
('this is big enough to not be classified as a "short string" by parquet VARIANT'::VARIANT)
|
||||
) t(a)
|
||||
)
|
||||
select a from cte
|
||||
) TO '__TEST_DIR__/varied_variant.parquet'
|
||||
|
||||
query I
|
||||
select * from '__TEST_DIR__/varied_variant.parquet';
|
||||
----
|
||||
{"a":21,"b":null}
|
||||
[]
|
||||
NULL
|
||||
[{"b":true,"c":"test"}]
|
||||
"this is a long string"
|
||||
"this is big enough to not be classified as a \"short string\" by parquet VARIANT"
|
||||
|
||||
# VARIANT is only supported at the root for now
|
||||
statement error
|
||||
COPY (select [123::VARIANT]) TO '__TEST_DIR__/list_of_variant.parquet'
|
||||
----
|
||||
Not implemented Error: Unimplemented type for Parquet "VARIANT"
|
||||
|
||||
statement ok
|
||||
create macro data() as table (
|
||||
select COLUMNS([
|
||||
x for x in (*) if x NOT IN [
|
||||
'utinyint',
|
||||
'usmallint',
|
||||
'uint',
|
||||
'ubigint',
|
||||
'hugeint',
|
||||
'uhugeint',
|
||||
'bignum',
|
||||
'timestamp_s',
|
||||
'timestamp_ms',
|
||||
'timestamp_tz',
|
||||
'time_tz',
|
||||
'interval',
|
||||
'bit',
|
||||
'dec_4_1', -- Parquet VARIANT doesn't have int16_t DECIMAL
|
||||
-- Conversion isn't 1-to-1
|
||||
'dec_9_4', -- can't roundtrip with json
|
||||
'dec_18_6', -- can't roundtrip with json
|
||||
'dec38_10', -- can't roundtrip with json
|
||||
'blob' -- data is base64-encoded in parquet read
|
||||
]
|
||||
])::VARIANT as "\0" from test_all_types()
|
||||
)
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
from data()
|
||||
) TO '__TEST_DIR__/variant_test_all_types.parquet';
|
||||
|
||||
query I nosort expected_res
|
||||
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
|
||||
----
|
||||
|
||||
query I nosort expected_res
|
||||
select * from '__TEST_DIR__/variant_test_all_types.parquet';
|
||||
----
|
||||
64
external/duckdb/test/parquet/variant/variant_list_of_struct_partial_shredding.test
vendored
Normal file
64
external/duckdb/test/parquet/variant/variant_list_of_struct_partial_shredding.test
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
# name: test/parquet/variant/variant_list_of_struct_partial_shredding.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
require json
|
||||
|
||||
statement ok
|
||||
create macro data() AS TABLE (
|
||||
FROM (VALUES
|
||||
(
|
||||
[
|
||||
{a:['foo'::VARIANT,42], b:true, c:{a:'nested1'}}::VARIANT, -- element of list in field 'a' is a different type
|
||||
{a: 42, b: true, c:{a:'nested2'}}, -- field 'a' is a different type
|
||||
{b: true, c:{a:'nested3'}}, -- field 'a' is missing
|
||||
{a:[], b:false, c:{a:NULL}},
|
||||
{a: [], c:{a:'nested4'}} -- field 'b' is missing
|
||||
]::VARIANT
|
||||
),
|
||||
(
|
||||
[]
|
||||
),
|
||||
(
|
||||
[
|
||||
{a:NULL, b:NULL, c:{a:'inner'}},
|
||||
{a:['baz'], b:false, c:{a:NULL}}
|
||||
]
|
||||
),
|
||||
(
|
||||
NULL
|
||||
),
|
||||
(
|
||||
[
|
||||
{a:['alpha'], b:true, c:{a:'deep'}}::VARIANT,
|
||||
{a: [[1,2]::VARIANT, 'hello', {a: 42}]}, -- fields 'b' and 'c' are missing, 'a' element is of a wrong type
|
||||
{b: false}, -- fields 'a' and 'c' are missing
|
||||
{a:[], b:NULL, c:{a:'leaf'}}
|
||||
]
|
||||
),
|
||||
(
|
||||
[
|
||||
{a:NULL, b:false, c:{a:NULL}},
|
||||
{a:['x',NULL,'z'], b:true, c:{a:'final'}}
|
||||
]
|
||||
)
|
||||
) t(a)
|
||||
);
|
||||
|
||||
query I nosort expected_res
|
||||
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
select a from data()
|
||||
) TO '__TEST_DIR__/shredded_list_of_structs.parquet' (
|
||||
shredding {
|
||||
a: 'STRUCT(a VARCHAR[], b BOOLEAN, c STRUCT(a VARCHAR))[]'
|
||||
}
|
||||
)
|
||||
|
||||
query I nosort expected_res
|
||||
select * from '__TEST_DIR__/shredded_list_of_structs.parquet';
|
||||
----
|
||||
59
external/duckdb/test/parquet/variant/variant_list_of_struct_shredding.test
vendored
Normal file
59
external/duckdb/test/parquet/variant/variant_list_of_struct_shredding.test
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
# name: test/parquet/variant/variant_list_of_struct_shredding.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
require json
|
||||
|
||||
statement ok
|
||||
create macro data() AS TABLE (
|
||||
FROM (VALUES
|
||||
(
|
||||
[
|
||||
{a:['foo','bar'], b:true, c:{a:'nested1'}},
|
||||
{a:[], b:false, c:{a:NULL}}
|
||||
]::VARIANT
|
||||
),
|
||||
(
|
||||
[]
|
||||
),
|
||||
(
|
||||
[
|
||||
{a:NULL, b:NULL, c:{a:'inner'}},
|
||||
{a:['baz'], b:false, c:{a:NULL}}
|
||||
]
|
||||
),
|
||||
(
|
||||
NULL
|
||||
),
|
||||
(
|
||||
[
|
||||
{a:['alpha'], b:true, c:{a:'deep'}},
|
||||
{a:[], b:NULL, c:{a:'leaf'}}
|
||||
]
|
||||
),
|
||||
(
|
||||
[
|
||||
{a:NULL, b:false, c:{a:NULL}},
|
||||
{a:['x',NULL,'z'], b:true, c:{a:'final'}}
|
||||
]
|
||||
)
|
||||
) t(a)
|
||||
);
|
||||
|
||||
query I nosort expected_res
|
||||
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
select a from data()
|
||||
) TO '__TEST_DIR__/shredded_list_of_structs.parquet' (
|
||||
shredding {
|
||||
a: 'STRUCT(a VARCHAR[], b BOOLEAN, c STRUCT(a VARCHAR))[]'
|
||||
}
|
||||
)
|
||||
|
||||
query I nosort expected_res
|
||||
select * from '__TEST_DIR__/shredded_list_of_structs.parquet';
|
||||
----
|
||||
34
external/duckdb/test/parquet/variant/variant_list_shredding.test
vendored
Normal file
34
external/duckdb/test/parquet/variant/variant_list_shredding.test
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# name: test/parquet/variant/variant_list_shredding.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
require json
|
||||
|
||||
statement ok
|
||||
create macro data() AS TABLE (
|
||||
FROM (VALUES
|
||||
([['test', NULL, 'this is a long string'],[],['hello'],NULL,[],[1, 2, 3]::VARIANT]::VARIANT),
|
||||
(NULL::VARIANT),
|
||||
([]::VARIANT),
|
||||
([[{'a': 'test'}::VARIANT, [1, 2, 3]]::VARIANT, {'a': 21}, {'b': 42}, [['hello']]]),
|
||||
([[], NULL, [1::VARIANT, 2, 'test'],['hello', 'world']]::VARIANT)
|
||||
) t(a)
|
||||
)
|
||||
|
||||
query I nosort expected_res
|
||||
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
|
||||
----
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
select a from data()
|
||||
) TO '__TEST_DIR__/shredded_list_of_list_of_string.parquet' (
|
||||
shredding {
|
||||
a: 'VARCHAR[][]'
|
||||
}
|
||||
)
|
||||
|
||||
query I nosort expected_res
|
||||
select * from '__TEST_DIR__/shredded_list_of_list_of_string.parquet';
|
||||
----
|
||||
31
external/duckdb/test/parquet/variant/variant_nanos_tz.test
vendored
Normal file
31
external/duckdb/test/parquet/variant/variant_nanos_tz.test
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
# name: test/parquet/variant/variant_nanos_tz.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
set variant_legacy_encoding=true;
|
||||
|
||||
# Timestamp NS - negative (with timezone) (shredded)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_timestamp_nanos_tz_negative_no_logical_type.parquet';
|
||||
----
|
||||
1 "1957-11-07 12:33:54.123457+00"
|
||||
|
||||
# Timestamp NS - positive (with timezone) (shredded)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_timestamp_nanos_tz_positive_no_logical_type.parquet';
|
||||
----
|
||||
1 "2024-11-07 12:33:54.123456+00"
|
||||
|
||||
# Timestamp NS - positive (with timezone) (unshredded)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_timestamp_nanos_tz_positive_no_logical_type.parquet';
|
||||
----
|
||||
1 "2024-11-07 12:33:54.123456+00"
|
||||
|
||||
# Timestamp NS - negative (with timezone) (unshredded)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_timestamp_nanos_tz_negative_no_logical_type.parquet';
|
||||
----
|
||||
1 "1957-11-07 12:33:54.123457+00"
|
||||
44
external/duckdb/test/parquet/variant/variant_nested_with_nulls.test
vendored
Normal file
44
external/duckdb/test/parquet/variant/variant_nested_with_nulls.test
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
# name: test/parquet/variant/variant_nested_with_nulls.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
query IIIIII
|
||||
describe from parquet_scan('data/parquet-testing/variant_unshredded_nested_nulls.parquet')
|
||||
----
|
||||
id BIGINT YES NULL NULL NULL
|
||||
v STRUCT("value" BLOB, metadata BLOB) YES NULL NULL NULL
|
||||
array_of_variants STRUCT("value" BLOB, metadata BLOB)[] YES NULL NULL NULL
|
||||
struct_of_variants STRUCT(v STRUCT("value" BLOB, metadata BLOB)) YES NULL NULL NULL
|
||||
map_of_variants MAP(VARCHAR, STRUCT("value" BLOB, metadata BLOB)) YES NULL NULL NULL
|
||||
array_of_struct_of_variants STRUCT(v STRUCT("value" BLOB, metadata BLOB))[] YES NULL NULL NULL
|
||||
struct_of_array_of_variants STRUCT(v STRUCT("value" BLOB, metadata BLOB)[]) YES NULL NULL NULL
|
||||
|
||||
statement ok
|
||||
set variant_legacy_encoding=true;
|
||||
|
||||
# Now the variant column gets emitted as JSON
|
||||
query IIIIII
|
||||
describe from parquet_scan('data/parquet-testing/variant_unshredded_nested_nulls.parquet')
|
||||
----
|
||||
id BIGINT YES NULL NULL NULL
|
||||
v JSON YES NULL NULL NULL
|
||||
array_of_variants JSON[] YES NULL NULL NULL
|
||||
struct_of_variants STRUCT(v JSON) YES NULL NULL NULL
|
||||
map_of_variants MAP(VARCHAR, JSON) YES NULL NULL NULL
|
||||
array_of_struct_of_variants STRUCT(v JSON)[] YES NULL NULL NULL
|
||||
struct_of_array_of_variants STRUCT(v JSON[]) YES NULL NULL NULL
|
||||
|
||||
query IIIIIII
|
||||
select * from parquet_scan('data/parquet-testing/variant_unshredded_nested_nulls.parquet') order by id limit 10;
|
||||
----
|
||||
0 {"key":0} ['{"key":0}', NULL, '{"key":0}', NULL, '{"key":0}'] {'v': '{"key":0}'} {0='{"key":0}', nullKey=NULL} [{'v': '{"key":0}'}, {'v': NULL}, NULL, {'v': '{"key":0}'}, NULL, {'v': '{"key":0}'}] {'v': [NULL, '{"key":0}']}
|
||||
0 {"key":0} ['{"key":0}', NULL, '{"key":0}', NULL, '{"key":0}'] {'v': '{"key":0}'} {0='{"key":0}', nullKey=NULL} [{'v': '{"key":0}'}, {'v': NULL}, NULL, {'v': '{"key":0}'}, NULL, {'v': '{"key":0}'}] {'v': [NULL, '{"key":0}']}
|
||||
1 {"key":1} ['{"key":1}', NULL, '{"key":1}', NULL, '{"key":1}'] {'v': '{"key":1}'} {1='{"key":1}', nullKey=NULL} [{'v': '{"key":1}'}, {'v': NULL}, NULL, {'v': '{"key":1}'}, NULL, {'v': '{"key":1}'}] {'v': [NULL, '{"key":1}']}
|
||||
1 {"key":1} ['{"key":1}', NULL, '{"key":1}', NULL, '{"key":1}'] {'v': '{"key":1}'} {1='{"key":1}', nullKey=NULL} [{'v': '{"key":1}'}, {'v': NULL}, NULL, {'v': '{"key":1}'}, NULL, {'v': '{"key":1}'}] {'v': [NULL, '{"key":1}']}
|
||||
2 {"key":2} ['{"key":2}', NULL, '{"key":2}', NULL, '{"key":2}'] {'v': '{"key":2}'} {2='{"key":2}', nullKey=NULL} [{'v': '{"key":2}'}, {'v': NULL}, NULL, {'v': '{"key":2}'}, NULL, {'v': '{"key":2}'}] {'v': [NULL, '{"key":2}']}
|
||||
3 {"key":3} ['{"key":3}', NULL, '{"key":3}', NULL, '{"key":3}'] {'v': '{"key":3}'} {3='{"key":3}', nullKey=NULL} [{'v': '{"key":3}'}, {'v': NULL}, NULL, {'v': '{"key":3}'}, NULL, {'v': '{"key":3}'}] {'v': [NULL, '{"key":3}']}
|
||||
4 {"key":4} ['{"key":4}', NULL, '{"key":4}', NULL, '{"key":4}'] {'v': '{"key":4}'} {4='{"key":4}', nullKey=NULL} [{'v': '{"key":4}'}, {'v': NULL}, NULL, {'v': '{"key":4}'}, NULL, {'v': '{"key":4}'}] {'v': [NULL, '{"key":4}']}
|
||||
5 {"key":5} ['{"key":5}', NULL, '{"key":5}', NULL, '{"key":5}'] {'v': '{"key":5}'} {5='{"key":5}', nullKey=NULL} [{'v': '{"key":5}'}, {'v': NULL}, NULL, {'v': '{"key":5}'}, NULL, {'v': '{"key":5}'}] {'v': [NULL, '{"key":5}']}
|
||||
6 {"key":6} ['{"key":6}', NULL, '{"key":6}', NULL, '{"key":6}'] {'v': '{"key":6}'} {6='{"key":6}', nullKey=NULL} [{'v': '{"key":6}'}, {'v': NULL}, NULL, {'v': '{"key":6}'}, NULL, {'v': '{"key":6}'}] {'v': [NULL, '{"key":6}']}
|
||||
7 {"key":7} ['{"key":7}', NULL, '{"key":7}', NULL, '{"key":7}'] {'v': '{"key":7}'} {7='{"key":7}', nullKey=NULL} [{'v': '{"key":7}'}, {'v': NULL}, NULL, {'v': '{"key":7}'}, NULL, {'v': '{"key":7}'}] {'v': [NULL, '{"key":7}']}
|
||||
189
external/duckdb/test/parquet/variant/variant_partially_shredded.test
vendored
Normal file
189
external/duckdb/test/parquet/variant/variant_partially_shredded.test
vendored
Normal file
@@ -0,0 +1,189 @@
|
||||
# name: test/parquet/variant/variant_partially_shredded.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded0.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded1.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded2.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded3.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded4.parquet';
|
||||
----
|
||||
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded5.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded6.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded7.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded8.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded9.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded10.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded11.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded12.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded13.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded14.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded15.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded16.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded17.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded18.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded19.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded20.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded21.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded22.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded23.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded24.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded25.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded26.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded27.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded28.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded29.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded30.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded31.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded32.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded33.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded34.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded35.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded36.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded37.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded38.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded39.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded40.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded41.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded42.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded43.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded44.parquet';
|
||||
----
|
||||
|
||||
query II nosort result
|
||||
from 'data/parquet-testing/variant_partial_shredded45.parquet';
|
||||
----
|
||||
39
external/duckdb/test/parquet/variant/variant_roundtrip.test_slow
vendored
Normal file
39
external/duckdb/test/parquet/variant/variant_roundtrip.test_slow
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
# name: test/parquet/variant/variant_roundtrip.test_slow
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
require json
|
||||
|
||||
foreach parquet_file p2strings.parquet p2.parquet pandas-date.parquet parquet_with_json.parquet spark-store.parquet struct_skip_test.parquet timestamp.parquet candidate.parquet
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT
|
||||
COLUMNS(*)::VARIANT
|
||||
FROM read_parquet('data/parquet-testing/${parquet_file}')
|
||||
) TO '__TEST_DIR__/variant_${parquet_file}' (FORMAT PARQUET);
|
||||
|
||||
query I nosort expected_res
|
||||
SELECT COLUMNS(*)::VARIANT FROM read_parquet('data/parquet-testing/${parquet_file}')
|
||||
|
||||
query I nosort expected_res
|
||||
SELECT COLUMNS(*)::VARIANT FROM read_parquet('__TEST_DIR__/variant_${parquet_file}')
|
||||
|
||||
reset label expected_res
|
||||
|
||||
endloop
|
||||
|
||||
foreach parquet_file 7-set.snappy.arrow2.parquet adam_genotypes.parquet apkwan.parquet arrow_nan.parquet aws_kinesis.parquet aws1.snappy.parquet aws2.parquet bigdecimal.parquet binary_string.parquet blob.parquet boolean_stats.parquet bug13053-2.parquet bug13053.parquet bug14120-dict-nulls-only.parquet bug1554.parquet bug1588.parquet bug1589.parquet bug1618_struct_strings.parquet bug2267.parquet bug2557.parquet bug3734.parquet bug4442.parquet bug4859.parquet bug4903.parquet bug687_nulls.parquet byte_stream_split.parquet CASE_INSENSITIVE.PARQUET complex.parquet corrupt_stats.parquet data-types.parquet date.parquet delta_byte_array.parquet delta_length_byte_array.parquet empty.parquet enum.parquet file_row_number.parquet filter_bug1391.parquet fixed.parquet float16.parquet incorrect_index_page_offsets.parquet issue_6013.parquet issue10279_delta_encoding.parquet issue12621.parquet issue6630_1.parquet issue6630_2.parquet issue6990.parquet issue9417.parquet leftdate3_192_loop_1.parquet lineitem-top10000.gzip.parquet list_sort_segfault.parquet manyrowgroups.parquet manyrowgroups2.parquet map.parquet multi_bloom_a.parquet multi_bloom_b.parquet multi_bloom_c.parquet nan-float.parquet nullbyte_multiple.parquet nullbyte.parquet parquet_go.parquet rle_boolean_encoding.parquet seqs_table.parquet signed_stats.parquet silly-names.parquet simple.parquet sorted.zstd_18_131072_small.parquet spark-ontime.parquet struct.parquet test_unnest_rewriter.parquet timestamp-ms.parquet tz.parquet upsert_bug.parquet userdata1.parquet varchar_stats.parquet zstd.parquet
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT
|
||||
COLUMNS(*)::VARIANT
|
||||
FROM read_parquet('data/parquet-testing/${parquet_file}')
|
||||
) TO '__TEST_DIR__/variant_${parquet_file}' (FORMAT PARQUET);
|
||||
|
||||
statement ok
|
||||
SELECT COLUMNS(*)::VARIANT FROM read_parquet('__TEST_DIR__/variant_${parquet_file}')
|
||||
|
||||
endloop
|
||||
210
external/duckdb/test/parquet/variant/variant_shredded.test
vendored
Normal file
210
external/duckdb/test/parquet/variant/variant_shredded.test
vendored
Normal file
@@ -0,0 +1,210 @@
|
||||
# name: test/parquet/variant/variant_shredded.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
# Timestamp NS - positive (no timezone)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_timestamp_nanos_ntz_positive.parquet';
|
||||
----
|
||||
1 "2024-11-07 12:33:54.123456789"
|
||||
|
||||
# Float - negative
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_float_negative.parquet';
|
||||
----
|
||||
1 -10.109999656677246
|
||||
|
||||
# Int64 - negative
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_int64_negative.parquet';
|
||||
----
|
||||
1 -9876543210
|
||||
|
||||
# Decimal16 - negative
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_decimal16_negative.parquet';
|
||||
----
|
||||
1 "-9876543210.123456789"
|
||||
|
||||
# UUID
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_uuid.parquet';
|
||||
----
|
||||
1 "f24f9b64-81fa-49d1-b74e-8c09a6e31c56"
|
||||
|
||||
# Decimal4 - negative
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_decimal4_negative.parquet';
|
||||
----
|
||||
1 "-123456.789"
|
||||
|
||||
# Decimal4 - positive
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_decimal4_positive.parquet';
|
||||
----
|
||||
1 "123456.789"
|
||||
|
||||
# Timestamp Micros - negative (no timezone)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_timestamp_micros_ntz_negative.parquet';
|
||||
----
|
||||
1 "1957-11-07 12:33:54.123456"
|
||||
|
||||
# Date - negative
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_date_negative.parquet';
|
||||
----
|
||||
1 "1957-11-07"
|
||||
|
||||
# int8 - positive
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_int8_positive.parquet';
|
||||
----
|
||||
1 34
|
||||
|
||||
# int16 - positive
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_int16_positive.parquet';
|
||||
----
|
||||
1 1234
|
||||
|
||||
# decimal8 - negative
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_decimal8_negative.parquet';
|
||||
----
|
||||
1 "-123456789.987654321"
|
||||
|
||||
# string
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_string.parquet';
|
||||
----
|
||||
1 "iceberg"
|
||||
|
||||
# FIXME: this is actually a Timestamp Nanos - positive (with timezone)
|
||||
# Timestamp Micros - positive (with timezone)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_timestamp_micros_tz_positive.parquet';
|
||||
----
|
||||
1 "2024-11-07 12:33:54.123456+00"
|
||||
|
||||
# binary
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_binary.parquet';
|
||||
----
|
||||
1 "CgsMDQ=="
|
||||
|
||||
# float - positive
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_float_positive.parquet';
|
||||
----
|
||||
1 10.109999656677246
|
||||
|
||||
# double - positive
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_double_positive.parquet';
|
||||
----
|
||||
1 14.3
|
||||
|
||||
# decimal16 - positive
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_decimal16_positive.parquet';
|
||||
----
|
||||
1 "9876543210.123456789"
|
||||
|
||||
# Timestamp Micros - positive (no timezone)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_timestamp_micros_ntz_positive.parquet';
|
||||
----
|
||||
1 "2024-11-07 12:33:54.123456"
|
||||
|
||||
# int16 - negative
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_int16_negative.parquet';
|
||||
----
|
||||
1 -1234
|
||||
|
||||
# Timestamp Micros - positive (with timezone)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_timestamp_micros_tz_positive2.parquet';
|
||||
----
|
||||
1 "2024-11-07 12:33:54.123456+00"
|
||||
|
||||
# Timestamp Micros - negative (with timezone)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_timestamp_micros_tz_negative.parquet';
|
||||
----
|
||||
1 "1957-11-07 12:33:54.123456+00"
|
||||
|
||||
# decimal8 - positive
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_decimal8_positive.parquet';
|
||||
----
|
||||
1 "123456789.987654321"
|
||||
|
||||
# Timestamp Nanos - negative (no timezone)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_timestamp_nanos_ntz_negative.parquet';
|
||||
----
|
||||
1 "1957-11-07 12:33:54.123456789"
|
||||
|
||||
# int32 - positive
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_int32_positive.parquet';
|
||||
----
|
||||
1 12345
|
||||
|
||||
# int32 - negative
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_int32_negative.parquet';
|
||||
----
|
||||
1 -12345
|
||||
|
||||
# FIXME: this is actually a Timestamp Nanos - negative (with timezone)
|
||||
# Timestamp Micros - negative (with timezone)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_timestamp_micros_tz_negative2.parquet';
|
||||
----
|
||||
1 "1957-11-07 12:33:54.123457+00"
|
||||
|
||||
# int8 - negative
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_int8_negative.parquet';
|
||||
----
|
||||
1 -34
|
||||
|
||||
# Time Micros (no timezone)
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_time_micros_ntz.parquet';
|
||||
----
|
||||
1 "12:33:54.123456"
|
||||
|
||||
# Date - positive
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_date_positive.parquet';
|
||||
----
|
||||
1 "2024-11-07"
|
||||
|
||||
# bool - true
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_bool_true.parquet';
|
||||
----
|
||||
1 true
|
||||
|
||||
# int64 - positive
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_int64_positive.parquet';
|
||||
----
|
||||
1 9876543210
|
||||
|
||||
# double - negative
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_double_negative.parquet';
|
||||
----
|
||||
1 -14.3
|
||||
|
||||
# bool - false
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_bool_false.parquet';
|
||||
----
|
||||
1 false
|
||||
40
external/duckdb/test/parquet/variant/variant_shredded_nested.test
vendored
Normal file
40
external/duckdb/test/parquet/variant/variant_shredded_nested.test
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
# name: test/parquet/variant/variant_shredded_nested.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
# Array
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_array1.parquet';
|
||||
----
|
||||
1 [["string","iceberg"],["apple","banana"]]
|
||||
|
||||
# Array
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_array2.parquet';
|
||||
----
|
||||
1 [{"a":123456789,"c":"string"},{"a":123456789,"c":"string"}]
|
||||
|
||||
# Array
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_array3.parquet';
|
||||
----
|
||||
1 ["iceberg","string"]
|
||||
|
||||
# Object
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_object1.parquet';
|
||||
----
|
||||
1 {"a":123456789,"c":"string"}
|
||||
|
||||
# Object
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_object2.parquet';
|
||||
----
|
||||
1 {"a":null,"d":"iceberg"}
|
||||
|
||||
# Object
|
||||
query II
|
||||
from 'data/parquet-testing/variant_shredded_object3.parquet';
|
||||
----
|
||||
1 {"a":123456789,"c":["string","iceberg"]}
|
||||
17
external/duckdb/test/parquet/variant/variant_to_parquet_variant.test
vendored
Normal file
17
external/duckdb/test/parquet/variant/variant_to_parquet_variant.test
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
# name: test/parquet/variant/variant_to_parquet_variant.test
|
||||
# group: [variant]
|
||||
|
||||
require parquet
|
||||
|
||||
require json
|
||||
|
||||
query I
|
||||
select variant_to_parquet_variant(NULL)
|
||||
----
|
||||
{'metadata': \x11\x00\x00, 'value': \x00}
|
||||
|
||||
# We don't expose the overload with a shredded type, only internally will we use that
|
||||
statement error
|
||||
select variant_to_parquet_variant(NULL, 'STRUCT(a VARCHAR)'::VARCHAR)
|
||||
----
|
||||
Binder Error
|
||||
Reference in New Issue
Block a user