should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,57 @@
# name: test/parquet/variant/variant_all_types_shredded.test
# group: [variant]
require parquet
require json
statement ok
create macro data() as table (
select COLUMNS([
x for x in (*) if x NOT IN [
'utinyint',
'usmallint',
'uint',
'ubigint',
'hugeint',
'uhugeint',
'bignum',
'timestamp_s',
'timestamp_ms',
'timestamp_tz',
'time_tz',
'interval',
'bit',
'dec_4_1', -- Parquet VARIANT doesn't have int16_t DECIMAL
-- Conversion isn't 1-to-1
'dec_9_4', -- can't roundtrip with json
'dec_18_6', -- can't roundtrip with json
'dec38_10', -- can't roundtrip with json
'blob' -- data is base64-encoded in parquet read
]
])::VARIANT var from test_all_types()
)
query I nosort expected_res
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
----
foreach type bool tinyint smallint int bigint date time timestamp timestamp_ns timestamp_tz float double dec_9_4 dec_18_6 dec38_10 uuid varchar blob small_enum medium_enum large_enum int_array double_array date_array timestamp_array timestamptz_array varchar_array nested_int_array struct struct_of_arrays array_of_structs
statement ok
SET VARIABLE type_str = (SELECT $$STRUCT("${type}" $$ || typeof("${type}") || ')' from test_all_types() limit 1);
statement ok
COPY (
FROM data()
) TO '__TEST_DIR__/all_types_shredded_${type}.parquet' (
SHREDDING {
'var': getvariable('type_str')
}
)
query I nosort expected_res
select * from '__TEST_DIR__/all_types_shredded_${type}.parquet'
----
endloop

View File

@@ -0,0 +1,227 @@
# name: test/parquet/variant/variant_basic.test
# group: [variant]
require parquet
# Array
query II
from 'data/parquet-testing/variant_array_array_string_and_integer.parquet';
----
1 [["string","iceberg",34],[34,null],[],["string","iceberg"],34]
# String
query II
from 'data/parquet-testing/variant_string.parquet';
----
1 "iceberg"
# BOOL TRUE
query II
from 'data/parquet-testing/variant_bool_true.parquet';
----
1 true
# Decimal4
query II
from 'data/parquet-testing/variant_decimal4_positive.parquet';
----
1 "123456.789"
# UUID
query II
from 'data/parquet-testing/variant_uuid.parquet';
----
1 "f24f9b64-81fa-49d1-b74e-8c09a6e31c56"
# Empty array
query II
from 'data/parquet-testing/variant_array_empty.parquet';
----
1 []
query II
from 'data/parquet-testing/variant_int16.parquet';
----
1 -1234
query II
from 'data/parquet-testing/variant_int32.parquet';
----
1 -12345
# Binary
query II
from 'data/parquet-testing/variant_binary.parquet';
----
1 "CgsMDQ=="
# Decimal16
query II
from 'data/parquet-testing/variant_decimal16.parquet';
----
1 "9876543210.123456789"
query II
from 'data/parquet-testing/variant_int64.parquet';
----
1 -9876543210
# TIMESTAMP_NANOS_NTZ
query II
from 'data/parquet-testing/variant_timestamp_nanos_ntz.parquet';
----
1 "1957-11-07 12:33:54.123456789"
# Array of strings (2-dimensional)
query II
from 'data/parquet-testing/variant_array_array_string.parquet';
----
1 [["string","iceberg"],["apple","banana"]]
# TIMESTAMP_MICROS
query II
from 'data/parquet-testing/variant_timestamp_micros.parquet';
----
1 "1957-11-07 12:33:54.123456+00"
# Object {'a': .., 'c': ...}
query II
from 'data/parquet-testing/variant_object_primitives.parquet';
----
1 {"a":123456789,"c":"string"}
query II
from 'data/parquet-testing/variant_timestamp_micros_positive.parquet';
----
1 "2024-11-07 12:33:54.123456+00"
query II
from 'data/parquet-testing/variant_int16_positive.parquet';
----
1 1234
query II
from 'data/parquet-testing/variant_time_ntz.parquet';
----
1 "12:33:54.123456"
query II
from 'data/parquet-testing/variant_decimal16_negative.parquet';
----
1 "-9876543210.123456789"
query II
from 'data/parquet-testing/variant_timestamp_nanos1.parquet';
----
1 "1957-11-07 12:33:54.123457+00"
query II
from 'data/parquet-testing/variant_decimal8_negative.parquet';
----
1 "-123456789.987654321"
query II
from 'data/parquet-testing/variant_timestamp_micros_negative.parquet';
----
1 "1957-11-07 12:33:54.123456"
query II
from 'data/parquet-testing/variant_int8_positive.parquet';
----
1 34
query II
from 'data/parquet-testing/variant_timestamp_nanos2.parquet';
----
1 "2024-11-07 12:33:54.123456+00"
query II
from 'data/parquet-testing/variant_int8_negative.parquet';
----
1 -34
query II
from 'data/parquet-testing/variant_array_string.parquet';
----
1 ["iceberg","string"]
query II
from 'data/parquet-testing/variant_date_negative.parquet';
----
1 "1957-11-07"
query II
from 'data/parquet-testing/variant_int64_positive.parquet';
----
1 9876543210
query II
from 'data/parquet-testing/variant_array_object_string_and_integer.parquet';
----
1 [{"a":123456789,"c":"string"},{"a":123456789,"c":"string"},"iceberg",34]
query II
from 'data/parquet-testing/variant_int32_positive.parquet';
----
1 12345
query II
from 'data/parquet-testing/variant_double_negative.parquet';
----
1 -14.3
query II
from 'data/parquet-testing/variant_object_empty.parquet';
----
1 {}
query II
from 'data/parquet-testing/variant_null.parquet';
----
1 NULL
# -10.11 in the test that it was generated from
query II
from 'data/parquet-testing/variant_float_negative.parquet';
----
1 -10.109999656677246
query II
from 'data/parquet-testing/variant_object_string_and_array.parquet';
----
1 {"a":123456789,"c":["string","iceberg"]}
query II
from 'data/parquet-testing/variant_object_null_and_string.parquet';
----
1 {"a":null,"d":"iceberg"}
query II
from 'data/parquet-testing/variant_date_positive.parquet';
----
1 "2024-11-07"
query II
from 'data/parquet-testing/variant_bool_false.parquet';
----
1 false
query II
from 'data/parquet-testing/variant_array_object_string.parquet';
----
1 [{"a":123456789,"c":"string"},{"a":123456789,"c":"string"}]
query II
from 'data/parquet-testing/variant_decimal4_negative.parquet';
----
1 "-123456.789"
query II
from 'data/parquet-testing/variant_double_positive.parquet';
----
1 14.3
query II
from 'data/parquet-testing/variant_timestamp_micros_ntz_positive.parquet';
----
1 "2024-11-07 12:33:54.123456"

View File

@@ -0,0 +1,49 @@
# name: test/parquet/variant/variant_basic_shredded_writing.test
# group: [variant]
require parquet
require json
statement ok
create macro data() AS TABLE (
FROM (VALUES
({'a': 21::INTEGER, 'b': NULL}::VARIANT),
({'a': 42::INTEGER, 'd': 'test'}::VARIANT),
([]::VARIANT),
(NULL::VARIANT),
([{'b': True, 'c': 'test'}::VARIANT, 'test', 21, {'a': True}, [1::VARIANT, 2, True, 'false']]::VARIANT),
('this is a long string'::VARIANT),
('this is big enough to not be classified as a "short string" by parquet VARIANT'::VARIANT)
) t(a)
)
query I nosort expected_res
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
----
statement ok
COPY (
from data() t(a)
) TO '__TEST_DIR__/shredded_struct.parquet' (
shredding {
a: 'STRUCT(a INTEGER, b VARIANT, c BOOLEAN)'
}
)
query I nosort expected_res
select * from '__TEST_DIR__/shredded_struct.parquet';
----
statement ok
COPY (
select a from data()
) TO '__TEST_DIR__/shredded_list.parquet' (
shredding {
a: 'VARCHAR[]'
}
)
query I nosort expected_res
select * from '__TEST_DIR__/shredded_list.parquet';
----

View File

@@ -0,0 +1,116 @@
# name: test/parquet/variant/variant_basic_writing.test
# group: [variant]
require parquet
require json
# STRUCT(a INTEGER, b INTEGER[])
statement ok
COPY (select
{
'a': 42,
'b': [null, 1, 2]
}::VARIANT
from range(10)
) TO '__TEST_DIR__/integer_variant.parquet';
query I
select * from '__TEST_DIR__/integer_variant.parquet';
----
{"a":42,"b":[null,1,2]}
{"a":42,"b":[null,1,2]}
{"a":42,"b":[null,1,2]}
{"a":42,"b":[null,1,2]}
{"a":42,"b":[null,1,2]}
{"a":42,"b":[null,1,2]}
{"a":42,"b":[null,1,2]}
{"a":42,"b":[null,1,2]}
{"a":42,"b":[null,1,2]}
{"a":42,"b":[null,1,2]}
statement ok
COPY (select
'[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]'::JSON::VARIANT
from range(5)
) TO '__TEST_DIR__/list_of_list_variant.parquet'
query I
select * from '__TEST_DIR__/list_of_list_variant.parquet';
----
[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]
[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]
[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]
[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]
[["string","iceberg",-34],[-34,null],[],["string","iceberg"],-34]
statement ok
COPY (
with cte as (
FROM (VALUES
({'a': 21, 'b': NULL}::VARIANT),
([]::VARIANT),
(NULL::VARIANT),
([{'b': True, 'c': 'test'}]::VARIANT),
('this is a long string'::VARIANT),
('this is big enough to not be classified as a "short string" by parquet VARIANT'::VARIANT)
) t(a)
)
select a from cte
) TO '__TEST_DIR__/varied_variant.parquet'
query I
select * from '__TEST_DIR__/varied_variant.parquet';
----
{"a":21,"b":null}
[]
NULL
[{"b":true,"c":"test"}]
"this is a long string"
"this is big enough to not be classified as a \"short string\" by parquet VARIANT"
# VARIANT is only supported at the root for now
statement error
COPY (select [123::VARIANT]) TO '__TEST_DIR__/list_of_variant.parquet'
----
Not implemented Error: Unimplemented type for Parquet "VARIANT"
statement ok
create macro data() as table (
select COLUMNS([
x for x in (*) if x NOT IN [
'utinyint',
'usmallint',
'uint',
'ubigint',
'hugeint',
'uhugeint',
'bignum',
'timestamp_s',
'timestamp_ms',
'timestamp_tz',
'time_tz',
'interval',
'bit',
'dec_4_1', -- Parquet VARIANT doesn't have int16_t DECIMAL
-- Conversion isn't 1-to-1
'dec_9_4', -- can't roundtrip with json
'dec_18_6', -- can't roundtrip with json
'dec38_10', -- can't roundtrip with json
'blob' -- data is base64-encoded in parquet read
]
])::VARIANT as "\0" from test_all_types()
)
statement ok
COPY (
from data()
) TO '__TEST_DIR__/variant_test_all_types.parquet';
query I nosort expected_res
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
----
query I nosort expected_res
select * from '__TEST_DIR__/variant_test_all_types.parquet';
----

View File

@@ -0,0 +1,64 @@
# name: test/parquet/variant/variant_list_of_struct_partial_shredding.test
# group: [variant]
require parquet
require json
statement ok
create macro data() AS TABLE (
FROM (VALUES
(
[
{a:['foo'::VARIANT,42], b:true, c:{a:'nested1'}}::VARIANT, -- element of list in field 'a' is a different type
{a: 42, b: true, c:{a:'nested2'}}, -- field 'a' is a different type
{b: true, c:{a:'nested3'}}, -- field 'a' is missing
{a:[], b:false, c:{a:NULL}},
{a: [], c:{a:'nested4'}} -- field 'b' is missing
]::VARIANT
),
(
[]
),
(
[
{a:NULL, b:NULL, c:{a:'inner'}},
{a:['baz'], b:false, c:{a:NULL}}
]
),
(
NULL
),
(
[
{a:['alpha'], b:true, c:{a:'deep'}}::VARIANT,
{a: [[1,2]::VARIANT, 'hello', {a: 42}]}, -- fields 'b' and 'c' are missing, 'a' element is of a wrong type
{b: false}, -- fields 'a' and 'c' are missing
{a:[], b:NULL, c:{a:'leaf'}}
]
),
(
[
{a:NULL, b:false, c:{a:NULL}},
{a:['x',NULL,'z'], b:true, c:{a:'final'}}
]
)
) t(a)
);
query I nosort expected_res
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
----
statement ok
COPY (
select a from data()
) TO '__TEST_DIR__/shredded_list_of_structs.parquet' (
shredding {
a: 'STRUCT(a VARCHAR[], b BOOLEAN, c STRUCT(a VARCHAR))[]'
}
)
query I nosort expected_res
select * from '__TEST_DIR__/shredded_list_of_structs.parquet';
----

View File

@@ -0,0 +1,59 @@
# name: test/parquet/variant/variant_list_of_struct_shredding.test
# group: [variant]
require parquet
require json
statement ok
create macro data() AS TABLE (
FROM (VALUES
(
[
{a:['foo','bar'], b:true, c:{a:'nested1'}},
{a:[], b:false, c:{a:NULL}}
]::VARIANT
),
(
[]
),
(
[
{a:NULL, b:NULL, c:{a:'inner'}},
{a:['baz'], b:false, c:{a:NULL}}
]
),
(
NULL
),
(
[
{a:['alpha'], b:true, c:{a:'deep'}},
{a:[], b:NULL, c:{a:'leaf'}}
]
),
(
[
{a:NULL, b:false, c:{a:NULL}},
{a:['x',NULL,'z'], b:true, c:{a:'final'}}
]
)
) t(a)
);
query I nosort expected_res
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
----
statement ok
COPY (
select a from data()
) TO '__TEST_DIR__/shredded_list_of_structs.parquet' (
shredding {
a: 'STRUCT(a VARCHAR[], b BOOLEAN, c STRUCT(a VARCHAR))[]'
}
)
query I nosort expected_res
select * from '__TEST_DIR__/shredded_list_of_structs.parquet';
----

View File

@@ -0,0 +1,34 @@
# name: test/parquet/variant/variant_list_shredding.test
# group: [variant]
require parquet
require json
statement ok
create macro data() AS TABLE (
FROM (VALUES
([['test', NULL, 'this is a long string'],[],['hello'],NULL,[],[1, 2, 3]::VARIANT]::VARIANT),
(NULL::VARIANT),
([]::VARIANT),
([[{'a': 'test'}::VARIANT, [1, 2, 3]]::VARIANT, {'a': 21}, {'b': 42}, [['hello']]]),
([[], NULL, [1::VARIANT, 2, 'test'],['hello', 'world']]::VARIANT)
) t(a)
)
query I nosort expected_res
select IF(VARIANT_TYPEOF(COLUMNS(*)) == 'VARIANT_NULL', NULL, COLUMNS(*)::JSON) from data();
----
statement ok
COPY (
select a from data()
) TO '__TEST_DIR__/shredded_list_of_list_of_string.parquet' (
shredding {
a: 'VARCHAR[][]'
}
)
query I nosort expected_res
select * from '__TEST_DIR__/shredded_list_of_list_of_string.parquet';
----

View File

@@ -0,0 +1,31 @@
# name: test/parquet/variant/variant_nanos_tz.test
# group: [variant]
require parquet
statement ok
set variant_legacy_encoding=true;
# Timestamp NS - negative (with timezone) (shredded)
query II
from 'data/parquet-testing/variant_shredded_timestamp_nanos_tz_negative_no_logical_type.parquet';
----
1 "1957-11-07 12:33:54.123457+00"
# Timestamp NS - positive (with timezone) (shredded)
query II
from 'data/parquet-testing/variant_shredded_timestamp_nanos_tz_positive_no_logical_type.parquet';
----
1 "2024-11-07 12:33:54.123456+00"
# Timestamp NS - positive (with timezone) (unshredded)
query II
from 'data/parquet-testing/variant_timestamp_nanos_tz_positive_no_logical_type.parquet';
----
1 "2024-11-07 12:33:54.123456+00"
# Timestamp NS - negative (with timezone) (unshredded)
query II
from 'data/parquet-testing/variant_timestamp_nanos_tz_negative_no_logical_type.parquet';
----
1 "1957-11-07 12:33:54.123457+00"

View File

@@ -0,0 +1,44 @@
# name: test/parquet/variant/variant_nested_with_nulls.test
# group: [variant]
require parquet
query IIIIII
describe from parquet_scan('data/parquet-testing/variant_unshredded_nested_nulls.parquet')
----
id BIGINT YES NULL NULL NULL
v STRUCT("value" BLOB, metadata BLOB) YES NULL NULL NULL
array_of_variants STRUCT("value" BLOB, metadata BLOB)[] YES NULL NULL NULL
struct_of_variants STRUCT(v STRUCT("value" BLOB, metadata BLOB)) YES NULL NULL NULL
map_of_variants MAP(VARCHAR, STRUCT("value" BLOB, metadata BLOB)) YES NULL NULL NULL
array_of_struct_of_variants STRUCT(v STRUCT("value" BLOB, metadata BLOB))[] YES NULL NULL NULL
struct_of_array_of_variants STRUCT(v STRUCT("value" BLOB, metadata BLOB)[]) YES NULL NULL NULL
statement ok
set variant_legacy_encoding=true;
# Now the variant column gets emitted as JSON
query IIIIII
describe from parquet_scan('data/parquet-testing/variant_unshredded_nested_nulls.parquet')
----
id BIGINT YES NULL NULL NULL
v JSON YES NULL NULL NULL
array_of_variants JSON[] YES NULL NULL NULL
struct_of_variants STRUCT(v JSON) YES NULL NULL NULL
map_of_variants MAP(VARCHAR, JSON) YES NULL NULL NULL
array_of_struct_of_variants STRUCT(v JSON)[] YES NULL NULL NULL
struct_of_array_of_variants STRUCT(v JSON[]) YES NULL NULL NULL
query IIIIIII
select * from parquet_scan('data/parquet-testing/variant_unshredded_nested_nulls.parquet') order by id limit 10;
----
0 {"key":0} ['{"key":0}', NULL, '{"key":0}', NULL, '{"key":0}'] {'v': '{"key":0}'} {0='{"key":0}', nullKey=NULL} [{'v': '{"key":0}'}, {'v': NULL}, NULL, {'v': '{"key":0}'}, NULL, {'v': '{"key":0}'}] {'v': [NULL, '{"key":0}']}
0 {"key":0} ['{"key":0}', NULL, '{"key":0}', NULL, '{"key":0}'] {'v': '{"key":0}'} {0='{"key":0}', nullKey=NULL} [{'v': '{"key":0}'}, {'v': NULL}, NULL, {'v': '{"key":0}'}, NULL, {'v': '{"key":0}'}] {'v': [NULL, '{"key":0}']}
1 {"key":1} ['{"key":1}', NULL, '{"key":1}', NULL, '{"key":1}'] {'v': '{"key":1}'} {1='{"key":1}', nullKey=NULL} [{'v': '{"key":1}'}, {'v': NULL}, NULL, {'v': '{"key":1}'}, NULL, {'v': '{"key":1}'}] {'v': [NULL, '{"key":1}']}
1 {"key":1} ['{"key":1}', NULL, '{"key":1}', NULL, '{"key":1}'] {'v': '{"key":1}'} {1='{"key":1}', nullKey=NULL} [{'v': '{"key":1}'}, {'v': NULL}, NULL, {'v': '{"key":1}'}, NULL, {'v': '{"key":1}'}] {'v': [NULL, '{"key":1}']}
2 {"key":2} ['{"key":2}', NULL, '{"key":2}', NULL, '{"key":2}'] {'v': '{"key":2}'} {2='{"key":2}', nullKey=NULL} [{'v': '{"key":2}'}, {'v': NULL}, NULL, {'v': '{"key":2}'}, NULL, {'v': '{"key":2}'}] {'v': [NULL, '{"key":2}']}
3 {"key":3} ['{"key":3}', NULL, '{"key":3}', NULL, '{"key":3}'] {'v': '{"key":3}'} {3='{"key":3}', nullKey=NULL} [{'v': '{"key":3}'}, {'v': NULL}, NULL, {'v': '{"key":3}'}, NULL, {'v': '{"key":3}'}] {'v': [NULL, '{"key":3}']}
4 {"key":4} ['{"key":4}', NULL, '{"key":4}', NULL, '{"key":4}'] {'v': '{"key":4}'} {4='{"key":4}', nullKey=NULL} [{'v': '{"key":4}'}, {'v': NULL}, NULL, {'v': '{"key":4}'}, NULL, {'v': '{"key":4}'}] {'v': [NULL, '{"key":4}']}
5 {"key":5} ['{"key":5}', NULL, '{"key":5}', NULL, '{"key":5}'] {'v': '{"key":5}'} {5='{"key":5}', nullKey=NULL} [{'v': '{"key":5}'}, {'v': NULL}, NULL, {'v': '{"key":5}'}, NULL, {'v': '{"key":5}'}] {'v': [NULL, '{"key":5}']}
6 {"key":6} ['{"key":6}', NULL, '{"key":6}', NULL, '{"key":6}'] {'v': '{"key":6}'} {6='{"key":6}', nullKey=NULL} [{'v': '{"key":6}'}, {'v': NULL}, NULL, {'v': '{"key":6}'}, NULL, {'v': '{"key":6}'}] {'v': [NULL, '{"key":6}']}
7 {"key":7} ['{"key":7}', NULL, '{"key":7}', NULL, '{"key":7}'] {'v': '{"key":7}'} {7='{"key":7}', nullKey=NULL} [{'v': '{"key":7}'}, {'v': NULL}, NULL, {'v': '{"key":7}'}, NULL, {'v': '{"key":7}'}] {'v': [NULL, '{"key":7}']}

View File

@@ -0,0 +1,189 @@
# name: test/parquet/variant/variant_partially_shredded.test
# group: [variant]
require parquet
query II nosort result
from 'data/parquet-testing/variant_partial_shredded0.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded1.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded2.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded3.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded4.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded5.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded6.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded7.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded8.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded9.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded10.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded11.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded12.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded13.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded14.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded15.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded16.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded17.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded18.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded19.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded20.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded21.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded22.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded23.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded24.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded25.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded26.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded27.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded28.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded29.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded30.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded31.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded32.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded33.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded34.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded35.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded36.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded37.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded38.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded39.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded40.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded41.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded42.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded43.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded44.parquet';
----
query II nosort result
from 'data/parquet-testing/variant_partial_shredded45.parquet';
----

View File

@@ -0,0 +1,39 @@
# name: test/parquet/variant/variant_roundtrip.test_slow
# group: [variant]
require parquet
require json
foreach parquet_file p2strings.parquet p2.parquet pandas-date.parquet parquet_with_json.parquet spark-store.parquet struct_skip_test.parquet timestamp.parquet candidate.parquet
statement ok
COPY (
SELECT
COLUMNS(*)::VARIANT
FROM read_parquet('data/parquet-testing/${parquet_file}')
) TO '__TEST_DIR__/variant_${parquet_file}' (FORMAT PARQUET);
query I nosort expected_res
SELECT COLUMNS(*)::VARIANT FROM read_parquet('data/parquet-testing/${parquet_file}')
query I nosort expected_res
SELECT COLUMNS(*)::VARIANT FROM read_parquet('__TEST_DIR__/variant_${parquet_file}')
reset label expected_res
endloop
foreach parquet_file 7-set.snappy.arrow2.parquet adam_genotypes.parquet apkwan.parquet arrow_nan.parquet aws_kinesis.parquet aws1.snappy.parquet aws2.parquet bigdecimal.parquet binary_string.parquet blob.parquet boolean_stats.parquet bug13053-2.parquet bug13053.parquet bug14120-dict-nulls-only.parquet bug1554.parquet bug1588.parquet bug1589.parquet bug1618_struct_strings.parquet bug2267.parquet bug2557.parquet bug3734.parquet bug4442.parquet bug4859.parquet bug4903.parquet bug687_nulls.parquet byte_stream_split.parquet CASE_INSENSITIVE.PARQUET complex.parquet corrupt_stats.parquet data-types.parquet date.parquet delta_byte_array.parquet delta_length_byte_array.parquet empty.parquet enum.parquet file_row_number.parquet filter_bug1391.parquet fixed.parquet float16.parquet incorrect_index_page_offsets.parquet issue_6013.parquet issue10279_delta_encoding.parquet issue12621.parquet issue6630_1.parquet issue6630_2.parquet issue6990.parquet issue9417.parquet leftdate3_192_loop_1.parquet lineitem-top10000.gzip.parquet list_sort_segfault.parquet manyrowgroups.parquet manyrowgroups2.parquet map.parquet multi_bloom_a.parquet multi_bloom_b.parquet multi_bloom_c.parquet nan-float.parquet nullbyte_multiple.parquet nullbyte.parquet parquet_go.parquet rle_boolean_encoding.parquet seqs_table.parquet signed_stats.parquet silly-names.parquet simple.parquet sorted.zstd_18_131072_small.parquet spark-ontime.parquet struct.parquet test_unnest_rewriter.parquet timestamp-ms.parquet tz.parquet upsert_bug.parquet userdata1.parquet varchar_stats.parquet zstd.parquet
statement ok
COPY (
SELECT
COLUMNS(*)::VARIANT
FROM read_parquet('data/parquet-testing/${parquet_file}')
) TO '__TEST_DIR__/variant_${parquet_file}' (FORMAT PARQUET);
statement ok
SELECT COLUMNS(*)::VARIANT FROM read_parquet('__TEST_DIR__/variant_${parquet_file}')
endloop

View File

@@ -0,0 +1,210 @@
# name: test/parquet/variant/variant_shredded.test
# group: [variant]
require parquet
# Timestamp NS - positive (no timezone)
query II
from 'data/parquet-testing/variant_shredded_timestamp_nanos_ntz_positive.parquet';
----
1 "2024-11-07 12:33:54.123456789"
# Float - negative
query II
from 'data/parquet-testing/variant_shredded_float_negative.parquet';
----
1 -10.109999656677246
# Int64 - negative
query II
from 'data/parquet-testing/variant_shredded_int64_negative.parquet';
----
1 -9876543210
# Decimal16 - negative
query II
from 'data/parquet-testing/variant_shredded_decimal16_negative.parquet';
----
1 "-9876543210.123456789"
# UUID
query II
from 'data/parquet-testing/variant_shredded_uuid.parquet';
----
1 "f24f9b64-81fa-49d1-b74e-8c09a6e31c56"
# Decimal4 - negative
query II
from 'data/parquet-testing/variant_shredded_decimal4_negative.parquet';
----
1 "-123456.789"
# Decimal4 - positive
query II
from 'data/parquet-testing/variant_shredded_decimal4_positive.parquet';
----
1 "123456.789"
# Timestamp Micros - negative (no timezone)
query II
from 'data/parquet-testing/variant_shredded_timestamp_micros_ntz_negative.parquet';
----
1 "1957-11-07 12:33:54.123456"
# Date - negative
query II
from 'data/parquet-testing/variant_shredded_date_negative.parquet';
----
1 "1957-11-07"
# int8 - positive
query II
from 'data/parquet-testing/variant_shredded_int8_positive.parquet';
----
1 34
# int16 - positive
query II
from 'data/parquet-testing/variant_shredded_int16_positive.parquet';
----
1 1234
# decimal8 - negative
query II
from 'data/parquet-testing/variant_shredded_decimal8_negative.parquet';
----
1 "-123456789.987654321"
# string
query II
from 'data/parquet-testing/variant_shredded_string.parquet';
----
1 "iceberg"
# FIXME: this is actually a Timestamp Nanos - positive (with timezone)
# Timestamp Micros - positive (with timezone)
query II
from 'data/parquet-testing/variant_shredded_timestamp_micros_tz_positive.parquet';
----
1 "2024-11-07 12:33:54.123456+00"
# binary
query II
from 'data/parquet-testing/variant_shredded_binary.parquet';
----
1 "CgsMDQ=="
# float - positive
query II
from 'data/parquet-testing/variant_shredded_float_positive.parquet';
----
1 10.109999656677246
# double - positive
query II
from 'data/parquet-testing/variant_shredded_double_positive.parquet';
----
1 14.3
# decimal16 - positive
query II
from 'data/parquet-testing/variant_shredded_decimal16_positive.parquet';
----
1 "9876543210.123456789"
# Timestamp Micros - positive (no timezone)
query II
from 'data/parquet-testing/variant_shredded_timestamp_micros_ntz_positive.parquet';
----
1 "2024-11-07 12:33:54.123456"
# int16 - negative
query II
from 'data/parquet-testing/variant_shredded_int16_negative.parquet';
----
1 -1234
# Timestamp Micros - positive (with timezone)
query II
from 'data/parquet-testing/variant_shredded_timestamp_micros_tz_positive2.parquet';
----
1 "2024-11-07 12:33:54.123456+00"
# Timestamp Micros - negative (with timezone)
query II
from 'data/parquet-testing/variant_shredded_timestamp_micros_tz_negative.parquet';
----
1 "1957-11-07 12:33:54.123456+00"
# decimal8 - positive
query II
from 'data/parquet-testing/variant_shredded_decimal8_positive.parquet';
----
1 "123456789.987654321"
# Timestamp Nanos - negative (no timezone)
query II
from 'data/parquet-testing/variant_shredded_timestamp_nanos_ntz_negative.parquet';
----
1 "1957-11-07 12:33:54.123456789"
# int32 - positive
query II
from 'data/parquet-testing/variant_shredded_int32_positive.parquet';
----
1 12345
# int32 - negative
query II
from 'data/parquet-testing/variant_shredded_int32_negative.parquet';
----
1 -12345
# FIXME: this is actually a Timestamp Nanos - negative (with timezone)
# Timestamp Micros - negative (with timezone)
query II
from 'data/parquet-testing/variant_shredded_timestamp_micros_tz_negative2.parquet';
----
1 "1957-11-07 12:33:54.123457+00"
# int8 - negative
query II
from 'data/parquet-testing/variant_shredded_int8_negative.parquet';
----
1 -34
# Time Micros (no timezone)
query II
from 'data/parquet-testing/variant_shredded_time_micros_ntz.parquet';
----
1 "12:33:54.123456"
# Date - positive
query II
from 'data/parquet-testing/variant_shredded_date_positive.parquet';
----
1 "2024-11-07"
# bool - true
query II
from 'data/parquet-testing/variant_shredded_bool_true.parquet';
----
1 true
# int64 - positive
query II
from 'data/parquet-testing/variant_shredded_int64_positive.parquet';
----
1 9876543210
# double - negative
query II
from 'data/parquet-testing/variant_shredded_double_negative.parquet';
----
1 -14.3
# bool - false
query II
from 'data/parquet-testing/variant_shredded_bool_false.parquet';
----
1 false

View File

@@ -0,0 +1,40 @@
# name: test/parquet/variant/variant_shredded_nested.test
# group: [variant]
require parquet
# Array
query II
from 'data/parquet-testing/variant_shredded_array1.parquet';
----
1 [["string","iceberg"],["apple","banana"]]
# Array
query II
from 'data/parquet-testing/variant_shredded_array2.parquet';
----
1 [{"a":123456789,"c":"string"},{"a":123456789,"c":"string"}]
# Array
query II
from 'data/parquet-testing/variant_shredded_array3.parquet';
----
1 ["iceberg","string"]
# Object
query II
from 'data/parquet-testing/variant_shredded_object1.parquet';
----
1 {"a":123456789,"c":"string"}
# Object
query II
from 'data/parquet-testing/variant_shredded_object2.parquet';
----
1 {"a":null,"d":"iceberg"}
# Object
query II
from 'data/parquet-testing/variant_shredded_object3.parquet';
----
1 {"a":123456789,"c":["string","iceberg"]}

View File

@@ -0,0 +1,17 @@
# name: test/parquet/variant/variant_to_parquet_variant.test
# group: [variant]
require parquet
require json
query I
select variant_to_parquet_variant(NULL)
----
{'metadata': \x11\x00\x00, 'value': \x00}
# We don't expose the overload with a shredded type, only internally will we use that
statement error
select variant_to_parquet_variant(NULL, 'STRUCT(a VARCHAR)'::VARCHAR)
----
Binder Error