should be it
This commit is contained in:
219
external/duckdb/test/sql/copy/parquet/parquet_stats.test
vendored
Normal file
219
external/duckdb/test/sql/copy/parquet/parquet_stats.test
vendored
Normal file
@@ -0,0 +1,219 @@
|
||||
# name: test/sql/copy/parquet/parquet_stats.test
|
||||
# description: Test stats reading in parquet reader
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# boolean values
|
||||
query IIII
|
||||
select stats_min, stats_max, stats_min_value, stats_max_value from parquet_metadata('data/parquet-testing/boolean_stats.parquet');
|
||||
----
|
||||
false true false true
|
||||
|
||||
# signed numbers
|
||||
query IIII
|
||||
select stats_min, stats_max, stats_min_value, stats_max_value from parquet_metadata('data/parquet-testing/signed_stats.parquet');
|
||||
----
|
||||
-128 127 -128 127
|
||||
-32768 32767 -32768 32767
|
||||
-2147483648 2147483647 -2147483648 2147483647
|
||||
-9223372036854775808 9223372036854775807 -9223372036854775808 9223372036854775807
|
||||
|
||||
query IIII
|
||||
select * from 'data/parquet-testing/signed_stats.parquet';
|
||||
----
|
||||
-128 -32768 -2147483648 -9223372036854775808
|
||||
127 32767 2147483647 9223372036854775807
|
||||
|
||||
# unsigned numbers
|
||||
query IIII
|
||||
select stats_min, stats_max, stats_min_value, stats_max_value from parquet_metadata('data/parquet-testing/unsigned_stats.parquet');
|
||||
----
|
||||
NULL NULL 0 255
|
||||
NULL NULL 0 65535
|
||||
0 4294967295 0 4294967295
|
||||
NULL NULL 0 18446744073709551615
|
||||
|
||||
query IIII
|
||||
select * from 'data/parquet-testing/unsigned_stats.parquet';
|
||||
----
|
||||
0 0 0 0
|
||||
255 65535 4294967295 18446744073709551615
|
||||
|
||||
# dates/times/timestamps
|
||||
query IIII
|
||||
select stats_min, stats_max, stats_min_value, stats_max_value from parquet_metadata('data/parquet-testing/date_stats.parquet');
|
||||
----
|
||||
1900-01-01 2030-12-31 1900-01-01 2030-12-31
|
||||
00:00:00+00 23:59:59+00 00:00:00+00 23:59:59+00
|
||||
1990-01-01 00:00:00 2030-12-31 23:59:59 1990-01-01 00:00:00 2030-12-31 23:59:59
|
||||
1900-01-01 00:00:00 2030-12-31 23:59:59 1900-01-01 00:00:00 2030-12-31 23:59:59
|
||||
1900-01-01 00:00:00 2030-12-31 23:59:59 1900-01-01 00:00:00 2030-12-31 23:59:59
|
||||
1900-01-01 00:00:00 2030-12-31 23:59:59 1900-01-01 00:00:00 2030-12-31 23:59:59
|
||||
|
||||
query IIIIII
|
||||
select * from 'data/parquet-testing/date_stats.parquet';
|
||||
----
|
||||
1900-01-01 00:00:00+00 1990-01-01 00:00:00 1900-01-01 00:00:00 1900-01-01 00:00:00 1900-01-01 00:00:00
|
||||
2030-12-31 23:59:59+00 2030-12-31 23:59:59 2030-12-31 23:59:59 2030-12-31 23:59:59 2030-12-31 23:59:59
|
||||
|
||||
# varchar/blob stats
|
||||
query IIII
|
||||
select stats_min, stats_max, stats_min_value, stats_max_value from parquet_metadata('data/parquet-testing/varchar_stats.parquet');
|
||||
----
|
||||
NULL NULL hello world world hello
|
||||
NULL NULL hello\x00world world\x00hello
|
||||
|
||||
# should be the same as computing min/max over these columns
|
||||
query IIII
|
||||
select min(str_val), max(str_val), min("hello\x00world"), max("hello\x00world") from 'data/parquet-testing/varchar_stats.parquet';
|
||||
----
|
||||
hello world world hello hello\x00world world\x00hello
|
||||
|
||||
query II
|
||||
select * from 'data/parquet-testing/varchar_stats.parquet';
|
||||
----
|
||||
hello world hello\x00world
|
||||
world hello world\x00hello
|
||||
|
||||
# decimal stats
|
||||
query IIII
|
||||
select stats_min, stats_max, stats_min_value, stats_max_value from parquet_metadata('data/parquet-testing/decimal_stats.parquet');
|
||||
----
|
||||
-999.9 999.9 -999.9 999.9
|
||||
-999999.999 999999.999 -999999.999 999999.999
|
||||
-9999999999999.99999 9999999999999.99999 -9999999999999.99999 9999999999999.99999
|
||||
-999999999999999999999999999999999.99999 999999999999999999999999999999999.99999 -999999999999999999999999999999999.99999 999999999999999999999999999999999.99999
|
||||
|
||||
query IIII
|
||||
select * from 'data/parquet-testing/decimal_stats.parquet';
|
||||
----
|
||||
-999.9 -999999.999 -9999999999999.99999 -999999999999999999999999999999999.99999
|
||||
999.9 999999.999 9999999999999.99999 999999999999999999999999999999999.99999
|
||||
|
||||
# int32 decimal stats
|
||||
query IIII
|
||||
select stats_min, stats_max, stats_min_value, stats_max_value from parquet_metadata('data/parquet-testing/arrow/int32_decimal.parquet');
|
||||
----
|
||||
1.00 24.00 NULL NULL
|
||||
|
||||
query I
|
||||
SELECT * FROM 'data/parquet-testing/arrow/int32_decimal.parquet'
|
||||
----
|
||||
1.00
|
||||
2.00
|
||||
3.00
|
||||
4.00
|
||||
5.00
|
||||
6.00
|
||||
7.00
|
||||
8.00
|
||||
9.00
|
||||
10.00
|
||||
11.00
|
||||
12.00
|
||||
13.00
|
||||
14.00
|
||||
15.00
|
||||
16.00
|
||||
17.00
|
||||
18.00
|
||||
19.00
|
||||
20.00
|
||||
21.00
|
||||
22.00
|
||||
23.00
|
||||
24.00
|
||||
|
||||
# int64 decimal stats
|
||||
query IIII
|
||||
select stats_min, stats_max, stats_min_value, stats_max_value from parquet_metadata('data/parquet-testing/arrow/int64_decimal.parquet');
|
||||
----
|
||||
1.00 24.00 NULL NULL
|
||||
|
||||
query I
|
||||
SELECT * FROM 'data/parquet-testing/arrow/int64_decimal.parquet'
|
||||
----
|
||||
1.00
|
||||
2.00
|
||||
3.00
|
||||
4.00
|
||||
5.00
|
||||
6.00
|
||||
7.00
|
||||
8.00
|
||||
9.00
|
||||
10.00
|
||||
11.00
|
||||
12.00
|
||||
13.00
|
||||
14.00
|
||||
15.00
|
||||
16.00
|
||||
17.00
|
||||
18.00
|
||||
19.00
|
||||
20.00
|
||||
21.00
|
||||
22.00
|
||||
23.00
|
||||
24.00
|
||||
|
||||
# data-types stats
|
||||
query IIII
|
||||
SELECT stats_min, stats_max, stats_min_value, stats_max_value FROM parquet_metadata('data/parquet-testing/data-types.parquet')
|
||||
----
|
||||
-127 127 -127 127
|
||||
-32767 32767 -32767 32767
|
||||
-2147483647 2147483647 -2147483647 2147483647
|
||||
-9223372036854775807 9223372036854775807 -9223372036854775807 9223372036854775807
|
||||
-4.6 4.6 -4.6 4.6
|
||||
-4.7 4.7 -4.7 4.7
|
||||
4.80 4.80 4.80 4.80
|
||||
49 49 49 49
|
||||
50 50 50 50
|
||||
false true false true
|
||||
2019-11-26 20:11:42.501 2019-11-26 20:11:42.501 2019-11-26 20:11:42.501 2019-11-26 20:11:42.501
|
||||
2020-01-10 2020-01-10 2020-01-10 2020-01-10
|
||||
|
||||
query IIIIIIIIIIII
|
||||
SELECT * FROM 'data/parquet-testing/data-types.parquet'
|
||||
----
|
||||
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
|
||||
42 43 44 45 4.600000 4.700000 4.80 49 50 True 2019-11-26 20:11:42.501 2020-01-10
|
||||
-127 -32767 -2147483647 -9223372036854775807 -4.600000 -4.700000 NULL NULL NULL False NULL NULL
|
||||
127 32767 2147483647 9223372036854775807 NULL NULL NULL NULL NULL NULL NULL NULL
|
||||
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
|
||||
|
||||
# parquet stats for all parquet files
|
||||
foreach parquet_file data/parquet-testing/manyrowgroups.parquet data/parquet-testing/map.parquet data/parquet-testing/arrow/int32_decimal.parquet data/parquet-testing/arrow/nonnullable.impala.parquet data/parquet-testing/bug687_nulls.parquet data/parquet-testing/bug1554.parquet data/parquet-testing/apkwan.parquet data/parquet-testing/arrow/nested_lists.snappy.parquet data/parquet-testing/arrow/nulls.snappy.parquet data/parquet-testing/nan-float.parquet data/parquet-testing/manyrowgroups2.parquet data/parquet-testing/struct.parquet data/parquet-testing/arrow/list_columns.parquet data/parquet-testing/timestamp-ms.parquet data/parquet-testing/arrow/alltypes_dictionary.parquet data/parquet-testing/arrow/binary.parquet data/parquet-testing/arrow/nation.dict-malformed.parquet data/parquet-testing/lineitem-top10000.gzip.parquet data/parquet-testing/arrow/nested_maps.snappy.parquet data/parquet-testing/arrow/dict-page-offset-zero.parquet data/parquet-testing/silly-names.parquet data/parquet-testing/zstd.parquet data/parquet-testing/bug1618_struct_strings.parquet data/parquet-testing/arrow/single_nan.parquet data/parquet-testing/arrow/int64_decimal.parquet data/parquet-testing/filter_bug1391.parquet data/parquet-testing/arrow/fixed_length_decimal_legacy.parquet data/parquet-testing/timestamp.parquet data/parquet-testing/arrow/fixed_length_decimal.parquet data/parquet-testing/leftdate3_192_loop_1.parquet data/parquet-testing/blob.parquet data/parquet-testing/bug1588.parquet data/parquet-testing/bug1589.parquet data/parquet-testing/arrow/alltypes_plain.parquet data/parquet-testing/arrow/repeated_no_annotation.parquet data/parquet-testing/data-types.parquet data/parquet-testing/unsigned.parquet data/parquet-testing/pandas-date.parquet data/parquet-testing/date.parquet data/parquet-testing/arrow/nullable.impala.parquet data/parquet-testing/fixed.parquet data/parquet-testing/arrow/alltypes_plain.snappy.parquet data/parquet-testing/decimal/int32_decimal.parquet data/parquet-testing/decimal/pandas_decimal.parquet data/parquet-testing/decimal/decimal_dc.parquet data/parquet-testing/decimal/int64_decimal.parquet data/parquet-testing/decimal/fixed_length_decimal_legacy.parquet data/parquet-testing/decimal/fixed_length_decimal.parquet data/parquet-testing/glob2/t1.parquet data/parquet-testing/cache/cache1.parquet data/parquet-testing/cache/cache2.parquet data/parquet-testing/glob/t2.parquet data/parquet-testing/glob/t1.parquet data/parquet-testing/bug2557.parquet
|
||||
|
||||
statement ok
|
||||
select * from parquet_metadata('${parquet_file}');
|
||||
|
||||
endloop
|
||||
|
||||
# internal issue 2037
|
||||
statement ok
|
||||
copy (select '' i) to '__TEST_DIR__/test.parquet';
|
||||
|
||||
query I
|
||||
select i is null c0 from '__TEST_DIR__/test.parquet';
|
||||
----
|
||||
false
|
||||
|
||||
query II
|
||||
select stats_min_value is null c0, stats_max_value is null c1 from parquet_metadata('__TEST_DIR__/test.parquet');
|
||||
----
|
||||
false false
|
||||
|
||||
query II
|
||||
select row_group_bytes, row_group_compressed_bytes from parquet_metadata('__TEST_DIR__/test.parquet');
|
||||
----
|
||||
27 29
|
||||
|
||||
query II
|
||||
select row_group_bytes, row_group_compressed_bytes from parquet_metadata('data/parquet-testing/varchar_stats.parquet');
|
||||
----
|
||||
200 208
|
||||
200 208
|
||||
Reference in New Issue
Block a user