95 lines
2.5 KiB
SQL
95 lines
2.5 KiB
SQL
# name: test/sql/copy/parquet/parquet_metadata.test
|
|
# description: Test parquet metadata function
|
|
# group: [parquet]
|
|
|
|
require parquet
|
|
|
|
statement ok
|
|
SELECT * FROM parquet_metadata('data/parquet-testing/lineitem-top10000.gzip.parquet');
|
|
|
|
statement ok
|
|
SELECT * FROM parquet_schema('data/parquet-testing/lineitem-top10000.gzip.parquet');
|
|
|
|
query I
|
|
SELECT COUNT(*) > 0 FROM parquet_metadata('data/parquet-testing/lineitem-top10000.gzip.parquet');
|
|
----
|
|
true
|
|
|
|
query I
|
|
SELECT COUNT(*) > 0 FROM parquet_schema('data/parquet-testing/lineitem-top10000.gzip.parquet');
|
|
----
|
|
true
|
|
|
|
statement ok
|
|
select * from parquet_schema('data/parquet-testing/decimal/decimal_dc.parquet');
|
|
|
|
statement ok
|
|
select * from parquet_schema('data/parquet-testing/decimal/int64_decimal.parquet');
|
|
|
|
# with globs
|
|
statement ok
|
|
select * from parquet_metadata('data/parquet-testing/glob/*.parquet');
|
|
|
|
statement ok
|
|
select * from parquet_schema('data/parquet-testing/glob/*.parquet');
|
|
|
|
# list parameters
|
|
statement ok
|
|
select * from parquet_schema(['data/parquet-testing/decimal/int64_decimal.parquet', 'data/parquet-testing/decimal/int64_decimal.parquet']);
|
|
|
|
query III
|
|
SELECT name, type, duckdb_type FROM parquet_schema('data/parquet-testing/lineitem-top10000.gzip.parquet') WHERE type IS NOT NULL;
|
|
----
|
|
l_orderkey INT64 BIGINT
|
|
l_partkey INT64 BIGINT
|
|
l_suppkey INT64 BIGINT
|
|
l_linenumber INT32 INTEGER
|
|
l_quantity INT32 INTEGER
|
|
l_extendedprice DOUBLE DOUBLE
|
|
l_discount DOUBLE DOUBLE
|
|
l_tax DOUBLE DOUBLE
|
|
l_returnflag BYTE_ARRAY VARCHAR
|
|
l_linestatus BYTE_ARRAY VARCHAR
|
|
l_shipdate BYTE_ARRAY VARCHAR
|
|
l_commitdate BYTE_ARRAY VARCHAR
|
|
l_receiptdate BYTE_ARRAY VARCHAR
|
|
l_shipinstruct BYTE_ARRAY VARCHAR
|
|
l_shipmode BYTE_ARRAY VARCHAR
|
|
l_comment BYTE_ARRAY VARCHAR
|
|
|
|
# column_id
|
|
query II
|
|
SELECT column_id, name FROM parquet_schema('data/parquet-testing/lineitem-top10000.gzip.parquet') ORDER BY column_id;
|
|
----
|
|
0 spark_schema
|
|
1 l_orderkey
|
|
2 l_partkey
|
|
3 l_suppkey
|
|
4 l_linenumber
|
|
5 l_quantity
|
|
6 l_extendedprice
|
|
7 l_discount
|
|
8 l_tax
|
|
9 l_returnflag
|
|
10 l_linestatus
|
|
11 l_shipdate
|
|
12 l_commitdate
|
|
13 l_receiptdate
|
|
14 l_shipinstruct
|
|
15 l_shipmode
|
|
16 l_comment
|
|
|
|
query III
|
|
WITH per_file AS (
|
|
SELECT file_name, COUNT(*) AS rows_per_file
|
|
FROM parquet_schema('data/parquet-testing/glob3/**/*.parquet')
|
|
GROUP BY file_name
|
|
)
|
|
SELECT
|
|
SUM(rows_per_file) AS total_rows,
|
|
MAX(rows_per_file) AS max_rows_per_filename,
|
|
(SELECT COUNT(DISTINCT column_id) FROM parquet_schema('data/parquet-testing/glob3/**/*.parquet')) AS distinct_column_ids
|
|
FROM per_file;
|
|
----
|
|
9 3 3
|