# name: test/sql/copy/parquet/parquet_metadata.test # description: Test parquet metadata function # group: [parquet] require parquet statement ok SELECT * FROM parquet_metadata('data/parquet-testing/lineitem-top10000.gzip.parquet'); statement ok SELECT * FROM parquet_schema('data/parquet-testing/lineitem-top10000.gzip.parquet'); query I SELECT COUNT(*) > 0 FROM parquet_metadata('data/parquet-testing/lineitem-top10000.gzip.parquet'); ---- true query I SELECT COUNT(*) > 0 FROM parquet_schema('data/parquet-testing/lineitem-top10000.gzip.parquet'); ---- true statement ok select * from parquet_schema('data/parquet-testing/decimal/decimal_dc.parquet'); statement ok select * from parquet_schema('data/parquet-testing/decimal/int64_decimal.parquet'); # with globs statement ok select * from parquet_metadata('data/parquet-testing/glob/*.parquet'); statement ok select * from parquet_schema('data/parquet-testing/glob/*.parquet'); # list parameters statement ok select * from parquet_schema(['data/parquet-testing/decimal/int64_decimal.parquet', 'data/parquet-testing/decimal/int64_decimal.parquet']); query III SELECT name, type, duckdb_type FROM parquet_schema('data/parquet-testing/lineitem-top10000.gzip.parquet') WHERE type IS NOT NULL; ---- l_orderkey INT64 BIGINT l_partkey INT64 BIGINT l_suppkey INT64 BIGINT l_linenumber INT32 INTEGER l_quantity INT32 INTEGER l_extendedprice DOUBLE DOUBLE l_discount DOUBLE DOUBLE l_tax DOUBLE DOUBLE l_returnflag BYTE_ARRAY VARCHAR l_linestatus BYTE_ARRAY VARCHAR l_shipdate BYTE_ARRAY VARCHAR l_commitdate BYTE_ARRAY VARCHAR l_receiptdate BYTE_ARRAY VARCHAR l_shipinstruct BYTE_ARRAY VARCHAR l_shipmode BYTE_ARRAY VARCHAR l_comment BYTE_ARRAY VARCHAR # column_id query II SELECT column_id, name FROM parquet_schema('data/parquet-testing/lineitem-top10000.gzip.parquet') ORDER BY column_id; ---- 0 spark_schema 1 l_orderkey 2 l_partkey 3 l_suppkey 4 l_linenumber 5 l_quantity 6 l_extendedprice 7 l_discount 8 l_tax 9 l_returnflag 10 l_linestatus 11 l_shipdate 12 l_commitdate 13 l_receiptdate 14 l_shipinstruct 15 l_shipmode 16 l_comment query III WITH per_file AS ( SELECT file_name, COUNT(*) AS rows_per_file FROM parquet_schema('data/parquet-testing/glob3/**/*.parquet') GROUP BY file_name ) SELECT SUM(rows_per_file) AS total_rows, MAX(rows_per_file) AS max_rows_per_filename, (SELECT COUNT(DISTINCT column_id) FROM parquet_schema('data/parquet-testing/glob3/**/*.parquet')) AS distinct_column_ids FROM per_file; ---- 9 3 3