should be it
This commit is contained in:
107
external/duckdb/test/sql/copy/parquet/parquet_filename.test
vendored
Normal file
107
external/duckdb/test/sql/copy/parquet/parquet_filename.test
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
# name: test/sql/copy/parquet/parquet_filename.test
|
||||
# description: Test the filename option of the parquet reader
|
||||
# group: [parquet]
|
||||
|
||||
require parquet
|
||||
|
||||
# Simple glob with filenames, note that we replace \ for / to make tests pass on windows
|
||||
query III
|
||||
select i, j, replace(filename, '\', '/') from parquet_scan('data/parquet-testing/glob*/t?.parquet', FILENAME=1) order by i;
|
||||
----
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
2 b data/parquet-testing/glob/t2.parquet
|
||||
3 c data/parquet-testing/glob2/t1.parquet
|
||||
|
||||
# Filter on filename col
|
||||
query III
|
||||
select i, j, replace(filename, '\', '/') as file from parquet_scan('data/parquet-testing/glob*/t?.parquet', FILENAME=1) where file='data/parquet-testing/glob2/t1.parquet';
|
||||
----
|
||||
3 c data/parquet-testing/glob2/t1.parquet
|
||||
|
||||
# filter on multiple vector sizes of rows
|
||||
query I
|
||||
SELECT count(filename) FROM parquet_scan('data/parquet-testing/p2.parquet', FILENAME=1) where id < 1000;
|
||||
----
|
||||
479
|
||||
|
||||
# filter pushdown on filename
|
||||
query I
|
||||
SELECT count(id) FROM parquet_scan('data/parquet-testing/p2.parquet', FILENAME=1) where filename >= 'data';
|
||||
----
|
||||
4979
|
||||
|
||||
# Filter on non-filename col
|
||||
query I
|
||||
select replace(filename, '\', '/') from parquet_scan('data/parquet-testing/glob*/t?.parquet', FILENAME=1) where i=2;
|
||||
----
|
||||
data/parquet-testing/glob/t2.parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_csv AS SELECT 1 as id, 'test_csv_content' as filename;
|
||||
|
||||
statement ok
|
||||
COPY test_csv TO '__TEST_DIR__/filename_as_column.csv' WITH HEADER;
|
||||
|
||||
# This currently fails with a binder error
|
||||
statement error
|
||||
SELECT id, filename FROM read_csv_auto('__TEST_DIR__/filename_as_column.csv', FILENAME=1);
|
||||
----
|
||||
|
||||
# Parquet filename name conflict
|
||||
statement ok
|
||||
CREATE TABLE test AS SELECT 1 as id, 'test' as filename;
|
||||
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/filename_as_column.parquet';
|
||||
|
||||
# we currently don't support filename as a column name when using the filename option
|
||||
statement error
|
||||
SELECT * FROM parquet_scan('__TEST_DIR__/filename_as_column.parquet', FILENAME=1);
|
||||
----
|
||||
|
||||
# Now also test copy
|
||||
statement ok
|
||||
CREATE TABLE test_copy (i INT, j VARCHAR, filename VARCHAR);
|
||||
|
||||
statement ok
|
||||
INSERT INTO test_copy FROM read_parquet('data/parquet-testing/glob/t1.parquet', filename=1, binary_as_string=1);
|
||||
|
||||
query III
|
||||
SELECT i, j, replace(filename, '\', '/') FROM test_copy
|
||||
----
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
|
||||
statement ok
|
||||
INSERT INTO test_copy FROM read_parquet('data/parquet-testing/glob/t1.parquet', filename=1);
|
||||
|
||||
query III
|
||||
SELECT i, j, replace(filename, '\', '/') FROM test_copy
|
||||
----
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
|
||||
statement error
|
||||
COPY test_copy FROM 'data/parquet-testing/glob/t1.parquet';
|
||||
----
|
||||
column count mismatch
|
||||
|
||||
# Multiple row groups in same file
|
||||
statement ok
|
||||
CREATE TABLE test_table_large AS SELECT * FROM range(0,10000) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY test_table_large TO '__TEST_DIR__/test_table_large.parquet' (ROW_GROUP_SIZE 1000);
|
||||
|
||||
query II
|
||||
SELECT sum(i), max(regexp_replace(filename, '^.*/', '')) FROM parquet_scan('__TEST_DIR__/test_table_large.parquet', FILENAME=1) where i>5000;
|
||||
----
|
||||
37492500 test_table_large.parquet
|
||||
|
||||
# Same file twice
|
||||
query III
|
||||
SELECT i, j, replace(filename, '\', '/') as file FROM parquet_scan(['data/parquet-testing/glob/t1.parquet', 'data/parquet-testing/glob/t1.parquet', 'data/parquet-testing/glob/t2.parquet'], FILENAME=1) where file like '%t1%'
|
||||
----
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
1 a data/parquet-testing/glob/t1.parquet
|
||||
|
||||
|
||||
Reference in New Issue
Block a user