should be it
This commit is contained in:
68
external/duckdb/test/sql/copy/partitioned/hive_filter_pushdown.test
vendored
Normal file
68
external/duckdb/test/sql/copy/partitioned/hive_filter_pushdown.test
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
# name: test/sql/copy/partitioned/hive_filter_pushdown.test
|
||||
# description: confirm that hive-specific filter pushdown does not mess up the filters
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
# Confirm
|
||||
statement ok
|
||||
COPY (SELECT i::VARCHAR as a, (i*10)::VARCHAR as b, (i*100)::VARCHAR as c from range(0,10) tbl(i)) TO '__TEST_DIR__/hive_pushdown_bug' (FORMAT PARQUET, PARTITION_BY c);
|
||||
|
||||
query II
|
||||
explain SELECT * FROM parquet_scan('__TEST_DIR__/hive_pushdown_bug/*/*.parquet', HIVE_PARTITIONING=1) where a > b;
|
||||
----
|
||||
physical_plan <!REGEX>:.*AND.*
|
||||
|
||||
query II nosort q1
|
||||
explain SELECT * FROM parquet_scan('__TEST_DIR__/hive_pushdown_bug/*/*.parquet', HIVE_PARTITIONING=1) where a > b;
|
||||
----
|
||||
|
||||
query II nosort q1
|
||||
explain SELECT * FROM parquet_scan('__TEST_DIR__/hive_pushdown_bug/*/*.parquet') where a > b;
|
||||
----
|
||||
physical_plan <!REGEX>:.*PARQUET_SCAN.*File Filters:.*
|
||||
|
||||
query II
|
||||
explain SELECT * FROM parquet_scan('__TEST_DIR__/hive_pushdown_bug/*/*.parquet', HIVE_PARTITIONING=1) where a::VARCHAR > c::VARCHAR;
|
||||
----
|
||||
physical_plan <!REGEX>:.*AND.*
|
||||
|
||||
# no file filters here
|
||||
query II nosort q2
|
||||
explain SELECT * FROM parquet_scan('__TEST_DIR__/hive_pushdown_bug/*/*.parquet', HIVE_PARTITIONING=1) where a::VARCHAR > c::VARCHAR;
|
||||
----
|
||||
physical_plan <!REGEX>:.*PARQUET_SCAN.*File Filters:.*
|
||||
|
||||
query II nosort q2
|
||||
explain SELECT * FROM parquet_scan('__TEST_DIR__/hive_pushdown_bug/*/*.parquet') where a::VARCHAR > c::VARCHAR;
|
||||
----
|
||||
physical_plan <!REGEX>:.*PARQUET_SCAN.*File Filters:.*
|
||||
|
||||
# Check that hive specific filters are pushed down into the explain plan regardless of format type
|
||||
query II
|
||||
explain SELECT * FROM parquet_scan('__TEST_DIR__/hive_pushdown_bug/*/*.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=1) where c=500;
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(c = 500\).*Scanning Files:.*1\/10.*
|
||||
|
||||
query II
|
||||
explain SELECT * FROM parquet_scan('__TEST_DIR__/hive_pushdown_bug/*/*.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=1) where c=500 and b='20';
|
||||
----
|
||||
physical_plan <REGEX>:.*PARQUET_SCAN.*File Filters:.*\(c = 500\).*Scanning Files:.*1\/10.*
|
||||
|
||||
# File Filters show up in read csv auto for hive partitioned csv files.
|
||||
statement ok
|
||||
COPY (SELECT i::VARCHAR as a, (i*10)::VARCHAR as b, (i*100)::VARCHAR as c from range(0,10) tbl(i)) TO '__TEST_DIR__/hive_pushdown_bug_csv' (FORMAT CSV, PARTITION_BY c);
|
||||
|
||||
query II
|
||||
explain SELECT * FROM read_csv_auto('__TEST_DIR__/hive_pushdown_bug_csv/*/*.csv', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=1, names=['a','b','c']) where c=500;
|
||||
----
|
||||
physical_plan <REGEX>:.*READ_CSV_AUTO.*File Filters:.*\(c = 500\).*Scanning Files:.*1\/10.*
|
||||
|
||||
# same for json paritioned files
|
||||
#statement ok
|
||||
#COPY (SELECT i::VARCHAR as a, (i*10)::VARCHAR as b, (i*100)::VARCHAR as c from range(0,10) tbl(i)) TO '__TEST_DIR__/hive_pushdown_bug_csv' (PARTITION_BY c);
|
||||
#
|
||||
#query II
|
||||
#explain SELECT * FROM read_csv_auto('__TEST_DIR__/hive_pushdown_bug/*/*.parquet', HIVE_PARTITIONING=1, names=['a','b','c']) where c=500;
|
||||
#----
|
||||
#physical_plan <REGEX>:.*Filters:\s*c=500.*
|
||||
63
external/duckdb/test/sql/copy/partitioned/hive_partition_append.test
vendored
Normal file
63
external/duckdb/test/sql/copy/partitioned/hive_partition_append.test
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
# name: test/sql/copy/partitioned/hive_partition_append.test
|
||||
# description: test APPEND mode for hive partitioned write
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE sensor_data(ts TIMESTAMP, value INT);
|
||||
|
||||
statement ok
|
||||
INSERT INTO sensor_data VALUES
|
||||
(TIMESTAMP '2000-01-01 01:02:03', 42),
|
||||
(TIMESTAMP '2000-02-01 01:02:03', 100),
|
||||
(TIMESTAMP '2000-03-01 12:11:10', 1000)
|
||||
;
|
||||
|
||||
statement ok
|
||||
COPY (SELECT YEAR(ts) AS year, MONTH(ts) AS month, * FROM sensor_data)
|
||||
TO '__TEST_DIR__/partitioned_append' (FORMAT PARQUET, PARTITION_BY (year, month), APPEND);
|
||||
|
||||
query III
|
||||
SELECT year, month, SUM(value) FROM '__TEST_DIR__/partitioned_append/**/*.parquet' GROUP BY ALL ORDER BY ALL
|
||||
----
|
||||
2000 1 42
|
||||
2000 2 100
|
||||
2000 3 1000
|
||||
|
||||
statement ok
|
||||
DELETE FROM sensor_data;
|
||||
|
||||
statement ok
|
||||
INSERT INTO sensor_data VALUES
|
||||
(TIMESTAMP '2000-01-01 02:02:03', 62),
|
||||
(TIMESTAMP '2000-03-01 13:11:10', 50)
|
||||
;
|
||||
|
||||
statement ok
|
||||
COPY (SELECT YEAR(ts) AS year, MONTH(ts) AS month, * FROM sensor_data)
|
||||
TO '__TEST_DIR__/partitioned_append' (FORMAT PARQUET, PARTITION_BY (year, month), APPEND, FILENAME_PATTERN 'my_pattern_{uuid}');
|
||||
|
||||
query III
|
||||
SELECT year, month, SUM(value) FROM '__TEST_DIR__/partitioned_append/**/*.parquet' GROUP BY ALL ORDER BY ALL
|
||||
----
|
||||
2000 1 104
|
||||
2000 2 100
|
||||
2000 3 1050
|
||||
|
||||
statement ok
|
||||
COPY (SELECT YEAR(ts) AS year, MONTH(ts) AS month, * FROM sensor_data)
|
||||
TO '__TEST_DIR__/partitioned_append' (FORMAT PARQUET, PARTITION_BY (year, month), FILENAME_PATTERN 'my_pattern_{uuid}', APPEND);
|
||||
|
||||
query III
|
||||
SELECT year, month, SUM(value) FROM '__TEST_DIR__/partitioned_append/**/*.parquet' GROUP BY ALL ORDER BY ALL
|
||||
----
|
||||
2000 1 166
|
||||
2000 2 100
|
||||
2000 3 1100
|
||||
|
||||
statement error
|
||||
COPY (SELECT YEAR(ts) AS year, MONTH(ts) AS month, * FROM sensor_data)
|
||||
TO '__TEST_DIR__/partitioned_append' (FORMAT PARQUET, PARTITION_BY (year, month), APPEND, FILENAME_PATTERN 'my_pattern_without_uuid');
|
||||
----
|
||||
APPEND mode requires a {uuid} label in filename_pattern
|
||||
11
external/duckdb/test/sql/copy/partitioned/hive_partition_case_insensitive_column.test
vendored
Normal file
11
external/duckdb/test/sql/copy/partitioned/hive_partition_case_insensitive_column.test
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
# name: test/sql/copy/partitioned/hive_partition_case_insensitive_column.test
|
||||
# description: Test when columns in the hive partitioned files differ only in case from the partitions themselves
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
query II
|
||||
SELECT * FROM 'data/parquet-testing/hive-partitioning/ci-column-names/**/*.parquet' ORDER BY ALL
|
||||
----
|
||||
Hannes 2
|
||||
Mark 1
|
||||
25
external/duckdb/test/sql/copy/partitioned/hive_partition_compression.test
vendored
Normal file
25
external/duckdb/test/sql/copy/partitioned/hive_partition_compression.test
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
# name: test/sql/copy/partitioned/hive_partition_compression.test
|
||||
# description: Test we can round-trip partitioned compressed Parquet files
|
||||
# group: [partitioned]
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test AS VALUES ('a', 'foo', 1), ('a', 'foo', 2), ('a', 'bar', 1), ('b', 'bar', 1);
|
||||
|
||||
statement ok
|
||||
COPY (FROM test) TO '__TEST_DIR__/hive_partition_compress' (FORMAT parquet, COMPRESSION 'gzip', PARTITION_BY ('col0', 'col1'));
|
||||
|
||||
# Specify Compression
|
||||
query III
|
||||
FROM read_parquet('__TEST_DIR__/hive_partition_compress/*/*/*.parquet')
|
||||
ORDER BY ALL
|
||||
----
|
||||
1 a bar
|
||||
1 a foo
|
||||
1 b bar
|
||||
2 a foo
|
||||
|
||||
15
external/duckdb/test/sql/copy/partitioned/hive_partition_duplicate_name.test
vendored
Normal file
15
external/duckdb/test/sql/copy/partitioned/hive_partition_duplicate_name.test
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# name: test/sql/copy/partitioned/hive_partition_duplicate_name.test
|
||||
# description: Test partitioning names with duplicate keys
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
# we just use the first partitioning key by default
|
||||
query III
|
||||
select *
|
||||
from parquet_scan('data/parquet-testing/hive-partitioning/duplicate_names/**/*.parquet')
|
||||
ORDER BY ALL
|
||||
----
|
||||
1 value1 1
|
||||
2 value2 2
|
||||
|
||||
74
external/duckdb/test/sql/copy/partitioned/hive_partition_escape.test
vendored
Normal file
74
external/duckdb/test/sql/copy/partitioned/hive_partition_escape.test
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
# name: test/sql/copy/partitioned/hive_partition_escape.test
|
||||
# description: Test escaping during hive partition read/write
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE SEQUENCE seq
|
||||
|
||||
statement ok
|
||||
CREATE TABLE weird_tbl(id INT DEFAULT nextval('seq'), key VARCHAR)
|
||||
|
||||
statement ok
|
||||
INSERT INTO weird_tbl (key) VALUES
|
||||
('/'),
|
||||
('\/\/'),
|
||||
('==='),
|
||||
('value with strings'),
|
||||
('?:&'),
|
||||
('🦆'),
|
||||
('==='),
|
||||
('===');
|
||||
|
||||
|
||||
statement ok
|
||||
COPY weird_tbl TO '__TEST_DIR__/escaped_partitions' (FORMAT PARQUET, PARTITION_BY(key))
|
||||
|
||||
query II
|
||||
select key, COUNT(*)
|
||||
from parquet_scan('__TEST_DIR__/escaped_partitions/**/*.parquet')
|
||||
GROUP BY ALL
|
||||
ORDER BY ALL
|
||||
----
|
||||
/ 1
|
||||
=== 3
|
||||
?:& 1
|
||||
\/\/ 1
|
||||
value with strings 1
|
||||
🦆 1
|
||||
|
||||
|
||||
# now with columns with weird characters in the name
|
||||
statement ok
|
||||
ALTER TABLE weird_tbl RENAME COLUMN key TO "=/ \\/"
|
||||
|
||||
# this column name won't work with automatic HIVE partition due to the equal character
|
||||
statement ok
|
||||
COPY weird_tbl TO '__TEST_DIR__/escaped_partitions_names' (FORMAT PARQUET, PARTITION_BY("=/ \\/"))
|
||||
|
||||
statement error
|
||||
select "=/ \\/", COUNT(*)
|
||||
from parquet_scan('__TEST_DIR__/escaped_partitions_names/**/*.parquet')
|
||||
GROUP BY ALL
|
||||
ORDER BY ALL
|
||||
----
|
||||
Binder Error: Referenced column "=/ \\/" not found in FROM clause!
|
||||
|
||||
|
||||
# if we write the partition column on files, it can be read
|
||||
statement ok
|
||||
COPY weird_tbl TO '__TEST_DIR__/escaped_partitions_names' (FORMAT PARQUET, PARTITION_BY("=/ \\/"), OVERWRITE, WRITE_PARTITION_COLUMNS)
|
||||
|
||||
query II
|
||||
select "=/ \\/", COUNT(*)
|
||||
from parquet_scan('__TEST_DIR__/escaped_partitions_names/**/*.parquet')
|
||||
GROUP BY ALL
|
||||
ORDER BY ALL
|
||||
----
|
||||
/ 1
|
||||
=== 3
|
||||
?:& 1
|
||||
\/\/ 1
|
||||
value with strings 1
|
||||
🦆 1
|
||||
78
external/duckdb/test/sql/copy/partitioned/hive_partition_join_pushdown.test
vendored
Normal file
78
external/duckdb/test/sql/copy/partitioned/hive_partition_join_pushdown.test
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
# name: test/sql/copy/partitioned/hive_partition_join_pushdown.test
|
||||
# description: Test pruning of hive partitions through join conditions
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
# partitions
|
||||
statement ok
|
||||
CREATE TABLE tbl AS SELECT i//1000 AS partition, i FROM range(10000) t(i)
|
||||
|
||||
statement ok
|
||||
COPY tbl TO '__TEST_DIR__/partition_join_pushdown' (FORMAT parquet, PARTITION_BY (partition))
|
||||
|
||||
query II
|
||||
EXPLAIN ANALYZE SELECT COUNT(*), MIN(partition), MAX(partition), SUM(i)
|
||||
FROM '__TEST_DIR__/partition_join_pushdown/**/*.parquet'
|
||||
----
|
||||
analyzed_plan <REGEX>:.*Total Files Read.*10.*
|
||||
|
||||
query IIII
|
||||
SELECT COUNT(*), MIN(partition), MAX(partition), SUM(i)
|
||||
FROM '__TEST_DIR__/partition_join_pushdown/**/*.parquet'
|
||||
WHERE partition=(SELECT MAX(partition) FROM tbl)
|
||||
----
|
||||
1000 9 9 9499500
|
||||
|
||||
query II
|
||||
EXPLAIN ANALYZE SELECT COUNT(*), MIN(partition), MAX(partition), SUM(i)
|
||||
FROM '__TEST_DIR__/partition_join_pushdown/**/*.parquet'
|
||||
WHERE partition=(SELECT MAX(partition) FROM tbl)
|
||||
----
|
||||
analyzed_plan <REGEX>:.*Total Files Read.*1.*
|
||||
|
||||
query IIII
|
||||
SELECT COUNT(*), MIN(partition), MAX(partition), SUM(i)
|
||||
FROM '__TEST_DIR__/partition_join_pushdown/**/*.parquet'
|
||||
WHERE i>=9980 AND partition=(SELECT MAX(partition) FROM tbl)
|
||||
----
|
||||
20 9 9 199790
|
||||
|
||||
# multiple filters on the same partition
|
||||
query IIII
|
||||
SELECT COUNT(*), MIN(partition), MAX(partition), SUM(i)
|
||||
FROM '__TEST_DIR__/partition_join_pushdown/**/*.parquet'
|
||||
WHERE partition>5 AND partition=(SELECT MAX(partition) FROM tbl)
|
||||
----
|
||||
1000 9 9 9499500
|
||||
|
||||
# multiple partitions
|
||||
statement ok
|
||||
CREATE TABLE tbl2 AS SELECT (date '2000-01-01' + interval (i//2000) years)::DATE AS part1, i%2 AS part2, i FROM range(10000) t(i)
|
||||
|
||||
statement ok
|
||||
COPY tbl2 TO '__TEST_DIR__/partition_join_pushdown_multi' (FORMAT parquet, PARTITION_BY (part1, part2))
|
||||
|
||||
# multiple join filters
|
||||
query IIIIII
|
||||
SELECT COUNT(*), MIN(part1), MAX(part1), MIN(part2), MAX(part2), SUM(i)
|
||||
FROM '__TEST_DIR__/partition_join_pushdown_multi/**/*.parquet'
|
||||
WHERE part1=(SELECT MAX(part1) FROM tbl2) AND part2=(SELECT MAX(part2) FROM tbl2)
|
||||
----
|
||||
1000 2004-01-01 2004-01-01 1 1 9000000
|
||||
|
||||
# mix of static and join flters
|
||||
query IIIIII
|
||||
SELECT COUNT(*), MIN(part2), MAX(part2), MIN(part1), MAX(part1), SUM(i)
|
||||
FROM '__TEST_DIR__/partition_join_pushdown_multi/**/*.parquet'
|
||||
WHERE part2=(SELECT MAX(part2) FROM tbl2) AND part1=date '2004-01-01'
|
||||
----
|
||||
1000 1 1 2004-01-01 2004-01-01 9000000
|
||||
|
||||
# only selecting a single column
|
||||
query I
|
||||
SELECT COUNT(*)
|
||||
FROM '__TEST_DIR__/partition_join_pushdown_multi/**/*.parquet'
|
||||
WHERE part2=(SELECT MAX(part2) FROM tbl2)
|
||||
----
|
||||
5000
|
||||
44
external/duckdb/test/sql/copy/partitioned/hive_partition_recursive_cte.test
vendored
Normal file
44
external/duckdb/test/sql/copy/partitioned/hive_partition_recursive_cte.test
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
# name: test/sql/copy/partitioned/hive_partition_recursive_cte.test
|
||||
# description: Test for hive partitioned read with recursive CTE
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
#statement ok
|
||||
#PRAGMA enable_verification
|
||||
|
||||
# create a table
|
||||
statement ok
|
||||
CREATE TABLE t AS SELECT 2000+i%10 AS year, 1+i%3 AS month, i%4 AS c, i%5 AS d FROM RANGE(0,20) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/partition_rec_cte' (FORMAT PARQUET, PARTITION_BY (year, month));
|
||||
|
||||
statement ok
|
||||
CREATE VIEW partitioned_tbl AS FROM '__TEST_DIR__/partition_rec_cte/**/*.parquet';
|
||||
|
||||
loop i 0 2
|
||||
|
||||
# this recursive CTE iterates over the years (2000...2009) and counts the number of rows in each of the years
|
||||
# then at the end we add everything up
|
||||
|
||||
query III
|
||||
WITH RECURSIVE cte AS (
|
||||
SELECT 0 AS count, 1999 AS selected_year
|
||||
UNION ALL
|
||||
SELECT COUNT(*) AS count, MAX(partitioned_tbl.year)
|
||||
FROM partitioned_tbl, (SELECT MAX(selected_year) AS next_year FROM cte)
|
||||
WHERE partitioned_tbl.year = (SELECT MAX(selected_year) + 1 FROM cte)
|
||||
HAVING COUNT(*)>0
|
||||
)
|
||||
SELECT SUM(count), MIN(selected_year), MAX(selected_year)
|
||||
FROM cte
|
||||
WHERE count>0
|
||||
----
|
||||
20 2000 2009
|
||||
|
||||
# retry with union by name
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW partitioned_tbl AS FROM read_parquet('__TEST_DIR__/partition_rec_cte/**/*.parquet', union_by_name=True);
|
||||
|
||||
endloop
|
||||
417
external/duckdb/test/sql/copy/partitioned/hive_partitioned_auto_detect.test
vendored
Normal file
417
external/duckdb/test/sql/copy/partitioned/hive_partitioned_auto_detect.test
vendored
Normal file
@@ -0,0 +1,417 @@
|
||||
# name: test/sql/copy/partitioned/hive_partitioned_auto_detect.test
|
||||
# description: basic tests for the hive partition auto detection
|
||||
# group: [partitioned]
|
||||
|
||||
statement ok
|
||||
PRAGMA enable_verification
|
||||
|
||||
# create a table
|
||||
statement ok
|
||||
CREATE TABLE t AS SELECT i%2 AS year, i%3 AS month, i%4 AS c, i%5 AS d FROM RANGE(0,20) tbl(i);
|
||||
|
||||
# without partition columns written
|
||||
# test a csv partition by year
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/csv_partition_1' (partition_by(year));
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/csv_partition_1/**');
|
||||
----
|
||||
2
|
||||
|
||||
# with HIVE_PARTITIONING=0, directory names won't be read unless they are written in data
|
||||
query III
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_1/**', names=['a','b','c','d'], HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
a b c
|
||||
|
||||
# with HIVE_PARTITIONING, column name from directory name supercedes "names" parameter
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_1/**', names=['a','b','c','d'], HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
a b c year
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_1/**', names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
a b c year
|
||||
|
||||
# test a csv partition by year,month
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/csv_partition_2' (partition_by(year,month));
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/csv_partition_2/**');
|
||||
----
|
||||
6
|
||||
|
||||
query II
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_2/**', names=['a','b','c','d'], HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
a b
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_2/**', names=['a','b','c','d'], HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
a b month year
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_2/**', names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
a b month year
|
||||
|
||||
# test a single file
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/t.csv');
|
||||
----
|
||||
0
|
||||
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/bad_file.csv';
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/bad_file.csv');
|
||||
----
|
||||
1
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/bad_file.csv', names=['a','b','c','d'], HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
a b c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/bad_file.csv', names=['a','b','c','d'], HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
a b c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/bad_file.csv', names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
a b c d
|
||||
|
||||
# add bad file to list: hive partitioning will be false, because scheme doesn't match
|
||||
query II
|
||||
select alias(columns(*)) from read_csv_auto(['__TEST_DIR__/csv_partition_2/**', '__TEST_DIR__/bad_file.csv'], HIVE_PARTITIONING=0, names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
a b
|
||||
|
||||
statement error
|
||||
select alias(columns(*)) from read_csv_auto(['__TEST_DIR__/csv_partition_2/**', '__TEST_DIR__/bad_file.csv'], HIVE_PARTITIONING=1, names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
Binder Error: Hive partition mismatch
|
||||
|
||||
query II
|
||||
select alias(columns(*)) from read_csv_auto(['__TEST_DIR__/csv_partition_2/**', '__TEST_DIR__/bad_file.csv'], names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
a b
|
||||
|
||||
# same tests with parquet
|
||||
require parquet
|
||||
|
||||
# test a parquet partition by year
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/parquet_partition_1' (format parquet, partition_by(year));
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/parquet_partition_1/**');
|
||||
----
|
||||
2
|
||||
|
||||
query III
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_1/**', HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
month c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_1/**', HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
month c d year
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_1/**') LIMIT 1;
|
||||
----
|
||||
month c d year
|
||||
|
||||
# test a parquet partition by year,month
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/parquet_partition_2' (format parquet, partition_by(year,month));
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/parquet_partition_2/**');
|
||||
----
|
||||
6
|
||||
|
||||
query II
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_2/**', HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_2/**', HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
c d month year
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_2/**') LIMIT 1;
|
||||
----
|
||||
c d month year
|
||||
|
||||
# test a single file
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/t.parquet');
|
||||
----
|
||||
0
|
||||
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/t.parquet' (format parquet);
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/t.parquet');
|
||||
----
|
||||
1
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/t.parquet', HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/t.parquet', HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/t.parquet') LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
# add bad file to list: hive partitioning will be false, because scheme doesn't match
|
||||
query II
|
||||
select alias(columns(*)) from read_parquet(['__TEST_DIR__/parquet_partition_2/**', '__TEST_DIR__/t.parquet'], HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
c d
|
||||
|
||||
statement error
|
||||
select alias(columns(*)) from read_parquet(['__TEST_DIR__/parquet_partition_2/**', '__TEST_DIR__/t.parquet'], HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
Binder Error: Hive partition mismatch
|
||||
|
||||
query II
|
||||
select alias(columns(*)) from read_parquet(['__TEST_DIR__/parquet_partition_2/**', '__TEST_DIR__/t.parquet']) LIMIT 1;
|
||||
----
|
||||
c d
|
||||
|
||||
|
||||
# with partition columns written
|
||||
# test a csv partition by year
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/csv_partition_1' (partition_by(year), overwrite_or_ignore, write_partition_columns);
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/csv_partition_1/**');
|
||||
----
|
||||
2
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_1/**', names=['a','b','c','d'], HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
a b c d
|
||||
|
||||
query IIIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_1/**', names=['a','b','c','d'], HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
a b c d year
|
||||
|
||||
query IIIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_1/**', names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
a b c d year
|
||||
|
||||
# test a csv partition by year,month
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/csv_partition_2' (partition_by(year,month), overwrite_or_ignore, write_partition_columns);
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/csv_partition_2/**');
|
||||
----
|
||||
6
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_2/**', names=['a','b','c','d'], HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
a b c d
|
||||
|
||||
query IIIIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_2/**', names=['a','b','c','d'], HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
a b c d month year
|
||||
|
||||
query IIIIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/csv_partition_2/**', names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
a b c d month year
|
||||
|
||||
# test a single file
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/t.csv');
|
||||
----
|
||||
0
|
||||
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/bad_file.csv';
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/bad_file.csv');
|
||||
----
|
||||
1
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/bad_file.csv', names=['a','b','c','d'], HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
a b c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/bad_file.csv', names=['a','b','c','d'], HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
a b c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto('__TEST_DIR__/bad_file.csv', names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
a b c d
|
||||
|
||||
# add bad file to list: hive partitioning will be false, because scheme doesn't match
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto(['__TEST_DIR__/csv_partition_2/**', '__TEST_DIR__/bad_file.csv'], HIVE_PARTITIONING=0, names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
a b c d
|
||||
|
||||
statement error
|
||||
select alias(columns(*)) from read_csv_auto(['__TEST_DIR__/csv_partition_2/**', '__TEST_DIR__/bad_file.csv'], HIVE_PARTITIONING=1, names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
Binder Error: Hive partition mismatch
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_csv_auto(['__TEST_DIR__/csv_partition_2/**', '__TEST_DIR__/bad_file.csv'], names=['a','b','c','d']) LIMIT 1;
|
||||
----
|
||||
a b c d
|
||||
|
||||
|
||||
|
||||
# same tests with parquet
|
||||
require parquet
|
||||
|
||||
# test a parquet partition by year
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/parquet_partition_1' (format parquet, partition_by(year), overwrite_or_ignore, write_partition_columns);
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/parquet_partition_1/**');
|
||||
----
|
||||
2
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_1/**', HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_1/**', HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_1/**') LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
# test a parquet partition by year,month
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/parquet_partition_2' (format parquet, partition_by(year,month), overwrite_or_ignore, write_partition_columns);
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/parquet_partition_2/**');
|
||||
----
|
||||
6
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_2/**', HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_2/**', HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/parquet_partition_2/**') LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
# test a single file
|
||||
statement ok
|
||||
COPY t TO '__TEST_DIR__/t.parquet' (format parquet);
|
||||
|
||||
query I
|
||||
select count(*) from glob('__TEST_DIR__/t.parquet');
|
||||
----
|
||||
1
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/t.parquet', HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/t.parquet', HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet('__TEST_DIR__/t.parquet') LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
# add bad file to list: hive partitioning will be false, because scheme doesn't match
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet(['__TEST_DIR__/parquet_partition_2/**', '__TEST_DIR__/t.parquet'], HIVE_PARTITIONING=0) LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
statement error
|
||||
select alias(columns(*)) from read_parquet(['__TEST_DIR__/parquet_partition_2/**', '__TEST_DIR__/t.parquet'], HIVE_PARTITIONING=1) LIMIT 1;
|
||||
----
|
||||
Binder Error: Hive partition mismatch
|
||||
|
||||
query IIII
|
||||
select alias(columns(*)) from read_parquet(['__TEST_DIR__/parquet_partition_2/**', '__TEST_DIR__/t.parquet']) LIMIT 1;
|
||||
----
|
||||
year month c d
|
||||
|
||||
query IIII
|
||||
select i,j,k,x
|
||||
from read_parquet('data/parquet-testing/hive-partitioning/union_by_name/*/*.parquet', hive_partitioning=0, union_by_name=1)
|
||||
order by j,x nulls last;
|
||||
----
|
||||
42 84 NULL 1
|
||||
42 84 NULL NULL
|
||||
NULL 128 33 NULL
|
||||
|
||||
query IIII
|
||||
select i,j,k,x
|
||||
from read_parquet('data/parquet-testing/hive-partitioning/union_by_name/*/*.parquet', hive_partitioning=1, union_by_name=1)
|
||||
order by j,x nulls last;
|
||||
----
|
||||
42 84 NULL 1
|
||||
42 84 NULL 1
|
||||
NULL 128 33 2
|
||||
|
||||
query IIII
|
||||
select i,j,k,x
|
||||
from read_parquet('data/parquet-testing/hive-partitioning/union_by_name/*/*.parquet', union_by_name=1)
|
||||
order by j,x nulls last;
|
||||
----
|
||||
42 84 NULL 1
|
||||
42 84 NULL 1
|
||||
NULL 128 33 2
|
||||
195
external/duckdb/test/sql/copy/partitioned/hive_partitioned_write.test
vendored
Normal file
195
external/duckdb/test/sql/copy/partitioned/hive_partitioned_write.test
vendored
Normal file
@@ -0,0 +1,195 @@
|
||||
# name: test/sql/copy/partitioned/hive_partitioned_write.test
|
||||
# description: basic tests for the hive partitioned write
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
# Simple table that is easy to partition
|
||||
statement ok
|
||||
CREATE TABLE test as SELECT i%2 as part_col, (i+1)%5 as value_col, i as value2_col from range(0,10) tbl(i);
|
||||
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/partitioned1' (FORMAT PARQUET, PARTITION_BY (part_col));
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/partitioned1/part_col=0/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/partitioned1/part_col=1/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
1 2 1
|
||||
1 4 3
|
||||
1 1 5
|
||||
1 3 7
|
||||
1 0 9
|
||||
|
||||
# Want a modified version of the partition_col? (for example to do custom string conversion?) No problem:
|
||||
statement ok
|
||||
COPY (SELECT * EXCLUDE (part_col), 'prefix-'::VARCHAR || part_col::VARCHAR as part_col FROM test) TO '__TEST_DIR__/partitioned2' (FORMAT PARQUET, PARTITION_BY (part_col));
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/partitioned2/part_col=prefix-0/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
prefix-0 1 0
|
||||
prefix-0 3 2
|
||||
prefix-0 0 4
|
||||
prefix-0 2 6
|
||||
prefix-0 4 8
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/partitioned2/part_col=prefix-1/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
prefix-1 2 1
|
||||
prefix-1 4 3
|
||||
prefix-1 1 5
|
||||
prefix-1 3 7
|
||||
prefix-1 0 9
|
||||
|
||||
# Test partitioning by all
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/partitioned3' (FORMAT PARQUET, PARTITION_BY '*', WRITE_PARTITION_COLUMNS);
|
||||
|
||||
query I
|
||||
SELECT min(value2_col) as min_val
|
||||
FROM parquet_scan('__TEST_DIR__/partitioned3/part_col=*/value_col=*/value2_col=*/*.parquet', FILENAME=1)
|
||||
GROUP BY filename
|
||||
ORDER BY min_val
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
|
||||
# single col as param is also fine
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/partitioned4' (FORMAT PARQUET, PARTITION_BY part_col);
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM parquet_scan('__TEST_DIR__/partitioned4/part_col=*/*.parquet', HIVE_PARTITIONING=1) WHERE part_col=0 ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
|
||||
# a file already exist, throw error
|
||||
statement error
|
||||
COPY test TO '__TEST_DIR__/partitioned4' (FORMAT PARQUET, PARTITION_BY part_col);
|
||||
----
|
||||
Directory
|
||||
|
||||
# Trailing slash ist auch gut!
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/partitioned5/' (FORMAT PARQUET, PARTITION_BY part_col);
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/partitioned5/part_col=0/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# Cannot use the USE_TMP_FILE option simulatiously with partitioning
|
||||
statement error
|
||||
COPY test TO '__TEST_DIR__/partitioned6' (FORMAT PARQUET, PARTITION_BY part_col, USE_TMP_FILE TRUE);
|
||||
----
|
||||
Not implemented Error: Can't combine USE_TMP_FILE and PARTITION_BY for COPY
|
||||
|
||||
# Technically it doesn't really matter, as currently out parition_by behaves similarly, but for clarity user should just
|
||||
# EITHER use partition_by or per_thread_output.
|
||||
statement error
|
||||
COPY test TO '__TEST_DIR__/partitioned6' (FORMAT PARQUET, PARTITION_BY part_col, PER_THREAD_OUTPUT TRUE);
|
||||
----
|
||||
Not implemented Error: Can't combine PER_THREAD_OUTPUT and PARTITION_BY for COPY
|
||||
|
||||
# partitioning csv files is also a thing
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/partitioned7' (FORMAT CSV, PARTITION_BY part_col);
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/partitioned7/part_col=0/*.csv' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/partitioned7/part_col=1/*.csv' ORDER BY value2_col;
|
||||
----
|
||||
1 2 1
|
||||
1 4 3
|
||||
1 1 5
|
||||
1 3 7
|
||||
1 0 9
|
||||
|
||||
# Don't care about capitalization
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/partitioned8' (FORMAT PARQUET, PARTITION_BY pArt_cOl);
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/partitioned8/part_col=0/*.parquet' ORDER BY value2_cOl;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# Order matters!
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/partitioned9' (FORMAT PARQUET, PARTITION_BY (part_col, value_col));
|
||||
|
||||
query I
|
||||
SELECT min(value2_col) as min_val
|
||||
FROM parquet_scan('__TEST_DIR__/partitioned9/part_col=*/value_col=*/*.parquet', FILENAME=1)
|
||||
GROUP BY filename
|
||||
ORDER BY min_val
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/partitioned10' (FORMAT PARQUET, PARTITION_BY (value_col, part_col));
|
||||
|
||||
query I
|
||||
SELECT min(value2_col) as min_val
|
||||
FROM parquet_scan('__TEST_DIR__/partitioned10/value_col=*/part_col=*/*.parquet', FILENAME=1)
|
||||
GROUP BY filename
|
||||
ORDER BY min_val
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
51
external/duckdb/test/sql/copy/partitioned/hive_partitioned_write.test_slow
vendored
Normal file
51
external/duckdb/test/sql/copy/partitioned/hive_partitioned_write.test_slow
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
# name: test/sql/copy/partitioned/hive_partitioned_write.test_slow
|
||||
# description: slow test for the hive partitioned write
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
require tpch
|
||||
|
||||
statement ok
|
||||
pragma memory_limit='100mb'
|
||||
|
||||
# around 200MB worth of data, will require the PartitionedColumnData to spill to disk
|
||||
statement ok
|
||||
COPY (SELECT i%2::INT32 as part_col, i::INT32 FROM range(0,25000000) tbl(i)) TO '__TEST_DIR__/partitioned_memory_spill' (FORMAT parquet, PARTITION_BY part_col);
|
||||
|
||||
statement ok
|
||||
pragma memory_limit='-1'
|
||||
|
||||
statement ok
|
||||
call dbgen(sf=1);
|
||||
|
||||
# Partition by 2 columns
|
||||
statement ok
|
||||
COPY lineitem TO '__TEST_DIR__/lineitem_sf1_partitioned' (FORMAT PARQUET, PARTITION_BY (l_linestatus, l_returnflag));
|
||||
|
||||
statement ok
|
||||
DROP TABLE lineitem;
|
||||
|
||||
statement ok
|
||||
CREATE VIEW lineitem as SELECT * FROM parquet_scan('__TEST_DIR__/lineitem_sf1_partitioned/l_linestatus=*/l_returnflag=*/*.parquet', HIVE_PARTITIONING=1);
|
||||
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 23
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
statement ok
|
||||
pragma threads=1;
|
||||
47
external/duckdb/test/sql/copy/partitioned/hive_partitioning_overwrite.test
vendored
Normal file
47
external/duckdb/test/sql/copy/partitioned/hive_partitioning_overwrite.test
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
# name: test/sql/copy/partitioned/hive_partitioning_overwrite.test
|
||||
# description: Test OVERWRITE option
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
# write a partition with value 42
|
||||
statement ok
|
||||
COPY (SELECT 42 AS part_col, 43 AS value_col) TO '__TEST_DIR__/overwrite_test' (FORMAT PARQUET, PARTITION_BY (part_col));
|
||||
|
||||
# writing to the same directory fails now
|
||||
statement error
|
||||
COPY (SELECT 84 AS part_col, 85 AS value_col) TO '__TEST_DIR__/overwrite_test' (FORMAT PARQUET, PARTITION_BY (part_col));
|
||||
----
|
||||
Enable OVERWRITE option to overwrite files
|
||||
|
||||
# test the overwrite setting
|
||||
statement ok
|
||||
COPY (SELECT 84 AS part_col, 85 AS value_col) TO '__TEST_DIR__/overwrite_test' (FORMAT PARQUET, PARTITION_BY (part_col), OVERWRITE 1);
|
||||
|
||||
# the old file (with part_col=42) should now be removed
|
||||
query II
|
||||
SELECT * FROM '__TEST_DIR__/overwrite_test/**/*.parquet'
|
||||
----
|
||||
85 84
|
||||
|
||||
# what if the file is a file?
|
||||
statement ok
|
||||
COPY (SELECT 42 AS part_col) TO '__TEST_DIR__/overwrite_test2' (FORMAT PARQUET);
|
||||
|
||||
statement error
|
||||
COPY (SELECT 84 AS part_col, 85 AS value_col) TO '__TEST_DIR__/overwrite_test2' (FORMAT PARQUET, PARTITION_BY (part_col));
|
||||
----
|
||||
it exists and is a file
|
||||
|
||||
statement ok
|
||||
COPY (SELECT 84 AS part_col, 85 AS value_col) TO '__TEST_DIR__/overwrite_test2' (FORMAT PARQUET, PARTITION_BY (part_col), OVERWRITE 1);
|
||||
|
||||
query II
|
||||
SELECT * FROM '__TEST_DIR__/overwrite_test2/**/*.parquet'
|
||||
----
|
||||
85 84
|
||||
|
||||
statement error
|
||||
COPY (SELECT 84 AS part_col) TO '__TEST_DIR__/overwrite_test' (FORMAT PARQUET, PARTITION_BY (part_col), OVERWRITE 1, OVERWRITE_OR_IGNORE 1);
|
||||
----
|
||||
OVERWRITE
|
||||
17
external/duckdb/test/sql/copy/partitioned/partition_issue_6304.test
vendored
Normal file
17
external/duckdb/test/sql/copy/partitioned/partition_issue_6304.test
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
# name: test/sql/copy/partitioned/partition_issue_6304.test
|
||||
# description: Issue #6304: INTERNAL Error: Comparison on NULL values
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
copy (select NULL as i, NULL as j from range(100000)) to '__TEST_DIR__/issue6304_null' (format parquet, partition_by(i), overwrite_or_ignore);
|
||||
|
||||
statement ok
|
||||
copy (select 1 as i, 2 as j from range(100000)) to '__TEST_DIR__/issue6304_constant' (format parquet, partition_by(i), overwrite_or_ignore);
|
||||
|
||||
statement ok
|
||||
copy (select NULL as i from range(100000)) to '__TEST_DIR__/issue6304_null' (format parquet, partition_by(i), overwrite_or_ignore, write_partition_columns);
|
||||
|
||||
statement ok
|
||||
copy (select 1 as i from range(100000)) to '__TEST_DIR__/issue6304_constant' (format parquet, partition_by(i), overwrite_or_ignore, write_partition_columns);
|
||||
159
external/duckdb/test/sql/copy/partitioned/partitioned_group_by.test
vendored
Normal file
159
external/duckdb/test/sql/copy/partitioned/partitioned_group_by.test
vendored
Normal file
@@ -0,0 +1,159 @@
|
||||
# name: test/sql/copy/partitioned/partitioned_group_by.test
|
||||
# description: Test partitioned aggregates
|
||||
# group: [partitioned]
|
||||
|
||||
require no_vector_verification # query plans are not robust against VERIFY_VECTOR operator
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE partitioned_tbl AS SELECT i%2 AS partition, i col1, i // 7 col2, (i%3)::VARCHAR col3 FROM range(10000) t(i)
|
||||
|
||||
statement ok
|
||||
COPY partitioned_tbl TO '__TEST_DIR__/partition_group_by' (FORMAT parquet, PARTITION_BY (partition))
|
||||
|
||||
statement ok
|
||||
DROP TABLE partitioned_tbl
|
||||
|
||||
statement ok
|
||||
CREATE VIEW partitioned_tbl AS FROM '__TEST_DIR__/partition_group_by/**/*.parquet'
|
||||
|
||||
query II
|
||||
SELECT partition, SUM(col1)
|
||||
FROM partitioned_tbl
|
||||
GROUP BY partition
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 24995000
|
||||
1 25000000
|
||||
|
||||
# make sure the partitioned aggregate is used
|
||||
# all of these are identical, i.e. this gets folded into a single count aggregate
|
||||
query II
|
||||
EXPLAIN SELECT partition, SUM(col1)
|
||||
FROM partitioned_tbl
|
||||
GROUP BY partition
|
||||
ORDER BY ALL
|
||||
----
|
||||
physical_plan <REGEX>:.*PARTITIONED_AGGREGATE.*
|
||||
|
||||
# distinct aggregate
|
||||
query II
|
||||
SELECT partition, COUNT(DISTINCT col2)
|
||||
FROM partitioned_tbl
|
||||
GROUP BY partition
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 1429
|
||||
1 1429
|
||||
|
||||
# grouping sets
|
||||
query II
|
||||
SELECT partition, SUM(col1)
|
||||
FROM partitioned_tbl
|
||||
GROUP BY GROUPING SETS ((), (partition))
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 24995000
|
||||
1 25000000
|
||||
NULL 49995000
|
||||
|
||||
# filtered aggregate
|
||||
query II
|
||||
SELECT partition, SUM(col1) FILTER (col2%7>2)
|
||||
FROM partitioned_tbl
|
||||
GROUP BY partition
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 14302848
|
||||
1 14302848
|
||||
|
||||
query II
|
||||
SELECT SUM(col1), partition
|
||||
FROM partitioned_tbl
|
||||
GROUP BY partition
|
||||
ORDER BY ALL
|
||||
----
|
||||
24995000 0
|
||||
25000000 1
|
||||
|
||||
# filter
|
||||
query II
|
||||
SELECT partition, SUM(col1)
|
||||
FROM partitioned_tbl
|
||||
WHERE col2 > 100
|
||||
GROUP BY partition
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 24870038
|
||||
1 24875391
|
||||
|
||||
# partition on multiple columns
|
||||
statement ok
|
||||
CREATE TABLE partitioned_tbl2 AS SELECT i%2 AS partition1, i%3 AS partition2, i col1, i + 1 col2 FROM range(10000) t(i)
|
||||
|
||||
statement ok
|
||||
COPY partitioned_tbl2 TO '__TEST_DIR__/partition_group_by_multiple' (FORMAT parquet, PARTITION_BY (partition1, partition2))
|
||||
|
||||
statement ok
|
||||
DROP TABLE partitioned_tbl2
|
||||
|
||||
statement ok
|
||||
CREATE VIEW partitioned_tbl2 AS FROM '__TEST_DIR__/partition_group_by_multiple/**/*.parquet'
|
||||
|
||||
query III
|
||||
SELECT partition1, partition2, SUM(col1)
|
||||
FROM partitioned_tbl2
|
||||
GROUP BY partition1, partition2
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 0 8331666
|
||||
0 1 8328334
|
||||
0 2 8335000
|
||||
1 0 8336667
|
||||
1 1 8333333
|
||||
1 2 8330000
|
||||
|
||||
# partition on a subset of the columns
|
||||
query II
|
||||
SELECT partition1, SUM(col1)
|
||||
FROM partitioned_tbl2
|
||||
GROUP BY partition1
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 24995000
|
||||
1 25000000
|
||||
|
||||
query II
|
||||
SELECT partition2, SUM(col1)
|
||||
FROM partitioned_tbl2
|
||||
GROUP BY partition2
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 16668333
|
||||
1 16661667
|
||||
2 16665000
|
||||
|
||||
# with a filter
|
||||
query II
|
||||
SELECT partition1, SUM(col1)
|
||||
FROM partitioned_tbl2
|
||||
WHERE partition2=0
|
||||
GROUP BY partition1
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 8331666
|
||||
1 8336667
|
||||
|
||||
# grouping sets
|
||||
query III
|
||||
SELECT partition1, partition2, SUM(col1)
|
||||
FROM partitioned_tbl2
|
||||
GROUP BY GROUPING SETS ((partition1), (partition2))
|
||||
ORDER BY ALL
|
||||
----
|
||||
0 NULL 24995000
|
||||
1 NULL 25000000
|
||||
NULL 0 16668333
|
||||
NULL 1 16661667
|
||||
NULL 2 16665000
|
||||
73
external/duckdb/test/sql/copy/partitioned/partitioned_write_tpch.test_slow
vendored
Normal file
73
external/duckdb/test/sql/copy/partitioned/partitioned_write_tpch.test_slow
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
# name: test/sql/copy/partitioned/partitioned_write_tpch.test_slow
|
||||
# description: TPC-H test for hive partitioned write
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
require tpch
|
||||
|
||||
statement ok
|
||||
CALL dbgen(sf=1);
|
||||
|
||||
# test writing with a very low partition threshold
|
||||
statement ok
|
||||
SET partitioned_write_flush_threshold=10000;
|
||||
|
||||
# write lineitem partitioned by l_returnflag and l_linestatus
|
||||
statement ok
|
||||
COPY lineitem TO '__TEST_DIR__/lineitem_partitioned_parquet' (FORMAT PARQUET, PARTITION_BY (l_returnflag, l_linestatus));
|
||||
|
||||
# write to CSV as well
|
||||
statement ok
|
||||
COPY lineitem TO '__TEST_DIR__/lineitem_partitioned_csv' (FORMAT CSV, PARTITION_BY (l_returnflag, l_linestatus));
|
||||
|
||||
statement ok
|
||||
DROP TABLE lineitem
|
||||
|
||||
statement ok
|
||||
CREATE VIEW lineitem AS FROM '__TEST_DIR__/lineitem_partitioned_parquet/**/*.parquet'
|
||||
|
||||
# now run tpc-h - results should be the same
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 23
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
statement ok
|
||||
DROP VIEW lineitem
|
||||
|
||||
# try the CSV next - but copy it into a regular table
|
||||
statement ok
|
||||
CREATE TABLE lineitem AS FROM read_csv('__TEST_DIR__/lineitem_partitioned_csv/**/*.csv')
|
||||
|
||||
# now run tpc-h - results should be the same
|
||||
loop i 1 9
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
|
||||
|
||||
endloop
|
||||
|
||||
loop i 10 23
|
||||
|
||||
query I
|
||||
PRAGMA tpch(${i})
|
||||
----
|
||||
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
|
||||
|
||||
endloop
|
||||
280
external/duckdb/test/sql/copy/partitioned/skip_partition_column_writes.test
vendored
Normal file
280
external/duckdb/test/sql/copy/partitioned/skip_partition_column_writes.test
vendored
Normal file
@@ -0,0 +1,280 @@
|
||||
# name: test/sql/copy/partitioned/skip_partition_column_writes.test
|
||||
# description: Skip partition column writes (issue 11931 & 12147)
|
||||
# group: [partitioned]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test as SELECT i%2 as part_col, (i+1)%5 as value_col, i as value2_col from range(0,10) tbl(i);
|
||||
|
||||
# Parquet
|
||||
|
||||
# Skip write of the first partition column
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/no-part-cols' (FORMAT PARQUET, PARTITION_BY (part_col));
|
||||
|
||||
# SELECT query returns all columns, but written files do not have partition columns
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/no-part-cols/part_col=0/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# Skip write of the first partition column with explicit option
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/no-part-cols-explicit' (FORMAT PARQUET, PARTITION_BY (part_col), WRITE_PARTITION_COLUMNS false);
|
||||
|
||||
# SELECT query returns all columns, but written files do not have partition columns
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/no-part-cols-explicit/part_col=0/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# Skip writes of 2 partition columns
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/no-part-cols2' (FORMAT PARQUET, PARTITION_BY (part_col, value_col));
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/no-part-cols2/part_col=0/value_col=*/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# Modified version of the partition_col
|
||||
statement ok
|
||||
COPY (SELECT * EXCLUDE (part_col), 'prefix-'::VARCHAR || part_col::VARCHAR as part_col FROM test) TO '__TEST_DIR__/no-part-cols3' (FORMAT PARQUET, PARTITION_BY (part_col));
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/no-part-cols3/part_col=prefix-0/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
prefix-0 1 0
|
||||
prefix-0 3 2
|
||||
prefix-0 0 4
|
||||
prefix-0 2 6
|
||||
prefix-0 4 8
|
||||
|
||||
# Partitions of more than 8 columns
|
||||
statement ok
|
||||
COPY (SELECT 1 AS part_col, 2 AS value_col, 3 AS value2_col, 4 AS value3_col, 5 AS value4_col, 6 AS value5_col, 7 AS value6_col, 8 AS value7_col, 9 AS value8_col, 10 AS value9_col) TO '__TEST_DIR__/no-part-cols4' (FORMAT PARQUET, PARTITION_BY (part_col));
|
||||
|
||||
query IIIIIIIIII
|
||||
SELECT part_col, value_col, value2_col, value3_col, value4_col, value5_col, value6_col, value7_col, value8_col, value9_col FROM '__TEST_DIR__/no-part-cols4/part_col=1/*.parquet' ORDER BY 1;
|
||||
----
|
||||
1 2 3 4 5 6 7 8 9 10
|
||||
|
||||
# Partition by last column out of 10 columns
|
||||
statement ok
|
||||
COPY (SELECT 1 AS part_col, 2 AS value_col, 3 AS value2_col, 4 AS value3_col, 5 AS value4_col, 6 AS value5_col, 7 AS value6_col, 8 AS value7_col, 9 AS value8_col, 10 AS value9_col) TO '__TEST_DIR__/no-part-cols5' (FORMAT PARQUET, PARTITION_BY (value9_col));
|
||||
|
||||
query IIIIIIIIII
|
||||
SELECT part_col, value_col, value2_col, value3_col, value4_col, value5_col, value6_col, value7_col, value8_col, value9_col FROM '__TEST_DIR__/no-part-cols5/value9_col=*/*.parquet' ORDER BY 1;
|
||||
----
|
||||
1 2 3 4 5 6 7 8 9 10
|
||||
|
||||
# Partition by last 2 columns out of 10 columns
|
||||
statement ok
|
||||
COPY (SELECT 1 AS part_col, 2 AS value_col, 3 AS value2_col, 4 AS value3_col, 5 AS value4_col, 6 AS value5_col, 7 AS value6_col, 8 AS value7_col, 9 AS value8_col, 10 AS value9_col) TO '__TEST_DIR__/no-part-cols6' (FORMAT PARQUET, PARTITION_BY (value8_col, value9_col));
|
||||
|
||||
query IIIIIIIIII
|
||||
SELECT part_col, value_col, value2_col, value3_col, value4_col, value5_col, value6_col, value7_col, value8_col, value9_col FROM '__TEST_DIR__/no-part-cols6/value8_col=*/value9_col=*/*.parquet' ORDER BY 1;
|
||||
----
|
||||
1 2 3 4 5 6 7 8 9 10
|
||||
|
||||
# Partition by last 3 columns out of 10 columns in a reverse order
|
||||
statement ok
|
||||
COPY (SELECT 1 AS part_col, 2 AS value_col, 3 AS value2_col, 4 AS value3_col, 5 AS value4_col, 6 AS value5_col, 7 AS value6_col, 8 AS value7_col, 9 AS value8_col, 10 AS value9_col) TO '__TEST_DIR__/no-part-cols7' (FORMAT PARQUET, PARTITION_BY (value9_col, value8_col, value7_col));
|
||||
|
||||
query IIIIIIIIII
|
||||
SELECT part_col, value_col, value2_col, value3_col, value4_col, value5_col, value6_col, value7_col, value8_col, value9_col FROM '__TEST_DIR__/no-part-cols7/value9_col=*/value8_col=*/value7_col=*/*.parquet' ORDER BY 1;
|
||||
----
|
||||
1 2 3 4 5 6 7 8 9 10
|
||||
|
||||
# Throw an error when all columns are specified as partitions
|
||||
statement error
|
||||
COPY test TO '__TEST_DIR__/no-part-cols8' (FORMAT PARQUET, PARTITION_BY (part_col, value_col, value2_col));
|
||||
----
|
||||
Not implemented Error: No column to write as all columns are specified as partition columns. WRITE_PARTITION_COLUMNS option can be used to write partition columns.
|
||||
|
||||
# With explicit WRITE_PARTITION_COLUMNS option, all columns would still be written and still readable.
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/no-part-cols8' (FORMAT PARQUET, OVERWRITE, PARTITION_BY (part_col, value_col, value2_col), WRITE_PARTITION_COLUMNS);
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/no-part-cols8/part_col=0/value_col=*/value2_col=*/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# '*' also ends up with error
|
||||
statement error
|
||||
COPY test TO '__TEST_DIR__/no-part-cols9' (FORMAT PARQUET, PARTITION_BY '*');
|
||||
----
|
||||
Not implemented Error: No column to write as all columns are specified as partition columns. WRITE_PARTITION_COLUMNS option can be used to write partition columns.
|
||||
|
||||
# With explicit WRITE_PARTITION_COLUMNS option, all columns would still be written and still readable.
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/no-part-cols9' (FORMAT PARQUET, PARTITION_BY '*', OVERWRITE, WRITE_PARTITION_COLUMNS);
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/no-part-cols9/part_col=0/value_col=*/value2_col=*/*.parquet' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# WRITE_PARTITION_COLUMNS: false behaves the same as default, so partition by all should result in an error.
|
||||
statement error
|
||||
COPY test TO '__TEST_DIR__/no-part-cols9' (FORMAT PARQUET, PARTITION_BY '*', WRITE_PARTITION_COLUMNS false);
|
||||
----
|
||||
Not implemented Error: No column to write as all columns are specified as partition columns. WRITE_PARTITION_COLUMNS option can be used to write partition columns.
|
||||
|
||||
# CSV
|
||||
|
||||
# Skip write of the first partition column
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/csv-no-part-cols' (FORMAT CSV, PARTITION_BY (part_col));
|
||||
|
||||
# SELECT query returns all columns, but written files do not have partition columns
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/csv-no-part-cols/part_col=0/*.csv' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# Skip write of the first partition column with explicit option
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/csv-no-part-cols-explicit' (FORMAT CSV, PARTITION_BY (part_col), WRITE_PARTITION_COLUMNS false);
|
||||
|
||||
# SELECT query returns all columns, but written files do not have partition columns
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/csv-no-part-cols-explicit/part_col=0/*.csv' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# Skip writes of 2 partition columns
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/csv-no-part-cols2' (FORMAT CSV, PARTITION_BY (part_col, value_col));
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/csv-no-part-cols2/part_col=0/value_col=*/*.csv' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# Modified version of the partition_col
|
||||
statement ok
|
||||
COPY (SELECT * EXCLUDE (part_col), 'prefix-'::VARCHAR || part_col::VARCHAR as part_col FROM test) TO '__TEST_DIR__/csv-no-part-cols3' (FORMAT CSV, PARTITION_BY (part_col));
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/csv-no-part-cols3/part_col=prefix-0/*.csv' ORDER BY value2_col;
|
||||
----
|
||||
prefix-0 1 0
|
||||
prefix-0 3 2
|
||||
prefix-0 0 4
|
||||
prefix-0 2 6
|
||||
prefix-0 4 8
|
||||
|
||||
# Partitions of more than 8 columns
|
||||
statement ok
|
||||
COPY (SELECT 1 AS part_col, 2 AS value_col, 3 AS value2_col, 4 AS value3_col, 5 AS value4_col, 6 AS value5_col, 7 AS value6_col, 8 AS value7_col, 9 AS value8_col, 10 AS value9_col) TO '__TEST_DIR__/csv-no-part-cols4' (FORMAT CSV, PARTITION_BY (part_col));
|
||||
|
||||
query IIIIIIIIII
|
||||
SELECT part_col, value_col, value2_col, value3_col, value4_col, value5_col, value6_col, value7_col, value8_col, value9_col FROM '__TEST_DIR__/csv-no-part-cols4/part_col=1/*.csv' ORDER BY 1;
|
||||
----
|
||||
1 2 3 4 5 6 7 8 9 10
|
||||
|
||||
# Partition by last column out of 10 columns
|
||||
statement ok
|
||||
COPY (SELECT 1 AS part_col, 2 AS value_col, 3 AS value2_col, 4 AS value3_col, 5 AS value4_col, 6 AS value5_col, 7 AS value6_col, 8 AS value7_col, 9 AS value8_col, 10 AS value9_col) TO '__TEST_DIR__/csv-no-part-cols5' (FORMAT CSV, PARTITION_BY (value9_col));
|
||||
|
||||
query IIIIIIIIII
|
||||
SELECT part_col, value_col, value2_col, value3_col, value4_col, value5_col, value6_col, value7_col, value8_col, value9_col FROM '__TEST_DIR__/csv-no-part-cols5/value9_col=*/*.csv' ORDER BY 1;
|
||||
----
|
||||
1 2 3 4 5 6 7 8 9 10
|
||||
|
||||
# Partition by last 2 columns out of 10 columns
|
||||
statement ok
|
||||
COPY (SELECT 1 AS part_col, 2 AS value_col, 3 AS value2_col, 4 AS value3_col, 5 AS value4_col, 6 AS value5_col, 7 AS value6_col, 8 AS value7_col, 9 AS value8_col, 10 AS value9_col) TO '__TEST_DIR__/csv-no-part-cols6' (FORMAT CSV, PARTITION_BY (value8_col, value9_col));
|
||||
|
||||
query IIIIIIIIII
|
||||
SELECT part_col, value_col, value2_col, value3_col, value4_col, value5_col, value6_col, value7_col, value8_col, value9_col FROM '__TEST_DIR__/csv-no-part-cols6/value8_col=*/value9_col=*/*.csv' ORDER BY 1;
|
||||
----
|
||||
1 2 3 4 5 6 7 8 9 10
|
||||
|
||||
# Partition by last 3 columns out of 10 columns in a reverse order
|
||||
statement ok
|
||||
COPY (SELECT 1 AS part_col, 2 AS value_col, 3 AS value2_col, 4 AS value3_col, 5 AS value4_col, 6 AS value5_col, 7 AS value6_col, 8 AS value7_col, 9 AS value8_col, 10 AS value9_col) TO '__TEST_DIR__/csv-no-part-cols7' (FORMAT CSV, PARTITION_BY (value9_col, value8_col, value7_col));
|
||||
|
||||
query IIIIIIIIII
|
||||
SELECT part_col, value_col, value2_col, value3_col, value4_col, value5_col, value6_col, value7_col, value8_col, value9_col FROM '__TEST_DIR__/csv-no-part-cols7/value9_col=*/value8_col=*/value7_col=*/*.csv' ORDER BY 1;
|
||||
----
|
||||
1 2 3 4 5 6 7 8 9 10
|
||||
|
||||
# Throw an error when all columns are specified as partitions
|
||||
statement error
|
||||
COPY test TO '__TEST_DIR__/csv-no-part-cols8' (FORMAT CSV, PARTITION_BY (part_col, value_col, value2_col));
|
||||
----
|
||||
Not implemented Error: No column to write as all columns are specified as partition columns. WRITE_PARTITION_COLUMNS option can be used to write partition columns.
|
||||
|
||||
# With explicit WRITE_PARTITION_COLUMNS option, all columns would still be written and still readable.
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/csv-no-part-cols8' (FORMAT CSV, PARTITION_BY (part_col, value_col, value2_col), OVERWRITE, WRITE_PARTITION_COLUMNS);
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/csv-no-part-cols8/part_col=0/value_col=*/value2_col=*/*.csv' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# '*' also ends up with error
|
||||
statement error
|
||||
COPY test TO '__TEST_DIR__/csv-no-part-cols9' (FORMAT CSV, PARTITION_BY '*');
|
||||
----
|
||||
Not implemented Error: No column to write as all columns are specified as partition columns. WRITE_PARTITION_COLUMNS option can be used to write partition columns.
|
||||
|
||||
# With explicit WRITE_PARTITION_COLUMNS option, all columns would still be written and still readable.
|
||||
statement ok
|
||||
COPY test TO '__TEST_DIR__/csv-no-part-cols9' (FORMAT CSV, PARTITION_BY '*', OVERWRITE, WRITE_PARTITION_COLUMNS);
|
||||
|
||||
query III
|
||||
SELECT part_col, value_col, value2_col FROM '__TEST_DIR__/csv-no-part-cols9/part_col=0/value_col=*/value2_col=*/*.csv' ORDER BY value2_col;
|
||||
----
|
||||
0 1 0
|
||||
0 3 2
|
||||
0 0 4
|
||||
0 2 6
|
||||
0 4 8
|
||||
|
||||
# WRITE_PARTITION_COLUMNS: false behaves the same as default, so partition by all should result in an error.
|
||||
statement error
|
||||
COPY test TO '__TEST_DIR__/csv-no-part-cols9' (FORMAT CSV, PARTITION_BY '*', WRITE_PARTITION_COLUMNS false);
|
||||
----
|
||||
Not implemented Error: No column to write as all columns are specified as partition columns. WRITE_PARTITION_COLUMNS option can be used to write partition columns.
|
||||
Reference in New Issue
Block a user