should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,45 @@
# name: test/sql/copy/copy_blob.test
# group: [copy]
statement error
COPY (select 'foo') TO '__TEST_DIR__/test.blob' (FORMAT BLOB);
----
Binder Error: "COPY (FORMAT BLOB)" only supports a single BLOB column
statement error
COPY (select 'foo'::BLOB, 10) TO '__TEST_DIR__/test.blob' (FORMAT BLOB);
----
Binder Error: "COPY (FORMAT BLOB)" only supports a single BLOB column
statement ok
COPY (select 'foo'::BLOB) TO '__TEST_DIR__/test.blob' (FORMAT BLOB);
query III
select filename LIKE '%test.blob', content, size from read_blob('__TEST_DIR__/test.blob');
----
true foo 3
statement error
COPY (select 'foo'::BLOB) TO '__TEST_DIR__/test.blob.gz' (FORMAT BLOB, ASDFGH);
----
Binder Error: Unrecognized option for COPY (FORMAT BLOB): "ASDFGH"
# With compression
statement ok
COPY (select 'foo'::BLOB) TO '__TEST_DIR__/test.blob.gz' (FORMAT BLOB);
query II
select filename LIKE '%test.blob.gz', size from read_blob('__TEST_DIR__/test.blob.gz');
----
true 26
# With explicit compression
statement ok
COPY (select 'foo'::BLOB) TO '__TEST_DIR__/test2.blob' (FORMAT BLOB, COMPRESSION 'GZIP');
query II
select filename LIKE '%test2.blob', size from read_blob('__TEST_DIR__/test2.blob');
----
true 26

View File

@@ -0,0 +1,28 @@
# name: test/sql/copy/csv/14512.test
# description: Test for issue #14512
# group: [csv]
statement ok
PRAGMA enable_verification
query II
FROM read_csv('data/csv/14512.csv', strict_mode=TRUE);
----
onions ,
query I
select columns FROM sniff_csv('data/csv/14512.csv')
----
[{'name': ingredients, 'type': VARCHAR}, {'name': item_tax_data, 'type': VARCHAR}]
query IIIIIIIIIIIIIIIIIIIIIIIIII
FROM read_csv('data/csv/14512_og.csv', strict_mode = false, delim = ',', quote = '"', escape = '"');
----
00000579000098 13.99 EA PINE RIDGE CHENIN VOIGNIER 750.0 ML 1 13 NULL 1 NULL NULL NULL NULL NULL NULL DEFAULT BRAND NULL NULL NULL NULL BEER & WINE NULL NULL 7.25 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.0725}}
00000609082001 3.99 EA MADELAINE MINI MILK CHOCOLATE TURKEY 1.0 OZ 1 13 NULL NULL NULL NULL NULL NULL NULL NULL MADELEINE NULL NULL NULL NULL CANDY NULL NULL 7.25 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.0725}}
00817566020096 9.99 EA COTSWOLD EW 5.3 OZ 1 13 NULL NULL NULL NULL NULL NULL NULL NULL LONG CLAWSON NULL NULL NULL NULL DELI INGREDIENTS: DOUBLE GLOUCESTER CHEESE (PASTEURIZED MILK SALT ENZYMES DAIRY CULTURES ANNATTO EXTRACT AS A COLOR) RECONSTITUTED MINCED ONIONS (2%) DRIED CHIVES. CONTAINS: MILK THIS PRODUCT WAS PRODUCED IN AN ENVIRONMENT THAT ALSO USES PEANUTS TREE NUTS EGGS MILK WHEAT SOY FISH SHELLFISH AND SESAME. NULL 2.0 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.02}}
query I
select columns FROM sniff_csv('data/csv/14512_og.csv', strict_mode = false, delim = ',', quote = '"', escape = '"')
----
[{'name': lookup_code, 'type': VARCHAR}, {'name': price, 'type': DOUBLE}, {'name': cost_unit, 'type': VARCHAR}, {'name': item_name, 'type': VARCHAR}, {'name': size, 'type': DOUBLE}, {'name': size_uom, 'type': VARCHAR}, {'name': available, 'type': BIGINT}, {'name': store_code, 'type': BIGINT}, {'name': private_label_item, 'type': VARCHAR}, {'name': alcoholic, 'type': BIGINT}, {'name': alcohol_by_volume, 'type': VARCHAR}, {'name': alcohol_type, 'type': VARCHAR}, {'name': nutri_info, 'type': VARCHAR}, {'name': allergens, 'type': VARCHAR}, {'name': balance_on_hand, 'type': VARCHAR}, {'name': blackout_times, 'type': VARCHAR}, {'name': brand_name, 'type': VARCHAR}, {'name': ca_prop65_text, 'type': VARCHAR}, {'name': ca_prop65_codes, 'type': VARCHAR}, {'name': configurable_products, 'type': VARCHAR}, {'name': country_of_origin, 'type': VARCHAR}, {'name': department, 'type': VARCHAR}, {'name': ingredients, 'type': VARCHAR}, {'name': item_details, 'type': VARCHAR}, {'name': tax_rate, 'type': DOUBLE}, {'name': item_tax_data, 'type': VARCHAR}]

View File

@@ -0,0 +1,22 @@
# name: test/sql/copy/csv/14874.test
# description: Test for issue #14784
# group: [csv]
mode skip
statement ok
PRAGMA enable_verification
query I
SELECT count(*) FROM read_csv('data/csv/drug_exposure.csv');
----
4113
query IIIIIIIIIIIIIIIIIIIIIII
SELECT * FROM read_csv('data/csv/drug_exposure.csv') ORDER BY ALL limit 5;
----
-9223335764168194396 1532249960797525190 43613338 2166-08-24 2166-08-24 08:00:00 2166-08-24 2166-08-24 23:00:00 NULL 32838 NULL NULL 200.0 NULL NULL 4171047 NULL NULL -8938795529793370194 NULL SW 100ml Bag 2000011398 IV mL
-9212518512714808847 1484542834460282651 19008723 2161-11-16 2161-11-16 08:00:00 2161-11-19 2161-11-19 10:00:00 NULL 32838 NULL NULL 0.1 NULL NULL 4262914 NULL NULL 2567137523204385703 NULL 45802011222 45085123 NU TUBE
-9206751087952985587 -9066461348710750663 35603224 2185-01-20 2185-01-20 19:00:00 2185-01-21 2185-01-21 18:00:00 NULL 32838 NULL NULL 0.6 NULL NULL 4171047 NULL NULL -7972824337100083284 NULL 00409610210 45044941 IV VIAL
-9204709101307434434 -6225647829918357531 35603227 2153-04-04 2153-04-04 11:00:00 2153-04-05 2153-04-05 10:00:00 NULL 32838 NULL NULL 1.0 NULL NULL 4171047 NULL NULL 2405647860386309016 NULL 00409610204 45283861 IV VIAL
-9201491012041940131 -3780452582396805474 36249734 2115-12-29 2115-12-29 05:00:00 2115-12-31 2115-12-31 07:00:00 NULL 32838 NULL NULL 500.0 NULL NULL 4171047 NULL NULL 1384337365755163052 NULL 00338001703 45214979 IV mL

View File

@@ -0,0 +1,58 @@
# name: test/sql/copy/csv/17738.test
# description: Test for issue #17738
# group: [csv]
statement ok
PRAGMA enable_verification
query I
FROM read_csv('data/csv/17738_rn.csv',header=False,skip=3, delim = ';');
----
xyz
lorem ipsum
NULL
NULL
John,Doe,120 jefferson st.,Riverside, NJ, 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123
query I
FROM read_csv('data/csv/17738.csv',header=False,skip=3);
----
xyz
lorem ipsum
NULL
NULL
John,Doe,120 jefferson st.,Riverside, NJ, 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123
query I
FROM read_csv('data/csv/17738.csv',header=False,skip=4);
----
lorem ipsum
NULL
NULL
John,Doe,120 jefferson st.,Riverside, NJ, 08075
Jack,McGinnis,220 hobo Av.,Phila, PA,09119
"John ""Da Man""",Repici,120 Jefferson St.,Riverside, NJ,08075
Stephen,Tyler,"7452 Terrace ""At the Plaza"" road",SomeTown,SD, 91234
,Blankman,,SomeTown, SD, 00298
"Joan ""the bone"", Anne",Jet,"9th, at Terrace plc",Desert City,CO,00123
query IIIIII
FROM read_csv('data/csv/17738.csv',header=False,skip=7);
----
John Doe 120 jefferson st. Riverside NJ 08075
Jack McGinnis 220 hobo Av. Phila PA 09119
John "Da Man" Repici 120 Jefferson St. Riverside NJ 08075
Stephen Tyler 7452 Terrace "At the Plaza" road SomeTown SD 91234
NULL Blankman NULL SomeTown SD 00298
Joan "the bone", Anne Jet 9th, at Terrace plc Desert City CO 00123

View File

@@ -0,0 +1,70 @@
# name: test/sql/copy/csv/17744.test
# description: Test for issue #17744
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
COPY (
SELECT
-- The string must start with a hash to reproduce the issue
'#hash start' AS first_column,
1 AS second_column
UNION ALL
SELECT
-- Quoted value can go anywhere between rows 1 and 2048 just not 1 or 2048. It must be between the hashes
'"my, quoted value"' AS first_column,
1 AS second_column
UNION ALL
-- These rows make the csv 2048 rows long which is required to reproduce
SELECT
'any value' AS first_column,
1 AS second_column
FROM range(0, 2045)
UNION ALL
SELECT
-- This hash value must be somewhere in the string just not at the beginning
'hash not at start #' AS column_value,
1 AS second_column
) TO '__TEST_DIR__/test.csv' (format csv, header 1);
query II
SELECT columns, comment FROM sniff_csv('__TEST_DIR__/test.csv', null_padding = true)
----
[{'name': first_column, 'type': VARCHAR}, {'name': second_column, 'type': BIGINT}] (empty)
query II
SELECT columns, comment FROM sniff_csv('__TEST_DIR__/test.csv', null_padding = true, comment = '#')
----
[{'name': first_column, 'type': VARCHAR}, {'name': second_column, 'type': BIGINT}] #
statement ok
COPY (
SELECT
-- The string must start with a hash to reproduce the issue
'#hash start' AS first_column,
1 AS second_column
UNION ALL
SELECT
-- Quoted value can go anywhere between rows 1 and 2048 just not 1 or 2048. It must be between the hashes
'"my, quoted value"' AS first_column,
1 AS second_column
UNION ALL
-- These rows make the csv 2048 rows long which is required to reproduce
SELECT
'any value' AS first_column,
1 AS second_column
FROM range(0, 2045)
UNION ALL
SELECT
-- This hash value must be somewhere in the string just not at the beginning
'hash not at start #' AS column_value,
1 AS second_column
) TO '__TEST_DIR__/test_2.csv' (format csv, header 1, QUOTE '');
query II
SELECT columns, comment FROM sniff_csv('__TEST_DIR__/test_2.csv')
----
[{'name': first_column, 'type': VARCHAR}, {'name': second_column, 'type': BIGINT}] (empty)

View File

@@ -0,0 +1,12 @@
# name: test/sql/copy/csv/18579.test
# description: Test for issue #18579
# group: [csv]
statement ok
PRAGMA enable_verification
query IIIIII
DESCRIBE FROM read_csv('data/csv/18579/*.csv', union_by_name = true);
----
billingAccountName VARCHAR YES NULL NULL NULL
partnerName VARCHAR YES NULL NULL NULL

View File

@@ -0,0 +1,16 @@
# name: test/sql/copy/csv/7702.test
# description: Test for issue #7702
# group: [csv]
statement ok
PRAGMA enable_verification
query I
SELECT count(*) FROM read_csv_auto( ['data/csv/error/mismatch/half1.csv', 'data/csv/error/mismatch/half2.csv'], ignore_errors=true, sample_size=1);
----
9102
query I
SELECT count(*) FROM read_csv_auto( ['data/csv/error/mismatch/half2.csv', 'data/csv/error/mismatch/half1.csv'], ignore_errors=true, sample_size=1);
----
9102

View File

@@ -0,0 +1,10 @@
# name: test/sql/copy/csv/afl/fuzz_20250211_crash.test
# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
# group: [afl]
statement ok
PRAGMA enable_verification
statement maybe
FROM read_csv('data/csv/afl/20250211_csv_fuzz_crash/case_53.csv', buffer_size=42);
----

View File

@@ -0,0 +1,17 @@
# name: test/sql/copy/csv/afl/fuzz_20250226.test
# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
# group: [afl]
require json
statement ok
PRAGMA enable_verification
query I
select count(file) from glob('./data/csv/afl/20250226_csv_fuzz_error/*');
----
1
statement maybe
FROM read_csv('data/csv/afl/20250226_csv_fuzz_error/case_1.csv', force_not_null=012%0, columns={'a':'JSON'});
----

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/afl/test_afl_ignore_errors.test
# description: Test AFL CSV Files work with ingore errors set to true
# group: [afl]
statement ok
PRAGMA enable_verification
loop i 1 56
statement maybe
FROM read_csv('data/csv/afl/ignore_errors/${i}.csv', ignore_errors = true)
----
endloop

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/afl/test_afl_no_parameter.test
# description: Test AFL CSV Files work with null padding set
# group: [afl]
statement ok
PRAGMA enable_verification
loop i 1 25
statement maybe
FROM 'data/csv/afl/no_parameter/${i}.csv'
----
endloop

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/afl/test_afl_null_padding.test
# description: Test AFL CSV Files work with null padding set
# group: [afl]
statement ok
PRAGMA enable_verification
loop i 1 46
statement maybe
FROM read_csv('data/csv/afl/null_padding/${i}.csv', null_padding=true)
----
endloop

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/afl/test_afl_skip.test
# description: Test AFL CSV Files work with skip set
# group: [afl]
statement ok
PRAGMA enable_verification
loop i 1 3
statement maybe
FROM read_csv('data/csv/afl/skip/${i}.csv', skip=1)
----
endloop

View File

@@ -0,0 +1,367 @@
# name: test/sql/copy/csv/afl/test_fuzz_3977.test
# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
# group: [afl]
loop i 0 2
query I
select count(file) from glob('./data/csv/afl/3977/*');
----
88
statement maybe
FROM read_csv('data/csv/afl/3977/case_1.csv', rejects_scan=0, buffer_size=655371, all_varchar=false, rejects_scan=0, buffer_size=42);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_2.csv', names=['a','b','c','d'], store_rejects=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_3.csv', names=['a','b','c','d'], store_rejects=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_4.csv', names=['a','b','c','d'], store_rejects=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_5.csv', auto_detect=false, columns={'a': 'VARCHAR'}, escape='"', header=false, quote='"', strict_mode=true, store_rejects=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_6.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_7.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_8.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_9.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=false);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_10.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_11.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_12.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_13.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_14.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_15.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_16.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_17.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_18.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_19.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_20.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_21.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_22.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_23.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_24.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_25.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_26.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_27.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_28.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_29.csv', auto_detect=false, buffer_size=65536, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_30.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_31.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_32.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_33.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_34.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_35.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_36.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_37.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_38.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_39.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_40.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, comment=';', rejects_table='"', strict_mode=false);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_41.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_42.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_43.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_44.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_45.csv', auto_detect=false, buffer_size=810, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_46.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_47.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_48.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_49.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_50.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_51.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', '|':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_52.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_53.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_54.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAr'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_55.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_56.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_57.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_58.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_59.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','"':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_60.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_61.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_62.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_63.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_64.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_65.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_66.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_67.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_68.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_69.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_70.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_71.csv', auto_detect=false, buffer_size=16711722, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_72.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','F':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_73.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_74.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_75.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_76.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_77.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_78.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_79.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_80.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_81.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_82.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_83.csv', auto_detect=false, parallel=false, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_84.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_85.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_86.csv', auto_detect=false, buffer_size=720938, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_87.csv', auto_detect=false, buffer_size=42, columns={'a2.0-22222222222222222.0222->>':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement maybe
FROM read_csv('data/csv/afl/3977/case_88.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', rejects_table='"', strict_mode=true);
----
statement ok
PRAGMA enable_verification
endloop

View File

@@ -0,0 +1,24 @@
# name: test/sql/copy/csv/afl/test_fuzz_4086.test
# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
# group: [afl]
require json
loop i 0 2
statement maybe
FROM read_csv('data/csv/afl/4086/case_1.csv', auto_detect=false, columns={'json': 'JSON'}, delim=NULL, buffer_size=42, store_rejects=true, rejects_limit=658694493994253607);
----
statement maybe
FROM read_csv('data/csv/afl/4086/case_2.csv', auto_detect=false, columns={'json': 'JSON'}, delim=NULL, buffer_size=42, store_rejects=true, rejects_limit=658694493994253607);
----
statement maybe
FROM read_csv('data/csv/afl/4086/case_3.csv', auto_detect=false, columns={'json': 'JSON'}, delim='\0', buffer_size=42, store_rejects=true, rejects_limit=658694493994253607);
----
statement ok
PRAGMA enable_verification
endloop

View File

@@ -0,0 +1,10 @@
# name: test/sql/copy/csv/afl/test_fuzz_4172.test
# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
# group: [afl]
statement ok
PRAGMA enable_verification
statement maybe
FROM read_csv('data/csv/afl/4172/case_4.csv', ignore_errors=true, buffer_size=1, store_rejects=false);
----

View File

@@ -0,0 +1,21 @@
# name: test/sql/copy/csv/afl/test_fuzz_4496.test
# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
# group: [afl]
require json
loop i 0 2
statement maybe
FROM read_csv('data/csv/afl/4496/crashes/case_0.csv', auto_detect=false, buffer_size=42, columns={'json': 'JSON'}, delim=NULL, rejects_limit=658694493994253607, store_rejects=true);
----
statement maybe
FROM read_csv('data/csv/afl/4496/crashes/case_1.csv', auto_detect=false, buffer_size=42, columns={'json': 'JSON'}, delim=NULL, rejects_limit=658694493994253607, store_rejects=true);
----
statement ok
PRAGMA enable_verification
endloop

View File

@@ -0,0 +1,18 @@
# name: test/sql/copy/csv/afl/test_fuzz_4793.test
# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
# group: [afl]
require json
statement ok
PRAGMA enable_verification
statement maybe
FROM read_csv('data/csv/afl/4793/crashes/case_0.csv', auto_detect=false, columns={'a':'varchar'}, delim='', encoding='latin-1', header=false, quote='');
----
statement maybe
FROM read_csv('data/csv/afl/4793/crashes/case_1.csv', auto_detect=false, buffer_size=42, columns={'a':'INTEGER','b':'INTEGER', 'c':'VARCHAR'}, delim=';', escape='"', quote='"');
----

View File

@@ -0,0 +1,17 @@
# name: test/sql/copy/csv/afl/test_fuzz_5194.test
# description: fuzzer generated csv files - should not raise internal exception (by failed assertion).
# group: [afl]
require json
statement ok
PRAGMA enable_verification
statement maybe
FROM read_csv('data/csv/afl/5194/crashes/case_0.csv', auto_detect=false, buffer_size=8, columns={'a': 'integer','b': 'integer','c': 'integer'}, header=true, maximum_line_size=0);
----
statement maybe
FROM read_csv('data/csv/afl/5194/crashes/case_4.csv', buffer_size=30, delim=';', union_by_name=false, header=false, null_padding=true);
----

View File

@@ -0,0 +1,11 @@
# name: test/sql/copy/csv/auto/test_14177.test
# description: Test CSV Sample works for #14177
# group: [auto]
statement ok
PRAGMA enable_verification
query I
select count(*) FROM (FROM read_csv('data/csv/auto/14177.csv', buffer_size=80, ignore_errors = true)) as t
----
5

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/auto/test_auto_5250.test
# description: Test CSV Sample works for #5250
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
query I
select count(*) from read_csv_auto('data/csv/page_namespacepage_title_sample.csv', SAMPLE_SIZE = -1)
----
3993

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/auto/test_auto_5378.test
# description: Test read_csv_auto on issue 5378
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
query I
SELECT count(*) FROM read_csv_auto ('data/csv/auto/titlebasicsdebug.tsv', nullstr='\N', sample_size = -1);
----
3002

View File

@@ -0,0 +1,23 @@
# name: test/sql/copy/csv/auto/test_auto_8231.test
# description: Test issue 8231 related to missing headers and null padding
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
create view locations_header_trailing_comma as SELECT * from read_csv_auto('data/csv/locations_row_trailing_comma.csv', null_padding=True)
query IIIII
SELECT * from locations_header_trailing_comma
----
1 name 0 0 value
query IIIIII
describe locations_header_trailing_comma;
----
id BIGINT YES NULL NULL NULL
name VARCHAR YES NULL NULL NULL
lat BIGINT YES NULL NULL NULL
lon BIGINT YES NULL NULL NULL
column4 VARCHAR YES NULL NULL NULL

View File

@@ -0,0 +1,20 @@
# name: test/sql/copy/csv/auto/test_auto_8573.test
# description: Test read_csv_auto on issue 8573
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
query II
SELECT typeof(bignumber), typeof(bignumber::DECIMAL(25,3)) FROM read_csv('data/csv/big_number.csv', COLUMNS={'bignumber': 'DECIMAL(25,3)'}, QUOTE='"', DELIM=',');
----
DECIMAL(25,3) DECIMAL(25,3)
query II
SELECT typeof(bignumber), typeof(bignumber::DECIMAL(25,3)) FROM read_csv_auto('data/csv/big_number.csv', COLUMNS={'bignumber': 'DECIMAL(25,3)'}, QUOTE='"', DELIM=',');
----
DECIMAL(25,3) DECIMAL(25,3)

View File

@@ -0,0 +1,17 @@
# name: test/sql/copy/csv/auto/test_auto_8649.test
# description: Test CSV Sample works for #8649
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
# Sample
query I
SELECT * FROM read_csv_auto("data/csv/dim0.csv") ;
----
T
0

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/auto/test_auto_8860.test
# description: Test read_csv_auto on issue 8860
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
query I
SELECT count(*) FROM read_csv_auto("data/csv/auto/product_codes_HS17_V202301.csv.gz", quote = '"', comment='', delim = ',') ;
----
5384

View File

@@ -0,0 +1,81 @@
# name: test/sql/copy/csv/auto/test_auto_column_type_opt.test
# description: Test read_csv_auto with column_types option
# group: [auto]
statement ok
PRAGMA enable_verification
# Test read_csv wout auto_detect throws
statement error
select * from read_csv('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(a := 'INTEGER'))
----
Columns with names: "a" do not exist in the CSV File
# Test non-struct throws
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=1)
----
COLUMN_TYPES requires a struct or list as input
# Test empty throws
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK())
----
Can't pack nothing into a struct
# Test funky type throws
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(a := 'BLA'))
----
Type with name BLA does not exist!
# Test funky name throws
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(bla := 'INTEGER'))
----
Columns with names: "bla" do not exist in the CSV File
# Test wrong type throws
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(column3 := 'INTEGER'))
----
This type was either manually set or derived from an existing table. Select a different type to correctly parse this column.
# Test 1st column defined
query I
SELECT typeof(#1) from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(column0 := 'DOUBLE')) LIMIT 1
----
DOUBLE
query I
SELECT typeof(#1) from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(column0 := 'INTEGER')) LIMIT 1
----
INTEGER
# Test 3rd column defined
query I
SELECT typeof(#3) from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK( column2 := 'HUGEINT')) LIMIT 1
----
HUGEINT
# Test 1st and 3rd column defined
query II
SELECT typeof(#1),typeof(#3) from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(column0 := 'BIGINT', column2 := 'HUGEINT')) LIMIT 1
----
BIGINT HUGEINT
query IIII
SELECT * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(column0 := 'BIGINT', column2 := 'HUGEINT'))
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9

View File

@@ -0,0 +1,37 @@
# name: test/sql/copy/csv/auto/test_auto_cranlogs.test
# description: Test read_csv_auto from cranlogs gzip
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE cranlogs AS SELECT * FROM read_csv_auto ('data/csv/real/tmp2013-06-15.csv.gz');
query I
SELECT COUNT(*) FROM cranlogs;
----
37459
query TTITTTTTTI
SELECT * FROM cranlogs LIMIT 5;
----
2013-06-15 00:18:11 46338 NA NA NA date 1.2-33 JP 1
2013-06-15 00:18:18 740765 NA NA NA plyr 1.8 JP 2
2013-06-15 00:54:25 1229408 NA NA NA RJSONIO 1.0-3 JP 3
2013-06-15 00:58:50 501915 2.15.3 x86_64 linux-gnu animation 2.2 IN 4
2013-06-15 00:14:52 254933 3.0.1 x86_64 linux-gnu foreign 0.8-54 HK 5
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE cranlogs2 AS SELECT * FROM read_csv_auto ('data/csv/real/tmp2013-06-15.csv.gz');
query IIIIIIIIII
(SELECT * FROM cranlogs EXCEPT SELECT * FROM cranlogs2)
UNION ALL
(SELECT * FROM cranlogs2 EXCEPT SELECT * FROM cranlogs)
----

View File

@@ -0,0 +1,45 @@
# name: test/sql/copy/csv/auto/test_auto_greek_ncvoter.test
# description: Test read_csv_auto from ncvoter csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE IF NOT EXISTS ncvoters(county_id INTEGER, county_desc STRING, voter_reg_num STRING,status_cd STRING, voter_status_desc STRING, reason_cd STRING, voter_status_reason_desc STRING, absent_ind STRING, name_prefx_cd STRING,last_name STRING, first_name STRING, midl_name STRING, name_sufx_cd STRING, full_name_rep STRING,full_name_mail STRING, house_num STRING, half_code STRING, street_dir STRING, street_name STRING, street_type_cd STRING, street_sufx_cd STRING, unit_designator STRING, unit_num STRING, res_city_desc STRING,state_cd STRING, zip_code STRING, res_street_address STRING, res_city_state_zip STRING, mail_addr1 STRING, mail_addr2 STRING, mail_addr3 STRING, mail_addr4 STRING, mail_city STRING, mail_state STRING, mail_zipcode STRING, mail_city_state_zip STRING, area_cd STRING, phone_num STRING, full_phone_number STRING, drivers_lic STRING, race_code STRING, race_desc STRING, ethnic_code STRING, ethnic_desc STRING, party_cd STRING, party_desc STRING, sex_code STRING, sex STRING, birth_age STRING, birth_place STRING, registr_dt STRING, precinct_abbrv STRING, precinct_desc STRING,municipality_abbrv STRING, municipality_desc STRING, ward_abbrv STRING, ward_desc STRING, cong_dist_abbrv STRING, cong_dist_desc STRING, super_court_abbrv STRING, super_court_desc STRING, judic_dist_abbrv STRING, judic_dist_desc STRING, nc_senate_abbrv STRING, nc_senate_desc STRING, nc_house_abbrv STRING, nc_house_desc STRING,county_commiss_abbrv STRING, county_commiss_desc STRING, township_abbrv STRING, township_desc STRING,school_dist_abbrv STRING, school_dist_desc STRING, fire_dist_abbrv STRING, fire_dist_desc STRING, water_dist_abbrv STRING, water_dist_desc STRING, sewer_dist_abbrv STRING, sewer_dist_desc STRING, sanit_dist_abbrv STRING, sanit_dist_desc STRING, rescue_dist_abbrv STRING, rescue_dist_desc STRING, munic_dist_abbrv STRING, munic_dist_desc STRING, dist_1_abbrv STRING, dist_1_desc STRING, dist_2_abbrv STRING, dist_2_desc STRING, confidential_ind STRING, age STRING, ncid STRING, vtd_abbrv STRING, vtd_desc STRING);
query I
COPY ncvoters FROM 'data/csv/real/ncvoter.csv' (FORMAT CSV, AUTO_DETECT TRUE);
----
10
query ITTT
SELECT county_id, county_desc, vtd_desc, name_prefx_cd FROM ncvoters;
----
1 ALAMANCE 09S NULL
1 ALAMANCE 09S NULL
1 ALAMANCE 03W NULL
1 ALAMANCE 09S NULL
1 ALAMANCE 1210 NULL
1 ALAMANCE 035 NULL
1 ALAMANCE 124 NULL
1 ALAMANCE 06E NULL
1 ALAMANCE 035 NULL
1 ALAMANCE 064 NULL
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE ncvoters2 AS SELECT * FROM ncvoters LIMIT 0
statement ok
COPY ncvoters2 FROM 'data/csv/real/ncvoter.csv' (FORMAT CSV, AUTO_DETECT TRUE);
query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
(SELECT * FROM ncvoters EXCEPT SELECT * FROM ncvoters2)
UNION ALL
(SELECT * FROM ncvoters2 EXCEPT SELECT * FROM ncvoters)
----

View File

@@ -0,0 +1,45 @@
# name: test/sql/copy/csv/auto/test_auto_greek_utf8.test
# description: Test read_csv_auto from greek-utf8 csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE greek_utf8 AS SELECT i, nfc_normalize(j) j, k FROM read_csv_auto ('data/csv/real/greek_utf8.csv') t(i, j, k)
query I
SELECT COUNT(*) FROM greek_utf8;
----
8
query ITI
SELECT * FROM greek_utf8 ORDER BY 1;
----
1689 00i\047m 2
1690 00i\047v 2
41561 2015 1
45804 21π 1
51981 24hours 1
171067 ardèch 2
182773 afi 1
607808 poverty 1
# can also do this
query ITI
SELECT i, nfc_normalize(j) j, k FROM 'data/csv/real/greek_utf8.csv' t(i, j, k)
----
1689 00i\047m 2
1690 00i\047v 2
41561 2015 1
45804 21π 1
51981 24hours 1
171067 ardèch 2
182773 afi 1
607808 poverty 1

View File

@@ -0,0 +1,25 @@
# name: test/sql/copy/csv/auto/test_auto_imdb.test
# description: Test read_csv_auto from imdb csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE movie_info AS SELECT * FROM read_csv_auto ('data/csv/real/imdb_movie_info_escaped.csv');
query I
SELECT COUNT(*) FROM movie_info;
----
201
statement ok
CREATE TABLE movie_info2 AS SELECT * FROM read_csv_auto ('data/csv/real/imdb_movie_info_escaped.csv');
query IIIII
(FROM movie_info EXCEPT FROM movie_info2)
UNION ALL
(FROM movie_info2 EXCEPT FROM movie_info)
----

View File

@@ -0,0 +1,46 @@
# name: test/sql/copy/csv/auto/test_auto_lineitem.test
# description: Test copy into auto from lineitem csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE lineitem(l_orderkey INT NOT NULL, l_partkey INT NOT NULL, l_suppkey INT NOT NULL, l_linenumber INT NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR(1) NOT NULL, l_linestatus VARCHAR(1) NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR(25) NOT NULL, l_shipmode VARCHAR(10) NOT NULL, l_comment VARCHAR(44) NOT NULL);
query I
COPY lineitem FROM 'data/csv/real/lineitem_sample.csv' (FORMAT CSV, AUTO_DETECT TRUE);
----
10
query I
SELECT COUNT(*) FROM lineitem;
----
10
query IT
SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;
----
15519 egular courts above the
6731 ly final dependencies: slyly bold
6370 riously. regular, express dep
214 lites. fluffily even de
2403 pending foxes. slyly re
1564 arefully slyly ex
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE lineitem2 AS SELECT * FROM lineitem LIMIT 0
statement ok
COPY lineitem2 FROM 'data/csv/real/lineitem_sample.csv' (FORMAT CSV, AUTO_DETECT TRUE);
query IIIIIIIIIIIIIIII
(SELECT * FROM lineitem EXCEPT SELECT * FROM lineitem2)
UNION ALL
(SELECT * FROM lineitem2 EXCEPT SELECT * FROM lineitem)
----

View File

@@ -0,0 +1,45 @@
# name: test/sql/copy/csv/auto/test_auto_ontime.test
# description: Test read_csv_auto from on-time dataset
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE ontime(year SMALLINT, quarter SMALLINT, month SMALLINT, dayofmonth SMALLINT, dayofweek SMALLINT, flightdate DATE, uniquecarrier CHAR(7), airlineid DECIMAL(8,2), carrier CHAR(2), tailnum VARCHAR(50), flightnum VARCHAR(10), originairportid INTEGER, originairportseqid INTEGER, origincitymarketid INTEGER, origin CHAR(5), origincityname VARCHAR(100), originstate CHAR(2), originstatefips VARCHAR(10), originstatename VARCHAR(100), originwac DECIMAL(8,2), destairportid INTEGER, destairportseqid INTEGER, destcitymarketid INTEGER, dest CHAR(5), destcityname VARCHAR(100), deststate CHAR(2), deststatefips VARCHAR(10), deststatename VARCHAR(100), destwac DECIMAL(8,2), crsdeptime DECIMAL(8,2), deptime DECIMAL(8,2), depdelay DECIMAL(8,2), depdelayminutes DECIMAL(8,2), depdel15 DECIMAL(8,2), departuredelaygroups DECIMAL(8,2), deptimeblk VARCHAR(20), taxiout DECIMAL(8,2), wheelsoff DECIMAL(8,2), wheelson DECIMAL(8,2), taxiin DECIMAL(8,2), crsarrtime DECIMAL(8,2), arrtime DECIMAL(8,2), arrdelay DECIMAL(8,2), arrdelayminutes DECIMAL(8,2), arrdel15 DECIMAL(8,2), arrivaldelaygroups DECIMAL(8,2), arrtimeblk VARCHAR(20), cancelled DECIMAL(8,2), cancellationcode CHAR(1), diverted DECIMAL(8,2), crselapsedtime DECIMAL(8,2), actualelapsedtime DECIMAL(8,2), airtime DECIMAL(8,2), flights DECIMAL(8,2), distance DECIMAL(8,2), distancegroup DECIMAL(8,2), carrierdelay DECIMAL(8,2), weatherdelay DECIMAL(8,2), nasdelay DECIMAL(8,2), securitydelay DECIMAL(8,2), lateaircraftdelay DECIMAL(8,2), firstdeptime VARCHAR(10), totaladdgtime VARCHAR(10), longestaddgtime VARCHAR(10), divairportlandings VARCHAR(10), divreacheddest VARCHAR(10), divactualelapsedtime VARCHAR(10), divarrdelay VARCHAR(10), divdistance VARCHAR(10), div1airport VARCHAR(10), div1aiportid INTEGER, div1airportseqid INTEGER, div1wheelson VARCHAR(10), div1totalgtime VARCHAR(10), div1longestgtime VARCHAR(10), div1wheelsoff VARCHAR(10), div1tailnum VARCHAR(10), div2airport VARCHAR(10), div2airportid INTEGER, div2airportseqid INTEGER, div2wheelson VARCHAR(10), div2totalgtime VARCHAR(10), div2longestgtime VARCHAR(10), div2wheelsoff VARCHAR(10), div2tailnum VARCHAR(10), div3airport VARCHAR(10), div3airportid INTEGER, div3airportseqid INTEGER, div3wheelson VARCHAR(10), div3totalgtime VARCHAR(10), div3longestgtime VARCHAR(10), div3wheelsoff VARCHAR(10), div3tailnum VARCHAR(10), div4airport VARCHAR(10), div4airportid INTEGER, div4airportseqid INTEGER, div4wheelson VARCHAR(10), div4totalgtime VARCHAR(10), div4longestgtime VARCHAR(10), div4wheelsoff VARCHAR(10), div4tailnum VARCHAR(10), div5airport VARCHAR(10), div5airportid INTEGER, div5airportseqid INTEGER, div5wheelson VARCHAR(10), div5totalgtime VARCHAR(10), div5longestgtime VARCHAR(10), div5wheelsoff VARCHAR(10), div5tailnum VARCHAR(10));
query I
COPY ontime FROM 'data/csv/real/ontime_sample.csv';
----
9
query ITTTT
SELECT year, uniquecarrier, origin, origincityname, div5longestgtime FROM ontime;
----
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE ontime2 AS SELECT * FROM ontime LIMIT 0
statement ok
COPY ontime2 FROM 'data/csv/real/ontime_sample.csv';
query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
(SELECT * FROM ontime EXCEPT SELECT * FROM ontime2)
UNION ALL
(SELECT * FROM ontime2 EXCEPT SELECT * FROM ontime)
----

View File

@@ -0,0 +1,39 @@
# name: test/sql/copy/csv/auto/test_auto_voter.test_slow
# description: Test read_csv_auto from voter tsv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE voters AS SELECT * FROM read_csv_auto ('data/csv/real/voter.tsv');
query I
SELECT COUNT(*) FROM voters;
----
5300
query I
SELECT COUNT(*) FROM "data/csv/real/voter.tsv";
----
5300
# read with parallel reader and verify that we get the same result
# FIXME: This should run on windows
require notwindows
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE voters2 AS SELECT * FROM read_csv_auto ('data/csv/real/voter.tsv');
query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
(SELECT * FROM voters EXCEPT SELECT * FROM voters2)
UNION ALL
(SELECT * FROM voters2 EXCEPT SELECT * FROM voters)
----

View File

@@ -0,0 +1,31 @@
# name: test/sql/copy/csv/auto/test_auto_web_page.test
# description: Test read_csv_auto from web_page csv
# group: [auto]
statement ok
CREATE TABLE web_page AS SELECT * FROM read_csv_auto ('data/csv/real/web_page.csv');
query I
SELECT COUNT(*) FROM web_page;
----
60
query ITTTIITITTIIII
SELECT * FROM web_page ORDER BY column00 LIMIT 3;
----
1 AAAAAAAABAAAAAAA 1997-09-03 NULL 2450810 2452620 Y 98539 http://www.foo.com welcome 2531 8 3 4
2 AAAAAAAACAAAAAAA 1997-09-03 2000-09-02 2450814 2452580 N NULL http://www.foo.com protected 1564 4 3 1
3 AAAAAAAACAAAAAAA 2000-09-03 NULL 2450814 2452611 N NULL http://www.foo.com feedback 1564 4 3 4
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE web_page2 AS SELECT * FROM read_csv_auto ('data/csv/real/web_page.csv');
query IIIIIIIIIIIIII
(SELECT * FROM web_page EXCEPT SELECT * FROM web_page2)
UNION ALL
(SELECT * FROM web_page2 EXCEPT SELECT * FROM web_page)
----

View File

@@ -0,0 +1,309 @@
# name: test/sql/copy/csv/auto/test_csv_auto.test
# description: Test csv dialect detection
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
query II
FROM read_csv('data/csv/repromarket.csv',
columns={
'email': 'varchar',
'password': 'varchar'
},
all_varchar=true,
delim=':',
header=false,
skip=0,
null_padding=true,
ignore_errors=true,
strict_mode=false
);
----
nemanja.krpovic@gmail.com krlleta
vega@example.combogus NULL
Vega-Inject bogus
mirkofoto@gmail.com mirko
query I
FROM read_csv('data/csv/pipe_delim.csv', columns={'a': 'VARCHAR'}, auto_detect=False)
----
one|two|three|four
1|2|3|4
query I
FROM read_csv('data/csv/nullterm.csv')
----
\0world\0
query I
FROM read_csv('data/csv/nullterm.csv', quote = '"', escape = '"')
----
\0world\0
query I
FROM read_csv('data/csv/single_quote.csv', quote = '"')
----
'Doc'
query I
select columns FROM sniff_csv('data/csv/auto/mock_duckdb_test_data.csv', ignore_errors = true);
----
[{'name': id, 'type': BIGINT}, {'name': name, 'type': VARCHAR}, {'name': age, 'type': BIGINT}, {'name': sex, 'type': VARCHAR}, {'name': state, 'type': VARCHAR}]
query IIIII
FROM read_csv('data/csv/auto/mock_duckdb_test_data.csv', ignore_errors = true,
strict_mode=true)
----
1 James 30 M AL
2 Jill 32 F CO
4 John 34 M AS
5 Matthew 31 M NULL
7 Olivia 36 F OR
8 James 37 M AZ
9 Titus 38 M WY
statement error
select * from read_csv_auto('data/csv/dates.csv', auto_detect=false, delim=',', quote='"', columns={'a': 'VARCHAR'},
strict_mode=true)
----
Expected Number of Columns: 1 Found: 2
query II
select * from read_csv_auto('data/csv/dates.csv')
----
919 304 6161 2008-08-10
query II
select * from read_csv_auto('data/csv/from_df.csv', quote='''')
----
'a,b,c' 45
NULL 234
hello 234
bye 2
# CSV file with RFC-conform dialect
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/rfc_conform.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST1 one space
345 TEST1 trailing_space
567 TEST1 no_space
statement ok
DROP TABLE test;
# CSV file with RFC-conform dialect quote
# read_csv is an alias to read_csv_auto when no extra parameters are supplied
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/rfc_conform_quote.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST2 one space
345 TEST2 trailing_space,
567 TEST2 no"space
statement ok
DROP TABLE test;
# CSV file with RFC-conform dialect quote/leading space of numerics
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/leading_space_numerics.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST3 text1
345 TEST3 text2
567 TEST3 text3
statement ok
DROP TABLE test;
# CSV file with bar delimiter
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/pipe_delim.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST4 text1
345 TEST4 text2
567 TEST4 text3
statement ok
DROP TABLE test;
# CSV file with bar delimiter and double quotes
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/pipe_delim_quote.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST5 text1
345 TEST5 text2|
567 TEST5 text3
statement ok
DROP TABLE test;
# CSV file with bar delimiter and double quotes and double escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/quote_escape.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST6 text1
345 TEST6 text"2"text
567 TEST6 text3
statement ok
DROP TABLE test;
# CSV file with bar delimiter and double quotes and backslash escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/backslash_escape.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST7 text1
345 TEST7 text"2"
567 TEST7 text3
statement ok
DROP TABLE test;
# CSV file with bar delimiter and single quotes and backslash escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/single_quote_backslash.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST8 text1
345 TEST8 text'2'text
567 TEST8 text3
statement ok
DROP TABLE test;
# CSV file with semicolon delimiter
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/semicolon_delim.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST9 text1
345 TEST9 text2
567 TEST9 text3
statement ok
DROP TABLE test;
# CSV file with semicolon delimiter and double quotes
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/semicolon_quote.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST10 text1
345 TEST10 text2
567 TEST10 te;xt3
statement ok
DROP TABLE test;
# CSV file with semicolon delimiter, double quotes and RFC escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/semicolon_escape.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST11 text1
345 TEST11 text2
567 TEST11 te"xt3
statement ok
DROP TABLE test;
# CSV file with tab delimiter
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/tab.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST12 text1
345 TEST12 text2
567 TEST12 text3
statement ok
DROP TABLE test;
# CSV file with tab delimiter and single quotes
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/tab_single_quote.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123
TEST13
text1
345
TEST13
te xt2
567
TEST13
text3
statement ok
DROP TABLE test;
# CSV file with tab delimiter and single quotes without type-hint
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/tab_single_quote_varchar.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123
TEST14
text1
345
TEST14
te xt2
567
TEST14
text3
statement ok
DROP TABLE test;
# CSV file with trailing empty lines
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_1254.csv');
query II
SELECT a, b FROM test;
----
1 2
1 2
statement ok
DROP TABLE test;

View File

@@ -0,0 +1,13 @@
# name: test/sql/copy/csv/auto/test_date_format_bug_linux.test
# group: [auto]
statement ok
PRAGMA enable_verification
query I
SELECT * FROM read_csv_auto('data/csv/auto/date_format_bug_linux.csv')
----
8cb123cb8
34fd321
fg5391jn4

View File

@@ -0,0 +1,137 @@
# name: test/sql/copy/csv/auto/test_describe_order.test
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
create view v as select * from read_csv_auto('data/csv/who.csv.gz');
query IIIIII
describe v;
----
country VARCHAR YES NULL NULL NULL
iso2 VARCHAR YES NULL NULL NULL
iso3 VARCHAR YES NULL NULL NULL
year BIGINT YES NULL NULL NULL
new_sp_m014 BIGINT YES NULL NULL NULL
new_sp_m1524 BIGINT YES NULL NULL NULL
new_sp_m2534 BIGINT YES NULL NULL NULL
new_sp_m3544 BIGINT YES NULL NULL NULL
new_sp_m4554 BIGINT YES NULL NULL NULL
new_sp_m5564 BIGINT YES NULL NULL NULL
new_sp_m65 BIGINT YES NULL NULL NULL
new_sp_f014 BIGINT YES NULL NULL NULL
new_sp_f1524 BIGINT YES NULL NULL NULL
new_sp_f2534 BIGINT YES NULL NULL NULL
new_sp_f3544 BIGINT YES NULL NULL NULL
new_sp_f4554 BIGINT YES NULL NULL NULL
new_sp_f5564 BIGINT YES NULL NULL NULL
new_sp_f65 BIGINT YES NULL NULL NULL
new_sn_m014 BIGINT YES NULL NULL NULL
new_sn_m1524 BIGINT YES NULL NULL NULL
new_sn_m2534 BIGINT YES NULL NULL NULL
new_sn_m3544 BIGINT YES NULL NULL NULL
new_sn_m4554 BIGINT YES NULL NULL NULL
new_sn_m5564 BIGINT YES NULL NULL NULL
new_sn_m65 BIGINT YES NULL NULL NULL
new_sn_f014 BIGINT YES NULL NULL NULL
new_sn_f1524 BIGINT YES NULL NULL NULL
new_sn_f2534 BIGINT YES NULL NULL NULL
new_sn_f3544 BIGINT YES NULL NULL NULL
new_sn_f4554 BIGINT YES NULL NULL NULL
new_sn_f5564 BIGINT YES NULL NULL NULL
new_sn_f65 BIGINT YES NULL NULL NULL
new_ep_m014 BIGINT YES NULL NULL NULL
new_ep_m1524 BIGINT YES NULL NULL NULL
new_ep_m2534 BIGINT YES NULL NULL NULL
new_ep_m3544 BIGINT YES NULL NULL NULL
new_ep_m4554 BIGINT YES NULL NULL NULL
new_ep_m5564 BIGINT YES NULL NULL NULL
new_ep_m65 BIGINT YES NULL NULL NULL
new_ep_f014 BIGINT YES NULL NULL NULL
new_ep_f1524 BIGINT YES NULL NULL NULL
new_ep_f2534 BIGINT YES NULL NULL NULL
new_ep_f3544 BIGINT YES NULL NULL NULL
new_ep_f4554 BIGINT YES NULL NULL NULL
new_ep_f5564 BIGINT YES NULL NULL NULL
new_ep_f65 BIGINT YES NULL NULL NULL
newrel_m014 BIGINT YES NULL NULL NULL
newrel_m1524 BIGINT YES NULL NULL NULL
newrel_m2534 BIGINT YES NULL NULL NULL
newrel_m3544 BIGINT YES NULL NULL NULL
newrel_m4554 BIGINT YES NULL NULL NULL
newrel_m5564 BIGINT YES NULL NULL NULL
newrel_m65 BIGINT YES NULL NULL NULL
newrel_f014 BIGINT YES NULL NULL NULL
newrel_f1524 BIGINT YES NULL NULL NULL
newrel_f2534 BIGINT YES NULL NULL NULL
newrel_f3544 BIGINT YES NULL NULL NULL
newrel_f4554 BIGINT YES NULL NULL NULL
newrel_f5564 BIGINT YES NULL NULL NULL
newrel_f65 BIGINT YES NULL NULL NULL
query IIIIII
describe select * from v;
----
country VARCHAR YES NULL NULL NULL
iso2 VARCHAR YES NULL NULL NULL
iso3 VARCHAR YES NULL NULL NULL
year BIGINT YES NULL NULL NULL
new_sp_m014 BIGINT YES NULL NULL NULL
new_sp_m1524 BIGINT YES NULL NULL NULL
new_sp_m2534 BIGINT YES NULL NULL NULL
new_sp_m3544 BIGINT YES NULL NULL NULL
new_sp_m4554 BIGINT YES NULL NULL NULL
new_sp_m5564 BIGINT YES NULL NULL NULL
new_sp_m65 BIGINT YES NULL NULL NULL
new_sp_f014 BIGINT YES NULL NULL NULL
new_sp_f1524 BIGINT YES NULL NULL NULL
new_sp_f2534 BIGINT YES NULL NULL NULL
new_sp_f3544 BIGINT YES NULL NULL NULL
new_sp_f4554 BIGINT YES NULL NULL NULL
new_sp_f5564 BIGINT YES NULL NULL NULL
new_sp_f65 BIGINT YES NULL NULL NULL
new_sn_m014 BIGINT YES NULL NULL NULL
new_sn_m1524 BIGINT YES NULL NULL NULL
new_sn_m2534 BIGINT YES NULL NULL NULL
new_sn_m3544 BIGINT YES NULL NULL NULL
new_sn_m4554 BIGINT YES NULL NULL NULL
new_sn_m5564 BIGINT YES NULL NULL NULL
new_sn_m65 BIGINT YES NULL NULL NULL
new_sn_f014 BIGINT YES NULL NULL NULL
new_sn_f1524 BIGINT YES NULL NULL NULL
new_sn_f2534 BIGINT YES NULL NULL NULL
new_sn_f3544 BIGINT YES NULL NULL NULL
new_sn_f4554 BIGINT YES NULL NULL NULL
new_sn_f5564 BIGINT YES NULL NULL NULL
new_sn_f65 BIGINT YES NULL NULL NULL
new_ep_m014 BIGINT YES NULL NULL NULL
new_ep_m1524 BIGINT YES NULL NULL NULL
new_ep_m2534 BIGINT YES NULL NULL NULL
new_ep_m3544 BIGINT YES NULL NULL NULL
new_ep_m4554 BIGINT YES NULL NULL NULL
new_ep_m5564 BIGINT YES NULL NULL NULL
new_ep_m65 BIGINT YES NULL NULL NULL
new_ep_f014 BIGINT YES NULL NULL NULL
new_ep_f1524 BIGINT YES NULL NULL NULL
new_ep_f2534 BIGINT YES NULL NULL NULL
new_ep_f3544 BIGINT YES NULL NULL NULL
new_ep_f4554 BIGINT YES NULL NULL NULL
new_ep_f5564 BIGINT YES NULL NULL NULL
new_ep_f65 BIGINT YES NULL NULL NULL
newrel_m014 BIGINT YES NULL NULL NULL
newrel_m1524 BIGINT YES NULL NULL NULL
newrel_m2534 BIGINT YES NULL NULL NULL
newrel_m3544 BIGINT YES NULL NULL NULL
newrel_m4554 BIGINT YES NULL NULL NULL
newrel_m5564 BIGINT YES NULL NULL NULL
newrel_m65 BIGINT YES NULL NULL NULL
newrel_f014 BIGINT YES NULL NULL NULL
newrel_f1524 BIGINT YES NULL NULL NULL
newrel_f2534 BIGINT YES NULL NULL NULL
newrel_f3544 BIGINT YES NULL NULL NULL
newrel_f4554 BIGINT YES NULL NULL NULL
newrel_f5564 BIGINT YES NULL NULL NULL
newrel_f65 BIGINT YES NULL NULL NULL

View File

@@ -0,0 +1,17 @@
# name: test/sql/copy/csv/auto/test_double_quoted_header.test
# group: [auto]
statement ok
PRAGMA enable_verification
query IIIIII
describe from 'data/csv/double_quoted_header.csv';
----
foo "bar BIGINT YES NULL NULL NULL
name VARCHAR YES NULL NULL NULL
query II
from 'data/csv/double_quoted_header.csv';
----
1 rob
2 sally

View File

@@ -0,0 +1,11 @@
# name: test/sql/copy/csv/auto/test_early_out.test
# group: [auto]
statement ok
PRAGMA enable_verification
statement error
SELECT *
FROM read_csv('data/csv/auto/early_out_error.csv', buffer_size = 8, maximum_line_size = 8, auto_detect = false, columns = {'a': 'integer','b': 'integer','c': 'integer'}, header = true)
----
Error when converting column "b". Could not convert string "\n" to 'INTEGER'

View File

@@ -0,0 +1,43 @@
# name: test/sql/copy/csv/auto/test_fallback_all_varchar.test_slow
# description: Test optional parameters for read csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
# CSV file with irregularity in first column and default sample size
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/test_fallback.csv');
query TTTT
SELECT typeof(TestDoubleError), typeof(TestDouble), typeof(TestText), typeof(TestInteger) FROM test LIMIT 1
----
VARCHAR DOUBLE VARCHAR BIGINT
statement ok
DROP TABLE test
loop i 1 100
# CSV file with irregularity in first column and small sample size
statement error
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/test_fallback.csv', SAMPLE_SIZE=1);
----
Column TestDoubleError is being converted as type DOUBLE
endloop
# CSV file with irregularity in first column, small sample size and fallback activated
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/test_fallback.csv', SAMPLE_SIZE=1, ALL_VARCHAR=1);
query TTTT
SELECT typeof(TestDoubleError), typeof(TestDouble), typeof(TestText), typeof(TestInteger) FROM test LIMIT 1
----
VARCHAR VARCHAR VARCHAR VARCHAR
statement ok
DROP TABLE test

View File

@@ -0,0 +1,112 @@
# name: test/sql/copy/csv/auto/test_header_completion.test
# description: Test csv header completion
# group: [auto]
statement ok
PRAGMA enable_verification
# CSV file with one missing header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/missing_header_col.csv');
query ITT
SELECT a, column1, c FROM test ORDER BY a;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with one duplicate header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/duplicate_header_col.csv');
query ITT
SELECT a, b, a_1 FROM test ORDER BY a;
----
123 TEST2 text1
345 TEST2 text2
statement ok
DROP TABLE test;
# CSV file with one duplicate header and collision
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/duplicate_header_collision.csv');
query ITTT
SELECT a, b, a_1, a_1_1 FROM test ORDER BY a;
----
123 TEST2 text1 text1
345 TEST2 text2 text2
statement ok
DROP TABLE test;
# CSV file with all column names missing
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/empty_header.csv');
query ITT
SELECT column0, column1, column2 FROM test ORDER BY column0;
----
123 TEST3 text1
345 TEST3 text2
statement ok
DROP TABLE test;
# CSV file with 12 columns and all but one column name missing
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/missing_many_col.csv');
query ITT
SELECT a, column01, column12 FROM test;
----
123 TEST2 value1
345 TEST2 value2
statement ok
DROP TABLE test;
# CSV file with 12 equally called columns
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/duplicate_header_columns.csv');
query IIIT
SELECT a, a_8, a_9, column12 FROM test;
----
123 NULL NULL value1
345 NULL NULL value2
statement ok
DROP TABLE test;
# CSV file with 10 equally called columns, one named column12 and column 11 and 12 missing
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/test_header_mix.csv');
query IIIIIT
SELECT a, a_8, a_9, column12, column11, column12_1 FROM test;
----
123 NULL NULL NULL NULL value1
345 NULL NULL NULL NULL value2
statement ok
DROP TABLE test;
# CSV file with 12 unnamed columns and check for correct naming
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/unnamed_columns.csv');
query ITTIIIIIIIIIT
SELECT column00, column01, column02, column03, column04, column05, column06, column07, column08, column09, column10, column11, column12 FROM test;
----
123 TEST2 text1 NULL NULL NULL NULL NULL NULL NULL NULL NULL value1
345 TEST2 text2 NULL NULL NULL NULL NULL NULL NULL NULL NULL value2
statement ok
DROP TABLE test;

View File

@@ -0,0 +1,176 @@
# name: test/sql/copy/csv/auto/test_header_detection.test
# description: Test csv header detection
# group: [auto]
statement ok
PRAGMA enable_verification
# CSV file with two lines, none header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/no_header.csv');
query RTT
SELECT column0, column1, column2 FROM test ORDER BY column0;
----
123.000000 TEST1 2000-12-12
345.000000 TEST1 2000-12-13
statement ok
DROP TABLE test;
# CSV file with two lines, one header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/single_header.csv');
query RTT
SELECT number, text, date FROM test ORDER BY number;
----
345.000000 TEST2 2000-12-13
statement ok
DROP TABLE test;
# CSV file with three lines, one header, one skip row
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/skip_row.csv');
query RTT
SELECT number, text, date FROM test ORDER BY number;
----
345.000000 TEST3 2000-12-13
statement ok
DROP TABLE test;
# CSV file with three lines, one header, two skip rows
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/multiple_skip_row.csv');
query RTT
SELECT number, text, date FROM test ORDER BY number;
----
345.000000 TEST4 2000-12-13
statement ok
DROP TABLE test;
# CSV file with two lines both only strings
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/varchar_multi_line.csv', header = 0);
query TTT
SELECT * FROM test ORDER BY column0;
----
Alice StreetA TEST5
Bob StreetB TEST5
statement ok
DROP TABLE test;
# CSV file with one line, two columns, only strings
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/varchar_single_line.csv', header = 0);
query TT
SELECT column0, column1 FROM test ORDER BY column0;
----
Alice StreetA
statement ok
DROP TABLE test;
# CSV file with one line, two columns - one numeric, one string
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/mixed_single_line.csv');
query IT
SELECT column0, column1 FROM test ORDER BY column0;
----
1 StreetA
statement ok
DROP TABLE test;
# CSV file with one line, one string column
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/single_value.csv');
query T
SELECT * FROM test;
----
statement ok
DROP TABLE test;
# CSV file with one line, one numeric column
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/single_numeric.csv');
query I
SELECT * FROM test;
----
1
statement ok
DROP TABLE test;
# CSV with UTF-8 BOM marker that could mess up the header line parsing
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto('data/csv/auto/utf8bom.csv');
query II
SELECT * FROM test;
----
1 Mark
2 Hannes
query I
SELECT id FROM test;
----
1
2
statement ok
DROP TABLE test;
statement ok
CREATE TABLE my_varchars(a VARCHAR, b VARCHAR, c VARCHAR);
statement ok
INSERT INTO my_varchars VALUES ('Hello', 'Beautiful', 'World');
statement ok
COPY my_varchars TO '__TEST_DIR__/varchar_header.csv' (HEADER 1);
statement ok
COPY my_varchars TO '__TEST_DIR__/varchar_no_header.csv' (HEADER 0);
statement ok
COPY my_varchars FROM '__TEST_DIR__/varchar_header.csv' ;
statement ok
COPY my_varchars FROM '__TEST_DIR__/varchar_no_header.csv' (HEADER 0);
query III
FROM my_varchars ;
----
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
statement ok
COPY my_varchars TO '__TEST_DIR__/big_varchar.csv';
statement ok
COPY my_varchars FROM '__TEST_DIR__/big_varchar.csv';
query III
FROM my_varchars;
----
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World

View File

@@ -0,0 +1,114 @@
# name: test/sql/copy/csv/auto/test_normalize_names.test
# description: Test csv header normalization
# group: [auto]
statement ok
PRAGMA enable_verification
query I
select columns from sniff_csv('data/csv/test_commit_rollback.csv', normalize_names = true)
----
[{'name': _commit, 'type': BIGINT}, {'name': _rollback, 'type': BIGINT}, {'name': _abort, 'type': BIGINT}]
# CSV file with uppercase header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_1.csv', normalize_names=TRUE);
query ITT
SELECT a, b, c FROM test ORDER BY a;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with uppercase header and normalize names off
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_1.csv');
query ITT
SELECT A, B, C FROM test ORDER BY a;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
query I
select columns from sniff_csv('data/csv/auto/normalize_names_2.csv', normalize_names = true)
----
[{'name': _select, 'type': BIGINT}, {'name': _insert, 'type': VARCHAR}, {'name': _join, 'type': VARCHAR}]
# CSV file with keywords in header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_2.csv', normalize_names=TRUE);
query ITT
SELECT _select, _insert, _join FROM test ORDER BY _select;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with names starting with numerics
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_3.csv', normalize_names=TRUE);
query ITT
SELECT _0_a, _1_b, _9_c FROM test ORDER BY _0_a;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with accents and UTF8 characters
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_4.csv', normalize_names=TRUE);
query ITT
SELECT allo, teost, _ FROM test ORDER BY allo;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with accents and UTF8 characters
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_5.csv', normalize_names=TRUE);
query ITT
SELECT a, b, c FROM test ORDER BY a;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with superscripts and UTF8 characters
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_6.csv', normalize_names=TRUE);
query ITT
SELECT aax, hello_world, qty_m2 FROM test ORDER BY aax;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
query I
select columns from sniff_csv('data/csv/normalize.csv', normalize_names = true)
----
[{'name': _name, 'type': VARCHAR}, {'name': _text, 'type': VARCHAR}]

View File

@@ -0,0 +1,105 @@
# name: test/sql/copy/csv/auto/test_sample_size.test
# description: Test optional parameters for read csv
# group: [auto]
statement ok
PRAGMA enable_verification
# CSV file with very sparse column
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_811.csv', SAMPLE_SIZE=1);
query IIII
SELECT typeof(TestInteger), typeof(TestDouble), typeof(TestDate), typeof(TestText) FROM test LIMIT 1
----
BIGINT VARCHAR DATE VARCHAR
statement ok
DROP TABLE test
# CSV file with very sparse column
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_811.csv', SAMPLE_SIZE=-1);
query IIII
SELECT typeof(TestInteger), typeof(TestDouble), typeof(TestDate), typeof(TestText) FROM test LIMIT 1
----
BIGINT DOUBLE DATE VARCHAR
statement ok
DROP TABLE test
# CSV file with very sparse column and sample size 500
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_811.csv', SAMPLE_SIZE = -1);
query IRTT
SELECT TestInteger, TestDouble, TestDate, TestText FROM test WHERE TestDouble is not NULL ;
----
5 1.1 2015-05-01 fdf
query TTTT
SELECT typeof(TestInteger), typeof(TestDouble), typeof(TestDate), typeof(TestText) FROM test LIMIT 1;
----
BIGINT DOUBLE DATE VARCHAR
statement ok
drop table test;
# CSV file with very sparse column and number of samples 50
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_811.csv');
query IRTT
SELECT TestInteger, TestDouble, TestDate, TestText FROM test WHERE TestDouble is not NULL ;
----
5 1.1 2015-05-01 fdf
query TTTT
SELECT typeof(TestInteger), typeof(TestDouble), typeof(TestDate), typeof(TestText) FROM test LIMIT 1;
----
BIGINT DOUBLE DATE VARCHAR
statement ok
drop table test;
# CSV file with very sparse column with sample size 200 and number of samples 20
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_811.csv', SAMPLE_SIZE = -1);
query IRTT
SELECT TestInteger, TestDouble, TestDate, TestText FROM test WHERE TestDouble is not NULL ;
----
5 1.1 2015-05-01 fdf
query TTTT
SELECT typeof(TestInteger), typeof(TestDouble), typeof(TestDate), typeof(TestText) FROM test LIMIT 1;
----
BIGINT DOUBLE DATE VARCHAR
statement ok
drop table test;
# CSV file with very sparse column using copy into
statement ok
CREATE TABLE test (TestInteger integer, TestDouble double, TestDate varchar, TestText varchar);
# CSV file with very sparse column, automatically aligns column types, small sample size
statement ok
COPY test FROM 'data/csv/auto/issue_811.csv' (AUTO_DETECT TRUE);
statement ok
drop table test;
# CSV file with very sparse column using copy into
statement ok
CREATE TABLE test (TestInteger integer, TestDouble double, TestDate varchar, TestText varchar);
# CSV file with very sparse column, automatically aligns column types, small sample size
statement ok
COPY test FROM 'data/csv/auto/issue_811.csv' (SAMPLE_SIZE -1, AUTO_DETECT TRUE);
statement ok
drop table test;

View File

@@ -0,0 +1,36 @@
# name: test/sql/copy/csv/auto/test_sniffer_blob.test
# description: Test reading a blob with the sniffer
# group: [auto]
statement ok
PRAGMA enable_verification
# This is the only way to try to trick the sniffer into checking blobs and it is not valid
statement error
select count(*) from read_csv('data/csv/test/blob.csv',auto_type_candidates=['blob'])
----
Auto Type Candidate of type BLOB is not accepted as a valid input
# All this is cool and should work.
query I
select count(*) from read_csv('data/csv/test/blob.csv',types=['blob'], header = 0)
----
1
query I
select count(*) from read_csv('data/csv/test/blob.csv',columns={'col1': 'BLOB'})
----
1
statement ok
create table t ( a blob)
statement ok
COPY t FROM 'data/csv/test/blob.csv';
query I
select count(*) from read_csv('data/csv/test/blob.csv',columns={'col1': 'BLOB'})
----
1

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/auto/test_sniffer_empty_start_value.test
# description: Test reading a value with empty spaces at the beginning
# group: [auto]
statement ok
PRAGMA enable_verification
query III
from read_csv('data/csv/empty_space_start_value.csv')
----
1968 86 Greetings
1970 17 Bloody Mama
1970 73 Hi, Mom!
1971 40 Born to Win

View File

@@ -0,0 +1,18 @@
# name: test/sql/copy/csv/auto/test_timings_csv.test
# description: Test CSV Sample works for Gabor's timings csv file
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
statement ok
CREATE OR REPLACE TABLE timings(tool string, sf float, day string, batch_type string, q string, parameters string, time float);
query I
COPY timings FROM 'data/csv/timings.csv' (HEADER, DELIMITER '|')
----
1095

View File

@@ -0,0 +1,117 @@
# name: test/sql/copy/csv/auto/test_type_candidates.test
# description: Test Type Candidates for auto_csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
create table t (a integer, b double, c varchar)
statement ok
insert into t values (1,1.1,'bla');
statement ok
COPY (SELECT * from t) TO '__TEST_DIR__/csv_file.csv' (FORMAT CSV, DELIMITER '|', HEADER 0);
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv');
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv');
----
BIGINT DOUBLE VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT', 'DOUBLE', 'VARCHAR']);
----
1 1.1 bla
statement error
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['MAP']);
----
Value "MAP" can not be converted to a DuckDB Type.
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT', 'DOUBLE', 'VARCHAR']);
----
BIGINT DOUBLE VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['VARCHAR'], header = 0);
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['VARCHAR'], header = 0);
----
VARCHAR VARCHAR VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT']);
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT']);
----
BIGINT VARCHAR VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT','VARCHAR']);
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT','VARCHAR']);
----
BIGINT VARCHAR VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['FLOAT','VARCHAR']);
----
1.0 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['FLOAT','VARCHAR']);
----
FLOAT FLOAT VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR']);
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR']);
----
SMALLINT FLOAT VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR','SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR']);
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR','SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR']);
----
SMALLINT FLOAT VARCHAR
statement error
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['USMALLINT', 'VARCHAR']);
----
Auto Type Candidate of type USMALLINT is not accepted as a valid input
statement error
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['bla', 'VARCHAR'])
----
Type with name bla does not exist!
statement error
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=[]);
----
auto_type_candidates requires at least one type

View File

@@ -0,0 +1,213 @@
# name: test/sql/copy/csv/auto/test_type_detection.test
# description: Test csv type detection
# group: [auto]
statement ok
PRAGMA enable_verification
# a CSV file with many strings
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/large_mixed_data.csv', SAMPLE_SIZE=-1);
query ITR
SELECT linenr, mixed_string, mixed_double FROM test LIMIT 3;
----
1 1 1.000000
2 2 2.000000
3 3 3.000000
query TTT
SELECT typeof(linenr), typeof(mixed_string), typeof(mixed_double) FROM test LIMIT 1;
----
BIGINT VARCHAR DOUBLE
query ITR
SELECT linenr, mixed_string, mixed_double FROM test WHERE linenr > 27000 LIMIT 3;
----
27001 1 1.000000
27002 2 2.000000
27003 3 3.500000
query I
SELECT count(*) FROM test;
----
27003
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 2000-01-01 2000-01-01 12:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time and date columns with leading/trailing chars
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_trailing.csv');
query ITTTTT
SELECT a, b, t, tf, d, df FROM test ORDER BY a;
----
123 TEST2 12:12:12 12:12:12 2000-01-01 2000-01-01
345 TEST2 14:15:30 14:15:30 2002-02-02 2000-01-01 a
346 TEST2 15:16:17 15:16:17 01 2004-12-13 2000-01-01
query TTTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(tf), typeof(d), typeof(df) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME VARCHAR DATE VARCHAR
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in mm-dd-yyyy (12 hour)
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_mm-dd-yyyy.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 2000-01-01 2000-01-01 00:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in mm-dd-yy format (12 hour)
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_mm-dd-yy.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 1990-01-01 1990-01-01 00:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in dd-mm-yyyy format
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_dd-mm-yyyy.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 2000-01-01 2000-01-01 12:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in dd-mm-yy format
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_dd-mm-yy.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 2000-01-01 1990-01-01 12:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in yyyy.mm.dd format
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_yyyy.mm.dd.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 2000-01-01 2000-01-01 12:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in yy.mm.dd format
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_yy.mm.dd.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 1990-01-01 1990-01-01 00:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing integer bool value
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/int_bol.csv');
query I
SELECT i FROM test ORDER BY i;
----
1
2
3
4
query TT
SELECT typeof(i), typeof(b) FROM test LIMIT 1;
----
BIGINT BOOLEAN
statement ok
DROP TABLE test;

View File

@@ -0,0 +1,12 @@
# name: test/sql/copy/csv/auto_glob_directory.test
# description: Test auto globbing a directory
# group: [csv]
statement ok
COPY (SELECT i%2 AS grp, i FROM range(1000) t(i)) TO '__TEST_DIR__/partitioned_glob_csv' (FORMAT csv, PARTITION_BY (grp));
query II
SELECT grp, COUNT(*) FROM read_csv('__TEST_DIR__/partitioned_glob_csv') GROUP BY ALL ORDER BY ALL
----
0 500
1 500

View File

@@ -0,0 +1,182 @@
# name: test/sql/copy/csv/batched_write/batch_csv_mixed_batches.test_slow
# description: Test batch CSV write with mixed batches
# group: [batched_write]
require parquet
statement ok
PRAGMA enable_verification
statement ok
COPY (FROM range(100000) tbl(i)) TO '__TEST_DIR__/mix_batches_small.parquet' (ROW_GROUP_SIZE 5000)
statement ok
COPY (FROM range(100000, 400000) tbl(i)) TO '__TEST_DIR__/mix_batches_large.parquet' (ROW_GROUP_SIZE 200000)
statement ok
COPY (FROM range(400000, 700000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd.parquet' (ROW_GROUP_SIZE 999)
statement ok
COPY (FROM range(700000, 1000000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd_again.parquet' (ROW_GROUP_SIZE 99979)
# create views that read the batches
statement ok
CREATE VIEW v1 AS SELECT * FROM parquet_scan(['__TEST_DIR__/mix_batches_small.parquet', '__TEST_DIR__/mix_batches_large.parquet', '__TEST_DIR__/mix_batches_odd.parquet', '__TEST_DIR__/mix_batches_odd_again.parquet'])
statement ok
CREATE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
statement ok
CREATE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
# empty table
statement ok
CREATE VIEW v4 AS FROM v1 WHERE i>998 AND i<1000 AND i%2=0
loop i 0 2
query I
COPY v1 TO '__TEST_DIR__/mixed_batches_v1.csv' (HEADER)
----
1000000
query I
CREATE TABLE mixed_batches_v1 AS FROM '__TEST_DIR__/mixed_batches_v1.csv'
----
1000000
foreach table v1 mixed_batches_v1
query IIIII
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
----
499999500000 0 999999 1000000 1000000
query I
SELECT * FROM ${table} LIMIT 5 OFFSET 99998
----
99998
99999
100000
100001
100002
endloop
# now do the same, but filter out half of the values
query I
COPY v2 TO '__TEST_DIR__/mixed_batches_v2.csv' (HEADER)
----
500000
query I
CREATE TABLE mixed_batches_v2 AS FROM '__TEST_DIR__/mixed_batches_v2.csv'
----
500000
foreach table v2 mixed_batches_v2
query IIIII
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
----
247499750000 0 989999 500000 500000
query I
SELECT * FROM ${table} LIMIT 5 OFFSET 99998
----
189998
189999
200000
200001
200002
endloop
# do it again, but this time only filter out SOME small batches
query I
COPY v3 TO '__TEST_DIR__/mixed_batches_v3.csv' (HEADER)
----
700000
query I
CREATE TABLE mixed_batches_v3 AS FROM '__TEST_DIR__/mixed_batches_v3.csv'
----
700000
foreach table v3 mixed_batches_v3
query IIIII
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
----
348499650000 0 989999 700000 700000
query I
SELECT * FROM ${table} LIMIT 5 OFFSET 9999
----
9999
20000
20001
20002
20003
endloop
# now with an empty table
query I
COPY v4 TO '__TEST_DIR__/mixed_batches_v4.csv' (HEADER)
----
0
query I
CREATE TABLE mixed_batches_v4 AS SELECT i::BIGINT as i FROM read_csv_auto('__TEST_DIR__/mixed_batches_v4.csv')
----
0
foreach table v4 mixed_batches_v4
query IIIII
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
----
NULL NULL NULL 0 0
query I
SELECT * FROM ${table} LIMIT 5
----
endloop
statement ok
DROP TABLE mixed_batches_v1
statement ok
DROP TABLE mixed_batches_v2
statement ok
DROP TABLE mixed_batches_v3
statement ok
DROP TABLE mixed_batches_v4
statement ok
drop view if exists v2;
statement ok
drop view if exists v3;
statement ok
drop view if exists v4;
# create views that read the batches using unions
statement ok
CREATE OR REPLACE VIEW v1 AS FROM '__TEST_DIR__/mix_batches_small.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_large.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd_again.parquet'
statement ok
CREATE OR REPLACE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
statement ok
CREATE OR REPLACE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
statement ok
CREATE OR REPLACE VIEW v4 AS FROM v1 WHERE i>998 AND i<1000 AND i%2=0
endloop

View File

@@ -0,0 +1,89 @@
# name: test/sql/copy/csv/batched_write/batch_csv_write.test_slow
# description: Batched copy to file
# group: [batched_write]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers AS SELECT i, i // 5 AS j FROM range(1000000) t(i) ;
statement ok
COPY integers TO '__TEST_DIR__/batched_integers.csv' (HEADER);
statement ok
CREATE TABLE integers_copied AS FROM '__TEST_DIR__/batched_integers.csv'
query IIIII
SELECT SUM(i), SUM(j), COUNT(*), COUNT(i), COUNT(j) FROM integers_copied
----
499999500000 99999500000 1000000 1000000 1000000
query II
SELECT * FROM integers_copied LIMIT 5
----
0 0
1 0
2 0
3 0
4 0
query II
SELECT * FROM integers_copied LIMIT 5 OFFSET 99997
----
99997 19999
99998 19999
99999 19999
100000 20000
100001 20000
query II
SELECT * FROM integers_copied QUALIFY i<=lag(i) over ()
----
# now with filters
statement ok
CREATE VIEW v1 AS SELECT * FROM integers WHERE (i%2=0 AND i<300000) OR (i BETWEEN 500000 AND 700000)
statement ok
COPY v1 TO '__TEST_DIR__/batched_integers_filters.csv' (HEADER);
statement ok
CREATE TABLE integers_filtered AS FROM '__TEST_DIR__/batched_integers_filters.csv'
foreach table v1 integers_filtered
query IIIII
SELECT SUM(i), SUM(j), COUNT(*), COUNT(i), COUNT(j) FROM ${table}
----
142500450000 28499950000 350001 350001 350001
query II
SELECT * FROM ${table} LIMIT 5
----
0 0
2 0
4 0
6 1
8 1
query II
SELECT * FROM ${table} LIMIT 5 OFFSET 99997
----
199994 39998
199996 39999
199998 39999
200000 40000
200002 40000
query II
SELECT * FROM ${table} LIMIT 5 OFFSET 300000
----
650000 130000
650001 130000
650002 130000
650003 130000
650004 130000
endloop

View File

@@ -0,0 +1,161 @@
# name: test/sql/copy/csv/batched_write/batch_json_mixed_batches.test_slow
# description: Test batch CSV write with mixed batches
# group: [batched_write]
require parquet
require json
statement ok
PRAGMA enable_verification
statement ok
COPY (FROM range(100000) tbl(i)) TO '__TEST_DIR__/mix_batches_small.parquet' (ROW_GROUP_SIZE 5000)
statement ok
COPY (FROM range(100000, 400000) tbl(i)) TO '__TEST_DIR__/mix_batches_large.parquet' (ROW_GROUP_SIZE 200000)
statement ok
COPY (FROM range(400000, 700000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd.parquet' (ROW_GROUP_SIZE 999)
statement ok
COPY (FROM range(700000, 1000000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd_again.parquet' (ROW_GROUP_SIZE 99979)
# create views that read the batches
statement ok
CREATE VIEW v1 AS SELECT * FROM parquet_scan(['__TEST_DIR__/mix_batches_small.parquet', '__TEST_DIR__/mix_batches_large.parquet', '__TEST_DIR__/mix_batches_odd.parquet', '__TEST_DIR__/mix_batches_odd_again.parquet'])
statement ok
CREATE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
statement ok
CREATE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
# empty table
statement ok
CREATE VIEW v4 AS FROM v1 WHERE i>998 AND i<1000 AND i%2=0
foreach ARRAY_SETTING TRUE FALSE
query I
COPY v1 TO '__TEST_DIR__/mixed_batches_v1.json' (ARRAY ${ARRAY_SETTING})
----
1000000
query I
CREATE TABLE mixed_batches_v1 AS FROM '__TEST_DIR__/mixed_batches_v1.json'
----
1000000
foreach table v1 mixed_batches_v1
query IIIII
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
----
499999500000 0 999999 1000000 1000000
query I
SELECT * FROM ${table} LIMIT 5 OFFSET 99998
----
99998
99999
100000
100001
100002
endloop
# now do the same, but filter out half of the values
query I
COPY v2 TO '__TEST_DIR__/mixed_batches_v2.json' (ARRAY ${ARRAY_SETTING})
----
500000
query I
CREATE TABLE mixed_batches_v2 AS FROM '__TEST_DIR__/mixed_batches_v2.json'
----
500000
foreach table v2 mixed_batches_v2
query IIIII
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
----
247499750000 0 989999 500000 500000
query I
SELECT * FROM ${table} LIMIT 5 OFFSET 99998
----
189998
189999
200000
200001
200002
endloop
# do it again, but this time only filter out SOME small batches
query I
COPY v3 TO '__TEST_DIR__/mixed_batches_v3.json' (ARRAY ${ARRAY_SETTING})
----
700000
query I
CREATE TABLE mixed_batches_v3 AS FROM '__TEST_DIR__/mixed_batches_v3.json'
----
700000
foreach table v3 mixed_batches_v3
query IIIII
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
----
348499650000 0 989999 700000 700000
query I
SELECT * FROM ${table} LIMIT 5 OFFSET 9999
----
9999
20000
20001
20002
20003
endloop
query I
COPY v4 TO '__TEST_DIR__/mixed_batches_v4.json' (ARRAY ${ARRAY_SETTING})
----
0
query I
CREATE TABLE mixed_batches_v4 AS SELECT i::BIGINT as i FROM '__TEST_DIR__/mixed_batches_v4.json' t(i)
----
0
foreach table v4 mixed_batches_v4
query IIIII
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM ${table}
----
NULL NULL NULL 0 0
query I
SELECT * FROM ${table} LIMIT 5
----
endloop
statement ok
DROP TABLE mixed_batches_v1
statement ok
DROP TABLE mixed_batches_v2
statement ok
DROP TABLE mixed_batches_v3
statement ok
DROP TABLE mixed_batches_v4
endloop

View File

@@ -0,0 +1,32 @@
# name: test/sql/copy/csv/batched_write/csv_write_memory_limit.test_slow
# description: Verify data is streamed and memory limit is not exceeded in CSV write
# group: [batched_write]
require parquet
require 64bit
statement ok
PRAGMA enable_verification
# 100M rows, 2 BIGINT columns = 1.6GB uncompressed
statement ok
COPY (SELECT i, i // 5 AS j FROM range(100000000) t(i)) TO '__TEST_DIR__/large_integers.parquet'
# set a memory limit of 300MB
statement ok
SET memory_limit='300MB'
# stream from one parquet file to another
query I
COPY '__TEST_DIR__/large_integers.parquet' TO '__TEST_DIR__/large_integers.csv'
----
100000000
# verify that the file is correctly written
statement ok
SET memory_limit='-1'
query II
SELECT * FROM '__TEST_DIR__/large_integers.parquet' EXCEPT FROM '__TEST_DIR__/large_integers.csv'
----

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,22 @@
# name: test/sql/copy/csv/code_cov/buffer_manager_finalize.test
# description: Test to reach Finalize call in the csv buffer manager for codecov
# group: [code_cov]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t1 AS select i, (i+1) as j from range(0,3000) tbl(i)
statement ok
COPY t1 TO '__TEST_DIR__/t1.csv' (FORMAT CSV, DELIMITER '|', HEADER);
query I
select count(*) from '__TEST_DIR__/t1.csv'
----
3000
query I
select count(*) from read_csv('data/csv/empty.csv', columns=STRUCT_PACK(d := 'BIGINT'), header=0, auto_detect = false)
----
0

View File

@@ -0,0 +1,21 @@
# name: test/sql/copy/csv/code_cov/csv_dialect_detection.test
# description: Test to reach missing Dialect Detection code.
# group: [code_cov]
statement ok
PRAGMA enable_verification
query I
SELECT * from read_csv_auto('data/csv/escape.csv', escape=']', header = 0)
----
"bla"
query I
SELECT * from read_csv_auto('data/csv/escape.csv', header = 0)
----
"]"bla]""
statement error
SELECT * from read_csv_auto('data/csv/no_opt.csv', delim = ';')
----
It was not possible to automatically detect the CSV parsing dialect

View File

@@ -0,0 +1,24 @@
# name: test/sql/copy/csv/code_cov/csv_disk_reload.test
# description: Test to reach Disk Reaload call in the csv buffer manager for codecov
# group: [code_cov]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t1 AS select i, (i+1) as j from range(0,300000) tbl(i)
statement ok
COPY t1 TO '__TEST_DIR__/t1.csv' (FORMAT CSV, DELIMITER '|', HEADER);
# Let's set a memory limit
statement ok
PRAGMA memory_limit='2M'
statement ok
PRAGMA threads=2
query I
select count(*) from read_csv_auto('__TEST_DIR__/t1.csv',buffer_size = 262144, sample_size=-1)
----
300000

View File

@@ -0,0 +1,23 @@
# name: test/sql/copy/csv/code_cov/csv_exact_buffer_size.test
# description: Test with exact buffer size being the file size
# group: [code_cov]
statement ok
PRAGMA enable_verification
query II
FROM read_csv('data/csv/auto/issue_1254_rn.csv', buffer_size=10)
----
1 2
1 2
query II
FROM read_csv('data/csv/auto/issue_1254_rn.csv', buffer_size=8)
----
1 2
1 2
query I
select count(*) from read_csv_auto('data/csv/small_file.csv', buffer_size = 7)
----
2

View File

@@ -0,0 +1,37 @@
# name: test/sql/copy/csv/code_cov/csv_sniffer_header.test
# description: Tests to enforce codecov in csv header sniffing
# group: [code_cov]
statement ok
PRAGMA enable_verification
query I
SELECT count(*) from read_csv_auto('data/csv/header_left_space.csv')
----
3
statement ok
create table t as select * from read_csv_auto('data/csv/header_normalize.csv', normalize_names=1)
query IIIIII
describe t
----
bla BIGINT YES NULL NULL NULL
bla_1 BIGINT YES NULL NULL NULL
b_la BIGINT YES NULL NULL NULL
_ BIGINT YES NULL NULL NULL
_3b BIGINT YES NULL NULL NULL
query III
FROM read_csv(['data/csv/auto/sample.csv','data/csv/auto/sample.csv','data/csv/auto/sample.csv'])
----
c1 pedro 1992
c2 mark 1992
c3 oogie 2021
c1 pedro 1992
c2 mark 1992
c3 oogie 2021
c1 pedro 1992
c2 mark 1992
c3 oogie 2021

View File

@@ -0,0 +1,209 @@
# name: test/sql/copy/csv/code_cov/csv_state_machine_invalid_utf.test
# description: Tests related to invalid UTF-8 detection
# group: [code_cov]
# Error during sniffing
statement error
from read_csv_auto('data/csv/test/invalid_utf.csv')
----
Invalid unicode (byte sequence mismatch) detected
statement error
from read_csv_auto('data/csv/test/invalid_utf.csv')
----
CSV Error on Line: 1
# Error during parsing
statement error
from read_csv('data/csv/test/invalid_utf.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',')
----
Invalid unicode (byte sequence mismatch) detected.
statement error
from read_csv('data/csv/test/invalid_utf.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',')
----
CSV Error on Line: 1
# Test ignore errors over more complex file
statement error
from read_csv('data/csv/test/invalid_utf_complex.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',')
----
CSV Error on Line: 11
query III
from read_csv('data/csv/test/invalid_utf_complex.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', ignore_errors=true)
----
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
# Test error in the second vector
statement ok
create table t as from read_csv('data/csv/test/invalid_utf_big.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', ignore_errors=true)
query I
select count(*) from t
----
3030
statement error
from read_csv('data/csv/test/invalid_utf_big.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',')
----
CSV Error on Line: 3001
# Test borked utf-8 within quotes
statement error
from read_csv('data/csv/test/invalid_utf_quoted.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', quote = '"')
----
CSV Error on Line: 11
query III
from read_csv('data/csv/test/invalid_utf_complex.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', quote = '"', ignore_errors=true)
----
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
# Test Invalid Header
statement error
from read_csv('data/csv/test/invalid_utf_header.csv', delim = ',', quote = '"')
----
Invalid unicode (byte sequence mismatch) detected.
statement error
from read_csv('data/csv/test/invalid_utf_header.csv', header=1, delim = ',', quote = '"')
----
Invalid unicode (byte sequence mismatch) detected.
query III
from read_csv('data/csv/test/invalid_utf_header.csv', header=1, delim = ',', quote = '"', ignore_errors = true)
----
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
# Test invalid unicode in between a quoted newline
statement error
from read_csv('data/csv/test/invalid_utf_quoted_nl.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', quote = '"')
----
CSV Error on Line: 11
query III
from read_csv('data/csv/test/invalid_utf_quoted_nl.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', quote = '"', ignore_errors=true)
----
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
# Test error between buffers
statement error
from read_csv('data/csv/test/invalid_utf_complex.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', quote = '"', buffer_size = 198)
----
CSV Error on Line: 11
# Test error between buffers (with ignore_errors set)
query III
from read_csv('data/csv/test/invalid_utf_complex.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', ignore_errors=true, buffer_size = 198)
----
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
# We get a casting error
statement error
SELECT * FROM read_csv('data/csv/test/invalid_utf_list.csv', header=0, auto_detect=false, quote = '"',columns = {'col1': 'INTEGER[]'} )
----
Invalid unicode (byte sequence mismatch) detected.
statement error
SELECT * FROM read_csv('data/csv/test/invalid_utf_list.csv', header=0, auto_detect=false, quote = '"',columns = {'col1': 'INTEGER[]'} )
----
CSV Error on Line: 11
# We get a invalid unicode error
statement error
SELECT * FROM read_csv('data/csv/test/invalid_utf_list.csv', header=0, auto_detect=false, quote = '"',columns = {'col1': 'VARCHAR'} )
----
Invalid unicode (byte sequence mismatch) detected.
statement error
SELECT * FROM read_csv('data/csv/test/invalid_utf_list.csv', header=0, auto_detect=false, quote = '"',columns = {'col1': 'VARCHAR'} )
----
CSV Error on Line: 11

View File

@@ -0,0 +1,26 @@
# name: test/sql/copy/csv/code_cov/csv_type_detection.test
# description: Tests to enforce codecov in csv type detection sniffing
# group: [code_cov]
statement ok
PRAGMA enable_verification
statement error
from read_csv_auto('data/csv/invalid_utf8.csv', auto_detect = false, columns={'c01': 'VARCHAR'} )
----
Invalid unicode (byte sequence mismatch) detected
query I
select * from read_csv_auto('data/csv/empty.csv')
----
query II
select * from read_csv_auto('data/csv/small_file.csv', sample_size=1)
----
1 2
5 3
query I
select * from read_csv_auto('data/csv/date_format_percentage.csv')
----
336%584%3205

View File

@@ -0,0 +1,16 @@
# name: test/sql/copy/csv/code_cov/csv_type_refinement.test
# description: Tests to enforce codecov in csv type refinement sniffing
# group: [code_cov]
statement ok
PRAGMA enable_verification
query I
select count(*) from read_csv_auto('data/csv/borked_date.csv', header = 0)
----
2070
query I
select count(*) from read_csv_auto('data/csv/big_not_bool.csv', header = 0)
----
2450

View File

@@ -0,0 +1,58 @@
# name: test/sql/copy/csv/column_names.test
# description: Test correct column name output in read_csv functions
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t1 AS SELECT * FROM read_csv_auto('data/csv/test/issue2518.csv', header=False, columns={'rsID':'INT', 'CHR': 'VARCHAR', 'POS': 'INT','REFB': 'VARCHAR','ALTB':'VARCHAR'}, auto_detect = false)
query IIIII
SELECT rsID, chr, pos, refb, altb FROM t1
----
4690 1 14673 G A,C,T
5 7 91839110 C T
6 7 91747131 A G
7 7 91779557 T A
8 7 92408329 C T
9 7 92373453 TG T
10 7 92383888 A C,G,T
1090 8 402108 C G,T
11 7 11364201 C T
1184 6 187649 T A,C,G
statement ok
CREATE TABLE t2 AS SELECT * FROM read_csv_auto('data/csv/test/issue2518.csv', header=False, columns={'rsID':'INT', 'CHR': 'VARCHAR', 'POS': 'INT','REFB': 'VARCHAR','ALTB':'VARCHAR'}, AUTO_DETECT=0)
query IIIII
SELECT rsID, chr, pos, refb, altb FROM t2
----
4690 1 14673 G A,C,T
5 7 91839110 C T
6 7 91747131 A G
7 7 91779557 T A
8 7 92408329 C T
9 7 92373453 TG T
10 7 92383888 A C,G,T
1090 8 402108 C G,T
11 7 11364201 C T
1184 6 187649 T A,C,G
statement ok
CREATE TABLE t3 AS SELECT * FROM read_csv_auto('data/csv/test/issue2518.csv', columns={'rsID':'INT', 'CHR': 'VARCHAR', 'POS': 'INT','REFB': 'VARCHAR','ALTB':'VARCHAR'}, auto_detect = false)
query IIIII
SELECT rsID, chr, pos, refb, altb FROM t3
----
4690 1 14673 G A,C,T
5 7 91839110 C T
6 7 91747131 A G
7 7 91779557 T A
8 7 92408329 C T
9 7 92373453 TG T
10 7 92383888 A C,G,T
1090 8 402108 C G,T
11 7 11364201 C T
1184 6 187649 T A,C,G

View File

@@ -0,0 +1,15 @@
# name: test/sql/copy/csv/copy_disable_parallelism.test
# description: Test copy statement
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE test (a INTEGER, b INTEGER, c VARCHAR(10));
query I
COPY test FROM 'data/csv/test/test.csv';
----
5000

View File

@@ -0,0 +1,40 @@
# name: test/sql/copy/csv/copy_expression.test
# description: Test copy with expressions
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
SET VARIABLE copy_target = '__TEST_DIR__/copy_expression_target.csv';
statement ok
COPY (SELECT * FROM range(5) t(i)) TO (getvariable('copy_target')) WITH (HEADER)
statement ok
CREATE TABLE tbl(i INTEGER);
query I
COPY tbl FROM (getvariable('copy_target'));
----
5
query I
SELECT * FROM tbl
----
0
1
2
3
4
statement ok
PREPARE v1 AS COPY (SELECT 'hello world' str) TO $1;
statement ok
EXECUTE v1('__TEST_DIR__/prepared_copy.csv');
query I
FROM '__TEST_DIR__/prepared_copy.csv'
----
hello world

View File

@@ -0,0 +1,67 @@
# name: test/sql/copy/csv/copy_to_overwrite.test
# description: Test copy to overwriting behavior
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
SET threads=1;
# run with and without preservation of insertion order
loop i 0 2
statement ok
COPY (SELECT * FROM range(5) t(i)) TO '__TEST_DIR__/copy_to_overwrite.csv' (HEADER)
query I
SELECT * FROM '__TEST_DIR__/copy_to_overwrite.csv'
----
0
1
2
3
4
statement ok
COPY (SELECT * FROM range(5, 10) t(i)) TO '__TEST_DIR__/copy_to_overwrite.csv' (HEADER)
query I
SELECT * FROM '__TEST_DIR__/copy_to_overwrite.csv'
----
5
6
7
8
9
# gzip
statement ok
COPY (SELECT * FROM range(5) t(i)) TO '__TEST_DIR__/copy_to_overwrite.csv.gz' (HEADER)
query I
SELECT * FROM '__TEST_DIR__/copy_to_overwrite.csv.gz'
----
0
1
2
3
4
statement ok
COPY (SELECT * FROM range(5, 10) t(i)) TO '__TEST_DIR__/copy_to_overwrite.csv.gz' (HEADER)
query I
SELECT * FROM '__TEST_DIR__/copy_to_overwrite.csv.gz'
----
5
6
7
8
9
statement ok
SET preserve_insertion_order=false
endloop

View File

@@ -0,0 +1,22 @@
# name: test/sql/copy/csv/csv_copy_sniffer.test
# description: Test CSV sniffer copy
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE sales (
salesid INTEGER NOT NULL PRIMARY KEY,
listid INTEGER NOT NULL,
sellerid INTEGER NOT NULL,
buyerid INTEGER NOT NULL,
eventid INTEGER NOT NULL,
dateid SMALLINT NOT NULL,
qtysold SMALLINT NOT NULL,
pricepaid DECIMAL (8,2),
commission DECIMAL (8,2),
saletime TIMESTAMP);
statement ok
COPY sales FROM 'data/csv/sales_snippet.csv' (TIMESTAMPFORMAT '%m/%d/%Y %I:%M:%S', IGNORE_ERRORS true);

View File

@@ -0,0 +1,100 @@
# name: test/sql/copy/csv/csv_decimal_separator.test
# description: Support decimal separators
# group: [csv]
statement ok
PRAGMA enable_verification
# period-separated decimal doesn't parse
statement error
CREATE TABLE decimal_separators AS SELECT * FROM read_csv_auto('data/csv/decimal_separators/decimal_separators.csv', column_types={'commas': 'double', 'periods': 'double'}, delim=';', decimal_separator=',')
----
Line: 2
statement ok
CREATE TABLE decimal_separators AS SELECT * FROM read_csv_auto('data/csv/decimal_separators/decimal_separators.csv', column_types={'commas': 'double'}, delim=';', decimal_separator=',')
query II
SELECT commas, periods FROM decimal_separators;
----
1.1 1.1
0.25 0.25
15300.0 1.53e4
15300.0 +1.53e4
-15300.0 -1.53e4
query II
SELECT typeof(commas), typeof(periods) FROM decimal_separators limit 1;
----
DOUBLE VARCHAR
# reading the commas column as decimal fails when decimal separator is set to '.'
statement error
CREATE TABLE decimal_separators2 AS SELECT * FROM read_csv_auto('data/csv/decimal_separators/decimal_separators.csv', column_types={'commas': 'decimal', 'periods': 'decimal'}, delim=';', decimal_separator='.')
----
Line: 2
# reading the commas column as float fails when decimal separator is set to '.'
statement error
CREATE TABLE decimal_separators2 AS SELECT * FROM read_csv_auto('data/csv/decimal_separators/decimal_separators.csv', column_types={'commas': 'float', 'periods': 'decimal'}, delim=';', decimal_separator='.')
----
Line: 2
statement ok
CREATE TABLE decimal_separators2 AS SELECT * FROM read_csv_auto('data/csv/decimal_separators/decimal_separators.csv', column_types={'commas': 'decimal'}, delim=';', decimal_separator=',')
query II
SELECT commas, periods FROM decimal_separators2;
----
1.100 1.1
0.250 0.25
15300.000 1.53e4
15300.000 +1.53e4
-15300.000 -1.53e4
query II
SELECT typeof(commas), typeof(periods) FROM decimal_separators2 limit 1;
----
DECIMAL(18,3) VARCHAR
# no separator specified => commas get read as varchar
statement ok
CREATE TABLE decimal_separators3 AS SELECT * FROM read_csv_auto('data/csv/decimal_separators/decimal_separators.csv', column_types={'periods': 'decimal'}, delim=';')
query II
SELECT commas, periods FROM decimal_separators3;
----
1,1 1.100
0,25 0.250
1,53e4 15300.000
+1,53e4 +15300.000
-1,53e4 -15300.000
# in a comma-delimited file, comma as decimal separator is OK when quoted
statement ok
CREATE TABLE decimal_separators4 AS SELECT * FROM read_csv_auto('data/csv/decimal_separators/decimal_separators_csv.csv', column_types={'commas': 'double'}, quote='"',delim=',',decimal_separator=',')
query II
SELECT commas, periods FROM decimal_separators4;
----
1.2345 1.2345
# auto-detection should read period-separated decimals as varchar
query II
SELECT typeof(commas), typeof(periods) FROM decimal_separators4 limit 1;
----
DOUBLE VARCHAR
# unsupported separator characters result in error
statement error
SELECT * FROM read_csv_auto('data/csv/decimal_separators/invalid_char.csv', column_types={'foo': 'double'}, decimal_separator='ö')
----
Binder Error: Unsupported parameter for DECIMAL_SEPARATOR: should be '.' or ','
# data with mixed separators will fail reading
statement error
SELECT * FROM read_csv_auto('data/csv/decimal_separators/mixed_format_fail.csv', column_types={'foo': 'double'}, decimal_separator=',', skip=0)
----
Line: 4

View File

@@ -0,0 +1,57 @@
# name: test/sql/copy/csv/csv_dtypes.test
# description: Read a CSV with dtypes flags
# group: [csv]
statement ok
PRAGMA enable_verification
query II
select typeof(Year), typeof(Quarter) from 'data/csv/real/ontime_sample.csv' LIMIT 1;
----
BIGINT BIGINT
query II
select typeof(Year), typeof(Quarter) from read_csv_auto('data/csv/real/ontime_sample.csv', dtypes={'Quarter': 'TINYINT'}) LIMIT 1
----
BIGINT TINYINT
# case insensitivity for struct
query II
select typeof(Year), typeof(Quarter) from read_csv_auto('data/csv/real/ontime_sample.csv', dtypes={'quArTeR': 'TINYINT'}) LIMIT 1
----
BIGINT TINYINT
query II
select typeof(Year), typeof(Quarter) from read_csv_auto('data/csv/real/ontime_sample.csv', dtypes=['INT', 'TINYINT']) LIMIT 1
----
INTEGER TINYINT
# mix of struct and list parameters
statement error
select * from read_csv_auto('data/csv/real/ontime_sample.csv', dtypes=['INT'], column_types={'Quarter': 'TINYINT'}) LIMIT 1
----
can only be supplied once
# invalid list type
statement error
select * from read_csv_auto('data/csv/real/ontime_sample.csv', dtypes=[42]) LIMIT 1
----
requires a list of types
# invalid type
statement error
select * from read_csv_auto('data/csv/real/ontime_sample.csv', dtypes=['unknown_type']) LIMIT 1
----
unknown_type
# invalid struct type
statement error
select * from read_csv_auto('data/csv/real/ontime_sample.csv', dtypes={'Quarter': 42}) LIMIT 1
----
requires a type specification as string
# too many sql types provided in list
statement error
select * from read_csv_auto('data/csv/auto/int_bol.csv', dtypes=['varchar', 'varchar', 'varchar']) LIMIT 1
----
3 types were provided, but CSV file only has 2 columns

View File

@@ -0,0 +1,69 @@
# name: test/sql/copy/csv/csv_dtypes_union_by_name.test
# description: Read a CSV with dtypes and UNION BY NAME
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE ubn1(a BIGINT);
statement ok
CREATE TABLE ubn2(a INTEGER, b INTEGER);
statement ok
CREATE TABLE ubn3(a INTEGER, c INTEGER);
statement ok
INSERT INTO ubn1 VALUES (1), (2), (9223372036854775807);
statement ok
INSERT INTO ubn2 VALUES (3,4), (5, 6);
statement ok
INSERT INTO ubn3 VALUES (100,101), (102, 103);
statement ok
COPY ubn1 TO '__TEST_DIR__/ubn1.csv' WITH (DELIMITER ',');
statement ok
COPY ubn2 TO '__TEST_DIR__/ubn2.csv' WITH ( DELIMITER ',');
statement ok
COPY ubn3 TO '__TEST_DIR__/ubn3.csv' WITH (DELIMITER ',');
query III
SELECT typeof(a), typeof(b), typeof(c)
FROM read_csv_auto('__TEST_DIR__/ubn*.csv', UNION_BY_NAME=TRUE, dtypes={'c': TINYINT})
LIMIT 1;
----
BIGINT BIGINT TINYINT
query III
SELECT typeof(a), typeof(b), typeof(c)
FROM read_csv_auto('__TEST_DIR__/ubn*.csv', UNION_BY_NAME=TRUE, dtypes={'c': TINYINT, 'A': DOUBLE})
LIMIT 1;
----
DOUBLE BIGINT TINYINT
# unrecognized in any file
statement error
SELECT typeof(a), typeof(b), typeof(c)
FROM read_csv_auto('__TEST_DIR__/ubn*.csv', UNION_BY_NAME=TRUE, dtypes={'xxx': TINYINT})
LIMIT 1;
----
xxx
statement error
SELECT typeof(a), typeof(b), typeof(c)
FROM read_csv_auto('__TEST_DIR__/ubn*.csv', UNION_BY_NAME=TRUE, dtypes={'c': TINYINT, 'A': DOUBLE, 'C': FLOAT})
LIMIT 1;
----
Duplicate struct entry name
statement error
SELECT typeof(a), typeof(b), typeof(c)
FROM read_csv_auto('__TEST_DIR__/ubn*.csv', UNION_BY_NAME=TRUE, dtypes={'c': TINYINT, 'A': DOUBLE, 'xZX': FLOAT})
LIMIT 1;
----
xZX

View File

@@ -0,0 +1,31 @@
# name: test/sql/copy/csv/csv_duck_fuzz.test
# description: Issues found from DuckFuzz regarding the CSV Reader
# group: [csv]
statement ok
PRAGMA enable_verification
# The important thing here is that we don't crash, some parameters accept null values (i.e., delimiter),
# and others will have different error messages, not making it possible to use a regex to test errors.
foreach parameter sep delim quote new_line escape nullstr columns auto_type_candidates header auto_detect sample_size all_varchar dateformat timestampformat normalize_names compression skip max_line_size maximum_line_size ignore_errors store_rejects rejects_table rejects_scan rejects_limit force_not_null buffer_size decimal_separator parallel null_padding allow_quoted_nulls column_types dtypes types names column_names comment encoding strict_mode
statement maybe
SELECT NULL FROM sniff_csv('data/csv/14512.csv', ${parameter} := NULL)
----
statement maybe
SELECT NULL FROM read_csv('data/csv/14512.csv', ${parameter} := NULL)
----
endloop
statement error
SELECT NULL FROM sniff_csv(NULL)
----
sniff_csv cannot take NULL as a file path parameter
statement error
SELECT NULL FROM read_csv(NULL)
----
read_csv cannot take NULL list as parameter

View File

@@ -0,0 +1,40 @@
# name: test/sql/copy/csv/csv_enum.test
# description: Read a CSV with enum types
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TYPE bla AS ENUM ('Y', 'N');
query I
select * from read_csv_auto('data/csv/response.csv', header = 0)
----
Y
Y
N
Null
query I
FROM read_csv('data/csv/response.csv', columns={'response': 'bla'}, nullstr = 'Null');
----
Y
Y
N
NULL
query I
FROM read_csv_auto('data/csv/response.csv', types={'column0': 'bla'}, nullstr = 'Null', header = 0);
----
Y
Y
N
NULL
statement error
FROM read_csv_auto('data/csv/response.csv', auto_type_candidates=['bla'], nullstr = 'Null');
----
Auto Type Candidate of type ENUM is not accepted as a valid input

View File

@@ -0,0 +1,45 @@
# name: test/sql/copy/csv/csv_enum_storage.test
# description: Read a CSV with enum types
# group: [csv]
statement ok
PRAGMA enable_verification
# load the DB from disk
load __TEST_DIR__/test_csv_enum.db
statement ok
CREATE TYPE bla AS ENUM ('Y', 'N');
restart
query I
select * from read_csv_auto('data/csv/response.csv', header = 0)
----
Y
Y
N
Null
query I
FROM read_csv('data/csv/response.csv', columns={'response': 'bla'}, nullstr = 'Null');
----
Y
Y
N
NULL
query I
FROM read_csv_auto('data/csv/response.csv', types={'column0': 'bla'}, nullstr = 'Null', header = 0);
----
Y
Y
N
NULL
statement error
FROM read_csv_auto('data/csv/response.csv', auto_type_candidates=['bla'], nullstr = 'Null');
----
Auto Type Candidate of type ENUM is not accepted as a valid input

View File

@@ -0,0 +1,39 @@
# name: test/sql/copy/csv/csv_error_message.test
# description: Various CSV reader error messages
# group: [csv]
statement ok
PRAGMA enable_verification
# Test columns error
statement error
FROM read_csv('data/csv/15473.csv', delim = ',', columns = {'A' : 'VARCHAR','B' : 'VARCHAR','C' : 'VARCHAR','D' : 'VARCHAR'})
----
Columns are set as: "columns = { 'A' : 'VARCHAR', 'B' : 'VARCHAR', 'C' : 'VARCHAR', 'D' : 'VARCHAR'}", and they contain: 4 columns. It does not match the number of columns found by the sniffer: 3. Verify the columns parameter is correctly set.
statement ok
COPY (SELECT i::VARCHAR i FROM range(103) tbl(i) UNION ALL SELECT 'hello') TO '__TEST_DIR__/int_parse_error.csv' (HEADER, DELIMITER '|')
statement error
SELECT * FROM read_csv('__TEST_DIR__/int_parse_error.csv', columns={'i': 'INT'})
----
Column at position: 0 Set type: INTEGER Sniffed type: VARCHAR
statement error
SELECT * FROM read_csv('__TEST_DIR__/int_parse_error.csv', columns={'i': 'INT'}, header=True, auto_detect=false)
----
Line: 105
statement ok
COPY (SELECT i::VARCHAR i FROM range(103) tbl(i) UNION ALL SELECT 'hello') TO '__TEST_DIR__/int_parse_error.csv' (HEADER 0, DELIMITER '|')
statement error
SELECT * FROM read_csv('__TEST_DIR__/int_parse_error.csv', columns={'i': 'INT'}, header=False, auto_detect=false)
----
Line: 104
statement error
SELECT * FROM read_csv('__TEST_DIR__/int_parse_error.csv', columns={'i': 'INT'}, header=False, auto_detect=false)
----
Original Line: hello

View File

@@ -0,0 +1,47 @@
# name: test/sql/copy/csv/csv_external_access.test
# description: Test that enable_external_access blocks CSV readers
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE date_test(d date);
statement ok
COPY date_test FROM 'data/csv/test/date.csv';
statement ok
SET enable_external_access=false;
statement error
SELECT * FROM read_csv('data/csv/test/date.csv', columns = {'d': 'DATE'});
----
Permission Error
statement error
SELECT * FROM read_csv_auto('data/csv/test/date.csv');
----
Permission Error
statement error
COPY date_test FROM 'data/csv/test/date.csv';
----
Permission Error
statement error
COPY date_test TO '__TEST_DIR__/date.csv'
----
Permission Error
# we also can't just enable external access again
statement error
SET enable_external_access=true;
----
Cannot change enable_external_access setting while database is running
# sniffer also respects external access flag
statement error
FROM sniff_csv('data/csv/test/date.csv');
----
Permission Error

View File

@@ -0,0 +1,18 @@
# name: test/sql/copy/csv/csv_glob_fallback.test
# description: Test glob fallback (#4699)
# group: [csv]
statement ok
PRAGMA enable_verification
query III
SELECT * FROM 'data/csv/[avalon]_daily-avg.csv'
----
1 2 3
3 4 5
4 5 6
statement error
SELECT * FROM 'data/csv/[avxalon]_daily-avg.csv'
----
<REGEX>:.*IO Error: No files found.*

View File

@@ -0,0 +1,151 @@
# name: test/sql/copy/csv/csv_hive.test
# description: Test the automatic parsing of the hive partitioning scheme
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE test AS SELECT 1 as id, 'value1' as value;
CREATE TABLE test2 AS SELECT 2 as id, 'value2' as value;
# filenames could allow you to parse hive partitions manually using SQL
query III
select id, value, replace(filename, '\', '/') from read_csv_auto('data/csv/hive-partitioning/simple/*/*/test.csv', FILENAME=1) order by id
----
1 value1 data/csv/hive-partitioning/simple/part=a/date=2012-01-01/test.csv
2 value2 data/csv/hive-partitioning/simple/part=b/date=2013-01-01/test.csv
# however this is just a lot nicer
query IIII
select id, value, part, date from read_csv_auto('data/csv/hive-partitioning/simple/*/*/test.csv', HIVE_PARTITIONING=1) order by id
----
1 value1 a 2012-01-01
2 value2 b 2013-01-01
# As long as the names match, we don't really mind since everything is a string anyway
query IIII
select id, value, part, date from read_csv_auto('data/csv/hive-partitioning/different_order/*/*/test.csv', HIVE_PARTITIONING=1) order by id
----
1 value1 a 2012-01-01
2 value2 b 2013-01-01
# If the key names don't add up, we throw
statement error
select * from read_csv_auto('data/csv/hive-partitioning/mismatching_names/*/*/test.csv', HIVE_PARTITIONING=1)
----
Hive partition mismatch
# If the key names don't add up, we throw
statement error
select * from read_csv_auto('data/csv/hive-partitioning/mismatching_count/*/*/test.csv', HIVE_PARTITIONING=1)
----
Hive partition mismatch
# Now we do a bunch of filtering on the partitions, to test the file skipping mechanism
query IIII
select id, value, part, date from read_csv_auto('data/csv/hive-partitioning/different_order/*/*/test.csv', HIVE_PARTITIONING=1) where part='a'
----
1 value1 a 2012-01-01
query IIII
select id, value, part, date from read_csv_auto('data/csv/hive-partitioning/different_order/*/*/test.csv', HIVE_PARTITIONING=1) where part='b'
----
2 value2 b 2013-01-01
query IIII
select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where part_cast > 0 and part_cast < 5000;
----
1 value1 1000 2012-01-01
query IIII
select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where part_cast > 5000;
----
2 value2 9000 2013-01-01
query IIII
select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where date_cast > CAST('2000-01-01' as DATE) and date_cast < CAST('2012-12-12' as DATE);
----
1 value1 1000 2012-01-01
query IIII
select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where date_cast > CAST('2000-01-01' as DATE) order by date_cast;
----
1 value1 1000 2012-01-01
2 value2 9000 2013-01-01
query IIII
select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where date_cast=CAST('2012-01-01' as DATE) OR part_cast=9000 ORDER BY date_cast;
----
1 value1 1000 2012-01-01
2 value2 9000 2013-01-01
## Filter expressions we can calculate during pushdown using filenames/hive partitions should be pruned
# Filtering out 0/2 files
query IIII
select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where (date_cast=CAST('2012-01-01' as DATE) AND concat(date_cast::VARCHAR, part_cast::VARCHAR) == '2012-01-011000') OR (part_cast=9000) ORDER BY date_cast;
----
1 value1 1000 2012-01-01
2 value2 9000 2013-01-01
# There should not be any filter operation remaining since it can be handled completely during pushdown by pruning file list
query II
EXPLAIN select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where (date_cast=CAST('2012-01-01' as DATE) AND concat(date_cast::VARCHAR, part_cast::VARCHAR) == '2012-01-011000') OR (part_cast=9000) ORDER BY date_cast;
----
physical_plan <!REGEX>:.*FILTER.*
# Query filtering out first file
query IIII
select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where (date_cast=CAST('2012-01-01' as DATE) AND concat(date_cast::VARCHAR, part_cast::VARCHAR) == 'foobar') OR (part_cast=9000) ORDER BY date_cast;
----
2 value2 9000 2013-01-01
# Again, we should not have a filter operator here
query II
explain select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where (date_cast=CAST('2012-01-01' as DATE) AND concat(date_cast::VARCHAR, part_cast::VARCHAR) == 'foobar') OR (part_cast=9000) ORDER BY date_cast;
----
physical_plan <!REGEX>:.*FILTER.*
# Query filtering out second file
query IIII
select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where (date_cast=CAST('2012-01-01' as DATE) AND concat(date_cast::VARCHAR, part_cast::VARCHAR) == '2012-01-011000') OR (part_cast=1337) ORDER BY date_cast;
----
1 value1 1000 2012-01-01
# Again, we should not have a filter operator here
query II
explain select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where (date_cast=CAST('2012-01-01' as DATE) AND concat(date_cast::VARCHAR, part_cast::VARCHAR) == '2012-01-011000') OR (part_cast=1337) ORDER BY date_cast;
----
physical_plan <!REGEX>:.*FILTER.*
# Filtering out both files
query IIII
select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where (date_cast=CAST('2012-01-01' as DATE) AND concat(date_cast::VARCHAR, part_cast::VARCHAR) == 'foobar') OR (part_cast=1337) ORDER BY date_cast;
----
# Again, we should not have a filter operator here
query II
EXPLAIN select id, value, CAST(part AS INT) as part_cast, CAST(date AS DATE) as date_cast from read_csv_auto('data/csv/hive-partitioning/types/*/*/test.csv', HIVE_PARTITIONING=1) where (date_cast=CAST('2012-01-01' as DATE) AND concat(date_cast::VARCHAR, part_cast::VARCHAR) == 'foobar') OR (part_cast=1337) ORDER BY date_cast;
----
physical_plan <!REGEX>:.*FILTER.*
# projection pushdown
query I
select value from read_csv_auto('data/csv/hive-partitioning/different_order/*/*/test.csv', HIVE_PARTITIONING=1) order by 1
----
value1
value2
query I
select part from read_csv_auto('data/csv/hive-partitioning/different_order/*/*/test.csv', HIVE_PARTITIONING=1) order by 1
----
a
b
# project only some columns from a hive partition
query I
select date from read_csv_auto('data/csv/hive-partitioning/different_order/*/*/test.csv', HIVE_PARTITIONING=1) order by 1
----
2012-01-01
2013-01-01

View File

@@ -0,0 +1,74 @@
# name: test/sql/copy/csv/csv_hive_filename_union.test
# description: Test the automatic parsing of the hive partitioning scheme
# group: [csv]
statement ok
PRAGMA enable_verification
# projection pushdown
query I
select filename.replace('\', '/').split('/')[-2] from read_csv_auto('data/csv/hive-partitioning/simple/*/*/test.csv', HIVE_PARTITIONING=1, FILENAME=1) order by 1
----
date=2012-01-01
date=2013-01-01
query III
select part, filename.replace('\', '/').split('/')[-2], value from read_csv_auto('data/csv/hive-partitioning/simple/*/*/test.csv', HIVE_PARTITIONING=1, FILENAME=1) order by 1
----
a date=2012-01-01 value1
b date=2013-01-01 value2
query III
select part, filename.replace('\', '/').split('/')[-2], value from read_csv_auto('data/csv/hive-partitioning/simple/*/*/test.csv', HIVE_PARTITIONING=1, FILENAME=1, UNION_BY_NAME=1) order by 1
----
a date=2012-01-01 value1
b date=2013-01-01 value2
query III
select * exclude(filename) from read_csv_auto('data/csv/hive-partitioning/mismatching_types/*/*.csv', HIVE_PARTITIONING=1, FILENAME=1) order by 1
----
99 world 2
xxx 42 1
query III
select * from read_csv_auto('data/csv/hive-partitioning/mismatching_types/*/*.csv', HIVE_PARTITIONING=1, UNION_BY_NAME=1) order by 1
----
99 world 2
xxx 42 1
query IIII
select * exclude(filename), filename.replace('\', '/').split('/')[-2] from read_csv_auto('data/csv/hive-partitioning/mismatching_types/*/*.csv', HIVE_PARTITIONING=1, FILENAME=1, UNION_BY_NAME=1) order by 1
----
99 world 2 part=2
xxx 42 1 part=1
query IIII
select part, filename.replace('\', '/').split('/')[-2], a, b from read_csv_auto('data/csv/hive-partitioning/mismatching_types/*/*.csv', HIVE_PARTITIONING=1, FILENAME=1, UNION_BY_NAME=1) order by 1
----
1 part=1 xxx 42
2 part=2 99 world
query II
select * exclude (filename) from read_csv_auto('data/csv/hive-partitioning/mismatching_types/*/*.csv', HIVE_PARTITIONING=0, FILENAME=1, UNION_BY_NAME=1) order by 1
----
99 world
xxx 42
# This can either throw a cast error or a schema mismatch error depending on what is executed first because of
# parallelism
statement error
select * from read_csv_auto(['data/csv/hive-partitioning/mismatching_contents/part=1/test.csv', 'data/csv/hive-partitioning/mismatching_contents/part=2/test.csv']) order by 1
----
If you are trying to read files with different schemas, try setting union_by_name=True
query III
select a, b, c from read_csv_auto('data/csv/hive-partitioning/mismatching_contents/*/*.csv', UNION_BY_NAME=1) order by 2 NULLS LAST
----
42 world NULL
42 NULL 1992-01-01
query IIII
select a, b, part, c from read_csv_auto('data/csv/hive-partitioning/mismatching_contents/*/*.csv', UNION_BY_NAME=1, HIVE_PARTITIONING=1) order by 2 NULLS LAST
----
42 world 2 NULL
42 NULL 1 1992-01-01

View File

@@ -0,0 +1,61 @@
# name: test/sql/copy/csv/csv_home_directory.test
# description: CSV writer home directory
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
SET home_directory='__TEST_DIR__'
statement ok
CREATE TABLE integers AS SELECT * FROM range(10)
statement ok
COPY integers TO '__TEST_DIR__/integers.csv' (FORMAT CSV);
query I
SELECT * FROM '~/integers.csv'
----
0
1
2
3
4
5
6
7
8
9
statement ok
CREATE TABLE integers_load(i INTEGER);
statement ok
COPY integers_load FROM '~/integers.csv'
query I
SELECT * FROM integers_load
----
0
1
2
3
4
5
6
7
8
9
# glob from home directory
statement ok
COPY integers TO '__TEST_DIR__/homedir_integers1.csv'
statement ok
COPY integers TO '__TEST_DIR__/homedir_integers2.csv'
query I
SELECT COUNT(*) FROM '~/homedir_integers*.csv'
----
20

View File

@@ -0,0 +1,18 @@
# name: test/sql/copy/csv/csv_limit_copy.test
# description: CSV limit copy
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers AS FROM range(1000000) t(i);
statement ok
COPY (FROM integers LIMIT 30000) TO '__TEST_DIR__/limit_copy.csv'
query I
SELECT COUNT(*) FROM '__TEST_DIR__/limit_copy.csv'
----
30000

View File

@@ -0,0 +1,28 @@
# name: test/sql/copy/csv/csv_line_too_long.test
# description: Test that verifies that the CSV has long lines and properly errors that
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE T1 (name VARCHAR);
foreach header true false
statement error
COPY T1(name) from 'data/csv/line_too_long.csv.gz' (DELIMITER ',', HEADER ${header} , COMPRESSION gzip, ALLOW_QUOTED_NULLS false);
----
Maximum line size of 2000000 bytes exceeded
statement error
COPY T1(name) from 'data/csv/line_too_long_with_newline.csv.gz' (DELIMITER ',', HEADER ${header} , COMPRESSION gzip, ALLOW_QUOTED_NULLS false);
----
Possible Solution: Change the maximum length size, e.g., max_line_size=2097165
statement error
COPY T1(name) from 'data/csv/multiple_line_too_long.csv.gz' (DELIMITER ',', HEADER ${header} , COMPRESSION gzip, ALLOW_QUOTED_NULLS false);
----
Possible Solution: Change the maximum length size, e.g., max_line_size=2097165
endloop

View File

@@ -0,0 +1,41 @@
# name: test/sql/copy/csv/csv_memory_management.test_slow
# description: Test the CSV Buffer Manager under strict memory limit conditions
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
SET memory_limit='512MB';
statement ok
SET threads=16;
#100KK rows, 2 BIGINT columns = 1.6GB uncompressed
statement ok
COPY (SELECT i, i // 5 AS j FROM range(100000000) t(i)) TO '__TEST_DIR__/integers.csv'
statement ok
CALL enable_logging('FileSystem')
query I
select count(*) from '__TEST_DIR__/integers.csv'
----
100000000
statement ok
COPY (SELECT i, i // 5 AS j FROM range(100000000) t(i)) TO '__TEST_DIR__/integers.csv.gz'
statement ok
SET temp_directory='';
statement ok
SET memory_limit='80MB';
statement ok
SET threads=1;
query I
select count(*) from '__TEST_DIR__/integers.csv.gz'
----
100000000

View File

@@ -0,0 +1,144 @@
# name: test/sql/copy/csv/csv_names.test
# description: Read a CSV with names flags
# group: [csv]
statement ok
PRAGMA enable_verification
# Duplicate names shuold not be accepted
statement error
from read_csv('data/csv/header_bug.csv', names=['col1', 'col1']) LIMIT 1;
----
read_csv names must have unique values
# Empty Names should not be accepted
statement error
from read_csv('data/csv/header_bug.csv', names=['']) LIMIT 1;
----
read_csv names cannot have empty (or all whitespace) value
statement error
from read_csv('data/csv/header_bug.csv', names=[' ', ' '], header = 0);
----
read_csv names cannot have empty (or all whitespace) value
# no names provided
query IIII
select column00, column01, column02, column03 from 'data/csv/real/lineitem_sample.csv' LIMIT 1;
----
1 15519 785 1
# override the names partially
query IIII
select l_orderkey, l_partkey, column02, column03 from read_csv_auto('data/csv/real/lineitem_sample.csv', names=['l_orderkey', 'l_partkey']) LIMIT 1;
----
1 15519 785 1
# empty list
query IIII
select column00, column01, column02, column03 from read_csv_auto('data/csv/real/lineitem_sample.csv', names=[]) LIMIT 1;
----
1 15519 785 1
# specify all names
query IIII
select l_orderkey, l_partkey, l_commitdate, l_comment from read_csv_auto('data/csv/real/lineitem_sample.csv', column_names=['l_orderkey', 'l_partkey', 'l_suppkey', 'l_linenumber', 'l_quantity', 'l_extendedprice', 'l_discount', 'l_tax', 'l_returnflag', 'l_linestatus', 'l_shipdate', 'l_commitdate', 'l_receiptdate', 'l_shipinstruct', 'l_shipmode', 'l_comment']) LIMIT 1;
----
1 15519 1996-02-12 egular courts above the
# specify too many names
statement error
select l_orderkey, l_partkey, l_commitdate, l_comment from read_csv_auto('data/csv/real/lineitem_sample.csv', names=['l_orderkey', 'l_partkey', 'l_suppkey', 'l_linenumber', 'l_quantity', 'l_extendedprice', 'l_discount', 'l_tax', 'l_returnflag', 'l_linestatus', 'l_shipdate', 'l_commitdate', 'l_receiptdate', 'l_shipinstruct', 'l_shipmode', 'l_comment', 'xx']) LIMIT 1;
----
Error when sniffing file "data/csv/real/lineitem_sample.csv".
# specify names on a file with a header
query II
select yr, Quarter from read_csv_auto('data/csv/real/ontime_sample.csv', names=['yr']) LIMIT 1;
----
1988 1
# NULL
statement error
select column00, column01, column02, column03 from read_csv_auto('data/csv/real/lineitem_sample.csv', names=NULL) LIMIT 1;
----
read_csv names cannot be NULL
# specify the names twice
statement error
select l_orderkey, l_partkey, column02, column03 from read_csv_auto('data/csv/real/lineitem_sample.csv', names=['l_orderkey', 'l_partkey'], column_names=['l_orderkey']) LIMIT 1;
----
read_csv column_names/names can only be supplied once
statement error
select l_orderkey, l_partkey, column02, column03 from read_csv_auto('data/csv/real/lineitem_sample.csv', names=42) LIMIT 1;
----
Failed to cast value: Unimplemented type for cast (INTEGER -> VARCHAR[])
# specify options delim and sep
statement error
select column00 from read_csv_auto('data/csv/real/lineitem_sample.csv', delim='|', sep='|') LIMIT 1;
----
CSV Reader function option delim and sep are aliases, only one can be supplied
# duplicate names
statement error
select l_orderkey, l_partkey, column02, column03 from read_csv_auto('data/csv/real/lineitem_sample.csv', names=['l_orderkey', 'l_orderkey']) LIMIT 1;
----
read_csv names must have unique values. "l_orderkey" is repeated.
query I
select Columns FROM sniff_csv('data/csv/header.csv', names = ['a'])
----
[{'name': a, 'type': VARCHAR}]
query I
FROM read_csv('data/csv/header.csv', names = ['a'])
----
line2
line3
query I
select Columns FROM sniff_csv('data/csv/header.csv', names = ['a'], header = false)
----
[{'name': a, 'type': VARCHAR}]
query I
FROM read_csv('data/csv/header.csv', names = ['a'], header = false)
----
line1
line2
line3
query I
select Columns FROM sniff_csv('data/csv/header_2.csv', names = ['a'])
----
[{'name': a, 'type': VARCHAR}, {'name': line1_2, 'type': VARCHAR}, {'name': line1_3, 'type': VARCHAR}]
query III
FROM read_csv('data/csv/header_2.csv', names = ['a'])
----
line2 line2_2 line2_3
line3 line3_2 line3_3
query I
select Columns FROM sniff_csv('data/csv/header_2.csv', names = ['a'], header=False)
----
[{'name': a, 'type': VARCHAR}, {'name': column1, 'type': VARCHAR}, {'name': column2, 'type': VARCHAR}]
statement error
select Columns FROM sniff_csv('data/csv/header_2.csv', names = ['a','b','c','d'])
----
Error when sniffing file "data/csv/header_2.csv"
query I
select Columns FROM sniff_csv('data/csv/header_2.csv', names = ['a','b','c','d'], null_padding = True)
----
[{'name': a, 'type': VARCHAR}, {'name': b, 'type': VARCHAR}, {'name': c, 'type': VARCHAR}, {'name': d, 'type': VARCHAR}]
query IIII
FROM read_csv('data/csv/header_2.csv', names = ['a','b','c','d'], null_padding = True)
----
line2 line2_2 line2_3 NULL
line3 line3_2 line3_3 NULL

View File

@@ -0,0 +1,32 @@
# name: test/sql/copy/csv/csv_null_byte.test
# description: Read a CSV with a null byte
# group: [csv]
statement ok
PRAGMA enable_verification
query III
select * from 'data/csv/nullbyte.csv';
----
val1 val\02 val3
query III
select * from read_csv('data/csv/nullbyte.csv', columns={'col1': 'VARCHAR', 'col2': 'VARCHAR', 'col3': 'VARCHAR'}, delim='|');
----
val1 val\02 val3
query II
select * from 'data/csv/nullbyte_header.csv';
----
val1 val2
query II
select * from read_csv('data/csv/nullbyte_header.csv', columns={'col1': 'VARCHAR', 'col2': 'VARCHAR'}, delim='|', header=False);
----
col1 col\02
val1 val2
query II
select * from read_csv('data/csv/nullbyte_header.csv', columns={'col1': 'VARCHAR', 'col2': 'VARCHAR'}, delim='|', header=True);
----
val1 val2

View File

@@ -0,0 +1,51 @@
# name: test/sql/copy/csv/csv_null_padding.test
# description: Test nullpadding setting
# group: [csv]
statement ok
PRAGMA enable_verification
# null padding with a header
query IIII
FROM read_csv_auto('data/csv/nullpadding_header.csv', null_padding=True, comment = '')
----
one two three four
1 a alice NULL
2 b bob NULL
# without null padding we can only read one column
query I
FROM read_csv_auto('data/csv/nullpadding_header.csv', null_padding=False, header = 0)
----
# this file has a bunch of gunk at the top
one,two,three,four
1,a,alice
2,b,bob
query I
FROM read_csv_auto('data/csv/nullpadding_header.csv', null_padding=False, skip=1, header = 0, comment = '')
----
one,two,three,four
1,a,alice
2,b,bob
query III
FROM read_csv_auto('data/csv/nullpadding_header.csv', null_padding=False, skip=2)
----
1 a alice
2 b bob
query I
FROM read_csv_auto('data/csv/blank_line.csv', null_padding=True)
----
1
2
3
4
5
6
7
8
9
NULL
11

View File

@@ -0,0 +1,163 @@
# name: test/sql/copy/csv/csv_nullstr_list.test
# description: Test CSVs with multiple nullstring values
# group: [csv]
statement ok
PRAGMA enable_verification
# Test List
query III
FROM read_csv('data/csv/null/multiple_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['','none','null']);
----
Pedro 31 1.73
Mark NULL NULL
Thijs 26 NULL
# Test Quoted
query III
FROM read_csv('data/csv/null/multiple_quoted_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['','none','null']);
----
Pedro 31 1.73
Mark NULL NULL
Thijs 26 NULL
#allow_quoted_nulls = false
query III
FROM read_csv('data/csv/null/multiple_quoted_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['','none','null'], allow_quoted_nulls = false);
----
Pedro 31 1.73
Mark null (empty)
Thijs 26 none
# Test nullstr = []
statement error
FROM read_csv('data/csv/null/multiple_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = []);
----
CSV Reader function option nullstr requires a non-empty list of possible null strings (varchar) as input
# Test nullstr = ['a', NULL]
statement error
FROM read_csv('data/csv/null/multiple_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['a', NULL]);
----
CSV Reader function option nullstr does not accept NULL values as a valid nullstr option
# Test nullstr = NULL
statement error
FROM read_csv('data/csv/null/multiple_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = NULL);
----
CSV Reader function option nullstr requires a string or a list as input
# Test nullstr = [42]
statement error
FROM read_csv('data/csv/null/multiple_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = [42]);
----
CSV Reader function option nullstr requires a non-empty list of possible null strings (varchar) as input
# Test Null Strings equal to delim quote escape
statement error
FROM read_csv('data/csv/null/multiple_quoted_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['',',','null'], allow_quoted_nulls = false);
----
DELIMITER must not appear in the NULL specification and vice versa
statement error
FROM read_csv('data/csv/null/multiple_quoted_nulls.csv', auto_detect=false, delim=',', quote='"', escape='\', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['','"','null'], allow_quoted_nulls = false);
----
QUOTE must not appear in the NULL specification and vice versa
statement error
FROM read_csv('data/csv/null/multiple_quoted_nulls.csv', auto_detect=false, delim=',', quote='"', escape='\', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['','\','null'], allow_quoted_nulls = false);
----
ESCAPE must not appear in the NULL specification and vice versa
# What if we have repeated values in our nullstr list?
query III
FROM read_csv('data/csv/null/multiple_quoted_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['','none','null','','none','null'], allow_quoted_nulls = false);
----
Pedro 31 1.73
Mark null (empty)
Thijs 26 none
# Test with force_not_null
query III
FROM read_csv('data/csv/null/multiple_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['','none','null'], force_not_null = ['height']);
----
Pedro 31 1.73
Mark NULL (empty)
Thijs 26 (empty)
query III
FROM read_csv('data/csv/null/multiple_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['','none','null'], force_not_null = ['age','height']);
----
Pedro 31 1.73
Mark (empty) (empty)
Thijs 26 (empty)
# Test Quoted
query III
FROM read_csv('data/csv/null/multiple_quoted_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['','none','null'], force_not_null = ['height']);
----
Pedro 31 1.73
Mark NULL (empty)
Thijs 26 (empty)
query III
FROM read_csv('data/csv/null/multiple_quoted_nulls.csv', auto_detect=true, delim=',', quote='"', escape='"', skip=0, header=true, nullstr = ['','none','null'], force_not_null = ['age','height'], ALL_VARCHAR = 1);
----
Pedro 31 1.73
Mark (empty) (empty)
Thijs 26 (empty)
# Test with projection push-down
query I
select height FROM read_csv('data/csv/null/multiple_nulls.csv', delim=',', quote='"', escape='"', skip=0, header=true, nullstr = ['','none','null']);
----
1.73
NULL
NULL
query I
select age FROM read_csv('data/csv/null/multiple_nulls.csv', delim=',', quote='"', escape='"', skip=0, header=true, nullstr = ['','none','null']);
----
31
NULL
26
# Test force_not_null fails for made-up column
statement error
FROM read_csv('data/csv/null/multiple_nulls.csv', auto_detect=false, delim=',', quote='"', escape='"', skip=0, header=true, columns={'name': 'VARCHAR', 'age': 'VARCHAR', 'height': 'VARCHAR'}, nullstr = ['','none','null'], force_not_null = ['dont_exist']);
----
"force_not_null" expected to find dont_exist, but it was not found in the table
# Lests add a few tests with copy from
statement ok
CREATE TABLE data (a VARCHAR, b VARCHAR, c VARCHAR)
statement ok
COPY data FROM 'data/csv/null/multiple_nulls.csv' (nullstr ['','none','null'], HEADER 1);
statement error
COPY data FROM 'data/csv/null/multiple_nulls.csv' (nullstr NULL, HEADER 1);
----
NULL is not supported
statement error
COPY data FROM 'data/csv/null/multiple_nulls.csv' (nullstr [NULL], HEADER 1);
----
Binder Error: CSV Reader function option nullstr requires a non-empty list of possible null strings (varchar) as input
statement error
COPY data FROM 'data/csv/null/multiple_nulls.csv' (nullstr [42], HEADER 1);
----
Binder Error: CSV Reader function option nullstr requires a non-empty list of possible null strings (varchar) as input
query III
FROM data
----
Pedro 31 1.73
Mark NULL NULL
Thijs 26 NULL
statement error
COPY data TO '__TEST_DIR__/multiple_nulls.csv' (nullstr ['a', 'b']);
----
CSV Writer function option nullstr only accepts one nullstr value.

View File

@@ -0,0 +1,49 @@
# name: test/sql/copy/csv/csv_projection_pushdown.test
# description: CSV reader projection pushdown
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE tbl(i INT, j VARCHAR, k DATE);
statement ok
INSERT INTO tbl VALUES (42, 'hello world', NULL), (NULL, NULL, DATE '1992-01-01'), (100, 'thisisalongstring', DATE '2000-01-01');
statement ok
COPY tbl TO '__TEST_DIR__/projection_pushdown.csv' (FORMAT CSV);
statement ok
CREATE VIEW v1 AS FROM read_csv_auto('__TEST_DIR__/projection_pushdown.csv', filename=True)
query I
SELECT COUNT(*) FROM v1
----
3
query III
SELECT i, j, k FROM v1 ORDER BY i NULLS LAST
----
42 hello world NULL
100 thisisalongstring 2000-01-01
NULL NULL 1992-01-01
query I
SELECT j FROM v1 ORDER BY j NULLS LAST
----
hello world
thisisalongstring
NULL
query I
SELECT j FROM v1 ORDER BY j NULLS LAST
----
hello world
thisisalongstring
NULL
query I
SELECT filename.replace('\', '/').split('/')[-1] FROM v1 LIMIT 1
----
projection_pushdown.csv

View File

@@ -0,0 +1,15 @@
# name: test/sql/copy/csv/csv_quoted_newline_incorrect.test
# description: Read a CSV with an incorrect quoted newline, is expected that the csv reader manages to skip dirty lines
# group: [csv]
statement ok
PRAGMA enable_verification
# force parallelism of the queries
statement ok
PRAGMA verify_parallelism
# CSV reader skips malformed lines
statement ok
from 'data/csv/csv_quoted_newline_odd.csv';

View File

@@ -0,0 +1,20 @@
# name: test/sql/copy/csv/csv_roundtrip_single_null.test
# description: Round-trip a single NULL value
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
COPY (SELECT NULL) TO '__TEST_DIR__/single_null.csv' (HEADER 0)
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
COPY integers FROM '__TEST_DIR__/single_null.csv' (HEADER 0)
query I
FROM integers
----
NULL

View File

@@ -0,0 +1,45 @@
# name: test/sql/copy/csv/csv_windows_mixed_separators.test
# description: Test mixed separators on Windows
# group: [csv]
require windows
statement ok
PRAGMA enable_verification
# \ and / are interchangeable on Windows
query I
SELECT * FROM 'data\csv/test/date.csv'
----
2019-06-05
query I
SELECT * FROM glob('data/csv\test/*.csv') t(g) WHERE g LIKE '%date.csv'
----
data\csv\test\date.csv
# also for attach
statement ok
ATTACH '__TEST_DIR__/windows_test.db' AS s1
statement ok
CREATE TABLE s1.tbl AS SELECT * FROM range(10) t(i);
query I
SELECT SUM(i) FROM s1.tbl
----
45
statement ok
DETACH s1
statement ok
ATTACH '__TEST_DIR__\windows_test.db' AS s1
query I
SELECT SUM(i) FROM s1.tbl
----
45
statement ok
DETACH s1

View File

@@ -0,0 +1,35 @@
# name: test/sql/copy/csv/csv_write_gz.test_slow
# description: Test round-trip writing of gzip CSV files
# group: [csv]
statement ok
PRAGMA enable_verification
foreach csv_name greek_utf8.csv imdb_movie_info_escaped.csv lineitem_sample.csv ncvoter.csv nfc_normalization.csv ontime_sample.csv voter.tsv web_page.csv
statement ok
CREATE TABLE csv_data AS SELECT * FROM 'data/csv/real/${csv_name}';
statement ok
COPY csv_data TO '__TEST_DIR__/${csv_name}.gz' (COMPRESSION GZIP, HEADER 0);
statement ok
CREATE TABLE csv_data_gz AS SELECT * FROM '__TEST_DIR__/${csv_name}.gz';
query I
SELECT COUNT(*) FROM (SELECT * FROM csv_data EXCEPT SELECT * FROM csv_data_gz)
----
0
query I
SELECT COUNT(*) FROM (SELECT * FROM csv_data_gz EXCEPT SELECT * FROM csv_data)
----
0
statement ok
DROP TABLE csv_data;
statement ok
DROP TABLE csv_data_gz;
endloop

View File

@@ -0,0 +1,39 @@
# name: test/sql/copy/csv/csv_write_zstd.test_slow
# description: Test round-trip writing of zstd CSV files
# group: [csv]
require parquet
require no_extension_autoloading "FIXME: Autoloading on zstd compression (parquet) not yet there"
statement ok
PRAGMA enable_verification
foreach csv_name greek_utf8.csv imdb_movie_info_escaped.csv lineitem_sample.csv ncvoter.csv nfc_normalization.csv ontime_sample.csv voter.tsv web_page.csv
statement ok
CREATE TABLE csv_data AS SELECT * FROM 'data/csv/real/${csv_name}';
statement ok
COPY csv_data TO '__TEST_DIR__/${csv_name}.zst' (COMPRESSION ZSTD, HEADER 0);
statement ok
CREATE TABLE csv_data_zst AS SELECT * FROM '__TEST_DIR__/${csv_name}.zst';
query I
SELECT COUNT(*) FROM (SELECT * FROM csv_data EXCEPT SELECT * FROM csv_data_zst)
----
0
query I
SELECT COUNT(*) FROM (SELECT * FROM csv_data_zst EXCEPT SELECT * FROM csv_data)
----
0
statement ok
DROP TABLE csv_data;
statement ok
DROP TABLE csv_data_zst;
endloop

View File

@@ -0,0 +1,20 @@
# name: test/sql/copy/csv/duck_fuzz/test_internal_4048.test
# description: Test Fuzz Issue 4048
# group: [duck_fuzz]
statement ok
PRAGMA enable_verification
statement error
FROM sniff_csv('data/csv/14512.csv', names := [NULL]);
----
Binder Error: read_csv names parameter cannot have a NULL value
statement ok
create table all_types as select * exclude(small_enum, medium_enum, large_enum) from test_all_types() limit 0;
statement error
SELECT DISTINCT NULL, c3, (c4 <= c1), (c3 BETWEEN c4 AND c2)
FROM sniff_csv('1a616242-1dcd-4914-99d1-16119d9b6e4c', "names" := ['1970-01-01'::DATE, 'infinity'::DATE, '-infinity'::DATE, NULL, '2022-05-12'::DATE], filename := '9be2bc9d-d49f-4564-bfa4-6336b211a874') AS t5(c1, c2, c3, c4) WHERE c1 GROUP BY c3 LIMIT ('c4000757-69ca-400e-b58a-1dac73b85595' IS NULL);
----
Binder Error: read_csv names parameter cannot have a NULL value

View File

@@ -0,0 +1,20 @@
# name: test/sql/copy/csv/empty_first_line.test
# description: Issue #4933: Reading a CSV file with an empty header is problematic
# group: [csv]
statement ok
PRAGMA enable_verification
query II
SELECT * FROM read_csv_auto('data/csv/empty_first_line.csv', delim=' ');
----
a 1
b 2
c 3
query I
SELECT * FROM read_csv_auto('data/csv/empty_first_line.csv', delim='|', auto_detect=false, columns={'column00': 'VARCHAR'}, skip = 1);
----
a 1
b 2
c 3

View File

@@ -0,0 +1,37 @@
# name: test/sql/copy/csv/empty_string_quote.test
# description: Test empty string quote
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE customer(c_customer_sk INTEGER, c_customer_id VARCHAR, c_current_cdemo_sk INTEGER, c_current_hdemo_sk INTEGER, c_current_addr_sk INTEGER, c_first_shipto_date_sk INTEGER, c_first_sales_date_sk INTEGER, c_salutation VARCHAR, c_first_name VARCHAR, c_last_name VARCHAR, c_preferred_cust_flag VARCHAR, c_birth_day INTEGER, c_birth_month INTEGER, c_birth_year INTEGER, c_birth_country VARCHAR, c_login VARCHAR, c_email_address VARCHAR, c_last_review_date_sk INTEGER);
statement ok
COPY customer FROM 'data/csv/customer.csv' (FORMAT 'csv', quote '"', delimiter ',', header 0);
query IIIIIIIIIIIIIIIIII
SELECT * FROM customer
----
1 AAAAAAAABAAAAAAA 980124 7135 32946 2452238 2452208 Mr. Javier Lewis Y 9 12 1936 CHILE NULL Javier.Lewis@VFAxlnZEvOx.org 2452508
2 AAAAAAAACAAAAAAA 819667 1461 31655 2452318 2452288 Dr. Amy Moses Y 9 4 1966 TOGO NULL Amy.Moses@Ovk9KjHH.com 2452318
3 AAAAAAAADAAAAAAA 1473522 6247 48572 2449130 2449100 Miss Latisha Hamilton Y 18 9 1979 NIUE NULL Latisha.Hamilton@V.com 2452313
4 AAAAAAAAEAAAAAAA 1703214 3986 39558 2450030 2450000 Dr. Michael White Y 7 6 1983 MEXICO NULL Michael.White@i.org 2452361
5 AAAAAAAAFAAAAAAA 953372 4470 36368 2449438 2449408 Sir Robert Moran N 8 5 1956 FIJI NULL Robert.Moran@Hh.edu 2452469
6 AAAAAAAAGAAAAAAA 213219 6374 27082 2451883 2451853 Ms. Brunilda Sharp Y 4 12 1925 SURINAME NULL Brunilda.Sharp@T3pylZEUQjm.org 2452430
7 AAAAAAAAHAAAAAAA 68377 3219 44814 2451438 2451408 Ms. Fonda Wiles N 24 4 1985 GAMBIA NULL Fonda.Wiles@S9KnyEtz9hv.org 2452360
8 AAAAAAAAIAAAAAAA 1215897 2471 16598 2449406 2449376 Sir Ollie Shipman N 26 12 1938 KOREA, REPUBLIC OF NULL Ollie.Shipman@be.org 2452334
9 AAAAAAAAJAAAAAAA 1168667 1404 49388 2452275 2452245 Sir Karl Gilbert N 26 10 1966 MONTSERRAT NULL Karl.Gilbert@Crg5KyP2IxX9C4d6.edu 2452454
10 AAAAAAAAKAAAAAAA 1207553 5143 19580 2451353 2451323 Ms. Albert Brunson N 15 10 1973 JORDAN NULL Albert.Brunson@62.com 2452641
statement ok
CREATE TABLE customer_quoted_nulls(c_customer_sk INTEGER, c_customer_id VARCHAR, c_current_cdemo_sk INTEGER, c_current_hdemo_sk INTEGER, c_current_addr_sk INTEGER, c_first_shipto_date_sk INTEGER, c_first_sales_date_sk INTEGER, c_salutation VARCHAR, c_first_name VARCHAR, c_last_name VARCHAR, c_preferred_cust_flag VARCHAR, c_birth_day INTEGER, c_birth_month INTEGER, c_birth_year INTEGER, c_birth_country VARCHAR, c_login VARCHAR, c_email_address VARCHAR, c_last_review_date_sk INTEGER);
statement ok
insert into customer_quoted_nulls select * from read_csv_auto('data/csv/customer.csv', allow_quoted_nulls=False)
query I
SELECT COUNT(c_login) FROM customer_quoted_nulls
----
10

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/except.test
# description: Test Except works when one of the children is a CSV File
# group: [csv]
statement ok
PRAGMA enable_verification
statement ok
COPY (SELECT 1 a, 2 b) TO '__TEST_DIR__/mismatch_types_except.csv';
query II
FROM '__TEST_DIR__/mismatch_types_except.csv' EXCEPT select 'bla' as a, 1 as b;
----
1 2

View File

@@ -0,0 +1,33 @@
# name: test/sql/copy/csv/glob/copy_csv_glob.test
# description: Test globbing CSVs
# group: [glob]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE dates(d DATE);
# simple globbing
statement ok
COPY dates FROM 'data/csv/glob/a?/*.csv' (AUTO_DETECT 1);
query I
SELECT * FROM dates ORDER BY 1
----
2019-06-05
2019-06-15
2019-06-25
2019-07-05
2019-07-15
2019-07-25
2019-08-05
2019-08-15
2019-08-25
# nothing matches the glob
statement error
COPY dates FROM read_csv('data/csv/glob/*/a*a.csv', auto_detect=1)
----
syntax error at or near "'data/csv/glob/*/a*a.csv'"

View File

@@ -0,0 +1,287 @@
# name: test/sql/copy/csv/glob/read_csv_glob.test
# description: Test globbing CSVs
# group: [glob]
statement ok
PRAGMA enable_verification
query IIIII
select typeof(#1),typeof(#2),typeof(#3),typeof(#4),typeof(#5) FROM read_csv('data/csv/per_thread/*.csv') limit 1
----
VARCHAR BOOLEAN DOUBLE DOUBLE VARCHAR
query IIIII
select typeof(#1),typeof(#2),typeof(#3),typeof(#4),typeof(#5) FROM read_csv(['data/csv/per_thread/c1.csv', 'data/csv/per_thread/c2.csv']) limit 1
----
VARCHAR BOOLEAN DOUBLE DOUBLE VARCHAR
query IIIII
select typeof(#1),typeof(#2),typeof(#3),typeof(#4),typeof(#5) FROM read_csv(['data/csv/per_thread/c2.csv', 'data/csv/per_thread/c1.csv', 'data/csv/per_thread/c3.csv']) limit 1
----
VARCHAR BOOLEAN DOUBLE DOUBLE VARCHAR
# simple globbing
query I
SELECT * FROM read_csv('data/csv/glob/a?/*.csv') ORDER BY 1
----
2019-06-05
2019-06-15
2019-06-25
2019-07-05
2019-07-15
2019-07-25
2019-08-05
2019-08-15
2019-08-25
query I
SELECT * FROM read_csv('data/csv/glob/a?/a*.csv') ORDER BY 1
----
2019-06-05
2019-06-15
2019-06-25
2019-07-05
2019-07-15
2019-07-25
# list parameter
query I
SELECT * FROM read_csv(['data/csv/glob/a1/a1.csv', 'data/csv/glob/a2/a2.csv']) ORDER BY 1
----
2019-06-05
2019-06-15
2019-06-25
2019-07-05
2019-07-15
2019-07-25
query I
SELECT * FROM read_csv_auto(['data/csv/glob/a1/a1.csv', 'data/csv/glob/a2/a2.csv']) ORDER BY 1
----
2019-06-05
2019-06-15
2019-06-25
2019-07-05
2019-07-15
2019-07-25
# multiple globs
query I
SELECT * FROM read_csv(['data/csv/glob/a?/a*.csv', 'data/csv/glob/a?/a*.csv']) ORDER BY 1
----
2019-06-05
2019-06-05
2019-06-15
2019-06-15
2019-06-25
2019-06-25
2019-07-05
2019-07-05
2019-07-15
2019-07-15
2019-07-25
2019-07-25
# more asterisks for directories
query I
SELECT * FROM read_csv('data/csv/*/a?/a*.csv') ORDER BY 1
----
2019-06-05
2019-06-15
2019-06-25
2019-07-05
2019-07-15
2019-07-25
query II
SELECT a, b LIKE '%a1.csv%' FROM read_csv('data/csv/*/a?/a*.csv', filename=1) t1(a,b) ORDER BY 1
----
2019-06-05 1
2019-06-15 1
2019-06-25 1
2019-07-05 0
2019-07-15 0
2019-07-25 0
# read-csv auto fails here because of a type mismatch: most files contain dates, but one file contains integers
statement error
SELECT * FROM read_csv('data/csv/glob/*/*.csv') ORDER BY 1
----
Schema mismatch between globbed files.
# forcing string parsing works
query I
SELECT * FROM read_csv('data/csv/glob/*/*.csv', columns=STRUCT_PACK(d := 'STRING')) ORDER BY 1
----
1
2
2019-06-05
2019-06-15
2019-06-25
2019-07-05
2019-07-15
2019-07-25
2019-08-05
2019-08-15
2019-08-25
3
query II
SELECT a, b LIKE '%a_.csv' FROM read_csv('data/csv/glob/*/*.csv', columns=STRUCT_PACK(d := 'STRING'), filename=1) t(a,b) ORDER BY 1
----
1 0
2 0
2019-06-05 1
2019-06-15 1
2019-06-25 1
2019-07-05 1
2019-07-15 1
2019-07-25 1
2019-08-05 0
2019-08-15 0
2019-08-25 0
3 0
# test glob parsing
query I
SELECT COUNT(*) FROM glob('data/csv/glob/*/*.csv')
----
5
query I
SELECT COUNT(*) FROM glob(['data/csv/glob/*/*.csv'])
----
5
query I
SELECT COUNT(*) FROM glob(['data/csv/glob/*/*.csv', 'data/csv/glob/*/*.csv'])
----
10
# we can also use windows file slashes
query I
SELECT COUNT(*) FROM glob('data\csv\glob\*\*.csv')
----
5
# consecutive slashes are ignored
query I
SELECT COUNT(*) FROM glob('data//csv///glob///*//////*.csv')
----
5
# nothing matches the glob
statement error
SELECT * FROM read_csv('data/csv/glob/*/a*a.csv') ORDER BY 1
----
No files found that match the pattern "data/csv/glob/*/a*a.csv"
statement error
SELECT * FROM read_csv(['data/csv/glob/*/a*a.csv']) ORDER BY 1
----
No files found that match the pattern "data/csv/glob/*/a*a.csv"
statement error
SELECT * FROM read_csv_auto(['data/csv/glob/*/a*a.csv']) ORDER BY 1
----
No files found that match the pattern "data/csv/glob/*/a*a.csv"
query I
SELECT COUNT(*) FROM glob('data/csv/glob/*/a*a.csv')
----
0
query I
select count(*) from glob('/rewoiarwiouw3rajkawrasdf790273489*.csv') limit 10;
----
0
query I
select count(*) from glob('~/rewoiarwiouw3rajkawrasdf790273489*.py') limit 10;
----
0
require skip_reload
# file_search_path with one path
statement ok
set file_search_path='data/csv/glob';
query I
SELECT COUNT(*) FROM glob('*/*.csv');
----
5
# file_search_path with multiple paths
statement ok
set file_search_path='data/csv/glob/a1,data/csv/glob/a2';
query I
SELECT COUNT(*) FROM glob('*.csv');
----
2
# file_search_path with a non-existent path
statement ok
set file_search_path='data/csv/glob,garbage';
query I
SELECT COUNT(*) FROM glob('*/*.csv');
----
5
# Only file_search_path is searched
query I
SELECT COUNT(*) FROM glob('data/csv/glob/*/*.csv');
----
0
# file_search_path can be cleared
statement ok
set file_search_path='';
query I
SELECT COUNT(*) FROM glob('data/csv/glob/*/*.csv');
----
5
# empty list
statement error
SELECT * FROM read_csv_auto([]) ORDER BY 1
----
No function matches
statement error
SELECT * FROM read_csv_auto([]::VARCHAR[]) ORDER BY 1
----
at least one file
# null list
statement error
SELECT * FROM read_csv_auto(NULL) ORDER BY 1
----
NULL
statement error
SELECT * FROM read_csv_auto([NULL]) ORDER BY 1
----
NULL
statement error
SELECT * FROM read_csv_auto(NULL::VARCHAR) ORDER BY 1
----
NULL
statement error
SELECT * FROM read_csv_auto(NULL::VARCHAR[]) ORDER BY 1
----
NULL
statement ok
SET threads=1;
statement error
FROM read_csv('data/csv/glob/*/*.csv');
----
Schema mismatch between globbed files.

Some files were not shown because too many files have changed in this diff Show More