should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,11 @@
# name: test/sql/copy/csv/auto/test_14177.test
# description: Test CSV Sample works for #14177
# group: [auto]
statement ok
PRAGMA enable_verification
query I
select count(*) FROM (FROM read_csv('data/csv/auto/14177.csv', buffer_size=80, ignore_errors = true)) as t
----
5

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/auto/test_auto_5250.test
# description: Test CSV Sample works for #5250
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
query I
select count(*) from read_csv_auto('data/csv/page_namespacepage_title_sample.csv', SAMPLE_SIZE = -1)
----
3993

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/auto/test_auto_5378.test
# description: Test read_csv_auto on issue 5378
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
query I
SELECT count(*) FROM read_csv_auto ('data/csv/auto/titlebasicsdebug.tsv', nullstr='\N', sample_size = -1);
----
3002

View File

@@ -0,0 +1,23 @@
# name: test/sql/copy/csv/auto/test_auto_8231.test
# description: Test issue 8231 related to missing headers and null padding
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
create view locations_header_trailing_comma as SELECT * from read_csv_auto('data/csv/locations_row_trailing_comma.csv', null_padding=True)
query IIIII
SELECT * from locations_header_trailing_comma
----
1 name 0 0 value
query IIIIII
describe locations_header_trailing_comma;
----
id BIGINT YES NULL NULL NULL
name VARCHAR YES NULL NULL NULL
lat BIGINT YES NULL NULL NULL
lon BIGINT YES NULL NULL NULL
column4 VARCHAR YES NULL NULL NULL

View File

@@ -0,0 +1,20 @@
# name: test/sql/copy/csv/auto/test_auto_8573.test
# description: Test read_csv_auto on issue 8573
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
query II
SELECT typeof(bignumber), typeof(bignumber::DECIMAL(25,3)) FROM read_csv('data/csv/big_number.csv', COLUMNS={'bignumber': 'DECIMAL(25,3)'}, QUOTE='"', DELIM=',');
----
DECIMAL(25,3) DECIMAL(25,3)
query II
SELECT typeof(bignumber), typeof(bignumber::DECIMAL(25,3)) FROM read_csv_auto('data/csv/big_number.csv', COLUMNS={'bignumber': 'DECIMAL(25,3)'}, QUOTE='"', DELIM=',');
----
DECIMAL(25,3) DECIMAL(25,3)

View File

@@ -0,0 +1,17 @@
# name: test/sql/copy/csv/auto/test_auto_8649.test
# description: Test CSV Sample works for #8649
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
# Sample
query I
SELECT * FROM read_csv_auto("data/csv/dim0.csv") ;
----
T
0

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/auto/test_auto_8860.test
# description: Test read_csv_auto on issue 8860
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
query I
SELECT count(*) FROM read_csv_auto("data/csv/auto/product_codes_HS17_V202301.csv.gz", quote = '"', comment='', delim = ',') ;
----
5384

View File

@@ -0,0 +1,81 @@
# name: test/sql/copy/csv/auto/test_auto_column_type_opt.test
# description: Test read_csv_auto with column_types option
# group: [auto]
statement ok
PRAGMA enable_verification
# Test read_csv wout auto_detect throws
statement error
select * from read_csv('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(a := 'INTEGER'))
----
Columns with names: "a" do not exist in the CSV File
# Test non-struct throws
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=1)
----
COLUMN_TYPES requires a struct or list as input
# Test empty throws
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK())
----
Can't pack nothing into a struct
# Test funky type throws
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(a := 'BLA'))
----
Type with name BLA does not exist!
# Test funky name throws
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(bla := 'INTEGER'))
----
Columns with names: "bla" do not exist in the CSV File
# Test wrong type throws
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(column3 := 'INTEGER'))
----
This type was either manually set or derived from an existing table. Select a different type to correctly parse this column.
# Test 1st column defined
query I
SELECT typeof(#1) from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(column0 := 'DOUBLE')) LIMIT 1
----
DOUBLE
query I
SELECT typeof(#1) from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(column0 := 'INTEGER')) LIMIT 1
----
INTEGER
# Test 3rd column defined
query I
SELECT typeof(#3) from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK( column2 := 'HUGEINT')) LIMIT 1
----
HUGEINT
# Test 1st and 3rd column defined
query II
SELECT typeof(#1),typeof(#3) from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(column0 := 'BIGINT', column2 := 'HUGEINT')) LIMIT 1
----
BIGINT HUGEINT
query IIII
SELECT * from read_csv_auto('data/csv/test/multi_column_string.csv', COLUMN_TYPES=STRUCT_PACK(column0 := 'BIGINT', column2 := 'HUGEINT'))
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9

View File

@@ -0,0 +1,37 @@
# name: test/sql/copy/csv/auto/test_auto_cranlogs.test
# description: Test read_csv_auto from cranlogs gzip
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE cranlogs AS SELECT * FROM read_csv_auto ('data/csv/real/tmp2013-06-15.csv.gz');
query I
SELECT COUNT(*) FROM cranlogs;
----
37459
query TTITTTTTTI
SELECT * FROM cranlogs LIMIT 5;
----
2013-06-15 00:18:11 46338 NA NA NA date 1.2-33 JP 1
2013-06-15 00:18:18 740765 NA NA NA plyr 1.8 JP 2
2013-06-15 00:54:25 1229408 NA NA NA RJSONIO 1.0-3 JP 3
2013-06-15 00:58:50 501915 2.15.3 x86_64 linux-gnu animation 2.2 IN 4
2013-06-15 00:14:52 254933 3.0.1 x86_64 linux-gnu foreign 0.8-54 HK 5
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE cranlogs2 AS SELECT * FROM read_csv_auto ('data/csv/real/tmp2013-06-15.csv.gz');
query IIIIIIIIII
(SELECT * FROM cranlogs EXCEPT SELECT * FROM cranlogs2)
UNION ALL
(SELECT * FROM cranlogs2 EXCEPT SELECT * FROM cranlogs)
----

View File

@@ -0,0 +1,45 @@
# name: test/sql/copy/csv/auto/test_auto_greek_ncvoter.test
# description: Test read_csv_auto from ncvoter csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE IF NOT EXISTS ncvoters(county_id INTEGER, county_desc STRING, voter_reg_num STRING,status_cd STRING, voter_status_desc STRING, reason_cd STRING, voter_status_reason_desc STRING, absent_ind STRING, name_prefx_cd STRING,last_name STRING, first_name STRING, midl_name STRING, name_sufx_cd STRING, full_name_rep STRING,full_name_mail STRING, house_num STRING, half_code STRING, street_dir STRING, street_name STRING, street_type_cd STRING, street_sufx_cd STRING, unit_designator STRING, unit_num STRING, res_city_desc STRING,state_cd STRING, zip_code STRING, res_street_address STRING, res_city_state_zip STRING, mail_addr1 STRING, mail_addr2 STRING, mail_addr3 STRING, mail_addr4 STRING, mail_city STRING, mail_state STRING, mail_zipcode STRING, mail_city_state_zip STRING, area_cd STRING, phone_num STRING, full_phone_number STRING, drivers_lic STRING, race_code STRING, race_desc STRING, ethnic_code STRING, ethnic_desc STRING, party_cd STRING, party_desc STRING, sex_code STRING, sex STRING, birth_age STRING, birth_place STRING, registr_dt STRING, precinct_abbrv STRING, precinct_desc STRING,municipality_abbrv STRING, municipality_desc STRING, ward_abbrv STRING, ward_desc STRING, cong_dist_abbrv STRING, cong_dist_desc STRING, super_court_abbrv STRING, super_court_desc STRING, judic_dist_abbrv STRING, judic_dist_desc STRING, nc_senate_abbrv STRING, nc_senate_desc STRING, nc_house_abbrv STRING, nc_house_desc STRING,county_commiss_abbrv STRING, county_commiss_desc STRING, township_abbrv STRING, township_desc STRING,school_dist_abbrv STRING, school_dist_desc STRING, fire_dist_abbrv STRING, fire_dist_desc STRING, water_dist_abbrv STRING, water_dist_desc STRING, sewer_dist_abbrv STRING, sewer_dist_desc STRING, sanit_dist_abbrv STRING, sanit_dist_desc STRING, rescue_dist_abbrv STRING, rescue_dist_desc STRING, munic_dist_abbrv STRING, munic_dist_desc STRING, dist_1_abbrv STRING, dist_1_desc STRING, dist_2_abbrv STRING, dist_2_desc STRING, confidential_ind STRING, age STRING, ncid STRING, vtd_abbrv STRING, vtd_desc STRING);
query I
COPY ncvoters FROM 'data/csv/real/ncvoter.csv' (FORMAT CSV, AUTO_DETECT TRUE);
----
10
query ITTT
SELECT county_id, county_desc, vtd_desc, name_prefx_cd FROM ncvoters;
----
1 ALAMANCE 09S NULL
1 ALAMANCE 09S NULL
1 ALAMANCE 03W NULL
1 ALAMANCE 09S NULL
1 ALAMANCE 1210 NULL
1 ALAMANCE 035 NULL
1 ALAMANCE 124 NULL
1 ALAMANCE 06E NULL
1 ALAMANCE 035 NULL
1 ALAMANCE 064 NULL
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE ncvoters2 AS SELECT * FROM ncvoters LIMIT 0
statement ok
COPY ncvoters2 FROM 'data/csv/real/ncvoter.csv' (FORMAT CSV, AUTO_DETECT TRUE);
query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
(SELECT * FROM ncvoters EXCEPT SELECT * FROM ncvoters2)
UNION ALL
(SELECT * FROM ncvoters2 EXCEPT SELECT * FROM ncvoters)
----

View File

@@ -0,0 +1,45 @@
# name: test/sql/copy/csv/auto/test_auto_greek_utf8.test
# description: Test read_csv_auto from greek-utf8 csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE greek_utf8 AS SELECT i, nfc_normalize(j) j, k FROM read_csv_auto ('data/csv/real/greek_utf8.csv') t(i, j, k)
query I
SELECT COUNT(*) FROM greek_utf8;
----
8
query ITI
SELECT * FROM greek_utf8 ORDER BY 1;
----
1689 00i\047m 2
1690 00i\047v 2
41561 2015 1
45804 21π 1
51981 24hours 1
171067 ardèch 2
182773 afi 1
607808 poverty 1
# can also do this
query ITI
SELECT i, nfc_normalize(j) j, k FROM 'data/csv/real/greek_utf8.csv' t(i, j, k)
----
1689 00i\047m 2
1690 00i\047v 2
41561 2015 1
45804 21π 1
51981 24hours 1
171067 ardèch 2
182773 afi 1
607808 poverty 1

View File

@@ -0,0 +1,25 @@
# name: test/sql/copy/csv/auto/test_auto_imdb.test
# description: Test read_csv_auto from imdb csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE movie_info AS SELECT * FROM read_csv_auto ('data/csv/real/imdb_movie_info_escaped.csv');
query I
SELECT COUNT(*) FROM movie_info;
----
201
statement ok
CREATE TABLE movie_info2 AS SELECT * FROM read_csv_auto ('data/csv/real/imdb_movie_info_escaped.csv');
query IIIII
(FROM movie_info EXCEPT FROM movie_info2)
UNION ALL
(FROM movie_info2 EXCEPT FROM movie_info)
----

View File

@@ -0,0 +1,46 @@
# name: test/sql/copy/csv/auto/test_auto_lineitem.test
# description: Test copy into auto from lineitem csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE lineitem(l_orderkey INT NOT NULL, l_partkey INT NOT NULL, l_suppkey INT NOT NULL, l_linenumber INT NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR(1) NOT NULL, l_linestatus VARCHAR(1) NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR(25) NOT NULL, l_shipmode VARCHAR(10) NOT NULL, l_comment VARCHAR(44) NOT NULL);
query I
COPY lineitem FROM 'data/csv/real/lineitem_sample.csv' (FORMAT CSV, AUTO_DETECT TRUE);
----
10
query I
SELECT COUNT(*) FROM lineitem;
----
10
query IT
SELECT l_partkey, l_comment FROM lineitem WHERE l_orderkey=1 ORDER BY l_linenumber;
----
15519 egular courts above the
6731 ly final dependencies: slyly bold
6370 riously. regular, express dep
214 lites. fluffily even de
2403 pending foxes. slyly re
1564 arefully slyly ex
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE lineitem2 AS SELECT * FROM lineitem LIMIT 0
statement ok
COPY lineitem2 FROM 'data/csv/real/lineitem_sample.csv' (FORMAT CSV, AUTO_DETECT TRUE);
query IIIIIIIIIIIIIIII
(SELECT * FROM lineitem EXCEPT SELECT * FROM lineitem2)
UNION ALL
(SELECT * FROM lineitem2 EXCEPT SELECT * FROM lineitem)
----

View File

@@ -0,0 +1,45 @@
# name: test/sql/copy/csv/auto/test_auto_ontime.test
# description: Test read_csv_auto from on-time dataset
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE ontime(year SMALLINT, quarter SMALLINT, month SMALLINT, dayofmonth SMALLINT, dayofweek SMALLINT, flightdate DATE, uniquecarrier CHAR(7), airlineid DECIMAL(8,2), carrier CHAR(2), tailnum VARCHAR(50), flightnum VARCHAR(10), originairportid INTEGER, originairportseqid INTEGER, origincitymarketid INTEGER, origin CHAR(5), origincityname VARCHAR(100), originstate CHAR(2), originstatefips VARCHAR(10), originstatename VARCHAR(100), originwac DECIMAL(8,2), destairportid INTEGER, destairportseqid INTEGER, destcitymarketid INTEGER, dest CHAR(5), destcityname VARCHAR(100), deststate CHAR(2), deststatefips VARCHAR(10), deststatename VARCHAR(100), destwac DECIMAL(8,2), crsdeptime DECIMAL(8,2), deptime DECIMAL(8,2), depdelay DECIMAL(8,2), depdelayminutes DECIMAL(8,2), depdel15 DECIMAL(8,2), departuredelaygroups DECIMAL(8,2), deptimeblk VARCHAR(20), taxiout DECIMAL(8,2), wheelsoff DECIMAL(8,2), wheelson DECIMAL(8,2), taxiin DECIMAL(8,2), crsarrtime DECIMAL(8,2), arrtime DECIMAL(8,2), arrdelay DECIMAL(8,2), arrdelayminutes DECIMAL(8,2), arrdel15 DECIMAL(8,2), arrivaldelaygroups DECIMAL(8,2), arrtimeblk VARCHAR(20), cancelled DECIMAL(8,2), cancellationcode CHAR(1), diverted DECIMAL(8,2), crselapsedtime DECIMAL(8,2), actualelapsedtime DECIMAL(8,2), airtime DECIMAL(8,2), flights DECIMAL(8,2), distance DECIMAL(8,2), distancegroup DECIMAL(8,2), carrierdelay DECIMAL(8,2), weatherdelay DECIMAL(8,2), nasdelay DECIMAL(8,2), securitydelay DECIMAL(8,2), lateaircraftdelay DECIMAL(8,2), firstdeptime VARCHAR(10), totaladdgtime VARCHAR(10), longestaddgtime VARCHAR(10), divairportlandings VARCHAR(10), divreacheddest VARCHAR(10), divactualelapsedtime VARCHAR(10), divarrdelay VARCHAR(10), divdistance VARCHAR(10), div1airport VARCHAR(10), div1aiportid INTEGER, div1airportseqid INTEGER, div1wheelson VARCHAR(10), div1totalgtime VARCHAR(10), div1longestgtime VARCHAR(10), div1wheelsoff VARCHAR(10), div1tailnum VARCHAR(10), div2airport VARCHAR(10), div2airportid INTEGER, div2airportseqid INTEGER, div2wheelson VARCHAR(10), div2totalgtime VARCHAR(10), div2longestgtime VARCHAR(10), div2wheelsoff VARCHAR(10), div2tailnum VARCHAR(10), div3airport VARCHAR(10), div3airportid INTEGER, div3airportseqid INTEGER, div3wheelson VARCHAR(10), div3totalgtime VARCHAR(10), div3longestgtime VARCHAR(10), div3wheelsoff VARCHAR(10), div3tailnum VARCHAR(10), div4airport VARCHAR(10), div4airportid INTEGER, div4airportseqid INTEGER, div4wheelson VARCHAR(10), div4totalgtime VARCHAR(10), div4longestgtime VARCHAR(10), div4wheelsoff VARCHAR(10), div4tailnum VARCHAR(10), div5airport VARCHAR(10), div5airportid INTEGER, div5airportseqid INTEGER, div5wheelson VARCHAR(10), div5totalgtime VARCHAR(10), div5longestgtime VARCHAR(10), div5wheelsoff VARCHAR(10), div5tailnum VARCHAR(10));
query I
COPY ontime FROM 'data/csv/real/ontime_sample.csv';
----
9
query ITTTT
SELECT year, uniquecarrier, origin, origincityname, div5longestgtime FROM ontime;
----
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
1988 AA JFK New York, NY NULL
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE ontime2 AS SELECT * FROM ontime LIMIT 0
statement ok
COPY ontime2 FROM 'data/csv/real/ontime_sample.csv';
query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
(SELECT * FROM ontime EXCEPT SELECT * FROM ontime2)
UNION ALL
(SELECT * FROM ontime2 EXCEPT SELECT * FROM ontime)
----

View File

@@ -0,0 +1,39 @@
# name: test/sql/copy/csv/auto/test_auto_voter.test_slow
# description: Test read_csv_auto from voter tsv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE voters AS SELECT * FROM read_csv_auto ('data/csv/real/voter.tsv');
query I
SELECT COUNT(*) FROM voters;
----
5300
query I
SELECT COUNT(*) FROM "data/csv/real/voter.tsv";
----
5300
# read with parallel reader and verify that we get the same result
# FIXME: This should run on windows
require notwindows
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE voters2 AS SELECT * FROM read_csv_auto ('data/csv/real/voter.tsv');
query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
(SELECT * FROM voters EXCEPT SELECT * FROM voters2)
UNION ALL
(SELECT * FROM voters2 EXCEPT SELECT * FROM voters)
----

View File

@@ -0,0 +1,31 @@
# name: test/sql/copy/csv/auto/test_auto_web_page.test
# description: Test read_csv_auto from web_page csv
# group: [auto]
statement ok
CREATE TABLE web_page AS SELECT * FROM read_csv_auto ('data/csv/real/web_page.csv');
query I
SELECT COUNT(*) FROM web_page;
----
60
query ITTTIITITTIIII
SELECT * FROM web_page ORDER BY column00 LIMIT 3;
----
1 AAAAAAAABAAAAAAA 1997-09-03 NULL 2450810 2452620 Y 98539 http://www.foo.com welcome 2531 8 3 4
2 AAAAAAAACAAAAAAA 1997-09-03 2000-09-02 2450814 2452580 N NULL http://www.foo.com protected 1564 4 3 1
3 AAAAAAAACAAAAAAA 2000-09-03 NULL 2450814 2452611 N NULL http://www.foo.com feedback 1564 4 3 4
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE web_page2 AS SELECT * FROM read_csv_auto ('data/csv/real/web_page.csv');
query IIIIIIIIIIIIII
(SELECT * FROM web_page EXCEPT SELECT * FROM web_page2)
UNION ALL
(SELECT * FROM web_page2 EXCEPT SELECT * FROM web_page)
----

View File

@@ -0,0 +1,309 @@
# name: test/sql/copy/csv/auto/test_csv_auto.test
# description: Test csv dialect detection
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
query II
FROM read_csv('data/csv/repromarket.csv',
columns={
'email': 'varchar',
'password': 'varchar'
},
all_varchar=true,
delim=':',
header=false,
skip=0,
null_padding=true,
ignore_errors=true,
strict_mode=false
);
----
nemanja.krpovic@gmail.com krlleta
vega@example.combogus NULL
Vega-Inject bogus
mirkofoto@gmail.com mirko
query I
FROM read_csv('data/csv/pipe_delim.csv', columns={'a': 'VARCHAR'}, auto_detect=False)
----
one|two|three|four
1|2|3|4
query I
FROM read_csv('data/csv/nullterm.csv')
----
\0world\0
query I
FROM read_csv('data/csv/nullterm.csv', quote = '"', escape = '"')
----
\0world\0
query I
FROM read_csv('data/csv/single_quote.csv', quote = '"')
----
'Doc'
query I
select columns FROM sniff_csv('data/csv/auto/mock_duckdb_test_data.csv', ignore_errors = true);
----
[{'name': id, 'type': BIGINT}, {'name': name, 'type': VARCHAR}, {'name': age, 'type': BIGINT}, {'name': sex, 'type': VARCHAR}, {'name': state, 'type': VARCHAR}]
query IIIII
FROM read_csv('data/csv/auto/mock_duckdb_test_data.csv', ignore_errors = true,
strict_mode=true)
----
1 James 30 M AL
2 Jill 32 F CO
4 John 34 M AS
5 Matthew 31 M NULL
7 Olivia 36 F OR
8 James 37 M AZ
9 Titus 38 M WY
statement error
select * from read_csv_auto('data/csv/dates.csv', auto_detect=false, delim=',', quote='"', columns={'a': 'VARCHAR'},
strict_mode=true)
----
Expected Number of Columns: 1 Found: 2
query II
select * from read_csv_auto('data/csv/dates.csv')
----
919 304 6161 2008-08-10
query II
select * from read_csv_auto('data/csv/from_df.csv', quote='''')
----
'a,b,c' 45
NULL 234
hello 234
bye 2
# CSV file with RFC-conform dialect
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/rfc_conform.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST1 one space
345 TEST1 trailing_space
567 TEST1 no_space
statement ok
DROP TABLE test;
# CSV file with RFC-conform dialect quote
# read_csv is an alias to read_csv_auto when no extra parameters are supplied
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/rfc_conform_quote.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST2 one space
345 TEST2 trailing_space,
567 TEST2 no"space
statement ok
DROP TABLE test;
# CSV file with RFC-conform dialect quote/leading space of numerics
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/leading_space_numerics.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST3 text1
345 TEST3 text2
567 TEST3 text3
statement ok
DROP TABLE test;
# CSV file with bar delimiter
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/pipe_delim.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST4 text1
345 TEST4 text2
567 TEST4 text3
statement ok
DROP TABLE test;
# CSV file with bar delimiter and double quotes
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/pipe_delim_quote.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST5 text1
345 TEST5 text2|
567 TEST5 text3
statement ok
DROP TABLE test;
# CSV file with bar delimiter and double quotes and double escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/quote_escape.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST6 text1
345 TEST6 text"2"text
567 TEST6 text3
statement ok
DROP TABLE test;
# CSV file with bar delimiter and double quotes and backslash escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/backslash_escape.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST7 text1
345 TEST7 text"2"
567 TEST7 text3
statement ok
DROP TABLE test;
# CSV file with bar delimiter and single quotes and backslash escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/single_quote_backslash.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST8 text1
345 TEST8 text'2'text
567 TEST8 text3
statement ok
DROP TABLE test;
# CSV file with semicolon delimiter
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/semicolon_delim.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST9 text1
345 TEST9 text2
567 TEST9 text3
statement ok
DROP TABLE test;
# CSV file with semicolon delimiter and double quotes
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/semicolon_quote.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST10 text1
345 TEST10 text2
567 TEST10 te;xt3
statement ok
DROP TABLE test;
# CSV file with semicolon delimiter, double quotes and RFC escape
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/semicolon_escape.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST11 text1
345 TEST11 text2
567 TEST11 te"xt3
statement ok
DROP TABLE test;
# CSV file with tab delimiter
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/tab.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123 TEST12 text1
345 TEST12 text2
567 TEST12 text3
statement ok
DROP TABLE test;
# CSV file with tab delimiter and single quotes
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/tab_single_quote.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123
TEST13
text1
345
TEST13
te xt2
567
TEST13
text3
statement ok
DROP TABLE test;
# CSV file with tab delimiter and single quotes without type-hint
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/tab_single_quote_varchar.csv');
query ITT
SELECT * FROM test ORDER BY column0;
----
123
TEST14
text1
345
TEST14
te xt2
567
TEST14
text3
statement ok
DROP TABLE test;
# CSV file with trailing empty lines
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_1254.csv');
query II
SELECT a, b FROM test;
----
1 2
1 2
statement ok
DROP TABLE test;

View File

@@ -0,0 +1,13 @@
# name: test/sql/copy/csv/auto/test_date_format_bug_linux.test
# group: [auto]
statement ok
PRAGMA enable_verification
query I
SELECT * FROM read_csv_auto('data/csv/auto/date_format_bug_linux.csv')
----
8cb123cb8
34fd321
fg5391jn4

View File

@@ -0,0 +1,137 @@
# name: test/sql/copy/csv/auto/test_describe_order.test
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
create view v as select * from read_csv_auto('data/csv/who.csv.gz');
query IIIIII
describe v;
----
country VARCHAR YES NULL NULL NULL
iso2 VARCHAR YES NULL NULL NULL
iso3 VARCHAR YES NULL NULL NULL
year BIGINT YES NULL NULL NULL
new_sp_m014 BIGINT YES NULL NULL NULL
new_sp_m1524 BIGINT YES NULL NULL NULL
new_sp_m2534 BIGINT YES NULL NULL NULL
new_sp_m3544 BIGINT YES NULL NULL NULL
new_sp_m4554 BIGINT YES NULL NULL NULL
new_sp_m5564 BIGINT YES NULL NULL NULL
new_sp_m65 BIGINT YES NULL NULL NULL
new_sp_f014 BIGINT YES NULL NULL NULL
new_sp_f1524 BIGINT YES NULL NULL NULL
new_sp_f2534 BIGINT YES NULL NULL NULL
new_sp_f3544 BIGINT YES NULL NULL NULL
new_sp_f4554 BIGINT YES NULL NULL NULL
new_sp_f5564 BIGINT YES NULL NULL NULL
new_sp_f65 BIGINT YES NULL NULL NULL
new_sn_m014 BIGINT YES NULL NULL NULL
new_sn_m1524 BIGINT YES NULL NULL NULL
new_sn_m2534 BIGINT YES NULL NULL NULL
new_sn_m3544 BIGINT YES NULL NULL NULL
new_sn_m4554 BIGINT YES NULL NULL NULL
new_sn_m5564 BIGINT YES NULL NULL NULL
new_sn_m65 BIGINT YES NULL NULL NULL
new_sn_f014 BIGINT YES NULL NULL NULL
new_sn_f1524 BIGINT YES NULL NULL NULL
new_sn_f2534 BIGINT YES NULL NULL NULL
new_sn_f3544 BIGINT YES NULL NULL NULL
new_sn_f4554 BIGINT YES NULL NULL NULL
new_sn_f5564 BIGINT YES NULL NULL NULL
new_sn_f65 BIGINT YES NULL NULL NULL
new_ep_m014 BIGINT YES NULL NULL NULL
new_ep_m1524 BIGINT YES NULL NULL NULL
new_ep_m2534 BIGINT YES NULL NULL NULL
new_ep_m3544 BIGINT YES NULL NULL NULL
new_ep_m4554 BIGINT YES NULL NULL NULL
new_ep_m5564 BIGINT YES NULL NULL NULL
new_ep_m65 BIGINT YES NULL NULL NULL
new_ep_f014 BIGINT YES NULL NULL NULL
new_ep_f1524 BIGINT YES NULL NULL NULL
new_ep_f2534 BIGINT YES NULL NULL NULL
new_ep_f3544 BIGINT YES NULL NULL NULL
new_ep_f4554 BIGINT YES NULL NULL NULL
new_ep_f5564 BIGINT YES NULL NULL NULL
new_ep_f65 BIGINT YES NULL NULL NULL
newrel_m014 BIGINT YES NULL NULL NULL
newrel_m1524 BIGINT YES NULL NULL NULL
newrel_m2534 BIGINT YES NULL NULL NULL
newrel_m3544 BIGINT YES NULL NULL NULL
newrel_m4554 BIGINT YES NULL NULL NULL
newrel_m5564 BIGINT YES NULL NULL NULL
newrel_m65 BIGINT YES NULL NULL NULL
newrel_f014 BIGINT YES NULL NULL NULL
newrel_f1524 BIGINT YES NULL NULL NULL
newrel_f2534 BIGINT YES NULL NULL NULL
newrel_f3544 BIGINT YES NULL NULL NULL
newrel_f4554 BIGINT YES NULL NULL NULL
newrel_f5564 BIGINT YES NULL NULL NULL
newrel_f65 BIGINT YES NULL NULL NULL
query IIIIII
describe select * from v;
----
country VARCHAR YES NULL NULL NULL
iso2 VARCHAR YES NULL NULL NULL
iso3 VARCHAR YES NULL NULL NULL
year BIGINT YES NULL NULL NULL
new_sp_m014 BIGINT YES NULL NULL NULL
new_sp_m1524 BIGINT YES NULL NULL NULL
new_sp_m2534 BIGINT YES NULL NULL NULL
new_sp_m3544 BIGINT YES NULL NULL NULL
new_sp_m4554 BIGINT YES NULL NULL NULL
new_sp_m5564 BIGINT YES NULL NULL NULL
new_sp_m65 BIGINT YES NULL NULL NULL
new_sp_f014 BIGINT YES NULL NULL NULL
new_sp_f1524 BIGINT YES NULL NULL NULL
new_sp_f2534 BIGINT YES NULL NULL NULL
new_sp_f3544 BIGINT YES NULL NULL NULL
new_sp_f4554 BIGINT YES NULL NULL NULL
new_sp_f5564 BIGINT YES NULL NULL NULL
new_sp_f65 BIGINT YES NULL NULL NULL
new_sn_m014 BIGINT YES NULL NULL NULL
new_sn_m1524 BIGINT YES NULL NULL NULL
new_sn_m2534 BIGINT YES NULL NULL NULL
new_sn_m3544 BIGINT YES NULL NULL NULL
new_sn_m4554 BIGINT YES NULL NULL NULL
new_sn_m5564 BIGINT YES NULL NULL NULL
new_sn_m65 BIGINT YES NULL NULL NULL
new_sn_f014 BIGINT YES NULL NULL NULL
new_sn_f1524 BIGINT YES NULL NULL NULL
new_sn_f2534 BIGINT YES NULL NULL NULL
new_sn_f3544 BIGINT YES NULL NULL NULL
new_sn_f4554 BIGINT YES NULL NULL NULL
new_sn_f5564 BIGINT YES NULL NULL NULL
new_sn_f65 BIGINT YES NULL NULL NULL
new_ep_m014 BIGINT YES NULL NULL NULL
new_ep_m1524 BIGINT YES NULL NULL NULL
new_ep_m2534 BIGINT YES NULL NULL NULL
new_ep_m3544 BIGINT YES NULL NULL NULL
new_ep_m4554 BIGINT YES NULL NULL NULL
new_ep_m5564 BIGINT YES NULL NULL NULL
new_ep_m65 BIGINT YES NULL NULL NULL
new_ep_f014 BIGINT YES NULL NULL NULL
new_ep_f1524 BIGINT YES NULL NULL NULL
new_ep_f2534 BIGINT YES NULL NULL NULL
new_ep_f3544 BIGINT YES NULL NULL NULL
new_ep_f4554 BIGINT YES NULL NULL NULL
new_ep_f5564 BIGINT YES NULL NULL NULL
new_ep_f65 BIGINT YES NULL NULL NULL
newrel_m014 BIGINT YES NULL NULL NULL
newrel_m1524 BIGINT YES NULL NULL NULL
newrel_m2534 BIGINT YES NULL NULL NULL
newrel_m3544 BIGINT YES NULL NULL NULL
newrel_m4554 BIGINT YES NULL NULL NULL
newrel_m5564 BIGINT YES NULL NULL NULL
newrel_m65 BIGINT YES NULL NULL NULL
newrel_f014 BIGINT YES NULL NULL NULL
newrel_f1524 BIGINT YES NULL NULL NULL
newrel_f2534 BIGINT YES NULL NULL NULL
newrel_f3544 BIGINT YES NULL NULL NULL
newrel_f4554 BIGINT YES NULL NULL NULL
newrel_f5564 BIGINT YES NULL NULL NULL
newrel_f65 BIGINT YES NULL NULL NULL

View File

@@ -0,0 +1,17 @@
# name: test/sql/copy/csv/auto/test_double_quoted_header.test
# group: [auto]
statement ok
PRAGMA enable_verification
query IIIIII
describe from 'data/csv/double_quoted_header.csv';
----
foo "bar BIGINT YES NULL NULL NULL
name VARCHAR YES NULL NULL NULL
query II
from 'data/csv/double_quoted_header.csv';
----
1 rob
2 sally

View File

@@ -0,0 +1,11 @@
# name: test/sql/copy/csv/auto/test_early_out.test
# group: [auto]
statement ok
PRAGMA enable_verification
statement error
SELECT *
FROM read_csv('data/csv/auto/early_out_error.csv', buffer_size = 8, maximum_line_size = 8, auto_detect = false, columns = {'a': 'integer','b': 'integer','c': 'integer'}, header = true)
----
Error when converting column "b". Could not convert string "\n" to 'INTEGER'

View File

@@ -0,0 +1,43 @@
# name: test/sql/copy/csv/auto/test_fallback_all_varchar.test_slow
# description: Test optional parameters for read csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
# CSV file with irregularity in first column and default sample size
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/test_fallback.csv');
query TTTT
SELECT typeof(TestDoubleError), typeof(TestDouble), typeof(TestText), typeof(TestInteger) FROM test LIMIT 1
----
VARCHAR DOUBLE VARCHAR BIGINT
statement ok
DROP TABLE test
loop i 1 100
# CSV file with irregularity in first column and small sample size
statement error
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/test_fallback.csv', SAMPLE_SIZE=1);
----
Column TestDoubleError is being converted as type DOUBLE
endloop
# CSV file with irregularity in first column, small sample size and fallback activated
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/test_fallback.csv', SAMPLE_SIZE=1, ALL_VARCHAR=1);
query TTTT
SELECT typeof(TestDoubleError), typeof(TestDouble), typeof(TestText), typeof(TestInteger) FROM test LIMIT 1
----
VARCHAR VARCHAR VARCHAR VARCHAR
statement ok
DROP TABLE test

View File

@@ -0,0 +1,112 @@
# name: test/sql/copy/csv/auto/test_header_completion.test
# description: Test csv header completion
# group: [auto]
statement ok
PRAGMA enable_verification
# CSV file with one missing header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/missing_header_col.csv');
query ITT
SELECT a, column1, c FROM test ORDER BY a;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with one duplicate header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/duplicate_header_col.csv');
query ITT
SELECT a, b, a_1 FROM test ORDER BY a;
----
123 TEST2 text1
345 TEST2 text2
statement ok
DROP TABLE test;
# CSV file with one duplicate header and collision
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/duplicate_header_collision.csv');
query ITTT
SELECT a, b, a_1, a_1_1 FROM test ORDER BY a;
----
123 TEST2 text1 text1
345 TEST2 text2 text2
statement ok
DROP TABLE test;
# CSV file with all column names missing
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/empty_header.csv');
query ITT
SELECT column0, column1, column2 FROM test ORDER BY column0;
----
123 TEST3 text1
345 TEST3 text2
statement ok
DROP TABLE test;
# CSV file with 12 columns and all but one column name missing
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/missing_many_col.csv');
query ITT
SELECT a, column01, column12 FROM test;
----
123 TEST2 value1
345 TEST2 value2
statement ok
DROP TABLE test;
# CSV file with 12 equally called columns
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/duplicate_header_columns.csv');
query IIIT
SELECT a, a_8, a_9, column12 FROM test;
----
123 NULL NULL value1
345 NULL NULL value2
statement ok
DROP TABLE test;
# CSV file with 10 equally called columns, one named column12 and column 11 and 12 missing
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/test_header_mix.csv');
query IIIIIT
SELECT a, a_8, a_9, column12, column11, column12_1 FROM test;
----
123 NULL NULL NULL NULL value1
345 NULL NULL NULL NULL value2
statement ok
DROP TABLE test;
# CSV file with 12 unnamed columns and check for correct naming
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/unnamed_columns.csv');
query ITTIIIIIIIIIT
SELECT column00, column01, column02, column03, column04, column05, column06, column07, column08, column09, column10, column11, column12 FROM test;
----
123 TEST2 text1 NULL NULL NULL NULL NULL NULL NULL NULL NULL value1
345 TEST2 text2 NULL NULL NULL NULL NULL NULL NULL NULL NULL value2
statement ok
DROP TABLE test;

View File

@@ -0,0 +1,176 @@
# name: test/sql/copy/csv/auto/test_header_detection.test
# description: Test csv header detection
# group: [auto]
statement ok
PRAGMA enable_verification
# CSV file with two lines, none header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/no_header.csv');
query RTT
SELECT column0, column1, column2 FROM test ORDER BY column0;
----
123.000000 TEST1 2000-12-12
345.000000 TEST1 2000-12-13
statement ok
DROP TABLE test;
# CSV file with two lines, one header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/single_header.csv');
query RTT
SELECT number, text, date FROM test ORDER BY number;
----
345.000000 TEST2 2000-12-13
statement ok
DROP TABLE test;
# CSV file with three lines, one header, one skip row
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/skip_row.csv');
query RTT
SELECT number, text, date FROM test ORDER BY number;
----
345.000000 TEST3 2000-12-13
statement ok
DROP TABLE test;
# CSV file with three lines, one header, two skip rows
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/multiple_skip_row.csv');
query RTT
SELECT number, text, date FROM test ORDER BY number;
----
345.000000 TEST4 2000-12-13
statement ok
DROP TABLE test;
# CSV file with two lines both only strings
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/varchar_multi_line.csv', header = 0);
query TTT
SELECT * FROM test ORDER BY column0;
----
Alice StreetA TEST5
Bob StreetB TEST5
statement ok
DROP TABLE test;
# CSV file with one line, two columns, only strings
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/varchar_single_line.csv', header = 0);
query TT
SELECT column0, column1 FROM test ORDER BY column0;
----
Alice StreetA
statement ok
DROP TABLE test;
# CSV file with one line, two columns - one numeric, one string
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/mixed_single_line.csv');
query IT
SELECT column0, column1 FROM test ORDER BY column0;
----
1 StreetA
statement ok
DROP TABLE test;
# CSV file with one line, one string column
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/single_value.csv');
query T
SELECT * FROM test;
----
statement ok
DROP TABLE test;
# CSV file with one line, one numeric column
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/single_numeric.csv');
query I
SELECT * FROM test;
----
1
statement ok
DROP TABLE test;
# CSV with UTF-8 BOM marker that could mess up the header line parsing
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto('data/csv/auto/utf8bom.csv');
query II
SELECT * FROM test;
----
1 Mark
2 Hannes
query I
SELECT id FROM test;
----
1
2
statement ok
DROP TABLE test;
statement ok
CREATE TABLE my_varchars(a VARCHAR, b VARCHAR, c VARCHAR);
statement ok
INSERT INTO my_varchars VALUES ('Hello', 'Beautiful', 'World');
statement ok
COPY my_varchars TO '__TEST_DIR__/varchar_header.csv' (HEADER 1);
statement ok
COPY my_varchars TO '__TEST_DIR__/varchar_no_header.csv' (HEADER 0);
statement ok
COPY my_varchars FROM '__TEST_DIR__/varchar_header.csv' ;
statement ok
COPY my_varchars FROM '__TEST_DIR__/varchar_no_header.csv' (HEADER 0);
query III
FROM my_varchars ;
----
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
statement ok
COPY my_varchars TO '__TEST_DIR__/big_varchar.csv';
statement ok
COPY my_varchars FROM '__TEST_DIR__/big_varchar.csv';
query III
FROM my_varchars;
----
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World
Hello Beautiful World

View File

@@ -0,0 +1,114 @@
# name: test/sql/copy/csv/auto/test_normalize_names.test
# description: Test csv header normalization
# group: [auto]
statement ok
PRAGMA enable_verification
query I
select columns from sniff_csv('data/csv/test_commit_rollback.csv', normalize_names = true)
----
[{'name': _commit, 'type': BIGINT}, {'name': _rollback, 'type': BIGINT}, {'name': _abort, 'type': BIGINT}]
# CSV file with uppercase header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_1.csv', normalize_names=TRUE);
query ITT
SELECT a, b, c FROM test ORDER BY a;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with uppercase header and normalize names off
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_1.csv');
query ITT
SELECT A, B, C FROM test ORDER BY a;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
query I
select columns from sniff_csv('data/csv/auto/normalize_names_2.csv', normalize_names = true)
----
[{'name': _select, 'type': BIGINT}, {'name': _insert, 'type': VARCHAR}, {'name': _join, 'type': VARCHAR}]
# CSV file with keywords in header
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_2.csv', normalize_names=TRUE);
query ITT
SELECT _select, _insert, _join FROM test ORDER BY _select;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with names starting with numerics
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_3.csv', normalize_names=TRUE);
query ITT
SELECT _0_a, _1_b, _9_c FROM test ORDER BY _0_a;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with accents and UTF8 characters
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_4.csv', normalize_names=TRUE);
query ITT
SELECT allo, teost, _ FROM test ORDER BY allo;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with accents and UTF8 characters
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_5.csv', normalize_names=TRUE);
query ITT
SELECT a, b, c FROM test ORDER BY a;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
# CSV file with superscripts and UTF8 characters
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/normalize_names_6.csv', normalize_names=TRUE);
query ITT
SELECT aax, hello_world, qty_m2 FROM test ORDER BY aax;
----
123 TEST1 text1
345 TEST1 text2
statement ok
DROP TABLE test;
query I
select columns from sniff_csv('data/csv/normalize.csv', normalize_names = true)
----
[{'name': _name, 'type': VARCHAR}, {'name': _text, 'type': VARCHAR}]

View File

@@ -0,0 +1,105 @@
# name: test/sql/copy/csv/auto/test_sample_size.test
# description: Test optional parameters for read csv
# group: [auto]
statement ok
PRAGMA enable_verification
# CSV file with very sparse column
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_811.csv', SAMPLE_SIZE=1);
query IIII
SELECT typeof(TestInteger), typeof(TestDouble), typeof(TestDate), typeof(TestText) FROM test LIMIT 1
----
BIGINT VARCHAR DATE VARCHAR
statement ok
DROP TABLE test
# CSV file with very sparse column
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_811.csv', SAMPLE_SIZE=-1);
query IIII
SELECT typeof(TestInteger), typeof(TestDouble), typeof(TestDate), typeof(TestText) FROM test LIMIT 1
----
BIGINT DOUBLE DATE VARCHAR
statement ok
DROP TABLE test
# CSV file with very sparse column and sample size 500
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_811.csv', SAMPLE_SIZE = -1);
query IRTT
SELECT TestInteger, TestDouble, TestDate, TestText FROM test WHERE TestDouble is not NULL ;
----
5 1.1 2015-05-01 fdf
query TTTT
SELECT typeof(TestInteger), typeof(TestDouble), typeof(TestDate), typeof(TestText) FROM test LIMIT 1;
----
BIGINT DOUBLE DATE VARCHAR
statement ok
drop table test;
# CSV file with very sparse column and number of samples 50
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_811.csv');
query IRTT
SELECT TestInteger, TestDouble, TestDate, TestText FROM test WHERE TestDouble is not NULL ;
----
5 1.1 2015-05-01 fdf
query TTTT
SELECT typeof(TestInteger), typeof(TestDouble), typeof(TestDate), typeof(TestText) FROM test LIMIT 1;
----
BIGINT DOUBLE DATE VARCHAR
statement ok
drop table test;
# CSV file with very sparse column with sample size 200 and number of samples 20
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/issue_811.csv', SAMPLE_SIZE = -1);
query IRTT
SELECT TestInteger, TestDouble, TestDate, TestText FROM test WHERE TestDouble is not NULL ;
----
5 1.1 2015-05-01 fdf
query TTTT
SELECT typeof(TestInteger), typeof(TestDouble), typeof(TestDate), typeof(TestText) FROM test LIMIT 1;
----
BIGINT DOUBLE DATE VARCHAR
statement ok
drop table test;
# CSV file with very sparse column using copy into
statement ok
CREATE TABLE test (TestInteger integer, TestDouble double, TestDate varchar, TestText varchar);
# CSV file with very sparse column, automatically aligns column types, small sample size
statement ok
COPY test FROM 'data/csv/auto/issue_811.csv' (AUTO_DETECT TRUE);
statement ok
drop table test;
# CSV file with very sparse column using copy into
statement ok
CREATE TABLE test (TestInteger integer, TestDouble double, TestDate varchar, TestText varchar);
# CSV file with very sparse column, automatically aligns column types, small sample size
statement ok
COPY test FROM 'data/csv/auto/issue_811.csv' (SAMPLE_SIZE -1, AUTO_DETECT TRUE);
statement ok
drop table test;

View File

@@ -0,0 +1,36 @@
# name: test/sql/copy/csv/auto/test_sniffer_blob.test
# description: Test reading a blob with the sniffer
# group: [auto]
statement ok
PRAGMA enable_verification
# This is the only way to try to trick the sniffer into checking blobs and it is not valid
statement error
select count(*) from read_csv('data/csv/test/blob.csv',auto_type_candidates=['blob'])
----
Auto Type Candidate of type BLOB is not accepted as a valid input
# All this is cool and should work.
query I
select count(*) from read_csv('data/csv/test/blob.csv',types=['blob'], header = 0)
----
1
query I
select count(*) from read_csv('data/csv/test/blob.csv',columns={'col1': 'BLOB'})
----
1
statement ok
create table t ( a blob)
statement ok
COPY t FROM 'data/csv/test/blob.csv';
query I
select count(*) from read_csv('data/csv/test/blob.csv',columns={'col1': 'BLOB'})
----
1

View File

@@ -0,0 +1,14 @@
# name: test/sql/copy/csv/auto/test_sniffer_empty_start_value.test
# description: Test reading a value with empty spaces at the beginning
# group: [auto]
statement ok
PRAGMA enable_verification
query III
from read_csv('data/csv/empty_space_start_value.csv')
----
1968 86 Greetings
1970 17 Bloody Mama
1970 73 Hi, Mom!
1971 40 Born to Win

View File

@@ -0,0 +1,18 @@
# name: test/sql/copy/csv/auto/test_timings_csv.test
# description: Test CSV Sample works for Gabor's timings csv file
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
statement ok
CREATE OR REPLACE TABLE timings(tool string, sf float, day string, batch_type string, q string, parameters string, time float);
query I
COPY timings FROM 'data/csv/timings.csv' (HEADER, DELIMITER '|')
----
1095

View File

@@ -0,0 +1,117 @@
# name: test/sql/copy/csv/auto/test_type_candidates.test
# description: Test Type Candidates for auto_csv
# group: [auto]
statement ok
PRAGMA enable_verification
statement ok
create table t (a integer, b double, c varchar)
statement ok
insert into t values (1,1.1,'bla');
statement ok
COPY (SELECT * from t) TO '__TEST_DIR__/csv_file.csv' (FORMAT CSV, DELIMITER '|', HEADER 0);
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv');
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv');
----
BIGINT DOUBLE VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT', 'DOUBLE', 'VARCHAR']);
----
1 1.1 bla
statement error
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['MAP']);
----
Value "MAP" can not be converted to a DuckDB Type.
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT', 'DOUBLE', 'VARCHAR']);
----
BIGINT DOUBLE VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['VARCHAR'], header = 0);
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['VARCHAR'], header = 0);
----
VARCHAR VARCHAR VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT']);
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT']);
----
BIGINT VARCHAR VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT','VARCHAR']);
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['BIGINT','VARCHAR']);
----
BIGINT VARCHAR VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['FLOAT','VARCHAR']);
----
1.0 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['FLOAT','VARCHAR']);
----
FLOAT FLOAT VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR']);
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR']);
----
SMALLINT FLOAT VARCHAR
query III
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR','SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR']);
----
1 1.1 bla
query TTT
SELECT typeof(column0), typeof(column1), typeof(column2) FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR','SMALLINT','BIGINT', 'DOUBLE', 'FLOAT','VARCHAR']);
----
SMALLINT FLOAT VARCHAR
statement error
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['USMALLINT', 'VARCHAR']);
----
Auto Type Candidate of type USMALLINT is not accepted as a valid input
statement error
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=['bla', 'VARCHAR'])
----
Type with name bla does not exist!
statement error
SELECT * FROM read_csv_auto ('__TEST_DIR__/csv_file.csv', auto_type_candidates=[]);
----
auto_type_candidates requires at least one type

View File

@@ -0,0 +1,213 @@
# name: test/sql/copy/csv/auto/test_type_detection.test
# description: Test csv type detection
# group: [auto]
statement ok
PRAGMA enable_verification
# a CSV file with many strings
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/large_mixed_data.csv', SAMPLE_SIZE=-1);
query ITR
SELECT linenr, mixed_string, mixed_double FROM test LIMIT 3;
----
1 1 1.000000
2 2 2.000000
3 3 3.000000
query TTT
SELECT typeof(linenr), typeof(mixed_string), typeof(mixed_double) FROM test LIMIT 1;
----
BIGINT VARCHAR DOUBLE
query ITR
SELECT linenr, mixed_string, mixed_double FROM test WHERE linenr > 27000 LIMIT 3;
----
27001 1 1.000000
27002 2 2.000000
27003 3 3.500000
query I
SELECT count(*) FROM test;
----
27003
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 2000-01-01 2000-01-01 12:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time and date columns with leading/trailing chars
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_trailing.csv');
query ITTTTT
SELECT a, b, t, tf, d, df FROM test ORDER BY a;
----
123 TEST2 12:12:12 12:12:12 2000-01-01 2000-01-01
345 TEST2 14:15:30 14:15:30 2002-02-02 2000-01-01 a
346 TEST2 15:16:17 15:16:17 01 2004-12-13 2000-01-01
query TTTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(tf), typeof(d), typeof(df) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME VARCHAR DATE VARCHAR
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in mm-dd-yyyy (12 hour)
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_mm-dd-yyyy.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 2000-01-01 2000-01-01 00:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in mm-dd-yy format (12 hour)
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_mm-dd-yy.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 1990-01-01 1990-01-01 00:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in dd-mm-yyyy format
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_dd-mm-yyyy.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 2000-01-01 2000-01-01 12:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in dd-mm-yy format
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_dd-mm-yy.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 2000-01-01 1990-01-01 12:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in yyyy.mm.dd format
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_yyyy.mm.dd.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 2000-01-01 2000-01-01 12:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing time, date and timestamp columns in yy.mm.dd format
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/time_date_timestamp_yy.mm.dd.csv');
query ITTTT
SELECT a, b, t, d, ts FROM test ORDER BY a;
----
123 TEST2 12:12:12 1990-01-01 1990-01-01 00:12:00
345 TEST2 14:15:30 2002-02-02 2002-02-02 14:15:00
346 TEST2 15:16:17 2004-12-13 2004-12-13 15:16:00
query TTTTT
SELECT typeof(a), typeof(b), typeof(t), typeof(d), typeof(ts) FROM test LIMIT 1;
----
BIGINT VARCHAR TIME DATE TIMESTAMP
statement ok
DROP TABLE test;
# a CSV file containing integer bool value
statement ok
CREATE TABLE test AS SELECT * FROM read_csv_auto ('data/csv/auto/int_bol.csv');
query I
SELECT i FROM test ORDER BY i;
----
1
2
3
4
query TT
SELECT typeof(i), typeof(b) FROM test LIMIT 1;
----
BIGINT BOOLEAN
statement ok
DROP TABLE test;