should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,80 @@
# name: test/sql/copy/csv/parallel/csv_parallel_buffer_size.test
# description: Test read CSV function
# group: [parallel]
# force parallelism of the queries
statement ok
PRAGMA verify_parallelism
query III
SELECT sum(a), sum(b), sum(c) FROM read_csv('data/csv/test/multi_column_integer.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
----
111111111 51866 3195
query I
SELECT sum(a) FROM read_csv('data/csv/test/multi_column_integer.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
----
111111111
query I
SELECT sum(a) FROM read_csv('data/csv/test/multi_column_integer_rn.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER'), auto_detect='true', delim = '|', buffer_size=30)
----
111111111
query IIII
select * from read_csv('data/csv/test/multi_column_string.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=30)
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9
query IIII
select * from read_csv('data/csv/test/multi_column_string_rn.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=27)
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9
query I
SELECT sum(a) FROM read_csv('data/csv/test/new_line_string_rn.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|')
----
111
query I
SELECT sum(a) FROM read_csv('data/csv/test/new_line_string_rn.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=100)
----
111
query I
SELECT sum(a) FROM read_csv('data/csv/test/new_line_string_rn_exc.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|')
----
111
query I
SELECT sum(a) FROM read_csv('data/csv/test/new_line_string_rn_exc.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|', buffer_size=80)
----
111
query I
SELECT sum(a) FROM read_csv('data/csv/test/new_line_string.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='true', delim = '|')
----
111
query I
SELECT sum(a) FROM read_csv('data/csv/test/new_line_string.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), quote ='"', escape ='"', comment = '', auto_detect='true', delim = '|', buffer_size=100, new_line = '\r\n', strict_mode = false)
----
111

View File

@@ -0,0 +1,130 @@
# name: test/sql/copy/csv/parallel/csv_parallel_clickbench.test_slow
# description: Test parallel read CSV function on Clickbench
# group: [parallel]
mode skip
require httpfs
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE hits
(
WatchID BIGINT NOT NULL,
JavaEnable SMALLINT NOT NULL,
Title TEXT,
GoodEvent SMALLINT NOT NULL,
EventTime TIMESTAMP NOT NULL,
EventDate Date NOT NULL,
CounterID INTEGER NOT NULL,
ClientIP INTEGER NOT NULL,
RegionID INTEGER NOT NULL,
UserID BIGINT NOT NULL,
CounterClass SMALLINT NOT NULL,
OS SMALLINT NOT NULL,
UserAgent SMALLINT NOT NULL,
URL TEXT,
Referer TEXT,
IsRefresh SMALLINT NOT NULL,
RefererCategoryID SMALLINT NOT NULL,
RefererRegionID INTEGER NOT NULL,
URLCategoryID SMALLINT NOT NULL,
URLRegionID INTEGER NOT NULL,
ResolutionWidth SMALLINT NOT NULL,
ResolutionHeight SMALLINT NOT NULL,
ResolutionDepth SMALLINT NOT NULL,
FlashMajor SMALLINT NOT NULL,
FlashMinor SMALLINT NOT NULL,
FlashMinor2 TEXT,
NetMajor SMALLINT NOT NULL,
NetMinor SMALLINT NOT NULL,
UserAgentMajor SMALLINT NOT NULL,
UserAgentMinor VARCHAR(255) NOT NULL,
CookieEnable SMALLINT NOT NULL,
JavascriptEnable SMALLINT NOT NULL,
IsMobile SMALLINT NOT NULL,
MobilePhone SMALLINT NOT NULL,
MobilePhoneModel TEXT,
Params TEXT,
IPNetworkID INTEGER NOT NULL,
TraficSourceID SMALLINT NOT NULL,
SearchEngineID SMALLINT NOT NULL,
SearchPhrase TEXT,
AdvEngineID SMALLINT NOT NULL,
IsArtifical SMALLINT NOT NULL,
WindowClientWidth SMALLINT NOT NULL,
WindowClientHeight SMALLINT NOT NULL,
ClientTimeZone SMALLINT NOT NULL,
ClientEventTime TIMESTAMP NOT NULL,
SilverlightVersion1 SMALLINT NOT NULL,
SilverlightVersion2 SMALLINT NOT NULL,
SilverlightVersion3 INTEGER NOT NULL,
SilverlightVersion4 SMALLINT NOT NULL,
PageCharset TEXT,
CodeVersion INTEGER NOT NULL,
IsLink SMALLINT NOT NULL,
IsDownload SMALLINT NOT NULL,
IsNotBounce SMALLINT NOT NULL,
FUniqID BIGINT NOT NULL,
OriginalURL TEXT,
HID INTEGER NOT NULL,
IsOldCounter SMALLINT NOT NULL,
IsEvent SMALLINT NOT NULL,
IsParameter SMALLINT NOT NULL,
DontCountHits SMALLINT NOT NULL,
WithHash SMALLINT NOT NULL,
HitColor CHAR NOT NULL,
LocalEventTime TIMESTAMP NOT NULL,
Age SMALLINT NOT NULL,
Sex SMALLINT NOT NULL,
Income SMALLINT NOT NULL,
Interests SMALLINT NOT NULL,
Robotness SMALLINT NOT NULL,
RemoteIP INTEGER NOT NULL,
WindowName INTEGER NOT NULL,
OpenerName INTEGER NOT NULL,
HistoryLength SMALLINT NOT NULL,
BrowserLanguage TEXT,
BrowserCountry TEXT,
SocialNetwork TEXT,
SocialAction TEXT,
HTTPError SMALLINT NOT NULL,
SendTiming INTEGER NOT NULL,
DNSTiming INTEGER NOT NULL,
ConnectTiming INTEGER NOT NULL,
ResponseStartTiming INTEGER NOT NULL,
ResponseEndTiming INTEGER NOT NULL,
FetchTiming INTEGER NOT NULL,
SocialSourceNetworkID SMALLINT NOT NULL,
SocialSourcePage TEXT,
ParamPrice BIGINT NOT NULL,
ParamOrderID TEXT,
ParamCurrency TEXT,
ParamCurrencyID SMALLINT NOT NULL,
OpenstatServiceName TEXT,
OpenstatCampaignID TEXT,
OpenstatAdID TEXT,
OpenstatSourceID TEXT,
UTMSource TEXT,
UTMMedium TEXT,
UTMCampaign TEXT,
UTMContent TEXT,
UTMTerm TEXT,
FromTag TEXT,
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INTEGER NOT NULL,
PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID)
);
statement ok
insert into hits from read_csv('https://datasets.clickhouse.com/hits_compatible/hits.csv.gz');
#Q 01
query I
SELECT COUNT(*) FROM hits;
----
99997497

View File

@@ -0,0 +1,131 @@
# name: test/sql/copy/csv/parallel/csv_parallel_new_line.test_slow
# description: Test parallel read CSV function with different settings of new lines
# group: [parallel]
# force parallelism of the queries
statement ok
PRAGMA verify_parallelism
statement ok
PRAGMA enable_verification
loop i 27 100
# Test read_csv auto with \n
query IIII
select * from read_csv_auto('data/csv/test/multi_column_string.csv', buffer_size=${i})
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9
# Test read_csv auto with \r
query IIII
select * from read_csv_auto('data/csv/auto/multi_column_string_r.csv', buffer_size=${i})
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9
# Test read_csv auto with mix \r and \n
statement error
select * from read_csv_auto('data/csv/auto/multi_column_string_mix_r_n.csv', buffer_size=${i})
----
* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not comply with the CSV standard.
query IIII
select * from read_csv_auto('data/csv/auto/multi_column_string_mix_r_n.csv', buffer_size=${i}, strict_mode = False)
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9
# Test read_csv auto with \r\n
query IIII
select * from read_csv_auto('data/csv/test/multi_column_string_rn.csv', buffer_size=${i}, header=False)
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9
endloop
# Test read_csv auto with mix \r, \n and \r\n (This must always run single threaded)
statement error
select * from read_csv_auto('data/csv/auto/multi_column_string_mix.csv')
----
Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not comply with the CSV standard.
query IIII
select * from read_csv_auto('data/csv/auto/multi_column_string_mix.csv', strict_mode=false)
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9
# These are basically checkers that assume our files have the set newlines, some robot is rewriting my newlines to
# \r\n on windows, hence some of these pass when they should fail on windows.
require notwindows
# Test read_csv with user defined variable
query IIII
select * from read_csv('data/csv/test/multi_column_string.csv', COLUMNS=STRUCT_PACK(a := 'INTEGER', b := 'INTEGER', c := 'INTEGER', d := 'VARCHAR'), auto_detect='false', delim = '|', new_line = '\n')
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9
query IIII
select * from read_csv_auto('data/csv/test/multi_column_string.csv', new_line = '\n')
----
1 6370 371 p1
10 214 465 p2
100 2403 160 p3
1000 1564 67 p4
10000 10617 138 p5
100000 430 181 p6
1000000 1904 658 p7
10000000 12845 370 p8
100000000 15519 785 p9
statement error
select * from read_csv_auto('data/csv/test/multi_column_string.csv', new_line = 'not_valid')
----

View File

@@ -0,0 +1,55 @@
# name: test/sql/copy/csv/parallel/csv_parallel_null_option.test
# description: Test parallel read CSV function with null option
# group: [parallel]
# Test read_csv with user defined variable
statement ok
PRAGMA enable_verification
statement ok
pragma threads=4
statement ok
PRAGMA enable_profiling
statement ok
PRAGMA profiling_output='__TEST_DIR__/test.json'
statement ok
PRAGMA profiling_mode = detailed
statement ok
CREATE TABLE integers(i INTEGER)
statement ok
INSERT INTO integers VALUES (3)
statement ok
SELECT min (i + i) FROM integers
statement ok
CREATE TABLE exprtest (a INTEGER, b INTEGER)
statement ok
INSERT INTO exprtest VALUES (42, 10), (43, 100), (NULL, 1), (45, -1)
statement ok
SELECT min (a + a ) FROM exprtest
statement ok
SELECT a FROM exprtest WHERE a BETWEEN 43 AND 44
statement ok
SELECT CASE a WHEN 42 THEN 100 WHEN 43 THEN 200 ELSE 300 END FROM exprtest
statement ok
PRAGMA profiling_output='__TEST_DIR__/test_2.json'
# At least one of the lines should contain the word "Optimizer" to verify that we're getting optimizer timings
query T
SELECT COUNT(*) > 0
FROM read_csv('__TEST_DIR__/test.json', columns={'c': 'VARCHAR'}, delim=NULL, header=0, quote=NULL, escape=NULL, auto_detect = false)
WHERE contains(c, 'Optimizer');
----
true

View File

@@ -0,0 +1,177 @@
# name: test/sql/copy/csv/parallel/csv_parallel_tpcds.test_slow
# description: Test parallel read CSV function on TPCH
# group: [parallel]
require tpcds
statement ok
pragma threads=4
statement ok
CALL dsdgen(sf=1, suffix='_og');
foreach tpcds_tbl call_center catalog_page catalog_returns catalog_sales customer customer_address customer_demographics date_dim household_demographics income_band inventory item promotion reason ship_mode store store_returns store_sales time_dim warehouse web_page web_returns web_sales web_site
statement ok
COPY ${tpcds_tbl}_og TO '__TEST_DIR__/${tpcds_tbl}.csv';
statement ok
create table ${tpcds_tbl} as select * from ${tpcds_tbl}_og limit 0;
statement ok
insert into ${tpcds_tbl} select * from read_csv_auto('__TEST_DIR__/${tpcds_tbl}.csv', allow_quoted_nulls=False)
query I
select count(*) from (SELECT * FROM ${tpcds_tbl} EXCEPT SELECT * FROM ${tpcds_tbl}_og);
----
0
query I
select count (*) from (SELECT * FROM ${tpcds_tbl}_og EXCEPT SELECT * FROM ${tpcds_tbl});
----
0
endloop
statement ok
PRAGMA default_null_order='NULLS LAST'
# check table counts
query T
SELECT COUNT(*) FROM call_center
----
6
query T
SELECT COUNT(*) FROM catalog_page
----
11718
query T
SELECT COUNT(*) FROM catalog_returns
----
144067
query T
SELECT COUNT(*) FROM catalog_sales
----
1441548
query T
SELECT COUNT(*) FROM customer
----
100000
query T
SELECT COUNT(*) FROM customer_demographics
----
1920800
query T
SELECT COUNT(*) FROM customer_address
----
50000
query T
SELECT COUNT(*) FROM date_dim
----
73049
query T
SELECT COUNT(*) FROM household_demographics
----
7200
query T
SELECT COUNT(*) FROM inventory
----
11745000
query T
SELECT COUNT(*) FROM income_band
----
20
query T
SELECT COUNT(*) FROM item
----
18000
query T
SELECT COUNT(*) FROM promotion
----
300
query T
SELECT COUNT(*) FROM reason
----
35
query T
SELECT COUNT(*) FROM ship_mode
----
20
query T
SELECT COUNT(*) FROM store
----
12
query T
SELECT COUNT(*) FROM store_returns
----
287867
query T
SELECT COUNT(*) FROM store_sales
----
2880404
query T
SELECT COUNT(*) FROM time_dim
----
86400
query T
SELECT COUNT(*) FROM warehouse
----
5
query T
SELECT COUNT(*) FROM web_page
----
60
query T
SELECT COUNT(*) FROM web_returns
----
71654
query T
SELECT COUNT(*) FROM web_sales
----
719384
query T
SELECT COUNT(*) FROM web_site
----
30
loop i 1 9
query I
PRAGMA tpcds(${i})
----
<FILE>:extension/tpcds/dsdgen/answers/sf1/0${i}.csv
endloop
loop i 10 100
query I
PRAGMA tpcds(${i})
----
<FILE>:extension/tpcds/dsdgen/answers/sf1/${i}.csv
endloop

View File

@@ -0,0 +1,52 @@
# name: test/sql/copy/csv/parallel/csv_parallel_tpch.test_slow
# description: Test parallel read CSV function on TPCH
# group: [parallel]
require tpch
statement ok
pragma threads=4
statement ok
CALL dbgen(sf=1, suffix='_og');
foreach tpch_tbl orders customer lineitem nation part partsupp region supplier
statement ok
COPY ${tpch_tbl}_og TO '__TEST_DIR__/${tpch_tbl}.csv' (HEADER 0);
statement ok
create table ${tpch_tbl} as select * from ${tpch_tbl}_og limit 0;
statement ok
copy ${tpch_tbl} from '__TEST_DIR__/${tpch_tbl}.csv';
query I
select count(*) from (SELECT * FROM ${tpch_tbl} EXCEPT SELECT * FROM ${tpch_tbl}_og);
----
0
query I
select count (*) from (SELECT * FROM ${tpch_tbl}_og EXCEPT SELECT * FROM ${tpch_tbl});
----
0
endloop
loop i 1 9
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
endloop
loop i 10 23
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
endloop

View File

@@ -0,0 +1,29 @@
# name: test/sql/copy/csv/parallel/parallel_csv_hive_partitioning.test
# description: Test parallel CSV reader with hive partitioning
# group: [parallel]
statement ok
PRAGMA enable_verification
# force parallelism of the queries
statement ok
PRAGMA verify_parallelism
# filenames could allow you to parse hive partitions manually using SQL
query IIII
select id, value, filename.replace('\', '/').split('/')[-2], filename.replace('\', '/').split('/')[-3] from read_csv_auto('data/csv/hive-partitioning/simple/*/*/test.csv', FILENAME=1) order by id
----
1 value1 date=2012-01-01 part=a
2 value2 date=2013-01-01 part=b
query IIII
select id, value, part, date from read_csv_auto('data/csv/hive-partitioning/simple/*/*/test.csv', HIVE_PARTITIONING=1) order by id
----
1 value1 a 2012-01-01
2 value2 b 2013-01-01
query III
select part, value, date from read_csv_auto('data/csv/hive-partitioning/simple/*/*/test.csv', HIVE_PARTITIONING=1) order by 1
----
a value1 2012-01-01
b value2 2013-01-01

View File

@@ -0,0 +1,104 @@
# name: test/sql/copy/csv/parallel/parallel_csv_union_by_name.test
# description: Test parallel CSV reader with union by name
# group: [parallel]
statement ok
PRAGMA enable_verification
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA verify_parallelism
query IIII
SELECT id, value, a, part
FROM read_csv_auto('data/csv/union-by-name/part=[ab]/*',HIVE_PARTITIONING=TRUE ,UNION_BY_NAME=TRUE)
ORDER BY id
----
1 value1 aaa a
2 value2 NULL b
query IIII
SELECT k, c, ts, replace(filename, '\', '/')
FROM read_csv_auto('data/csv/union-by-name/ubn[!1-2].csv',FILENAME=TRUE ,UNION_BY_NAME=TRUE)
ORDER BY c
----
6 3 2003-06-30 12:03:10 data/csv/union-by-name/ubn3.csv
6 5 2003-06-30 12:03:10 data/csv/union-by-name/ubn3.csv
6 6 2003-06-30 12:03:10 data/csv/union-by-name/ubn3.csv
NULL 100 Monday data/csv/union-by-name/ubn4.csv
NULL 200 Sunday data/csv/union-by-name/ubn4.csv
NULL 300 Friday data/csv/union-by-name/ubn4.csv
query IIIII
SELECT a, b, c, ts, k
FROM read_csv_auto('data/csv/union-by-name/ubn*.csv',UNION_BY_NAME=TRUE)
ORDER BY a, c, ts
----
NULL NULL 3 2003-06-30 12:03:10 6
NULL NULL 5 2003-06-30 12:03:10 6
NULL NULL 6 2003-06-30 12:03:10 6
NULL NULL 100 Monday NULL
NULL NULL 200 Sunday NULL
NULL NULL 300 Friday NULL
1 2 3 NULL NULL
3 4 5 NULL NULL
34fd321 91 NULL 2020-12-30 03:25:58.745232+01 NULL
4 5 6 NULL NULL
8cb123cb8 90 NULL 2020-12-30 02:25:58.745232+01 NULL
fg5391jn4 92 NULL 2020-12-30 04:25:58.745232+01 NULL
test 88 NULL 2020-12-30 01:25:58.745232+01 NULL
mode unskip
query TTTTT
SELECT typeof(a), typeof(b), typeof(c), typeof(ts), typeof(k)
FROM read_csv_auto('data/csv/union-by-name/ubn*.csv',UNION_BY_NAME=TRUE)
LIMIT 1;
----
VARCHAR BIGINT BIGINT VARCHAR BIGINT
mode skip
# projection pushdown
query II
SELECT c, k
FROM read_csv_auto('data/csv/union-by-name/ubn*.csv',UNION_BY_NAME=TRUE)
ORDER BY c NULLS LAST, k NULLS LAST
----
3 6
3 NULL
5 6
5 NULL
6 6
6 NULL
100 NULL
200 NULL
300 NULL
NULL NULL
NULL NULL
NULL NULL
NULL NULL
# projection pushdown
query I
SELECT ts
FROM read_csv_auto('data/csv/union-by-name/ubn*.csv',UNION_BY_NAME=TRUE)
ORDER BY ts NULLS LAST
----
2003-06-30 12:03:10
2003-06-30 12:03:10
2003-06-30 12:03:10
2020-12-30 01:25:58.745232+01
2020-12-30 02:25:58.745232+01
2020-12-30 03:25:58.745232+01
2020-12-30 04:25:58.745232+01
Friday
Monday
Sunday
NULL
NULL
NULL

View File

@@ -0,0 +1,18 @@
# name: test/sql/copy/csv/parallel/test_5438.test
# description: Test parallel read CSV function on issue #5438
# group: [parallel]
require json
statement ok
PRAGMA enable_verification
# force parallelism of the queries
statement ok
PRAGMA verify_parallelism
query I
SELECT j->>'duck' FROM read_csv_auto('data/csv/test/5438.csv', delim='', columns={'j': 'JSON'}, auto_detect = false)
----
1
2

View File

@@ -0,0 +1,43 @@
# name: test/sql/copy/csv/parallel/test_5566.test
# description: Test parallel read CSV function on issue #5566
# group: [parallel]
# force parallelism of the queries
statement ok
PRAGMA verify_parallelism
statement ok
PRAGMA enable_verification
query I
select * from read_csv_auto('data/csv/auto/test_single_column.csv')
----
1
2
3
4
5
query I
select * from read_csv_auto('data/csv/auto/test_single_column_rn.csv')
----
1
2
3
4
5
query II
select foo, count(1) cnt from read_csv_auto('data/csv/auto/test_multiple_columns.csv') group by foo order by cnt desc
----
1 102
2 100
3 98
query II
select foo, count(1) cnt from read_csv_auto('data/csv/auto/test_multiple_columns_rn.csv') group by foo order by cnt desc
----
1 102
2 100
3 98

View File

@@ -0,0 +1,51 @@
# name: test/sql/copy/csv/parallel/test_7578.test
# description: Test parallel read CSV function on sample file from #7578
# group: [parallel]
# force parallelism of the queries
statement ok
PRAGMA verify_parallelism
statement ok
PRAGMA enable_verification
query IIIIIIIIII
select *
from read_csv('data/csv/bug_7578.csv', delim='\t', quote = '`', columns={
'transaction_id': 'VARCHAR',
'team_id': 'INT',
'direction': 'INT',
'amount':'DOUBLE',
'account_id':'INT',
'transaction_date':'DATE',
'recorded_date':'DATE',
'tags.transaction_id':'VARCHAR',
'tags.team_id':'INT',
'tags':'varchar'
}) order by all
----
01GXBE9CMV5R0Q4TEXZ4XFXPQR 58 -1 3.91 41 2022-11-22 2023-04-06 01GXBE9CMV5R0Q4TEXZ4XFXPQR 58 {"_journalize_rule": "four score and seven years ago our fathers brought forth upon this continent a new nation conceived in liberty and dedicated to the proposition that all men are created equal. now we are engaged in a great civil war", "transaction_match_id": "1234567", "transaction id": "1234456", "job id": "", "transacting party": "customer", "user id": "1278729", "state": "NY", "event type": "Sale", "product name": "acme", "event description": "", "related transaction id": "11234813", "transaction created at": "2022-11-11T01:12:31", "created on": "2022-11-11T01:12:31", "last modified on": "2022-11-14T09:03:57", "base amount": "48.0000", "tax": "4.120000", "stripe payment id": "ch_1234567889", "stripe payment amount": "54.1100", "stripe payment date": "2022-11-14T09:03:55", "payment fail date": "", "write-off id": "", "write-off date": "", "write-off amount": "", "amount due": "0.0000"}
01GXBE9CMV5R0Q4TEXZ4XFXPQR 58 1 3.91 39 2022-11-22 2023-04-06 01GXBE9CMV5R0Q4TEXZ4XFXPQR 58 {"_journalize_rule": "four score and seven years ago our fathers brought forth upon this continent a new nation conceived in liberty and dedicated to the proposition that all men are created equal. now we are engaged in a great civil war", "transaction_match_id": "1234567", "transaction id": "1234456", "job id": "", "transacting party": "customer", "user id": "1278729", "state": "NY", "event type": "Sale", "product name": "acme", "event description": "", "related transaction id": "11234813", "transaction created at": "2022-11-11T01:12:31", "created on": "2022-11-11T01:12:31", "last modified on": "2022-11-14T09:03:57", "base amount": "48.0000", "tax": "4.120000", "stripe payment id": "ch_1234567889", "stripe payment amount": "54.1100", "stripe payment date": "2022-11-14T09:03:55", "payment fail date": "", "write-off id": "", "write-off date": "", "write-off amount": "", "amount due": "0.0000"}
# FIXME: this fails randomly
mode skip
statement ok
pragma threads=2
statement error
select *
from read_csv('data/csv/bug_7578.csv', delim='\t', columns={
'transaction_id': 'VARCHAR',
'team_id': 'INT',
'direction': 'INT',
'amount':'DOUBLE',
'account_id':'INT',
'transaction_date':'DATE',
'recorded_date':'DATE',
'tags.transaction_id':'VARCHAR',
'tags.team_id':'INT',
'tags':'varchar'
})
----

View File

@@ -0,0 +1,12 @@
# name: test/sql/copy/csv/parallel/test_7789.test_slow
# description: Test Read CSV Auto function on sample file from #7789
# group: [parallel]
statement ok
PRAGMA enable_verification
query I
select count(*)
from read_csv_auto('data/csv/CrashStatistics.csv', SAMPLE_SIZE = -1)
----
4980

View File

@@ -0,0 +1,248 @@
# name: test/sql/copy/csv/parallel/test_multiple_files.test
# description: Test parallel read CSV function on multiple files
# group: [parallel]
# force parallelism of the queries
statement ok
PRAGMA verify_parallelism
statement ok
PRAGMA enable_verification
query IIII rowsort
select * from read_csv_auto('data/csv/auto/glob/[0-9].csv');
----
0 0 1.0 zero
1 1 1.1 one
10 10 2.0 zero
11 11 2.1 one
12 12 2.2 two
13 13 2.3 three
14 14 2.4 four
15 15 2.5 five
16 16 2.6 six
17 17 2.7 seven
18 18 2.8 eight
19 19 2.9 nine
2 2 1.2 two
20 20 3.0 zero
21 21 3.1 one
22 22 3.2 two
23 23 3.3 three
24 24 3.4 four
25 25 3.5 five
26 26 3.6 six
3 3 1.3 three
4 4 1.4 four
5 5 1.5 five
50000 0 1.0 zero
50001 1 1.1 one
50002 2 1.2 two
50003 3 1.3 three
50004 4 1.4 four
50005 5 1.5 five
50006 6 1.6 six
50007 7 1.7 seven
50008 8 1.8 eight
50009 9 1.9 nine
50010 10 2.0 zero
50011 11 2.1 one
50012 12 2.2 two
50013 13 2.3 three
50014 14 2.4 four
50015 15 2.5 five
50016 16 2.6 six
50017 17 2.7 seven
50018 18 2.8 eight
50019 19 2.9 nine
50020 20 3.0 zero
50021 21 3.1 one
50022 22 3.2 two
50023 23 3.3 three
50024 24 3.4 four
50025 25 3.5 five
50026 26 3.6 six
50027 27 3.7 seven
6 6 1.6 six
7 7 1.7 seven
8 8 1.8 eight
9 9 1.9 nine
query IIII rowsort
select * from read_csv_auto('data/csv/auto/glob/[0-9].csv', buffer_size=100)
----
0 0 1.0 zero
1 1 1.1 one
10 10 2.0 zero
11 11 2.1 one
12 12 2.2 two
13 13 2.3 three
14 14 2.4 four
15 15 2.5 five
16 16 2.6 six
17 17 2.7 seven
18 18 2.8 eight
19 19 2.9 nine
2 2 1.2 two
20 20 3.0 zero
21 21 3.1 one
22 22 3.2 two
23 23 3.3 three
24 24 3.4 four
25 25 3.5 five
26 26 3.6 six
3 3 1.3 three
4 4 1.4 four
5 5 1.5 five
50000 0 1.0 zero
50001 1 1.1 one
50002 2 1.2 two
50003 3 1.3 three
50004 4 1.4 four
50005 5 1.5 five
50006 6 1.6 six
50007 7 1.7 seven
50008 8 1.8 eight
50009 9 1.9 nine
50010 10 2.0 zero
50011 11 2.1 one
50012 12 2.2 two
50013 13 2.3 three
50014 14 2.4 four
50015 15 2.5 five
50016 16 2.6 six
50017 17 2.7 seven
50018 18 2.8 eight
50019 19 2.9 nine
50020 20 3.0 zero
50021 21 3.1 one
50022 22 3.2 two
50023 23 3.3 three
50024 24 3.4 four
50025 25 3.5 five
50026 26 3.6 six
50027 27 3.7 seven
6 6 1.6 six
7 7 1.7 seven
8 8 1.8 eight
9 9 1.9 nine
query IIII rowsort
select * from read_csv('data/csv/auto/glob/[0-9].csv', AUTO_DETECT=true)
----
0 0 1.0 zero
1 1 1.1 one
10 10 2.0 zero
11 11 2.1 one
12 12 2.2 two
13 13 2.3 three
14 14 2.4 four
15 15 2.5 five
16 16 2.6 six
17 17 2.7 seven
18 18 2.8 eight
19 19 2.9 nine
2 2 1.2 two
20 20 3.0 zero
21 21 3.1 one
22 22 3.2 two
23 23 3.3 three
24 24 3.4 four
25 25 3.5 five
26 26 3.6 six
3 3 1.3 three
4 4 1.4 four
5 5 1.5 five
50000 0 1.0 zero
50001 1 1.1 one
50002 2 1.2 two
50003 3 1.3 three
50004 4 1.4 four
50005 5 1.5 five
50006 6 1.6 six
50007 7 1.7 seven
50008 8 1.8 eight
50009 9 1.9 nine
50010 10 2.0 zero
50011 11 2.1 one
50012 12 2.2 two
50013 13 2.3 three
50014 14 2.4 four
50015 15 2.5 five
50016 16 2.6 six
50017 17 2.7 seven
50018 18 2.8 eight
50019 19 2.9 nine
50020 20 3.0 zero
50021 21 3.1 one
50022 22 3.2 two
50023 23 3.3 three
50024 24 3.4 four
50025 25 3.5 five
50026 26 3.6 six
50027 27 3.7 seven
6 6 1.6 six
7 7 1.7 seven
8 8 1.8 eight
9 9 1.9 nine
query IIII rowsort
select * from read_csv('data/csv/auto/glob/[0-9].csv', sample_size=-1, new_line = '\r\n', columns={'row_id':'BIGINT','integer':'INTEGER','float':'DOUBLE', 'text':'VARCHAR'})
----
0 0 1.0 zero
1 1 1.1 one
10 10 2.0 zero
11 11 2.1 one
12 12 2.2 two
13 13 2.3 three
14 14 2.4 four
15 15 2.5 five
16 16 2.6 six
17 17 2.7 seven
18 18 2.8 eight
19 19 2.9 nine
2 2 1.2 two
20 20 3.0 zero
21 21 3.1 one
22 22 3.2 two
23 23 3.3 three
24 24 3.4 four
25 25 3.5 five
26 26 3.6 six
3 3 1.3 three
4 4 1.4 four
5 5 1.5 five
50000 0 1.0 zero
50001 1 1.1 one
50002 2 1.2 two
50003 3 1.3 three
50004 4 1.4 four
50005 5 1.5 five
50006 6 1.6 six
50007 7 1.7 seven
50008 8 1.8 eight
50009 9 1.9 nine
50010 10 2.0 zero
50011 11 2.1 one
50012 12 2.2 two
50013 13 2.3 three
50014 14 2.4 four
50015 15 2.5 five
50016 16 2.6 six
50017 17 2.7 seven
50018 18 2.8 eight
50019 19 2.9 nine
50020 20 3.0 zero
50021 21 3.1 one
50022 22 3.2 two
50023 23 3.3 three
50024 24 3.4 four
50025 25 3.5 five
50026 26 3.6 six
50027 27 3.7 seven
6 6 1.6 six
7 7 1.7 seven
8 8 1.8 eight
9 9 1.9 nine

View File

@@ -0,0 +1,141 @@
# name: test/sql/copy/csv/parallel/test_parallel_csv.test
# description: Test parallel read CSV function on ghub bugs
# group: [parallel]
statement ok
PRAGMA enable_verification
query IIIIIIIIIIIIIIIIIIIIIIIIII
FROM read_csv('data/csv/14512_og.csv', buffer_size = 473, strict_mode = false, delim = ',', quote = '"', escape = '"')
----
00000579000098 13.99 EA PINE RIDGE CHENIN VOIGNIER 750.0 ML 1 13 NULL 1 NULL NULL NULL NULL NULL NULL DEFAULT BRAND NULL NULL NULL NULL BEER & WINE NULL NULL 7.25 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.0725}}
00000609082001 3.99 EA MADELAINE MINI MILK CHOCOLATE TURKEY 1.0 OZ 1 13 NULL NULL NULL NULL NULL NULL NULL NULL MADELEINE NULL NULL NULL NULL CANDY NULL NULL 7.25 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.0725}}
00817566020096 9.99 EA COTSWOLD EW 5.3 OZ 1 13 NULL NULL NULL NULL NULL NULL NULL NULL LONG CLAWSON NULL NULL NULL NULL DELI INGREDIENTS: DOUBLE GLOUCESTER CHEESE (PASTEURIZED MILK SALT ENZYMES DAIRY CULTURES ANNATTO EXTRACT AS A COLOR) RECONSTITUTED MINCED ONIONS (2%) DRIED CHIVES. CONTAINS: MILK THIS PRODUCT WAS PRODUCED IN AN ENVIRONMENT THAT ALSO USES PEANUTS TREE NUTS EGGS MILK WHEAT SOY FISH SHELLFISH AND SESAME. NULL 2.0 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.02}}
query III
select * from read_csv_auto('data/csv/dirty_line.csv', skip = 1)
----
1.5 a 3
2.5 b 4
query II
select * from read_csv_auto('data/csv/null_string.csv', nullstr="null")
----
1 NULL
NULL 2
# We need to add header = false here. Because with vector_size=2 the sniffer will think we have a header, since the
# row 1 null has types INTEGER;VARCHAR at that point
query II
select * from read_csv_auto('data/csv/null_string.csv', header = false)
----
a b
1 null
null 2
query IIIIIIIIII
select * from read_csv_auto('data/csv/aws_locations.csv')
----
IAD Washington District of Columbia United States US 20 38.94449997 -77.45580292 North America United States, Mexico, & Canada
ORD Chicago Illinois United States US 20 41.978611 -87.904722 North America United States, Mexico, & Canada
JFK New York New York United States US 8 40.639801 -73.7789 North America United States, Mexico, & Canada
ATL Atlanta Georgia United States US 17 33.6367 -84.428101 North America United States, Mexico, & Canada
LAX Los Angeles California United States US 15 33.942501 -118.407997 North America United States, Mexico, & Canada
MIA Miami Florida United States US 11 25.79319953918457 -80.29060363769531 North America United States, Mexico, & Canada
DFW Dallas-Fort Worth Texas United States US 18 32.896801 -97.038002 North America United States, Mexico, & Canada
IAH Houston Texas United States US 6 29.984399795532227 -95.34140014648438 North America United States, Mexico, & Canada
SFO San Francisco California United States US 8 37.61899948120117 -122.375 North America United States, Mexico, & Canada
BOS Boston Massachusetts United States US 5 42.36429977 -71.00520325 North America United States, Mexico, & Canada
DEN Denver Colorado United States US 6 39.861698150635 -104.672996521 North America United States, Mexico, & Canada
PDX Portland Oregon United States US 2 45.58869934 -122.5979996 North America United States, Mexico, & Canada
SEA Seattle Washington United States US 6 47.448889 -122.309444 North America United States, Mexico, & Canada
MSP Minneapolis Minnesota United States US 4 44.882 -93.221802 North America United States, Mexico, & Canada
PHX Phoenix Arizona United States US 3 33.43429946899414 -112.01200103759766 North America United States, Mexico, & Canada
PHL Philadelphia Pennsylvania United States US 2 39.87189865112305 -75.24109649658203 North America United States, Mexico, & Canada
SLC Salt Lake City Utah United States US 1 40.78839874267578 -111.97799682617188 North America United States, Mexico, & Canada
BNA Nashville Tennessee United States US 2 36.1245002746582 -86.6781997680664 North America United States, Mexico, & Canada
DTW Detroit Michigan United States US 2 42.212398529052734 -83.35340118408203 North America United States, Mexico, & Canada
TPA Tampa Florida United States US 2 27.975500106811523 -82.533203125 North America United States, Mexico, & Canada
EWR Newark New Jersey United States US 10 40.692501068115234 -74.168701171875 North America United States, Mexico, & Canada
CMH Columbus Ohio United States US 2 39.998001 -82.891899 North America United States, Mexico, & Canada
MCI Kansas City Missouri United States US 2 39.2976 -94.713898 North America United States, Mexico, & Canada
QRO Queretaro NULL North America MX 1 20.6173 -100.185997 undefined null
FRA Frankfurt am Main NULL Germany DE 17 50.033333 8.570556 Europe Europe & Israel
DUS Düsseldorf NULL Germany DE 3 51.289501 6.76678 Europe Europe & Israel
HAM Hamburg NULL Germany DE 6 53.630401611328 9.9882297515869 Europe Europe & Israel
MUC Munich NULL Germany DE 4 48.353802 11.7861 Europe Europe & Israel
TXL Berlin NULL Germany DE 5 52.559722 13.287778 Europe Europe & Israel
CDG Paris NULL France FR 11 49.012798 2.55 Europe Europe & Israel
MRS Marseille NULL France FR 6 43.439271922 5.22142410278 Europe Europe & Israel
MXP Milan NULL Italy IT 9 45.6306 8.72811 Europe Europe & Israel
FCO Rome NULL Italy IT 6 41.8002778 12.2388889 Europe Europe & Israel
PMO Palermo NULL Italy IT 1 38.175999 13.091 Europe Europe & Israel
AMS Amsterdam NULL Netherlands NL 5 52.308601 4.76389 Europe Europe & Israel
MAN Manchester NULL UK GB 5 53.35369873046875 -2.2749500274658203 Europe Europe & Israel
LHR London NULL UK GB 25 51.4775 -0.461389 Europe Europe & Israel
DUB Dublin NULL Ireland IE 2 53.421299 -6.27007 Europe Europe & Israel
VIE Vienna NULL Austria AT 3 48.110298156738 16.569700241089 Europe Europe & Israel
ARN Stockholm NULL Sweden SE 4 59.651901245117 17.918600082397 Europe Europe & Israel
CPH Copenhagen NULL Denmark DK 3 55.617900848389 12.656000137329 Europe Europe & Israel
HEL Helsinki NULL Finland FI 4 60.317199707031 24.963300704956 Europe Europe & Israel
ATH Athens NULL Greece GR 1 37.9364013672 23.9444999695 Europe Europe & Israel
BRU Brussels NULL Belgium BE 1 50.901401519800004 4.48443984985 Europe Europe & Israel
BUD Budapest NULL Hungary HU 1 47.42976 19.261093 Europe Europe & Israel
LIS Lisbon NULL Portugal PT 1 38.7813 -9.13592 Europe Europe & Israel
OSL Oslo NULL Norway NO 2 60.193901062012 11.100399971008 Europe Europe & Israel
OTP Bucharest NULL Romania RO 1 44.5711111 26.085 Europe Europe & Israel
PRG Prague NULL Czech Republic CZ 1 50.1008 14.26 Europe Europe & Israel
SOF Sofia NULL Bulgaria BG 1 42.696693420410156 23.411436080932617 Europe Europe & Israel
WAW Warsaw NULL Poland PL 3 52.165833 20.967222 Europe Europe & Israel
ZAG Zagreb NULL Croatia HR 1 45.7429008484 16.0687999725 Europe Europe & Israel
ZRH Zurich NULL Switzerland CH 2 47.464699 8.54917 Europe Europe & Israel
BCN Barcelona NULL Spain ES 2 41.2971 2.07846 Europe Europe & Israel
MAD Madrid NULL Spain ES 10 40.471926 -3.56264 Europe Europe & Israel
DEL New Delhi NULL India IN 14 28.5665 77.103104 Asia India
MAA Chennai NULL India IN 8 12.990005493164062 80.16929626464844 Asia India
BOM Mumbai NULL India IN 8 19.0886993408 72.8678970337 Asia India
PNQ Pune NULL India IN 4 18.58209991455078 73.9197006225586 Asia India
BLR Bangalore NULL India IN 5 13.1979 77.706299 Asia India
HYD Hyderabad NULL India IN 5 17.231318 78.429855 Asia India
SIN Singapore NULL Singapore SG 7 1.35019 103.994003 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand
KIX Osaka NULL Japan JP 5 34.42729949951172 135.24400329589844 Asia Japan
NRT Tokyo NULL Japan JP 22 35.764702 140.386002 Asia Japan
TPE Taoyuan NULL Taiwan TW 3 25.0777 121.233002 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand
ICN Seoul NULL Korea KR 8 37.46910095214844 126.45099639892578 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand
BKK Bangkok NULL Thailand TH 2 13.689999 100.750114 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand
CCU Kolkata NULL India IN 2 22.654699325561523 88.44670104980469 Asia India
CGK Jakarta NULL Indonesia ID 5 -6.1255698204 106.65599823 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand
KUL Kuala Lumpur NULL Malaysia MY 2 2.745579957962 101.70999908447 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand
MNL Manila NULL Philippines PH 1 14.5086 121.019997 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand
HAN Hanoi NULL Vietnam VN 1 21.221200942993164 105.80699920654297 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand
SGN Ho Chi Minh City NULL Vietnam VN 1 10.8187999725 106.652000427 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand
SYD Sydney NULL Australia AU 4 -33.94609832763672 151.177001953125 Australia & New Zealand Australia & New Zealand
AKL Auckland NULL New Zealand NZ 2 -37.008098602299995 174.792007446 Australia & New Zealand Australia & New Zealand
MEL Melbourne NULL Australia AU 3 -37.673302 144.843002 Australia & New Zealand Australia & New Zealand
PER Perth NULL Australia AU 1 -31.94029998779297 115.96700286865234 Australia & New Zealand Australia & New Zealand
GRU Sao Paulo NULL Brazil BR 8 -23.435556 -46.473056 South America South America
GIG Rio De Janeiro NULL Brazil BR 5 -22.8099994659 -43.2505569458 South America South America
FOR Fortaleza NULL Brazil BR 4 -3.776279926300049 -38.53260040283203 South America South America
BOG Bogota NULL Colombia CO 3 4.70159 -74.1469 South America South America
EZE Buenos Aires NULL Argentina AR 2 -34.8222 -58.5358 South America South America
SCL Santiago NULL Chile CL 3 -33.393001556396484 -70.78579711914062 South America South America
LIM Lima NULL Peru PE 2 -12.0219 -77.114305 South America South America
TLV Tel Aviv NULL Israel IL 2 32.01139831542969 34.88669967651367 Middle East Europe & Israel
BAH Manama NULL Bahrain BH 2 26.27079963684082 50.63359832763672 Middle East South Africa, Kenya, & Middle East
DXB Dubai NULL UAE AE 1 25.2527999878 55.3643989563 Middle East South Africa, Kenya, & Middle East
FJR Fujairah NULL UAE AE 3 25.112222 56.324167 Middle East South Africa, Kenya, & Middle East
MCT Muscat NULL Oman OM 1 23.593299865722656 58.284400939941406 Middle East South Africa, Kenya, & Middle East
CPT Cape Town NULL South Africa ZA 1 -33.9648017883 18.6016998291 Africa South Africa, Kenya, & Middle East
JNB Johannesburg NULL South Africa ZA 1 -26.1392 28.246 Africa South Africa, Kenya, & Middle East
NBO Nairobi NULL Kenya KE 1 -1.31923997402 36.9277992249 Africa South Africa, Kenya, & Middle East
PVG Shanghai NULL China CN 1 31.143400192260742 121.80500030517578 China China
SZX Shenzhen NULL China CN 1 22.639299392700195 113.81099700927734 China China
ZHY Zhongwei NULL China CN 1 37.572778 105.154444 China China
PEK Beijing NULL China CN 1 40.080101013183594 116.58499908447266 China China
HKG Hong Kong NULL China HK 4 22.308901 113.915001 China China
CMH Columbus Ohio United States US 1 39.998056 -82.891944 North America United States, Mexico, & Canada
HIO Hillsboro Oregon United States US 1 45.540394 -122.949825 North America United States, Mexico, & Canada
TPA Tampa Florida United States US 1 27.979722 -82.534722 North America United States, Mexico, & Canada
PNQ Pune Maharashtra India IN 1 18.582222 73.919722 Asia India
MCT Muscat Muscat Oman OM 1 23.6015386 58.2899376 Middle East South Africa, Kenya, & Middle East

View File

@@ -0,0 +1,147 @@
# name: test/sql/copy/csv/parallel/test_parallel_error_messages.test
# description: Test auto-detect with plus symbols
# group: [parallel]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
statement ok
SET threads=4
foreach batched true false
statement ok
SET preserve_insertion_order=${batched}
statement error
SELECT * FROM read_csv('data/csv/missing_column.csv', sep=',', buffer_size=100, columns={'h1': int, 'h2': varchar}, header=True, auto_detect = false)
----
Line: 7
statement error
SELECT * FROM read_csv('data/csv/wrongtype.csv', sep=',', buffer_size=100, columns={'h1': int, 'h2': varchar}, header=True)
----
Column at position: 0 Set type: INTEGER Sniffed type: VARCHAR
statement error
SELECT * FROM read_csv('data/csv/wrongtype.csv', sep=',', buffer_size=100, columns={'h1': int, 'h2': varchar}, header=True, auto_detect = false)
----
Line: 8
statement error
SELECT * FROM read_csv('data/csv/wrongtype.csv', sep=',', columns={'h1': int, 'h2': varchar}, header=True)
----
Column at position: 0 Set type: INTEGER Sniffed type: VARCHAR
# FIXME sporadically a few of these succeed
mode skip
# the first error is on line 10002
statement error
SELECT * FROM read_csv('data/csv/error/mixedtypes.csv', parallel=false, sep=',', columns={'h1': int, 'h2': varchar}, header=True)
----
line 10002
# the first error is on line 10002
statement error
SELECT * FROM read_csv('data/csv/error/mixedtypes.csv', parallel=true, sep=',', buffer_size=100, columns={'h1': int, 'h2': varchar}, header=True)
----
line 10002
statement error
SELECT * FROM read_csv('data/csv/error/mixedtypes.csv', parallel=false, sep=',', buffer_size=100, columns={'h1': int, 'h2': varchar}, header=True)
----
line 10002
# the first error is on line 10002
statement error
SELECT * FROM read_csv('data/csv/error/mixedtypes_rn.csv', parallel=false, sep=',', columns={'h1': int, 'h2': varchar}, header=True)
----
line 10002
# the first error is on line 10001
statement error
SELECT * FROM read_csv('data/csv/error/mixedtypes_rn.csv', parallel=true, sep=',', buffer_size=100, columns={'h1': int, 'h2': varchar}, header=True)
----
line 10002
statement error
SELECT * FROM read_csv('data/csv/error/mixedtypes_rn.csv', parallel=false, sep=',', buffer_size=100, columns={'h1': int, 'h2': varchar}, header=True)
----
line 10002
statement error
SELECT * FROM read_csv('data/csv/error/quotednewlines.csv', parallel=true, sep=',', buffer_size=100, columns={'h1': varchar, 'h2': varchar}, header=True)
----
not supported for multithreading
statement error
SELECT * FROM read_csv('data/csv/error/quotednewlines.csv', parallel=true, sep=',', buffer_size=200, columns={'h1': int, 'h2': varchar}, header=True)
----
not supported for multithreading
# Let's do an All-Type error testing
foreach type bool int bigint hugeint float double 'decimal(4,1)' 'decimal(8,1)' 'decimal(12,1)' 'decimal(18,1)'
# the first error is on line 3001
statement error
SELECT * FROM read_csv('data/csv/error/csv_error.csv', parallel=true, sep=',', buffer_size=100, columns={'h1': ${type}, 'h2': varchar}, header=True)
----
line 3002
# the first error is on line 3001
statement error
SELECT * FROM read_csv('data/csv/error/csv_error.csv', parallel=false, sep=',', buffer_size=100, columns={'h1': ${type}, 'h2': varchar}, header=True)
----
line 3002
# the first error is on line 3001
statement error
SELECT * FROM read_csv('data/csv/error/date.csv', parallel=true, sep=',', buffer_size=100, columns={'h1': date, 'h2': varchar}, header=True)
----
line 3002
# the first error is on line 3001
statement error
SELECT * FROM read_csv('data/csv/error/date.csv', parallel=false, sep=',', buffer_size=100, columns={'h1': date, 'h2': varchar}, header=True)
----
line 3002
# the first error is on line 3002
statement error
SELECT * FROM read_csv('data/csv/error/time.csv', parallel=true, sep=',', buffer_size=100, columns={'h1': time, 'h2': varchar}, header=True)
----
line 3002
# the first error is on line 3002
statement error
SELECT * FROM read_csv('data/csv/error/time.csv', parallel=false, sep=',', buffer_size=100, columns={'h1': time, 'h2': varchar}, header=True)
----
line 3002
# the first error is on line 3002
statement error
SELECT * FROM read_csv('data/csv/error/timestamp.csv', parallel=true, sep=',', buffer_size=200, columns={'h1': timestamp, 'h2': varchar}, header=True)
----
line 3002
# the first error is on line 3000
statement error
SELECT * FROM read_csv('data/csv/error/timestamp.csv', parallel=false, sep=',', columns={'h1': timestamp, 'h2': varchar}, header=True)
----
line 3002
# the first error is on line 3000
statement error
SELECT * FROM read_csv('data/csv/error/timestamp.csv', parallel=true, sep=',', buffer_size=100, columns={'h1': timestamp, 'h2': varchar}, header=True)
----
line 3002
endloop
mode unskip
endloop