should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,22 @@
# name: test/sql/copy/csv/code_cov/buffer_manager_finalize.test
# description: Test to reach Finalize call in the csv buffer manager for codecov
# group: [code_cov]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t1 AS select i, (i+1) as j from range(0,3000) tbl(i)
statement ok
COPY t1 TO '__TEST_DIR__/t1.csv' (FORMAT CSV, DELIMITER '|', HEADER);
query I
select count(*) from '__TEST_DIR__/t1.csv'
----
3000
query I
select count(*) from read_csv('data/csv/empty.csv', columns=STRUCT_PACK(d := 'BIGINT'), header=0, auto_detect = false)
----
0

View File

@@ -0,0 +1,21 @@
# name: test/sql/copy/csv/code_cov/csv_dialect_detection.test
# description: Test to reach missing Dialect Detection code.
# group: [code_cov]
statement ok
PRAGMA enable_verification
query I
SELECT * from read_csv_auto('data/csv/escape.csv', escape=']', header = 0)
----
"bla"
query I
SELECT * from read_csv_auto('data/csv/escape.csv', header = 0)
----
"]"bla]""
statement error
SELECT * from read_csv_auto('data/csv/no_opt.csv', delim = ';')
----
It was not possible to automatically detect the CSV parsing dialect

View File

@@ -0,0 +1,24 @@
# name: test/sql/copy/csv/code_cov/csv_disk_reload.test
# description: Test to reach Disk Reaload call in the csv buffer manager for codecov
# group: [code_cov]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t1 AS select i, (i+1) as j from range(0,300000) tbl(i)
statement ok
COPY t1 TO '__TEST_DIR__/t1.csv' (FORMAT CSV, DELIMITER '|', HEADER);
# Let's set a memory limit
statement ok
PRAGMA memory_limit='2M'
statement ok
PRAGMA threads=2
query I
select count(*) from read_csv_auto('__TEST_DIR__/t1.csv',buffer_size = 262144, sample_size=-1)
----
300000

View File

@@ -0,0 +1,23 @@
# name: test/sql/copy/csv/code_cov/csv_exact_buffer_size.test
# description: Test with exact buffer size being the file size
# group: [code_cov]
statement ok
PRAGMA enable_verification
query II
FROM read_csv('data/csv/auto/issue_1254_rn.csv', buffer_size=10)
----
1 2
1 2
query II
FROM read_csv('data/csv/auto/issue_1254_rn.csv', buffer_size=8)
----
1 2
1 2
query I
select count(*) from read_csv_auto('data/csv/small_file.csv', buffer_size = 7)
----
2

View File

@@ -0,0 +1,37 @@
# name: test/sql/copy/csv/code_cov/csv_sniffer_header.test
# description: Tests to enforce codecov in csv header sniffing
# group: [code_cov]
statement ok
PRAGMA enable_verification
query I
SELECT count(*) from read_csv_auto('data/csv/header_left_space.csv')
----
3
statement ok
create table t as select * from read_csv_auto('data/csv/header_normalize.csv', normalize_names=1)
query IIIIII
describe t
----
bla BIGINT YES NULL NULL NULL
bla_1 BIGINT YES NULL NULL NULL
b_la BIGINT YES NULL NULL NULL
_ BIGINT YES NULL NULL NULL
_3b BIGINT YES NULL NULL NULL
query III
FROM read_csv(['data/csv/auto/sample.csv','data/csv/auto/sample.csv','data/csv/auto/sample.csv'])
----
c1 pedro 1992
c2 mark 1992
c3 oogie 2021
c1 pedro 1992
c2 mark 1992
c3 oogie 2021
c1 pedro 1992
c2 mark 1992
c3 oogie 2021

View File

@@ -0,0 +1,209 @@
# name: test/sql/copy/csv/code_cov/csv_state_machine_invalid_utf.test
# description: Tests related to invalid UTF-8 detection
# group: [code_cov]
# Error during sniffing
statement error
from read_csv_auto('data/csv/test/invalid_utf.csv')
----
Invalid unicode (byte sequence mismatch) detected
statement error
from read_csv_auto('data/csv/test/invalid_utf.csv')
----
CSV Error on Line: 1
# Error during parsing
statement error
from read_csv('data/csv/test/invalid_utf.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',')
----
Invalid unicode (byte sequence mismatch) detected.
statement error
from read_csv('data/csv/test/invalid_utf.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',')
----
CSV Error on Line: 1
# Test ignore errors over more complex file
statement error
from read_csv('data/csv/test/invalid_utf_complex.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',')
----
CSV Error on Line: 11
query III
from read_csv('data/csv/test/invalid_utf_complex.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', ignore_errors=true)
----
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
# Test error in the second vector
statement ok
create table t as from read_csv('data/csv/test/invalid_utf_big.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', ignore_errors=true)
query I
select count(*) from t
----
3030
statement error
from read_csv('data/csv/test/invalid_utf_big.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',')
----
CSV Error on Line: 3001
# Test borked utf-8 within quotes
statement error
from read_csv('data/csv/test/invalid_utf_quoted.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', quote = '"')
----
CSV Error on Line: 11
query III
from read_csv('data/csv/test/invalid_utf_complex.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', quote = '"', ignore_errors=true)
----
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
# Test Invalid Header
statement error
from read_csv('data/csv/test/invalid_utf_header.csv', delim = ',', quote = '"')
----
Invalid unicode (byte sequence mismatch) detected.
statement error
from read_csv('data/csv/test/invalid_utf_header.csv', header=1, delim = ',', quote = '"')
----
Invalid unicode (byte sequence mismatch) detected.
query III
from read_csv('data/csv/test/invalid_utf_header.csv', header=1, delim = ',', quote = '"', ignore_errors = true)
----
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
# Test invalid unicode in between a quoted newline
statement error
from read_csv('data/csv/test/invalid_utf_quoted_nl.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', quote = '"')
----
CSV Error on Line: 11
query III
from read_csv('data/csv/test/invalid_utf_quoted_nl.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', quote = '"', ignore_errors=true)
----
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
# Test error between buffers
statement error
from read_csv('data/csv/test/invalid_utf_complex.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', quote = '"', buffer_size = 198)
----
CSV Error on Line: 11
# Test error between buffers (with ignore_errors set)
query III
from read_csv('data/csv/test/invalid_utf_complex.csv',columns = {'col1': 'VARCHAR','col2': 'VARCHAR','col3': 'VARCHAR'}, auto_detect=false, header = 0, delim = ',', ignore_errors=true, buffer_size = 198)
----
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
valid valid valid
# We get a casting error
statement error
SELECT * FROM read_csv('data/csv/test/invalid_utf_list.csv', header=0, auto_detect=false, quote = '"',columns = {'col1': 'INTEGER[]'} )
----
Invalid unicode (byte sequence mismatch) detected.
statement error
SELECT * FROM read_csv('data/csv/test/invalid_utf_list.csv', header=0, auto_detect=false, quote = '"',columns = {'col1': 'INTEGER[]'} )
----
CSV Error on Line: 11
# We get a invalid unicode error
statement error
SELECT * FROM read_csv('data/csv/test/invalid_utf_list.csv', header=0, auto_detect=false, quote = '"',columns = {'col1': 'VARCHAR'} )
----
Invalid unicode (byte sequence mismatch) detected.
statement error
SELECT * FROM read_csv('data/csv/test/invalid_utf_list.csv', header=0, auto_detect=false, quote = '"',columns = {'col1': 'VARCHAR'} )
----
CSV Error on Line: 11

View File

@@ -0,0 +1,26 @@
# name: test/sql/copy/csv/code_cov/csv_type_detection.test
# description: Tests to enforce codecov in csv type detection sniffing
# group: [code_cov]
statement ok
PRAGMA enable_verification
statement error
from read_csv_auto('data/csv/invalid_utf8.csv', auto_detect = false, columns={'c01': 'VARCHAR'} )
----
Invalid unicode (byte sequence mismatch) detected
query I
select * from read_csv_auto('data/csv/empty.csv')
----
query II
select * from read_csv_auto('data/csv/small_file.csv', sample_size=1)
----
1 2
5 3
query I
select * from read_csv_auto('data/csv/date_format_percentage.csv')
----
336%584%3205

View File

@@ -0,0 +1,16 @@
# name: test/sql/copy/csv/code_cov/csv_type_refinement.test
# description: Tests to enforce codecov in csv type refinement sniffing
# group: [code_cov]
statement ok
PRAGMA enable_verification
query I
select count(*) from read_csv_auto('data/csv/borked_date.csv', header = 0)
----
2070
query I
select count(*) from read_csv_auto('data/csv/big_not_bool.csv', header = 0)
----
2450