should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,17 @@
add_library_unity(
test_common
OBJECT
test_cast.cpp
test_checksum.cpp
test_file_system.cpp
test_hyperlog.cpp
test_numeric_cast.cpp
test_parse_logical_type.cpp
test_utf.cpp
test_storage_fuzz.cpp
test_strftime.cpp
test_string_util.cpp)
set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:test_common>
PARENT_SCOPE)

View File

@@ -0,0 +1,333 @@
#include "catch.hpp"
#include "duckdb/common/operator/cast_operators.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/common/limits.hpp"
#include "duckdb/common/types.hpp"
#include "duckdb/common/types/vector.hpp"
#include "duckdb/common/vector.hpp"
using namespace duckdb; // NOLINT
using namespace std; // NOLINT
template <class SRC, class DST>
struct ExpectedNumericCast {
static inline DST Operation(SRC value) {
return (DST)value;
}
};
template <class DST>
struct ExpectedNumericCast<double, DST> {
static inline DST Operation(double value) {
return (DST)nearbyint(value);
}
};
template <class DST>
struct ExpectedNumericCast<float, DST> {
static inline DST Operation(float value) {
return (DST)nearbyintf(value);
}
};
template <class SRC, class DST>
static void TestNumericCast(duckdb::vector<SRC> &working_values, duckdb::vector<SRC> &broken_values) {
DST result;
for (auto value : working_values) {
REQUIRE_NOTHROW(Cast::Operation<SRC, DST>(value) == (DST)value);
REQUIRE(TryCast::Operation<SRC, DST>(value, result));
REQUIRE(result == ExpectedNumericCast<SRC, DST>::Operation(value));
}
for (auto value : broken_values) {
REQUIRE_THROWS(Cast::Operation<SRC, DST>(value));
REQUIRE(!TryCast::Operation<SRC, DST>(value, result));
}
}
template <class DST>
static void TestStringCast(duckdb::vector<string> &working_values, duckdb::vector<DST> &expected_values,
duckdb::vector<string> &broken_values) {
DST result;
for (idx_t i = 0; i < working_values.size(); i++) {
auto &value = working_values[i];
auto expected_value = expected_values[i];
REQUIRE_NOTHROW(Cast::Operation<string_t, DST>(string_t(value)) == expected_value);
REQUIRE(TryCast::Operation<string_t, DST>(string_t(value), result));
REQUIRE(result == expected_value);
StringUtil::Trim(value);
duckdb::vector<string> splits;
splits = StringUtil::Split(value, 'e');
if (splits.size() > 1 || value[0] == '+') {
continue;
}
splits = StringUtil::Split(value, '.');
REQUIRE(ConvertToString::Operation<DST>(result) == splits[0]);
}
for (auto &value : broken_values) {
REQUIRE_THROWS(Cast::Operation<string_t, DST>(string_t(value)));
REQUIRE(!TryCast::Operation<string_t, DST>(string_t(value), result));
}
}
template <class T>
static void TestExponent() {
T parse_result;
string str;
double value = 1;
T expected_value = 1;
for (idx_t exponent = 0; exponent < 100; exponent++) {
if (value < (double)NumericLimits<T>::Maximum()) {
// expect success
str = "1e" + to_string(exponent);
REQUIRE(TryCast::Operation<string_t, T>(string_t(str), parse_result));
REQUIRE(parse_result == expected_value);
str = "-1e" + to_string(exponent);
REQUIRE(TryCast::Operation<string_t, T>(string_t(str), parse_result));
REQUIRE(parse_result == -expected_value);
value *= 10;
// check again because otherwise this overflows
if (value < (double)NumericLimits<T>::Maximum()) {
expected_value *= 10;
}
} else {
// expect failure
str = "1e" + to_string(exponent);
REQUIRE(!TryCast::Operation<string_t, T>(string_t(str), parse_result));
str = "-1e" + to_string(exponent);
REQUIRE(!TryCast::Operation<string_t, T>(string_t(str), parse_result));
}
}
}
TEST_CASE("Test casting to boolean", "[cast]") {
duckdb::vector<string> working_values = {"true", "false", "TRUE", "FALSE", "T", "F", "1", "0", "False", "True"};
duckdb::vector<bool> expected_values = {true, false, true, false, true, false, true, false, false, true};
duckdb::vector<string> broken_values = {"304", "1002", "blabla", "", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"};
bool result;
for (idx_t i = 0; i < working_values.size(); i++) {
auto &value = working_values[i];
auto expected_value = expected_values[i];
REQUIRE_NOTHROW(Cast::Operation<string_t, bool>(value) == expected_value);
REQUIRE(TryCast::Operation<string_t, bool>(value, result));
REQUIRE(result == expected_value);
}
for (auto &value : broken_values) {
REQUIRE_THROWS(Cast::Operation<string_t, bool>(value));
REQUIRE(!TryCast::Operation<string_t, bool>(value, result));
}
}
TEST_CASE("Test casting to int8_t", "[cast]") {
// int16_t -> int8_t
duckdb::vector<int16_t> working_values_int16 = {10, -10, 127, -128};
duckdb::vector<int16_t> broken_values_int16 = {128, -129, 1000, -1000};
TestNumericCast<int16_t, int8_t>(working_values_int16, broken_values_int16);
// int32_t -> int8_t
duckdb::vector<int32_t> working_values_int32 = {10, -10, 127, -128};
duckdb::vector<int32_t> broken_values_int32 = {128, -129, 1000000, -1000000};
TestNumericCast<int32_t, int8_t>(working_values_int32, broken_values_int32);
// int64_t -> int8_t
duckdb::vector<int64_t> working_values_int64 = {10, -10, 127, -128};
duckdb::vector<int64_t> broken_values_int64 = {128, -129, 10000000000LL, -10000000000LL};
TestNumericCast<int64_t, int8_t>(working_values_int64, broken_values_int64);
// float -> int8_t
duckdb::vector<float> working_values_float = {10, -10, 127, -128, 1.3f, -2.7f};
duckdb::vector<float> broken_values_float = {128, -129, 10000000000.0f, -10000000000.0f, 1e30f, -1e30f};
TestNumericCast<float, int8_t>(working_values_float, broken_values_float);
// double -> int8_t
duckdb::vector<double> working_values_double = {10, -10, 127, -128, 1.3, -2.7};
duckdb::vector<double> broken_values_double = {128, -129, 10000000000.0, -10000000000.0, 1e100, -1e100};
TestNumericCast<double, int8_t>(working_values_double, broken_values_double);
// string -> int8_t
duckdb::vector<string> working_values_str = {"10", "+10", "-10", "127", "-128", "1.3", "1e2",
"2e1", "2e0", "20e-1", "1.", " 3", " 3 ", "\t3 \t \n"};
duckdb::vector<int8_t> expected_values_str = {10, 10, -10, 127, -128, 1, 100, 20, 2, 2, 1, 3, 3, 3};
duckdb::vector<string> broken_values_str = {"128",
"-129",
"10000000000000000000000000000000000000000000000000000000000000",
"aaaa",
"19A",
"",
"1e3",
"1e",
"1e-",
"1e100",
"1e100000000",
"10000e-1",
" 3 2",
"+"};
TestStringCast<int8_t>(working_values_str, expected_values_str, broken_values_str);
TestExponent<int8_t>();
}
TEST_CASE("Test casting to int16_t", "[cast]") {
// int32_t -> int16_t
duckdb::vector<int32_t> working_values_int32 = {10, -10, 127, -127, 32767, -32768};
duckdb::vector<int32_t> broken_values_int32 = {32768, -32769, 1000000, -1000000};
TestNumericCast<int32_t, int16_t>(working_values_int32, broken_values_int32);
// int64_t -> int16_t
duckdb::vector<int64_t> working_values_int64 = {10, -10, 127, -127, 32767, -32768};
duckdb::vector<int64_t> broken_values_int64 = {32768, -32769, 10000000000LL, -10000000000LL};
TestNumericCast<int64_t, int16_t>(working_values_int64, broken_values_int64);
// float -> int16_t
duckdb::vector<float> working_values_float = {10.0f, -10.0f, 32767.0f, -32768.0f, 1.3f, -2.7f};
duckdb::vector<float> broken_values_float = {32768.0f, -32769.0f, 10000000000.0f, -10000000000.0f, 1e30f, -1e30f};
TestNumericCast<float, int16_t>(working_values_float, broken_values_float);
// double -> int16_t
duckdb::vector<double> working_values_double = {10, -10, 32767, -32768, 1.3, -2.7};
duckdb::vector<double> broken_values_double = {32768, -32769, 10000000000.0, -10000000000.0, 1e100, -1e100};
TestNumericCast<double, int16_t>(working_values_double, broken_values_double);
// string -> int16_t
duckdb::vector<string> working_values_str = {"10", "-10", "32767", "-32768", "1.3",
"3e4", "250e2", "3e+4", "3e0", "30e-1"};
duckdb::vector<int16_t> expected_values_str = {10, -10, 32767, -32768, 1, 30000, 25000, 30000, 3, 3};
duckdb::vector<string> broken_values_str = {
"32768", "-32769", "10000000000000000000000000000000000000000000000000000000000000",
"aaaa", "19A", "",
"1.A", "1e", "1e-",
"1e100", "1e100000000", "+"};
TestStringCast<int16_t>(working_values_str, expected_values_str, broken_values_str);
TestExponent<int16_t>();
}
TEST_CASE("Test casting to int32_t", "[cast]") {
// int64_t -> int32_t
duckdb::vector<int64_t> working_values_int64 = {10, -10, 127, -127, 32767, -32768, 2147483647LL, -2147483648LL};
duckdb::vector<int64_t> broken_values_int64 = {2147483648LL, -2147483649LL, 10000000000LL, -10000000000LL};
TestNumericCast<int64_t, int32_t>(working_values_int64, broken_values_int64);
// float -> int32_t
duckdb::vector<float> working_values_float = {10.0f, -10.0f, 2000000000.0f, -2000000000.0f, 1.3f, -2.7f};
duckdb::vector<float> broken_values_float = {3000000000.0f, -3000000000.0f, 10000000000.0f,
-10000000000.0f, 1e30f, -1e30f};
TestNumericCast<float, int32_t>(working_values_float, broken_values_float);
// double -> int32_t
duckdb::vector<double> working_values_double = {10, -10, 32767.0, -32768.0, 1.3, -2.7, 2147483647.0, -2147483648.0};
duckdb::vector<double> broken_values_double = {2147483648.0, -2147483649.0, 10000000000.0,
-10000000000.0, 1e100, -1e100};
TestNumericCast<double, int32_t>(working_values_double, broken_values_double);
// string -> int32_t
duckdb::vector<string> working_values_str = {"10", "-10", "2147483647", "-2147483647", "1.3", "-1.3", "1e6"};
duckdb::vector<int32_t> expected_values_str = {10, -10, 2147483647, -2147483647, 1, -1, 1000000};
duckdb::vector<string> broken_values_str = {
"2147483648", "-2147483649", "10000000000000000000000000000000000000000000000000000000000000",
"aaaa", "19A", "",
"1.A", "1e1e1e1"};
TestStringCast<int32_t>(working_values_str, expected_values_str, broken_values_str);
TestExponent<int32_t>();
}
TEST_CASE("Test casting to int64_t", "[cast]") {
// float -> int64_t
duckdb::vector<float> working_values_float = {10.0f,
-10.0f,
32767.0f,
-32768.0f,
1.3f,
-2.7f,
2000000000.0f,
-2000000000.0f,
4000000000000000000.0f,
-4000000000000000000.0f};
duckdb::vector<float> broken_values_float = {20000000000000000000.0f, -20000000000000000000.0f, 1e30f, -1e30f};
TestNumericCast<float, int64_t>(working_values_float, broken_values_float);
// double -> int64_t
duckdb::vector<double> working_values_double = {
10, -10, 32767, -32768, 1.3, -2.7, 2147483647, -2147483648.0, 4611686018427387904.0, -4611686018427387904.0};
duckdb::vector<double> broken_values_double = {18446744073709551616.0, -18446744073709551617.0, 1e100, -1e100};
TestNumericCast<double, int64_t>(working_values_double, broken_values_double);
// string -> int64_t
duckdb::vector<string> working_values_str = {
"10", "-10", "9223372036854775807", "-9223372036854775807", "1.3", "-9223372036854775807.1293813", "1e18",
"1e+18", "1."};
duckdb::vector<int64_t> expected_values_str = {10,
-10,
9223372036854775807LL,
-9223372036854775807LL,
1,
-9223372036854775807LL,
1000000000000000000LL,
1000000000000000000LL,
1};
duckdb::vector<string> broken_values_str = {"9223372036854775808",
"-9223372036854775809",
"10000000000000000000000000000000000000000000000000000000000000",
"aaaa",
"19A",
"",
"1.A",
"1.2382398723A",
"1e++1",
"1e+1+1",
"1e+1-1",
"+"};
TestStringCast<int64_t>(working_values_str, expected_values_str, broken_values_str);
TestExponent<int64_t>();
}
template <class DST>
static void TestStringCastDouble(duckdb::vector<string> &working_values, duckdb::vector<DST> &expected_values,
duckdb::vector<string> &broken_values) {
DST result;
for (idx_t i = 0; i < working_values.size(); i++) {
auto &value = working_values[i];
auto expected_value = expected_values[i];
REQUIRE_NOTHROW(Cast::Operation<string_t, DST>(string_t(value)) == expected_value);
REQUIRE(TryCast::Operation<string_t, DST>(string_t(value), result));
REQUIRE(ApproxEqual(result, expected_value));
auto to_str_and_back =
Cast::Operation<string_t, DST>(string_t(ConvertToString::Operation<DST>(expected_value)));
REQUIRE(ApproxEqual(to_str_and_back, expected_value));
}
for (auto &value : broken_values) {
REQUIRE_THROWS(Cast::Operation<string_t, DST>(string_t(value)));
REQUIRE(!TryCast::Operation<string_t, DST>(string_t(value), result));
}
}
TEST_CASE("Test casting to float", "[cast]") {
// string -> float
duckdb::vector<string> working_values = {
"1.3", "1.34514", "1e10", "1e-2", "-1e-1", "1.1781237378938173987123987123981723981723981723987123",
"1.123456789", "1."};
duckdb::vector<float> expected_values = {
1.3f, 1.34514f, 1e10f, 1e-2f, -1e-1f, 1.1781237378938173987123987123981723981723981723987123f,
1.123456789f, 1.0f};
duckdb::vector<string> broken_values = {
"-", "", "aaa",
"12aaa", "1e10e10", "1e",
"1e-", "1e10a", "1.1781237378938173987123987123981723981723981723934834583490587123w",
"1.2.3"};
TestStringCastDouble<float>(working_values, expected_values, broken_values);
}
TEST_CASE("Test casting to double", "[cast]") {
// string -> double
duckdb::vector<string> working_values = {"1.3",
"+1.3",
"1.34514",
"1e10",
"1e-2",
"-1e-1",
"1.1781237378938173987123987123981723981723981723987123",
"1.123456789",
"1.",
"-1.2",
"-1.2e1",
" 1.2 ",
" 1.2e2 ",
" \t 1.2e2 \t"};
duckdb::vector<double> expected_values = {
1.3, 1.3, 1.34514, 1e10, 1e-2, -1e-1, 1.1781237378938173987123987123981723981723981723987123,
1.123456789, 1.0, -1.2, -12, 1.2, 120, 120};
duckdb::vector<string> broken_values = {
"-", "", "aaa",
"12aaa", "1e10e10", "1e",
"1e-", "1e10a", "1.1781237378938173987123987123981723981723981723934834583490587123w",
"1.2.3", "1.222.", "1..",
"1 . 2", "1. 2", "1.2 e20",
"+"};
TestStringCastDouble<double>(working_values, expected_values, broken_values);
}

View File

@@ -0,0 +1,411 @@
# name: test/common/test_cast_hugeint.test
# description: Test hugeint casting from various types
# group: [common]
# test float -> hugeint casts
statement ok
CREATE TABLE working_floats(f FLOAT);
CREATE TABLE broken_floats(f FLOAT);
statement ok
INSERT INTO working_floats VALUES (10.0), (-10.0), (32767.0), (-32767.0), (1.3), (-2.7), (2000000000.0), (-2000000000.0), (4000000000000000000.0), (-4000000000000000000.0), (1329227995784915872903807060280344576.0), (-1329227995784915872903807060280344576.0);
statement ok
INSERT INTO broken_floats VALUES (170141183460469231731687303715884105729.0), (-170141183460469231731687303715884105729.0);
query I
SELECT f::HUGEINT::FLOAT FROM working_floats
----
10.0
-10.0
32767.0
-32767.0
1.0
-3.0
2000000000.0
-2000000000.0
4000000000000000000.0
-4000000000000000000.0
1329227995784915872903807060280344576
-1329227995784915872903807060280344576
# test broken casts
# we test one by one to ensure that every single value is broken
loop i 0 2
statement error
SELECT f::HUGEINT FROM (SELECT f FROM broken_floats ORDER BY f LIMIT 1 OFFSET ${i}) t1
----
<REGEX>:Conversion Error.*FLOAT.*is out of range for.*INT128.*
endloop
# test double -> hugeint casts
statement ok
CREATE TABLE working_doubles(f DOUBLE);
CREATE TABLE broken_doubles(f DOUBLE);
statement ok
INSERT INTO working_doubles VALUES (10.0), (-10.0), (32767.0), (-32767.0), (1.3), (-2.7), (2000000000.0), (-2000000000.0), (4000000000000000000.0), (-4000000000000000000.0), (1329227995784915872903807060280344576.0), (-1329227995784915872903807060280344576.0);
statement ok
INSERT INTO broken_doubles VALUES (1361129467683753853853498429727072845824.0), (-1361129467683753853853498429727072845824.0), (1.0e100), (-1.0e100);
query I
SELECT f::HUGEINT::DOUBLE FROM working_doubles
----
10.0
-10.0
32767.0
-32767.0
1.0
-3.0
2000000000.0
-2000000000.0
4000000000000000000.0
-4000000000000000000.0
1329227995784915872903807060280344576
-1329227995784915872903807060280344576
# we handle the values one by one here
loop i 0 4
statement error
SELECT f::HUGEINT FROM (SELECT f FROM broken_doubles ORDER BY f LIMIT 1 OFFSET ${i}) t1
----
<REGEX>:Conversion Error.*DOUBLE.*is out of range for.*INT128.*
endloop
# test varchar -> hugeint casts
statement ok
CREATE TABLE working_strings(f VARCHAR);
CREATE TABLE broken_strings(f VARCHAR);
statement ok
INSERT INTO working_strings VALUES ('10'), ('-10'), ('-1329227995784915872903807060280344576'), ('170141183460469231731687303715884105727'), ('-170141183460469231731687303715884105728'), ('1.3'), ('-9223372036854775807.1293813'), ('1e18'), ('1e+18'), ('1.'), ('.1'), ('0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), ('1000e-40'), ('10000e-3');
statement ok
INSERT INTO broken_strings VALUES ('170141183460469231731687303715884105728'), ('-170141183460469231731687303715884105729'), ('10000000000000000000000000000000000000000000000000000000000000'), ('aaaa'), ('19A'), (''), ('1.A'), ('1.2382398723A'), ('1e++1'), ('1e+1+1'), ('1e+1-1'), ('+'), ('.'), ('. '), ('10000000000e37');
query I
SELECT f::HUGEINT FROM working_strings
----
10
-10
-1329227995784915872903807060280344576
170141183460469231731687303715884105727
-170141183460469231731687303715884105728
1.3
-9223372036854775807.1293813
1e18
1e+18
1.
.1
0
0
10
loop i 0 14
statement error
SELECT f::HUGEINT FROM (SELECT f FROM broken_strings ORDER BY f LIMIT 1 OFFSET ${i}) t1
----
<REGEX>:Conversion Error.*string.*INT128.*
endloop
# cast all powers of ten
statement ok
CREATE TABLE powers_of_ten(p VARCHAR);
statement ok
INSERT INTO powers_of_ten VALUES ('1'), ('10'), ('100'), ('1000'), ('10000'), ('100000'), ('1000000'), ('10000000'), ('100000000'), ('1000000000'), ('10000000000'), ('100000000000'), ('1000000000000'), ('10000000000000'), ('100000000000000'), ('1000000000000000'), ('10000000000000000'), ('100000000000000000'), ('1000000000000000000'), ('10000000000000000000'), ('100000000000000000000'), ('1000000000000000000000'), ('10000000000000000000000'), ('100000000000000000000000'), ('1000000000000000000000000'), ('10000000000000000000000000'), ('100000000000000000000000000'), ('1000000000000000000000000000'), ('10000000000000000000000000000'), ('100000000000000000000000000000'), ('1000000000000000000000000000000'), ('10000000000000000000000000000000'), ('100000000000000000000000000000000'), ('1000000000000000000000000000000000'), ('10000000000000000000000000000000000'), ('100000000000000000000000000000000000'), ('1000000000000000000000000000000000000'), ('10000000000000000000000000000000000000'), ('100000000000000000000000000000000000000'), ('-1'), ('-10'), ('-100'), ('-1000'), ('-10000'), ('-100000'), ('-1000000'), ('-10000000'), ('-100000000'), ('-1000000000'), ('-10000000000'), ('-100000000000'), ('-1000000000000'), ('-10000000000000'), ('-100000000000000'), ('-1000000000000000'), ('-10000000000000000'), ('-100000000000000000'), ('-1000000000000000000'), ('-10000000000000000000'), ('-100000000000000000000'), ('-1000000000000000000000'), ('-10000000000000000000000'), ('-100000000000000000000000'), ('-1000000000000000000000000'), ('-10000000000000000000000000'), ('-100000000000000000000000000'), ('-1000000000000000000000000000'), ('-10000000000000000000000000000'), ('-100000000000000000000000000000'), ('-1000000000000000000000000000000'), ('-10000000000000000000000000000000'), ('-100000000000000000000000000000000'), ('-1000000000000000000000000000000000'), ('-10000000000000000000000000000000000'), ('-100000000000000000000000000000000000'), ('-1000000000000000000000000000000000000'), ('-10000000000000000000000000000000000000'), ('-100000000000000000000000000000000000000');
query I
SELECT p::HUGEINT FROM powers_of_ten
----
1
10
100
1000
10000
100000
1000000
10000000
100000000
1000000000
10000000000
100000000000
1000000000000
10000000000000
100000000000000
1000000000000000
10000000000000000
100000000000000000
1000000000000000000
10000000000000000000
100000000000000000000
1000000000000000000000
10000000000000000000000
100000000000000000000000
1000000000000000000000000
10000000000000000000000000
100000000000000000000000000
1000000000000000000000000000
10000000000000000000000000000
100000000000000000000000000000
1000000000000000000000000000000
10000000000000000000000000000000
100000000000000000000000000000000
1000000000000000000000000000000000
10000000000000000000000000000000000
100000000000000000000000000000000000
1000000000000000000000000000000000000
10000000000000000000000000000000000000
100000000000000000000000000000000000000
-1
-10
-100
-1000
-10000
-100000
-1000000
-10000000
-100000000
-1000000000
-10000000000
-100000000000
-1000000000000
-10000000000000
-100000000000000
-1000000000000000
-10000000000000000
-100000000000000000
-1000000000000000000
-10000000000000000000
-100000000000000000000
-1000000000000000000000
-10000000000000000000000
-100000000000000000000000
-1000000000000000000000000
-10000000000000000000000000
-100000000000000000000000000
-1000000000000000000000000000
-10000000000000000000000000000
-100000000000000000000000000000
-1000000000000000000000000000000
-10000000000000000000000000000000
-100000000000000000000000000000000
-1000000000000000000000000000000000
-10000000000000000000000000000000000
-100000000000000000000000000000000000
-1000000000000000000000000000000000000
-10000000000000000000000000000000000000
-100000000000000000000000000000000000000
query I
SELECT p::HUGEINT::VARCHAR FROM powers_of_ten
----
1
10
100
1000
10000
100000
1000000
10000000
100000000
1000000000
10000000000
100000000000
1000000000000
10000000000000
100000000000000
1000000000000000
10000000000000000
100000000000000000
1000000000000000000
10000000000000000000
100000000000000000000
1000000000000000000000
10000000000000000000000
100000000000000000000000
1000000000000000000000000
10000000000000000000000000
100000000000000000000000000
1000000000000000000000000000
10000000000000000000000000000
100000000000000000000000000000
1000000000000000000000000000000
10000000000000000000000000000000
100000000000000000000000000000000
1000000000000000000000000000000000
10000000000000000000000000000000000
100000000000000000000000000000000000
1000000000000000000000000000000000000
10000000000000000000000000000000000000
100000000000000000000000000000000000000
-1
-10
-100
-1000
-10000
-100000
-1000000
-10000000
-100000000
-1000000000
-10000000000
-100000000000
-1000000000000
-10000000000000
-100000000000000
-1000000000000000
-10000000000000000
-100000000000000000
-1000000000000000000
-10000000000000000000
-100000000000000000000
-1000000000000000000000
-10000000000000000000000
-100000000000000000000000
-1000000000000000000000000
-10000000000000000000000000
-100000000000000000000000000
-1000000000000000000000000000
-10000000000000000000000000000
-100000000000000000000000000000
-1000000000000000000000000000000
-10000000000000000000000000000000
-100000000000000000000000000000000
-1000000000000000000000000000000000
-10000000000000000000000000000000000
-100000000000000000000000000000000000
-1000000000000000000000000000000000000
-10000000000000000000000000000000000000
-100000000000000000000000000000000000000
# test large constants and correct parsing into either HUGEINT or DOUBLE
query II
SELECT typeof(4832904823908104981209840981240981277), 4832904823908104981209840981240981277
----
HUGEINT 4832904823908104981209840981240981277
query II
SELECT typeof(48329048239081049812098409812409812772), 48329048239081049812098409812409812772
----
HUGEINT 48329048239081049812098409812409812772
query II
SELECT typeof(483290482390810498120984098124098127725), 483290482390810498120984098124098127725
----
DOUBLE 483290482390810498120984098124098127725.0
query II
SELECT typeof(4832904823908104981209840981240981277256), 4832904823908104981209840981240981277256
----
DOUBLE 4832904823908104981209840981240981277256.0
query II
SELECT typeof(48329048239081049812098409812409812772568), 48329048239081049812098409812409812772568
----
DOUBLE 48329048239081049812098409812409812772568.0
query II
SELECT typeof(483290482390810498120984098124098127725683), 483290482390810498120984098124098127725683
----
DOUBLE 483290482390810498120984098124098127725683.0
query I
SELECT 0::HUGEINT::VARCHAR
----
0
# hugeint -> uints
# uint8
query I
select '255'::HUGEINT::UINT8
----
255
statement error
select '-1'::hugeint::uint8
----
<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT8.*
statement error
select '256'::hugeint::uint8
----
<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT8.*
# uint16
query I
select '65535'::HUGEINT::UINT16
----
65535
statement error
select '-1'::hugeint::uint16
----
<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT16.*
statement error
select '65536'::hugeint::uint16
----
<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT16.*
# uint32
query I
select '4294967295'::HUGEINT::UINT32
----
4294967295
statement error
select '-1'::hugeint::uint32
----
<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT32.*
statement error
select '4294967296'::hugeint::uint32
----
<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT32.*
# UINT64
query I
select '18446744073709551615'::HUGEINT::UINT64
----
18446744073709551615
statement error
select '-1'::hugeint::UINT64
----
<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT64.*
statement error
select '18446744073709551616'::hugeint::UINT64
----
<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT64.*
# uint -> hugeint
query I
select '255'::UINT8::HUGEINT
----
255
query I
select '65535'::UINT16::HUGEINT
----
65535
query I
select '4294967295'::UINT32::HUGEINT
----
4294967295
query I
select '18446744073709551615'::UINT64::HUGEINT
----
18446744073709551615

View File

@@ -0,0 +1,168 @@
# name: test/common/test_cast_struct.test
# description: Test casting structs
# group: [common]
statement ok
PRAGMA enable_verification
statement error
SELECT struct_pack(b => 42)::STRUCT(a INT);
----
<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
statement error
SELECT struct_extract(struct_pack(b => 42)::STRUCT(a INT), 'a');
----
<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
query I
SELECT struct_extract(struct_pack(a => 42)::STRUCT(a STRING), 'a');
----
42
statement error
SELECT struct_extract(struct_pack(b => 42)::ROW(a INT), 'a');
----
<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
query I
SELECT struct_extract(struct_pack(a => 42)::ROW(a INT), 'a');
----
42
statement error
SELECT struct_extract(struct_pack(b => 42::DOUBLE)::STRUCT(a INT), 'a');
----
<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
query I
SELECT struct_extract(struct_pack(a => 42::DOUBLE)::STRUCT(a INT), 'a');
----
42
statement error
SELECT struct_extract(struct_pack(b => '42'::DOUBLE)::STRUCT(a INT), 'a');
----
<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
query I
SELECT struct_extract(struct_pack(a => '42'::DOUBLE)::STRUCT(a INT), 'a');
----
42
statement error
SELECT struct_pack(b => '42'::DOUBLE)::STRUCT(a INT, c STRING)
----
<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
statement error
SELECT struct_pack(b => 'hello'::STRING)::STRUCT(b INT)
----
Could not convert string 'hello' to INT32
statement error
SELECT struct_pack(a => 'hello'::STRING, b => 'world'::STRING)::STRUCT(a STRING, b INT)
----
Could not convert string 'world' to INT32
statement error
SELECT struct_pack(a => [1, 2, 3])::STRUCT(a INT)
----
Unimplemented type for cast (INTEGER[] -> INTEGER)
statement error
SELECT struct_pack(a => struct_pack(b => 42)::STRUCT(b INT))::STRUCT(a INT)
----
Unimplemented type for cast (STRUCT(b INTEGER) -> INTEGER)
statement error
SELECT struct_pack(b => 'hello'::STRING)::STRUCT(a INT)
----
<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
statement error
SELECT struct_pack(b => '42'::DOUBLE, c => 'asdf'::STRING)::STRUCT(a1 INT, a2 STRING);
----
<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
query I
SELECT struct_pack(a1 => '42'::DOUBLE, a2 => 'asdf'::STRING)::STRUCT(a1 INT, a2 STRING);
----
{'a1': 42, 'a2': asdf}
query I
SELECT ROW(42, 'asdf');
----
(42, asdf)
statement error
SELECT ROW();
----
pack nothing into a struct
query I
SELECT ROW(NULL);
----
(NULL)
query I
SELECT ROW(NULL, NULL);
----
(NULL, NULL)
# MB example
query I
SELECT CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))
----
{'a': 1, 'b': 2}
query I
SELECT a::ROW(a INT, b STRING) r FROM (VALUES (ROW(1, 'asdf')), (ROW(4, 'fdsa'))) s(a);
----
{'a': 1, 'b': asdf}
{'a': 4, 'b': fdsa}
statement error
SELECT struct_extract({'a': a}, a) FROM (SELECT a::VARCHAR AS a FROM range(10) tbl(a));
----
<REGEX>:.*Binder Error.*Key name for struct_extract needs to be a constant string.*
statement error
SELECT struct_extract({'a': 42}, 42)
----
<REGEX>:.*Binder Error.*can only be used on unnamed structs.*
query I
SELECT struct_extract_at({'a': 42}, 1)
----
42
statement error
SELECT struct_extract_at({'a': 42}, 0)
----
<REGEX>:.*Binder Error.*out of range.*
statement error
SELECT struct_extract_at({'a': 42}, 42)
----
<REGEX>:.*Binder Error.*out of range.*
# Test string to struct cast within struct casting.
query I
SELECT {a: {b: '{a: 3, b: "Hello World"}'}}::STRUCT(a STRUCT(b STRUCT(a INT, b VARCHAR)));
----
{'a': {'b': {'a': 3, 'b': Hello World}}}
# Test if try_cast continues after encountering error.
query I
SELECT TRY_CAST(struct_pack(a => 4, b => 'Ducky', c => '1964-06-15')
AS STRUCT(a INT, b DOUBLE, c DATE));
----
{'a': 4, 'b': NULL, 'c': 1964-06-15}
query I
SELECT TRY_CAST(struct_pack(a => 4, b => 'Ducky', c => 'Tommorow', d => {a:3.0})
AS STRUCT(a VARCHAR[], b VARCHAR, c DATE, d STRUCT(a INT)));
----
{'a': NULL, 'b': Ducky, 'c': NULL, 'd': {'a': 3}}

View File

@@ -0,0 +1,40 @@
#include "catch.hpp"
#include "duckdb/common/checksum.hpp"
#include <vector>
using namespace duckdb;
using namespace std;
#define NUM_INTS 10
TEST_CASE("Checksum tests", "[checksum]") {
// create a buffer
int vals[NUM_INTS];
for (size_t i = 0; i < NUM_INTS; i++) {
vals[i] = i + 1;
}
// verify that checksum is consistent
uint64_t c1 = Checksum((uint8_t *)vals, sizeof(int) * NUM_INTS);
uint64_t c2 = Checksum((uint8_t *)vals, sizeof(int) * NUM_INTS);
REQUIRE(c1 == c2);
// verify that checksum is sort of good
vals[3] = 1;
uint64_t c3 = Checksum((uint8_t *)vals, sizeof(int) * NUM_INTS);
REQUIRE(c1 != c3);
// verify that zeros in the input does not zero the checksum
vals[3] = 0;
uint64_t c4 = Checksum((uint8_t *)vals, sizeof(int) * NUM_INTS);
REQUIRE(c4 != 0);
// zero at a different location should change the checksum
vals[3] = 4;
vals[4] = 0;
uint64_t c5 = Checksum((uint8_t *)vals, sizeof(int) * NUM_INTS);
REQUIRE(c4 != c5);
REQUIRE(c1 != c4);
REQUIRE(c1 != c5);
}

View File

@@ -0,0 +1,238 @@
#include "catch.hpp"
#include "duckdb/common/file_buffer.hpp"
#include "duckdb/common/file_system.hpp"
#include "duckdb/common/fstream.hpp"
#include "duckdb/common/local_file_system.hpp"
#include "duckdb/common/vector.hpp"
#include "duckdb/common/virtual_file_system.hpp"
#include "test_helpers.hpp"
using namespace duckdb;
using namespace std;
static void create_dummy_file(string fname) {
string normalized_string;
if (StringUtil::StartsWith(fname, "file:///")) {
#ifdef _WIN32
normalized_string = fname.substr(8);
#else
normalized_string = fname.substr(7);
#endif
} else if (StringUtil::StartsWith(fname, "file://localhost/")) {
#ifdef _WIN32
normalized_string = fname.substr(18);
#else
normalized_string = fname.substr(18);
#endif
} else {
normalized_string = fname;
}
ofstream outfile(normalized_string);
outfile << "I_AM_A_DUMMY" << endl;
outfile.close();
}
TEST_CASE("Make sure the file:// protocol works as expected", "[file_system]") {
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
auto dname = fs->JoinPath(fs->GetWorkingDirectory(), TestCreatePath("TEST_DIR"));
auto dname_converted_slashes = StringUtil::Replace(dname, "\\", "/");
// handle differences between windows and linux
if (StringUtil::StartsWith(dname_converted_slashes, "/")) {
dname_converted_slashes = dname_converted_slashes.substr(1);
}
// Path of format file:///bla/bla on 'nix and file:///X:/bla/bla on Windows
auto dname_triple_slash = fs->JoinPath("file://", dname_converted_slashes);
// Path of format file://localhost/bla/bla on 'nix and file://localhost/X:/bla/bla on Windows
auto dname_localhost = fs->JoinPath("file://localhost", dname_converted_slashes);
auto dname_no_host = fs->JoinPath("file:", dname_converted_slashes);
string fname = "TEST_FILE";
string fname2 = "TEST_FILE_TWO";
if (fs->DirectoryExists(dname_triple_slash)) {
fs->RemoveDirectory(dname_triple_slash);
}
fs->CreateDirectory(dname_triple_slash);
REQUIRE(fs->DirectoryExists(dname_triple_slash));
REQUIRE(!fs->FileExists(dname_triple_slash));
// we can call this again and nothing happens
fs->CreateDirectory(dname_triple_slash);
auto fname_in_dir = fs->JoinPath(dname_triple_slash, fname);
auto fname_in_dir2 = fs->JoinPath(dname_localhost, fname2);
auto fname_in_dir3 = fs->JoinPath(dname_no_host, fname2);
create_dummy_file(fname_in_dir);
REQUIRE(fs->FileExists(fname_in_dir));
REQUIRE(!fs->DirectoryExists(fname_in_dir));
size_t n_files = 0;
REQUIRE(fs->ListFiles(dname_triple_slash, [&n_files](const string &path, bool) { n_files++; }));
REQUIRE(n_files == 1);
REQUIRE(fs->FileExists(fname_in_dir));
REQUIRE(!fs->FileExists(fname_in_dir2));
auto file_listing = fs->Glob(fs->JoinPath(dname_triple_slash, "*"));
REQUIRE(file_listing[0].path == fname_in_dir);
fs->MoveFile(fname_in_dir, fname_in_dir2);
REQUIRE(!fs->FileExists(fname_in_dir));
REQUIRE(fs->FileExists(fname_in_dir2));
auto file_listing_after_move = fs->Glob(fs->JoinPath(dname_no_host, "*"));
REQUIRE(file_listing_after_move[0].path == fname_in_dir3);
fs->RemoveDirectory(dname_triple_slash);
REQUIRE(!fs->DirectoryExists(dname_triple_slash));
REQUIRE(!fs->FileExists(fname_in_dir));
REQUIRE(!fs->FileExists(fname_in_dir2));
}
TEST_CASE("Make sure file system operators work as advertised", "[file_system]") {
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
auto dname = TestCreatePath("TEST_DIR");
string fname = "TEST_FILE";
string fname2 = "TEST_FILE_TWO";
if (fs->DirectoryExists(dname)) {
fs->RemoveDirectory(dname);
}
fs->CreateDirectory(dname);
REQUIRE(fs->DirectoryExists(dname));
REQUIRE(!fs->FileExists(dname));
// we can call this again and nothing happens
fs->CreateDirectory(dname);
auto fname_in_dir = fs->JoinPath(dname, fname);
auto fname_in_dir2 = fs->JoinPath(dname, fname2);
create_dummy_file(fname_in_dir);
REQUIRE(fs->FileExists(fname_in_dir));
REQUIRE(!fs->DirectoryExists(fname_in_dir));
size_t n_files = 0;
REQUIRE(fs->ListFiles(dname, [&n_files](const string &path, bool) { n_files++; }));
REQUIRE(n_files == 1);
REQUIRE(fs->FileExists(fname_in_dir));
REQUIRE(!fs->FileExists(fname_in_dir2));
fs->MoveFile(fname_in_dir, fname_in_dir2);
REQUIRE(!fs->FileExists(fname_in_dir));
REQUIRE(fs->FileExists(fname_in_dir2));
fs->RemoveDirectory(dname);
REQUIRE(!fs->DirectoryExists(dname));
REQUIRE(!fs->FileExists(fname_in_dir));
REQUIRE(!fs->FileExists(fname_in_dir2));
}
// note: the integer count is chosen as 512 so that we write 512*8=4096 bytes to the file
// this is required for the Direct-IO as on Windows Direct-IO can only write multiples of sector sizes
// sector sizes are typically one of [512/1024/2048/4096] bytes, hence a 4096 bytes write succeeds.
#define INTEGER_COUNT 512
TEST_CASE("Test file operations", "[file_system]") {
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
duckdb::unique_ptr<FileHandle> handle, handle2;
int64_t test_data[INTEGER_COUNT];
for (int i = 0; i < INTEGER_COUNT; i++) {
test_data[i] = i;
}
auto fname = TestCreatePath("test_file");
// standard reading/writing test
// open file for writing
REQUIRE_NOTHROW(handle = fs->OpenFile(fname, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE));
// write 10 integers
REQUIRE_NOTHROW(handle->Write(QueryContext(), (void *)test_data, sizeof(int64_t) * INTEGER_COUNT, 0));
// close the file
handle.reset();
for (int i = 0; i < INTEGER_COUNT; i++) {
test_data[i] = 0;
}
// now open the file for reading
REQUIRE_NOTHROW(handle = fs->OpenFile(fname, FileFlags::FILE_FLAGS_READ));
// read the 10 integers back
REQUIRE_NOTHROW(handle->Read(QueryContext(), (void *)test_data, sizeof(int64_t) * INTEGER_COUNT, 0));
// check the values of the integers
for (int i = 0; i < 10; i++) {
REQUIRE(test_data[i] == i);
}
handle.reset();
fs->RemoveFile(fname);
}
TEST_CASE("absolute paths", "[file_system]") {
duckdb::LocalFileSystem fs;
#ifndef _WIN32
REQUIRE(fs.IsPathAbsolute("/home/me"));
REQUIRE(!fs.IsPathAbsolute("./me"));
REQUIRE(!fs.IsPathAbsolute("me"));
#else
const std::string long_path = "\\\\?\\D:\\very long network\\";
REQUIRE(fs.IsPathAbsolute(long_path));
const std::string network = "\\\\network_drive\\filename.csv";
REQUIRE(fs.IsPathAbsolute(network));
REQUIRE(fs.IsPathAbsolute("C:\\folder\\filename.csv"));
REQUIRE(fs.IsPathAbsolute("C:/folder\\filename.csv"));
REQUIRE(fs.NormalizeAbsolutePath("C:/folder\\filename.csv") == "c:\\folder\\filename.csv");
REQUIRE(fs.NormalizeAbsolutePath(network) == network);
REQUIRE(fs.NormalizeAbsolutePath(long_path) == "\\\\?\\d:\\very long network\\");
#endif
}
TEST_CASE("extract subsystem", "[file_system]") {
duckdb::VirtualFileSystem vfs;
auto local_filesystem = FileSystem::CreateLocal();
auto *local_filesystem_ptr = local_filesystem.get();
vfs.RegisterSubSystem(std::move(local_filesystem));
// Extract a non-existent filesystem gets nullptr.
REQUIRE(vfs.ExtractSubSystem("non-existent") == nullptr);
// Extract an existing filesystem.
auto extracted_filesystem = vfs.ExtractSubSystem(local_filesystem_ptr->GetName());
REQUIRE(extracted_filesystem.get() == local_filesystem_ptr);
// Re-extraction gets nullptr.
REQUIRE(vfs.ExtractSubSystem("non-existent") == nullptr);
// Register a subfilesystem and disable, which is not allowed to extract.
const ::duckdb::string target_fs = extracted_filesystem->GetName();
const ::duckdb::vector<string> disabled_subfilesystems {target_fs};
vfs.RegisterSubSystem(std::move(extracted_filesystem));
vfs.SetDisabledFileSystems(disabled_subfilesystems);
REQUIRE(vfs.ExtractSubSystem(target_fs) == nullptr);
}
TEST_CASE("re-register subsystem", "[file_system]") {
duckdb::VirtualFileSystem vfs;
// First time registration should succeed.
auto local_filesystem = FileSystem::CreateLocal();
vfs.RegisterSubSystem(std::move(local_filesystem));
// Re-register an already registered subfilesystem should throw.
auto second_local_filesystem = FileSystem::CreateLocal();
REQUIRE_THROWS(vfs.RegisterSubSystem(std::move(second_local_filesystem)));
}

View File

@@ -0,0 +1,98 @@
#include "catch.hpp"
#include "duckdb/common/serializer/binary_deserializer.hpp"
#include "duckdb/common/serializer/binary_serializer.hpp"
#include "duckdb/common/serializer/memory_stream.hpp"
#include "duckdb/common/types/hash.hpp"
#include "duckdb/common/types/hyperloglog.hpp"
using namespace duckdb;
using namespace std;
TEST_CASE("Test that hyperloglog works", "[hyperloglog]") {
HyperLogLog log;
// add a million elements of the same value
int x = 4;
for (size_t i = 0; i < 1000000; i++) {
log.InsertElement(Hash(x));
}
REQUIRE(log.Count() == 1);
// now add a million different values
HyperLogLog log2;
for (size_t i = 0; i < 1000000; i++) {
x = i;
log2.InsertElement(Hash(x));
}
// the count is approximate, but should be pretty close to a million
size_t count = log2.Count();
REQUIRE(count > 950000LL);
REQUIRE(count < 1050000LL);
// now we can merge the HLLs
log.Merge(log2);
// the count should be pretty much the same
count = log.Count();
REQUIRE(count > 950000LL);
REQUIRE(count < 1050000LL);
// now test composability of the merge
// add everything to one big_hll one
// add chunks to small_hll ones and then merge them
// the result should be the same
HyperLogLog big_hll;
HyperLogLog small_hll[16];
for (size_t i = 0; i < 1000000; i++) {
x = ((2 * i) + 3) % (i + 3 / 2);
big_hll.InsertElement(Hash(x));
small_hll[i % 16].InsertElement(Hash(x));
}
// now merge them into one big_hll HyperLogLog
for (idx_t i = 1; i < 16; i++) {
small_hll[0].Merge(small_hll[i]);
}
// the result should be identical to the big_hll one
REQUIRE(small_hll[0].Count() == big_hll.Count());
}
TEST_CASE("Test different hyperloglog version serialization", "[hyperloglog]") {
Allocator allocator;
MemoryStream stream(allocator);
SerializationOptions options;
options.serialization_compatibility = SerializationCompatibility::FromString("v1.0.0");
// Add 100M values to a NEW HyperLogLog
HyperLogLog original_log;
for (size_t i = 0; i < 100000000; i++) {
original_log.InsertElement(Hash(i));
switch (i + 1) {
case 1:
case 10:
case 100:
case 1000:
case 10000:
case 100000:
case 1000000:
case 10000000:
case 100000000:
break; // We roundtrip the serialization every order of magnitude
default:
continue;
}
// Grab the count
const auto original_count = original_log.Count();
// Serialize it as an OLD HyperLogLog
stream.Rewind();
BinarySerializer::Serialize(original_log, stream, options);
// Deserialize it, creating a NEW HyperLogLog from the OLD one
stream.Rewind();
auto deserialized_log = BinaryDeserializer::Deserialize<HyperLogLog>(stream);
// Verify that the deserialized count is equal
const auto deserialized_count = deserialized_log->Count();
REQUIRE(original_count == deserialized_count);
}
}

View File

@@ -0,0 +1,98 @@
# name: test/common/test_local_file_urls.test
# group: [common]
# Note: __WORKING_DIRECTORY__ will be replaced with the full path to the working dir of the tests (root of duckdb repo)
statement ok
SET VARIABLE work_dir_no_host='file:/' || ltrim(replace('__WORKING_DIRECTORY__', '\', '/'), '/')
statement ok
SET VARIABLE work_dir_triple_slash='file:///' || ltrim(replace('__WORKING_DIRECTORY__', '\', '/'), '/')
statement ok
SET VARIABLE work_dir_localhost='file://localhost/' || ltrim(replace('__WORKING_DIRECTORY__', '\', '/'), '/')
# testing file:/some/path/to/duckdb/repo
query II
SELECT * FROM read_csv_auto(getvariable('work_dir_no_host') || '/data/csv/normalize.csv');
----
John ipsum
# testing file:///some/path/to/duckdb/repo
query II
SELECT * FROM read_csv_auto(getvariable('work_dir_triple_slash') || '/data/csv/normalize.csv');
----
John ipsum
# testing file://localhost/some/path/to/duckdb/repo
query II
SELECT * FROM read_csv_auto(getvariable('work_dir_localhost') || '/data/csv/normalize.csv');
----
John ipsum
# Test glob with file:/some/path
query II
SELECT file[:6], parse_filename(file) FROM glob(getvariable('work_dir_no_host') || '/data/*/bad_date_timestamp_mix.csv')
----
file:/ bad_date_timestamp_mix.csv
# Test glob with file:///some/path
query II
SELECT file[:8], parse_filename(file) FROM glob(getvariable('work_dir_triple_slash') || '/data/*/bad_date_timestamp_mix.csv')
----
file:/// bad_date_timestamp_mix.csv
# Test glob with file://localhost/some/path/to/duckdb/repo
query II
SELECT file[:17], parse_filename(file) FROM glob(getvariable('work_dir_localhost') || '/data/*/bad_date_timestamp_mix.csv')
----
file://localhost/ bad_date_timestamp_mix.csv
# Test scanning multiple files using glob with file:/some/path
query III
SELECT id, filename[:6], parse_filename(filename) FROM read_csv_auto(getvariable('work_dir_no_host') || '/data/csv/hive-partitioning/simple/*/*/test.csv', filename=1) ORDER BY id
----
1 file:/ test.csv
2 file:/ test.csv
# Test scanning multiple files using glob with file:///some/path
query III
SELECT id, filename[:8], parse_filename(filename) FROM read_csv_auto(getvariable('work_dir_triple_slash') || '/data/csv/hive-partitioning/simple/*/*/test.csv', filename=1) ORDER BY id
----
1 file:/// test.csv
2 file:/// test.csv
# Test scanning multiple files using glob with file://localhost/some/path
query III
SELECT id, filename[:17], parse_filename(filename) FROM read_csv_auto(getvariable('work_dir_localhost') || '/data/csv/hive-partitioning/simple/*/*/test.csv', filename=1) ORDER BY id
----
1 file://localhost/ test.csv
2 file://localhost/ test.csv
require noforcestorage
# Ensure secrets work correctly using the file://
statement ok
create secret secret_file_url_tripleslash (TYPE HTTP, scope 'file:///');
statement ok
create secret secret_file_url_localhost (TYPE HTTP, scope 'file://localhost/');
statement ok
create secret secret_without_file_path (TYPE HTTP);
query I
SELECT name FROM which_secret(getvariable('work_dir_triple_slash') || '/data/csv/hive-partitioning/simple/*/*/test.csv', 'http')
----
secret_file_url_tripleslash
query I
SELECT name FROM which_secret(getvariable('work_dir_localhost') || '/data/csv/hive-partitioning/simple/*/*/test.csv', 'http')
----
secret_file_url_localhost
# raw paths now do not match
query I
SELECT name FROM which_secret('__WORKING_DIRECTORY__/data/csv/hive-partitioning/simple/*/*/test.csv', 'http')
----
secret_without_file_path

View File

@@ -0,0 +1,60 @@
#include "catch.hpp"
#include "duckdb/common/numeric_utils.hpp"
#include "test_helpers.hpp"
using namespace duckdb;
using namespace std;
TEST_CASE("Numeric cast checks", "[numeric_cast]") {
#ifdef DUCKDB_CRASH_ON_ASSERT
return;
#endif
// unsigned-unsiged
// can not fail upcasting unsigned type
REQUIRE_NOTHROW(NumericCast<uint16_t, uint8_t>(NumericLimits<uint8_t>::Maximum()));
REQUIRE_NOTHROW(NumericCast<uint16_t, uint8_t>(NumericLimits<uint8_t>::Minimum()));
// we can down cast if value fits
REQUIRE_NOTHROW(NumericCast<uint8_t, uint16_t>(NumericLimits<uint8_t>::Maximum()));
// but not if it doesn't
REQUIRE_THROWS(NumericCast<uint8_t, uint16_t>(NumericLimits<uint8_t>::Maximum() + 1));
// signed-signed, same as above
REQUIRE_NOTHROW(NumericCast<int16_t, int8_t>(NumericLimits<int8_t>::Maximum()));
REQUIRE_NOTHROW(NumericCast<int16_t, int8_t>(NumericLimits<int8_t>::Minimum()));
REQUIRE_NOTHROW(NumericCast<int8_t, int16_t>(NumericLimits<int8_t>::Maximum()));
REQUIRE_THROWS(NumericCast<int8_t, int16_t>(NumericLimits<int8_t>::Maximum() + 1));
REQUIRE_THROWS(NumericCast<int8_t, int16_t>(NumericLimits<int8_t>::Minimum() - 1));
// unsigned to signed
REQUIRE_NOTHROW(NumericCast<int8_t, uint8_t>(NumericLimits<int8_t>::Maximum()));
REQUIRE_NOTHROW(NumericCast<int8_t, uint8_t>(NumericLimits<uint8_t>::Minimum()));
// uint8 max will not fit in int8
REQUIRE_THROWS(NumericCast<int8_t, uint8_t>(NumericLimits<uint8_t>::Maximum()));
// signed to unsigned
// can cast int8 max to uint8
REQUIRE_NOTHROW(NumericCast<uint8_t, int8_t>(NumericLimits<int8_t>::Maximum()));
// cat cast int8 min to unit8
REQUIRE_THROWS(NumericCast<uint8_t, int8_t>(NumericLimits<int8_t>::Minimum()));
// can't cast anything negative to anything unsigned
REQUIRE_THROWS(NumericCast<uint64_t, int8_t>(-1));
REQUIRE_THROWS(NumericCast<uint64_t, int16_t>(-1));
REQUIRE_THROWS(NumericCast<uint64_t, int32_t>(-1));
REQUIRE_THROWS(NumericCast<uint64_t, int64_t>(-1));
// can't downcast big number
REQUIRE_THROWS(NumericCast<int64_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
REQUIRE_THROWS(NumericCast<int32_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
REQUIRE_THROWS(NumericCast<uint32_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
REQUIRE_THROWS(NumericCast<int16_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
REQUIRE_THROWS(NumericCast<uint16_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
REQUIRE_THROWS(NumericCast<int8_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
REQUIRE_THROWS(NumericCast<uint8_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
// TODO this should throw but doesn't
// REQUIRE_THROWS(NumericCast<uint8_t, hugeint_t>(hugeint_t(-1)));
}

View File

@@ -0,0 +1,81 @@
#include "catch.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/string.hpp"
#include "duckdb/main/config.hpp"
using namespace duckdb;
TEST_CASE("Test parse logical type", "[parse_logical_type]") {
SECTION("simple types") {
REQUIRE(DBConfig::ParseLogicalType("integer") == LogicalType::INTEGER);
REQUIRE(DBConfig::ParseLogicalType("any") == LogicalType::ANY);
}
SECTION("nested types") {
// list
REQUIRE(DBConfig::ParseLogicalType("ANY[]") == LogicalType::LIST(LogicalType::ANY));
REQUIRE(DBConfig::ParseLogicalType("VARCHAR[]") == LogicalType::LIST(LogicalType::VARCHAR));
// array
REQUIRE(DBConfig::ParseLogicalType("ANY[3]") == LogicalType::ARRAY(LogicalType::ANY, 3));
REQUIRE(DBConfig::ParseLogicalType("FLOAT[42]") == LogicalType::ARRAY(LogicalType::FLOAT, 42));
REQUIRE(DBConfig::ParseLogicalType("VARCHAR[100000]") ==
LogicalType::ARRAY(LogicalType::VARCHAR, ArrayType::MAX_ARRAY_SIZE));
// map
REQUIRE(DBConfig::ParseLogicalType("MAP(VARCHAR, VARCHAR)") ==
LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR));
REQUIRE(DBConfig::ParseLogicalType("MAP(ANY,ANY)") == LogicalType::MAP(LogicalType::ANY, LogicalType::ANY));
REQUIRE(DBConfig::ParseLogicalType("MAP(INTEGER,ANY)") ==
LogicalType::MAP(LogicalType::INTEGER, LogicalType::ANY));
REQUIRE(DBConfig::ParseLogicalType("MAP(ANY, DOUBLE)") ==
LogicalType::MAP(LogicalType::ANY, LogicalType::DOUBLE));
// union
child_list_t<LogicalType> union_members;
union_members.emplace_back(make_pair("num", LogicalTypeId::INTEGER));
union_members.emplace_back(make_pair("v", LogicalTypeId::VARCHAR));
union_members.emplace_back(make_pair("f", LogicalTypeId::FLOAT));
REQUIRE(DBConfig::ParseLogicalType("UNION(num INTEGER, v VARCHAR, f FLOAT)") ==
LogicalType::UNION(union_members));
// struct
child_list_t<LogicalType> struct_children;
struct_children.emplace_back(make_pair("year", LogicalTypeId::BIGINT));
struct_children.emplace_back(make_pair("month", LogicalTypeId::BIGINT));
struct_children.emplace_back(make_pair("day", LogicalTypeId::BIGINT));
REQUIRE(DBConfig::ParseLogicalType("STRUCT(year BIGINT, month BIGINT, day BIGINT)") ==
LogicalType::STRUCT(struct_children));
}
SECTION("deeper nested types") {
// list of lists
REQUIRE(DBConfig::ParseLogicalType("VARCHAR[][]") ==
LogicalType::LIST(LogicalType::LIST(LogicalType::VARCHAR)));
// array of lists
REQUIRE(DBConfig::ParseLogicalType("VARCHAR[][3]") ==
LogicalType::ARRAY(LogicalType::LIST(LogicalType::VARCHAR), 3));
// list of structs
child_list_t<LogicalType> date_struct_children;
date_struct_children.emplace_back(make_pair("year", LogicalTypeId::BIGINT));
date_struct_children.emplace_back(make_pair("month", LogicalTypeId::BIGINT));
date_struct_children.emplace_back(make_pair("day", LogicalTypeId::BIGINT));
REQUIRE(DBConfig::ParseLogicalType("STRUCT(year BIGINT, month BIGINT, day BIGINT)[]") ==
LogicalType::LIST(LogicalType::STRUCT(date_struct_children)));
// map with list as key
REQUIRE(DBConfig::ParseLogicalType("MAP(VARCHAR[],FLOAT)") ==
LogicalType::MAP(LogicalType::LIST(LogicalType::VARCHAR), LogicalType::FLOAT));
// struct with list, array and map
child_list_t<LogicalType> mix_struct_children;
mix_struct_children.emplace_back(make_pair("my_list", LogicalType::LIST(LogicalType::ANY)));
mix_struct_children.emplace_back(make_pair("my_array", LogicalType::ARRAY(LogicalType::VARCHAR, 2)));
mix_struct_children.emplace_back(
make_pair("my_map", LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR)));
REQUIRE(DBConfig::ParseLogicalType("STRUCT(my_list ANY[], my_array VARCHAR[2], my_map MAP(VARCHAR,VARCHAR))") ==
LogicalType::STRUCT(mix_struct_children));
}
}

View File

@@ -0,0 +1,517 @@
#include "catch.hpp"
#include "duckdb.hpp"
#include "duckdb/common/common.hpp"
#include "duckdb/common/local_file_system.hpp"
#include "duckdb/common/virtual_file_system.hpp"
#include "duckdb/main/materialized_query_result.hpp"
#include "test_config.hpp"
#include "test_helpers.hpp"
#include <iostream>
#include <shared_mutex>
using namespace duckdb;
bool g_enable_verbose_output = false;
bool g_enable_info_output = true;
#define PRINT_VERBOSE(x) \
do { \
if (g_enable_verbose_output) \
std::cout << x << std::endl; \
} while (0)
#define PRINT_INFO(x) \
do { \
if (g_enable_info_output) \
std::cout << x << std::endl; \
} while (0)
bool ends_with(const std::string &str, const std::string &suffix) {
// Ensure str is at least as long as suffix
if (str.length() < suffix.length()) {
return false;
}
// Compare the ending characters
return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
}
class FaultInjectionFileSystem : public duckdb::LocalFileSystem {
public:
enum FaultInjectionSite {
WRITE = 0,
FSYNC = 1,
};
void Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override {
PRINT_VERBOSE("FS write offset=" << location << " bytes=" << nr_bytes);
if (is_db_file(handle)) {
ThrowInjectedFaultIfSet(FaultInjectionSite::WRITE);
}
return duckdb::LocalFileSystem::Write(handle, buffer, nr_bytes, location);
}
void FileSync(duckdb::FileHandle &handle) override {
PRINT_VERBOSE("FS fsync " << handle.GetPath() << " file_size=" << handle.GetFileSize());
if (is_db_file(handle)) {
ThrowInjectedFaultIfSet(FaultInjectionSite::FSYNC);
}
return duckdb::LocalFileSystem::FileSync(handle);
}
void RemoveFile(const duckdb::string &filename,
duckdb::optional_ptr<duckdb::FileOpener> opener = nullptr) override {
PRINT_VERBOSE("FS remove " << filename);
return duckdb::LocalFileSystem::RemoveFile(filename, opener);
}
void Truncate(duckdb::FileHandle &handle, int64_t new_size) override {
PRINT_VERBOSE("FS truncate " << handle.GetPath() << " from " << handle.GetFileSize() << " to " << new_size);
return duckdb::LocalFileSystem::Truncate(handle, new_size);
}
// In linux - trim() is equivalent to zeroing out a range (albeit in a much more efficient manner). Let's
// reproduce this behavior regardless of whether the current environment supports it.
bool Trim(duckdb::FileHandle &handle, idx_t offset_bytes, idx_t length_bytes) override {
PRINT_VERBOSE("FS trim " << handle.GetPath() << " offset=" << offset_bytes << " bytes=" << length_bytes);
std::string nulls(length_bytes, '\0');
duckdb::LocalFileSystem::Write(handle, (void *)nulls.data(), length_bytes, offset_bytes);
return true;
}
// Will inject a single occurrence of a fault
void InjectFault(FaultInjectionSite site) {
std::lock_guard<std::mutex> l(fault_m_);
// Make sure this is not called twice - as we will drop a fault
REQUIRE(faults.insert(site).second);
}
protected:
void ThrowInjectedFaultIfSet(FaultInjectionSite site) {
std::lock_guard<std::mutex> l(fault_m_);
auto it = faults.find(site);
if (it != faults.end()) {
faults.erase(it);
PRINT_VERBOSE("Injecting fault");
throw duckdb::IOException("Injected fault");
}
}
bool is_wal_file(const duckdb::FileHandle &handle) {
return ends_with(handle.GetPath(), ".db.wal");
}
bool is_db_file(const duckdb::FileHandle &handle) {
return ends_with(handle.GetPath(), ".db");
}
bool is_wal_or_db_file(const duckdb::FileHandle &handle) {
return is_db_file(handle) || is_wal_file(handle);
}
private:
std::mutex fault_m_;
std::unordered_set<FaultInjectionSite> faults;
};
// This implementation of duckdb::FileSystem will cache writes to the database file in memory until fsync is called.
// It expects all read ranges to be perfectly aligned with previous writes.
class LazyFlushFileSystem : public FaultInjectionFileSystem {
public:
~LazyFlushFileSystem() {
if (!unflushed_chunks.empty()) {
PRINT_INFO("Unflushed chunks on shutdown for db file");
}
}
void Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override {
PRINT_VERBOSE("FS write offset=" << location << " bytes=" << nr_bytes);
// We only perform positional writes for the db file
REQUIRE(is_db_file(handle));
std::unique_lock<std::mutex> l(m_);
ThrowInjectedFaultIfSet(FaultInjectionSite::WRITE);
// Store the data in memory until fsync occurs
PRINT_VERBOSE("Caching chunk " << location << " bytes " << nr_bytes);
// TODO: be lazy - don't handle partial overwrites
REQUIRE(!partially_overlaps_existing_chunk(unflushed_chunks, location, nr_bytes));
auto it = unflushed_chunks.find(location);
if (it != unflushed_chunks.end()) {
// Check that if there is an existing chunk - it's size matches exactly
REQUIRE(it->second.size() == nr_bytes);
it->second = std::string((char *)buffer, nr_bytes);
} else {
unflushed_chunks.emplace(location, std::string((char *)buffer, nr_bytes));
}
}
int64_t Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes) override {
// Check appends only occur on the WAL
REQUIRE(is_wal_file(handle));
return duckdb::LocalFileSystem::Write(handle, buffer, nr_bytes);
}
void Read(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override {
REQUIRE(is_db_file(handle));
{
// TODO: shared_lock
std::unique_lock<std::mutex> l(m_);
// We don't handle partial overlaps for now.
REQUIRE(!partially_overlaps_existing_chunk(unflushed_chunks, location, nr_bytes));
auto it = unflushed_chunks.find(location);
if (it != unflushed_chunks.end()) {
PRINT_VERBOSE("FS read cached chunk at offset=" << location << " bytes=" << nr_bytes);
const auto &data = it->second;
// Assume block-aligned reads
REQUIRE(data.size() == nr_bytes);
memcpy(buffer, data.data(), nr_bytes);
return;
}
}
PRINT_VERBOSE("FS read disk chunk at offset=" << location << " bytes=" << nr_bytes);
return duckdb::LocalFileSystem::Read(handle, buffer, nr_bytes, location);
}
int64_t Read(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes) override {
PRINT_VERBOSE("FS read at end of file, bytes=" << nr_bytes);
REQUIRE(is_wal_or_db_file(handle));
if (is_db_file(handle)) {
// Just make sure we don't miss the unflushed chunks
REQUIRE(unflushed_chunks.empty());
}
return duckdb::LocalFileSystem::Read(handle, buffer, nr_bytes);
}
void FileSync(duckdb::FileHandle &handle) override {
PRINT_VERBOSE("FS fsync " << handle.GetPath() << " file_size=" << handle.GetFileSize());
REQUIRE(is_wal_or_db_file(handle));
if (!is_db_file(handle)) {
return duckdb::LocalFileSystem::FileSync(handle);
}
std::unique_lock<std::mutex> l(m_);
ThrowInjectedFaultIfSet(FaultInjectionSite::FSYNC);
for (const auto &location_and_data : unflushed_chunks) {
auto location = location_and_data.first;
const auto &data = location_and_data.second;
PRINT_VERBOSE("Flushing chunk " << location << " size=" << data.size());
duckdb::LocalFileSystem::Write(handle, (void *)data.data(), data.size(), location);
}
unflushed_chunks.clear();
duckdb::LocalFileSystem::FileSync(handle);
}
bool Trim(duckdb::FileHandle &handle, idx_t offset_bytes, idx_t length_bytes) override {
REQUIRE(is_db_file(handle));
std::unique_lock<std::mutex> l(m_);
// This is just simpler to implement
REQUIRE(unflushed_chunks.count(offset_bytes) == 0);
return FaultInjectionFileSystem::Trim(handle, offset_bytes, length_bytes);
}
void Truncate(duckdb::FileHandle &handle, int64_t new_size) override {
std::unique_lock<std::mutex> l(m_);
if (is_db_file(handle)) {
REQUIRE(unflushed_chunks.empty());
}
return duckdb::LocalFileSystem::Truncate(handle, new_size);
}
private:
// Lock for modifying unflushed_chunks:
// 1. Adding to unflushed_chunks on write
// 2. Flushing unflushed_chunks on fsync
// 3. Reading from unflushed_chunks
std::mutex m_;
std::map<idx_t, std::string> unflushed_chunks;
bool partially_overlaps_existing_chunk(const std::map<idx_t, std::string> &chunks, idx_t offset, size_t length) {
idx_t end = offset + length;
for (const auto &off_data : chunks) {
auto off = off_data.first;
const auto &data = off_data.second;
idx_t chunk_end = off + data.size();
// Check for any overlap
bool overlap = offset < chunk_end && off < end;
// Exclude full containment and exact match
bool exact_match = (offset == off && length == data.size());
if (overlap && !exact_match)
return true;
}
return false;
}
};
template <class ResultT>
void validate(ResultT &r, std::string expected_err_message = "") {
// For debugging
bool expected_err = !expected_err_message.empty();
if (expected_err != r.HasError() && r.HasError()) {
PRINT_INFO("Unexpected: query failed with " << r.GetError());
}
REQUIRE(expected_err == r.HasError());
if (r.HasError()) {
REQUIRE(r.GetError().find(expected_err_message) != std::string::npos);
}
}
void cleanup_db_file(const std::string &filename) {
bool removed_or_missing = std::remove(filename.c_str()) == 0 || errno == ENOENT;
REQUIRE(removed_or_missing);
}
TEST_CASE("simple fault injection storage test", "[storage][.]") {
if (!TestConfiguration::TestRunStorageFuzzer()) {
SKIP_TEST("storage-fuzzer not enabled");
return;
}
duckdb::DBConfig config;
LazyFlushFileSystem *raw_fs = new LazyFlushFileSystem();
config.file_system = duckdb::make_uniq<duckdb::VirtualFileSystem>(duckdb::unique_ptr<LazyFlushFileSystem>(raw_fs));
std::string file_path = TestCreatePath("pig.db");
cleanup_db_file(file_path);
{
duckdb::DuckDB db(file_path, &config);
duckdb::Connection con(db);
validate(*con.Query("CREATE TABLE IF NOT EXISTS t(i INTEGER)"));
validate(*con.Query("INSERT INTO t SELECT * FROM RANGE(0, 1000)"));
validate(*con.Query("INSERT INTO t SELECT * FROM RANGE(0, 1000000)"));
auto res = con.Query("SELECT count(*) FROM t");
validate(*res);
REQUIRE(res->GetValue(0, 0).ToString() == "1001000");
// Writes are ok - fsync are not ok
raw_fs->InjectFault(LazyFlushFileSystem::FaultInjectionSite::FSYNC);
validate(*con.Query("INSERT INTO t SELECT * FROM RANGE(0, 1000000)"),
"TransactionContext Error: Failed to commit: Injected fault");
// Check that the tx was rolled back
auto res2 = con.Query("SELECT count(*) FROM t");
validate(*res2);
REQUIRE(res2->GetValue(0, 0).ToString() == "1001000");
}
{
duckdb::DuckDB db(file_path, &config);
duckdb::Connection con(db);
auto res = con.Query("SELECT count(*) FROM t");
validate(*res);
REQUIRE(res->GetValue(0, 0).ToString() == "1001000");
}
}
enum ActionType {
// This action will simply flip the setting true -> false or false -> true
TOGGLE_SKIP_CHECKPOINTS_ON_COMMIT = 0,
SMALL_WRITE = 2,
LARGE_WRITE = 3,
LARGE_WRITE_WITH_FAULT = 4,
UPDATE = 5,
DELETE = 6,
RESET_TABLE = 7,
};
TEST_CASE("fuzzed storage test", "[storage][.]") {
if (!TestConfiguration::TestRunStorageFuzzer()) {
SKIP_TEST("storage-fuzzer not enabled");
return;
}
// DuckDB Configurations
duckdb::DBConfig config;
config.options.set_variables["debug_skip_checkpoint_on_commit"] = duckdb::Value(true);
config.options.trim_free_blocks = true;
config.options.checkpoint_on_shutdown = false;
std::string file_path = TestCreatePath("pig.db");
cleanup_db_file(file_path);
{
duckdb::DuckDB db(file_path, &config);
duckdb::Connection con(db);
validate(*con.Query("CREATE TABLE IF NOT EXISTS t(i INTEGER)"));
}
std::map<double, ActionType> pct_to_action = {{0.1, ActionType::TOGGLE_SKIP_CHECKPOINTS_ON_COMMIT},
{0.3, ActionType::LARGE_WRITE},
{0.5, ActionType::SMALL_WRITE},
{0.7, ActionType::UPDATE},
{0.85, ActionType::DELETE},
{1.0, ActionType::LARGE_WRITE_WITH_FAULT}};
// Randomly generated sequence of actions
std::vector<ActionType> actions = {};
int NUM_ACTIONS = 30;
for (int i = 0; i < NUM_ACTIONS; i++) {
double selection = (rand() % 100) / 100.0;
for (const auto &prob_type : pct_to_action) {
auto prob = prob_type.first;
auto type = prob_type.second;
if (selection > prob) {
continue;
}
actions.push_back(type);
break;
}
}
actions.push_back(RESET_TABLE);
for (int i = 0; i < NUM_ACTIONS; i++) {
double selection = (rand() % 100) / 100.0;
for (const auto &prob_type : pct_to_action) {
auto prob = prob_type.first;
auto type = prob_type.second;
if (selection > prob) {
continue;
}
actions.push_back(type);
break;
}
}
uint64_t offset = 0;
bool skip_checkpoint_on_commit = true;
std::string expected_checksum = "";
duckdb::unique_ptr<QueryResult> previous_result;
for (const auto &action : actions) {
// Note: the injected file system has to be reset each time. DuckDB construction seems to be std::move'ing them
/*
LazyFlushFileSystem *raw_fs = new LazyFlushFileSystem();
config.file_system =
duckdb::make_uniq<duckdb::VirtualFileSystem>(duckdb::unique_ptr<LazyFlushFileSystem>(raw_fs));
*/
FaultInjectionFileSystem *raw_fs = new FaultInjectionFileSystem();
config.file_system =
duckdb::make_uniq<duckdb::VirtualFileSystem>(duckdb::unique_ptr<FaultInjectionFileSystem>(raw_fs));
duckdb::DuckDB db(file_path, &config);
duckdb::Connection con(db);
// Compute a checksum
if (!expected_checksum.empty()) {
auto checksum = con.Query("SELECT bit_xor(hash(i)) FROM t");
validate(*checksum);
auto computed_checksum = checksum->GetValue(0, 0).ToString();
PRINT_INFO("Verifying checksum computed=" << computed_checksum << ", actual=" << expected_checksum);
if (computed_checksum != expected_checksum) {
auto result = con.Query("SELECT * FROM t ORDER BY ALL");
string error;
ColumnDataCollection::ResultEquals(previous_result->Cast<MaterializedQueryResult>().Collection(),
result->Cast<MaterializedQueryResult>().Collection(), error);
Printer::PrintF("Checksum failure\nResult comparison:\n%s", error);
REQUIRE(computed_checksum == expected_checksum);
}
}
previous_result = con.Query("SELECT * FROM t ORDER BY ALL");
switch (action) {
case ActionType::TOGGLE_SKIP_CHECKPOINTS_ON_COMMIT:
skip_checkpoint_on_commit = !skip_checkpoint_on_commit;
PRINT_INFO("Setting skip commit=" << skip_checkpoint_on_commit);
config.options.set_variables["debug_skip_checkpoint_on_commit"] = duckdb::Value(skip_checkpoint_on_commit);
break;
case ActionType::SMALL_WRITE: {
std::string small_insert = "INSERT INTO t SELECT * FROM RANGE(" + std::to_string(offset) + ", " +
std::to_string(offset + 100) + ")";
PRINT_INFO("RUN: " << small_insert);
validate(*con.Query(small_insert));
offset += 100;
break;
}
case ActionType::LARGE_WRITE: {
std::string large_insert = "INSERT INTO t SELECT * FROM RANGE(" + std::to_string(offset) + ", " +
std::to_string(offset + 1000 * 1000) + ")";
PRINT_INFO("RUN: " << large_insert);
validate(*con.Query(large_insert));
offset += 1000 * 1000;
break;
}
case ActionType::UPDATE: {
if (offset != 0) {
uint64_t begin = rand() % offset;
uint64_t length = rand() % (offset - begin);
std::string update_query = "UPDATE t SET i = i * 2 WHERE i > " + std::to_string(begin) + " and i <" +
std::to_string(begin + length);
PRINT_INFO("RUN: " << update_query);
validate(*con.Query(update_query));
}
break;
}
case ActionType::DELETE: {
if (offset != 0) {
uint64_t begin = rand() % offset;
uint64_t length = rand() % (offset - begin);
std::string delete_query =
"DELETE FROM t WHERE i > " + std::to_string(begin) + " and i <" + std::to_string(begin + length);
PRINT_INFO("RUN: " << delete_query);
validate(*con.Query(delete_query));
break;
}
}
case ActionType::LARGE_WRITE_WITH_FAULT: {
raw_fs->InjectFault(LazyFlushFileSystem::FaultInjectionSite::FSYNC);
std::string large_insert = "INSERT INTO t SELECT * FROM RANGE(" + std::to_string(offset) + ", " +
std::to_string(offset + 1000 * 1000) + ")";
PRINT_INFO("RUN with fault: " << large_insert);
validate(*con.Query(large_insert), "Injected fault");
break;
}
case ActionType::RESET_TABLE: {
std::string replace_query = "CREATE OR REPLACE TABLE t(i INTEGER)";
PRINT_INFO("RUN with fault: " << replace_query);
validate(*con.Query(replace_query));
break;
}
}
// Compute a checksum (unless we injected a fault - which will invalidate the database)
if (action != ActionType::LARGE_WRITE_WITH_FAULT) {
auto checksum = con.Query("SELECT bit_xor(hash(i)) FROM t");
validate(*checksum);
expected_checksum = checksum->GetValue(0, 0).ToString();
PRINT_INFO("Computed new checksum: " << expected_checksum);
} else {
PRINT_INFO("Keeping old checksum due to faults: " << expected_checksum);
}
}
}

View File

@@ -0,0 +1,14 @@
#include "catch.hpp"
#include "duckdb/function/scalar/strftime_format.hpp"
#include "duckdb/common/types/timestamp.hpp"
#include "duckdb/common/types/date.hpp"
#include <vector>
using namespace duckdb;
using namespace std;
TEST_CASE("Test that strftime format works", "[strftime]") {
auto string = StrfTimeFormat::Format(Timestamp::FromDatetime(Date::FromDate(1992, 1, 1), dtime_t(0)), "%Y%m%d");
REQUIRE(string == "19920101");
}

View File

@@ -0,0 +1,499 @@
#include "duckdb/common/string_util.hpp"
#include "duckdb/common/types/value.hpp"
#include "duckdb/common/to_string.hpp"
#include "duckdb/common/exception/parser_exception.hpp"
#include "catch.hpp"
#include "duckdb/common/vector.hpp"
#include <string>
#include <cstring>
using namespace duckdb;
TEST_CASE("Test strcmp() to ensure platform sanity", "[comparison]") {
int res;
res = strcmp("ZZZ", "ZZZ");
REQUIRE(res == 0);
res = strcmp("ZZZ", "HXR");
REQUIRE(res > 0);
res = strcmp("ZZZ", "NUT");
REQUIRE(res > 0);
res = strcmp("HXR", "ZZZ");
REQUIRE(res < 0);
res = strcmp("HXR", "HXR");
REQUIRE(res == 0);
res = strcmp("HXR", "NUT");
REQUIRE(res < 0);
res = strcmp("NUT", "ZZZ");
REQUIRE(res < 0);
res = strcmp("NUT", "HXR");
REQUIRE(res > 0);
res = strcmp("NUT", "NUT");
REQUIRE(res == 0);
Value zzz("ZZZ");
Value hxr("HXR");
Value nut("NUT");
REQUIRE_FALSE(zzz > zzz);
REQUIRE(zzz > hxr);
REQUIRE(zzz > nut);
REQUIRE(zzz >= zzz);
REQUIRE(zzz >= hxr);
REQUIRE(zzz >= nut);
REQUIRE(zzz <= zzz);
REQUIRE_FALSE(zzz <= hxr);
REQUIRE_FALSE(zzz <= nut);
REQUIRE(zzz == zzz);
REQUIRE_FALSE(zzz == hxr);
REQUIRE_FALSE(zzz == nut);
REQUIRE_FALSE(zzz != zzz);
REQUIRE(zzz != hxr);
REQUIRE(zzz != nut);
REQUIRE_FALSE(hxr > zzz);
REQUIRE_FALSE(hxr > hxr);
REQUIRE_FALSE(hxr > nut);
REQUIRE_FALSE(hxr >= zzz);
REQUIRE(hxr >= hxr);
REQUIRE_FALSE(hxr >= nut);
REQUIRE(hxr <= zzz);
REQUIRE(hxr <= hxr);
REQUIRE(hxr <= nut);
REQUIRE_FALSE(hxr == zzz);
REQUIRE(hxr == hxr);
REQUIRE_FALSE(hxr == nut);
REQUIRE(hxr != zzz);
REQUIRE_FALSE(hxr != hxr);
REQUIRE(hxr != nut);
REQUIRE_FALSE(nut > zzz);
REQUIRE(nut > hxr);
REQUIRE_FALSE(nut > nut);
REQUIRE_FALSE(nut >= zzz);
REQUIRE(nut >= hxr);
REQUIRE(nut >= nut);
REQUIRE(nut <= zzz);
REQUIRE_FALSE(nut <= hxr);
REQUIRE(nut <= nut);
REQUIRE_FALSE(nut == zzz);
REQUIRE_FALSE(nut == hxr);
REQUIRE(nut == nut);
REQUIRE(nut != zzz);
REQUIRE(nut != hxr);
REQUIRE_FALSE(nut != nut);
}
TEST_CASE("Test join vector items", "[string_util]") {
SECTION("Three string items") {
duckdb::vector<std::string> str_items = {"abc", "def", "ghi"};
std::string result = StringUtil::Join(str_items, ",");
REQUIRE(result == "abc,def,ghi");
}
SECTION("One string item") {
duckdb::vector<std::string> str_items = {"abc"};
std::string result = StringUtil::Join(str_items, ",");
REQUIRE(result == "abc");
}
SECTION("No string items") {
duckdb::vector<std::string> str_items;
std::string result = StringUtil::Join(str_items, ",");
REQUIRE(result == "");
}
SECTION("Three int items") {
duckdb::vector<int> int_items = {1, 2, 3};
std::string result =
StringUtil::Join(int_items, int_items.size(), ", ", [](const int &item) { return to_string(item); });
REQUIRE(result == "1, 2, 3");
}
SECTION("One int item") {
duckdb::vector<int> int_items = {1};
std::string result =
StringUtil::Join(int_items, int_items.size(), ", ", [](const int &item) { return to_string(item); });
REQUIRE(result == "1");
}
SECTION("No int items") {
duckdb::vector<int> int_items;
std::string result =
StringUtil::Join(int_items, int_items.size(), ", ", [](const int &item) { return to_string(item); });
REQUIRE(result == "");
}
}
TEST_CASE("Test SplitWithParentheses", "[string_util]") {
SECTION("Standard split") {
REQUIRE(StringUtil::SplitWithParentheses("") == duckdb::vector<string> {});
REQUIRE(StringUtil::SplitWithParentheses("x") == duckdb::vector<string> {"x"});
REQUIRE(StringUtil::SplitWithParentheses("hello") == duckdb::vector<string> {"hello"});
REQUIRE(StringUtil::SplitWithParentheses("hello,world") == duckdb::vector<string> {"hello", "world"});
}
SECTION("Single item with parentheses") {
REQUIRE(StringUtil::SplitWithParentheses("STRUCT(year BIGINT, month BIGINT, day BIGINT)") ==
duckdb::vector<string> {"STRUCT(year BIGINT, month BIGINT, day BIGINT)"});
REQUIRE(StringUtil::SplitWithParentheses("(apple)") == duckdb::vector<string> {"(apple)"});
REQUIRE(StringUtil::SplitWithParentheses("(apple, pear)") == duckdb::vector<string> {"(apple, pear)"});
REQUIRE(StringUtil::SplitWithParentheses("(apple, pear) banana") ==
duckdb::vector<string> {"(apple, pear) banana"});
REQUIRE(StringUtil::SplitWithParentheses("banana (apple, pear)") ==
duckdb::vector<string> {"banana (apple, pear)"});
REQUIRE(StringUtil::SplitWithParentheses("banana (apple, pear) banana") ==
duckdb::vector<string> {"banana (apple, pear) banana"});
}
SECTION("Multiple items with parentheses") {
REQUIRE(StringUtil::SplitWithParentheses("map::MAP(ANY,ANY),key::ANY") ==
duckdb::vector<string> {"map::MAP(ANY,ANY)", "key::ANY"});
REQUIRE(StringUtil::SplitWithParentheses("extra,STRUCT(year BIGINT, month BIGINT, day BIGINT)") ==
duckdb::vector<string> {"extra", "STRUCT(year BIGINT, month BIGINT, day BIGINT)"});
REQUIRE(StringUtil::SplitWithParentheses("extra,STRUCT(year BIGINT, month BIGINT, day BIGINT),extra") ==
duckdb::vector<string> {"extra", "STRUCT(year BIGINT, month BIGINT, day BIGINT)", "extra"});
REQUIRE(StringUtil::SplitWithParentheses("aa(bb)cc,dd(ee)ff") ==
duckdb::vector<string> {"aa(bb)cc", "dd(ee)ff"});
REQUIRE(StringUtil::SplitWithParentheses("aa(bb cc,dd),ee(f,,f)gg") ==
duckdb::vector<string> {"aa(bb cc,dd)", "ee(f,,f)gg"});
}
SECTION("Leading and trailing separators") {
REQUIRE(StringUtil::SplitWithParentheses(",") == duckdb::vector<string> {""});
REQUIRE(StringUtil::SplitWithParentheses(",,") == duckdb::vector<string> {"", ""});
REQUIRE(StringUtil::SplitWithParentheses("aa,") == duckdb::vector<string> {"aa"});
REQUIRE(StringUtil::SplitWithParentheses(",aa") == duckdb::vector<string> {"", "aa"});
REQUIRE(StringUtil::SplitWithParentheses(",(aa,),") == duckdb::vector<string> {"", "(aa,)"});
}
SECTION("Leading and trailing spaces") {
REQUIRE(StringUtil::SplitWithParentheses(" ") == duckdb::vector<string> {" "});
REQUIRE(StringUtil::SplitWithParentheses(" ") == duckdb::vector<string> {" "});
REQUIRE(StringUtil::SplitWithParentheses(" , ") == duckdb::vector<string> {" ", " "});
REQUIRE(StringUtil::SplitWithParentheses("aa, bb") == duckdb::vector<string> {"aa", " bb"});
REQUIRE(StringUtil::SplitWithParentheses(" aa,(bb, cc) ") == duckdb::vector<string> {" aa", "(bb, cc) "});
}
SECTION("Nested parentheses") {
REQUIRE(StringUtil::SplitWithParentheses("STRUCT(aa BIGINT, bb STRUCT(cc BIGINT, dd BIGINT, BIGINT))") ==
duckdb::vector<string> {"STRUCT(aa BIGINT, bb STRUCT(cc BIGINT, dd BIGINT, BIGINT))"});
REQUIRE(StringUtil::SplitWithParentheses("(((aa)))") == duckdb::vector<string> {"(((aa)))"});
REQUIRE(StringUtil::SplitWithParentheses("((aa, bb))") == duckdb::vector<string> {"((aa, bb))"});
REQUIRE(StringUtil::SplitWithParentheses("aa,(bb,(cc,dd)),ee") ==
duckdb::vector<string> {"aa", "(bb,(cc,dd))", "ee"});
}
SECTION("other parentheses") {
REQUIRE(StringUtil::SplitWithParentheses(" aa,[bb, cc] )", ',', '[', ']') ==
duckdb::vector<string> {" aa", "[bb, cc] )"});
}
SECTION("other separators") {
REQUIRE(StringUtil::SplitWithParentheses(" aa|(bb| cc),dd", '|') ==
duckdb::vector<string> {" aa", "(bb| cc),dd"});
}
SECTION("incongruent parentheses") {
REQUIRE_THROWS(StringUtil::SplitWithParentheses("("));
REQUIRE_THROWS(StringUtil::SplitWithParentheses(")"));
REQUIRE_THROWS(StringUtil::SplitWithParentheses("aa(bb"));
REQUIRE_THROWS(StringUtil::SplitWithParentheses("aa)bb"));
REQUIRE_THROWS(StringUtil::SplitWithParentheses("(aa)bb)"));
REQUIRE_THROWS(StringUtil::SplitWithParentheses("(aa(bb)"));
}
}
TEST_CASE("Test split quoted strings", "[string_util]") {
SECTION("Empty string") {
REQUIRE(StringUtil::SplitWithQuote("") == duckdb::vector<string> {});
}
SECTION("Empty string with space") {
REQUIRE(StringUtil::SplitWithQuote(" ") == duckdb::vector<string> {});
}
SECTION("One item") {
REQUIRE(StringUtil::SplitWithQuote("x") == duckdb::vector<string> {"x"});
}
SECTION("One item with space") {
REQUIRE(StringUtil::SplitWithQuote(" x ") == duckdb::vector<string> {"x"});
}
SECTION("One item with quote") {
REQUIRE(StringUtil::SplitWithQuote("\"x\"") == duckdb::vector<string> {"x"});
}
SECTION("One empty item with quote") {
REQUIRE(StringUtil::SplitWithQuote("\"\"") == duckdb::vector<string> {""});
}
SECTION("One empty item, followed by non-empty one - Or vise versa") {
REQUIRE(StringUtil::SplitWithQuote("\"\",hello") == duckdb::vector<string> {"", "hello"});
REQUIRE(StringUtil::SplitWithQuote(",\"hello\"") == duckdb::vector<string> {"", "hello"});
REQUIRE(StringUtil::SplitWithQuote(",hello") == duckdb::vector<string> {"", "hello"});
REQUIRE(StringUtil::SplitWithQuote("\"\",\"hello\"") == duckdb::vector<string> {"", "hello"});
REQUIRE(StringUtil::SplitWithQuote("\"hello\",") == duckdb::vector<string> {"hello", ""});
REQUIRE(StringUtil::SplitWithQuote("hello,\"\"") == duckdb::vector<string> {"hello", ""});
REQUIRE(StringUtil::SplitWithQuote("hello,") == duckdb::vector<string> {"hello", ""});
REQUIRE(StringUtil::SplitWithQuote("\"hello\",\"\"") == duckdb::vector<string> {"hello", ""});
}
SECTION("One quoted item with spaces") {
REQUIRE(StringUtil::SplitWithQuote(" \" x y \" ") == duckdb::vector<string> {" x y "});
}
SECTION("One quoted item with a delimiter") {
REQUIRE(StringUtil::SplitWithQuote("\"x,y\"") == duckdb::vector<string> {"x,y"});
}
SECTION("Three items") {
REQUIRE(StringUtil::SplitWithQuote("x,y,z") == duckdb::vector<string> {"x", "y", "z"});
}
SECTION("Three items, with and without quote") {
REQUIRE(StringUtil::SplitWithQuote("x,\"y\",z") == duckdb::vector<string> {"x", "y", "z"});
}
SECTION("Even more items, with and without quote") {
REQUIRE(StringUtil::SplitWithQuote("a,b,c,d,e,f,g") ==
duckdb::vector<string> {"a", "b", "c", "d", "e", "f", "g"});
}
SECTION("Three empty items") {
REQUIRE(StringUtil::SplitWithQuote(",,") == duckdb::vector<string> {"", "", ""});
}
SECTION("Three empty quoted items") {
REQUIRE(StringUtil::SplitWithQuote("\"\",\"\",\"\"") == duckdb::vector<string> {"", "", ""});
}
SECTION("Unclosed quote") {
REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\""), ParserException);
REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\"x"), ParserException);
REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\"x "), ParserException);
REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\","), ParserException);
REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\"x,"), ParserException);
}
SECTION("Unexpected quote") {
REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("abc\"def"), ParserException);
}
SECTION("Missing delimiter") {
REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\"x\"\"y\""), ParserException);
REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\"x\" \"y\""), ParserException);
REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("x y"), ParserException);
}
}
TEST_CASE("Test path utilities", "[string_util]") {
SECTION("File name") {
REQUIRE("bin" == StringUtil::GetFileName("/usr/bin/"));
REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt"));
REQUIRE("foo.txt" == StringUtil::GetFileName("tmp/foo.txt"));
REQUIRE("foo.txt" == StringUtil::GetFileName("tmp\\foo.txt"));
REQUIRE("foo.txt" == StringUtil::GetFileName("/tmp/foo.txt"));
REQUIRE("foo.txt" == StringUtil::GetFileName("\\tmp\\foo.txt"));
REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt/."));
REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt/./"));
REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt/.//"));
REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt\\."));
REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt\\.\\"));
REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt\\.\\\\"));
REQUIRE(".." == StringUtil::GetFileName(".."));
REQUIRE("" == StringUtil::GetFileName("/"));
}
SECTION("File extension") {
REQUIRE("cpp" == StringUtil::GetFileExtension("test.cpp"));
REQUIRE("gz" == StringUtil::GetFileExtension("test.cpp.gz"));
REQUIRE("" == StringUtil::GetFileExtension("test"));
REQUIRE("" == StringUtil::GetFileExtension(".gitignore"));
}
SECTION("File stem (base name)") {
REQUIRE("test" == StringUtil::GetFileStem("test.cpp"));
REQUIRE("test.cpp" == StringUtil::GetFileStem("test.cpp.gz"));
REQUIRE("test" == StringUtil::GetFileStem("test"));
REQUIRE(".gitignore" == StringUtil::GetFileStem(".gitignore"));
REQUIRE("test" == StringUtil::GetFileStem("test.cpp/"));
REQUIRE("test" == StringUtil::GetFileStem("test.cpp/."));
REQUIRE("test" == StringUtil::GetFileStem("test.cpp/./"));
REQUIRE("test" == StringUtil::GetFileStem("test.cpp/.//"));
REQUIRE("test" == StringUtil::GetFileStem("test.cpp\\"));
REQUIRE("test" == StringUtil::GetFileStem("test.cpp\\."));
REQUIRE("test" == StringUtil::GetFileStem("test.cpp\\.\\"));
REQUIRE("test" == StringUtil::GetFileStem("test.cpp\\.\\\\"));
REQUIRE(".." == StringUtil::GetFileStem(".."));
REQUIRE("" == StringUtil::GetFileStem("/"));
REQUIRE("test" == StringUtil::GetFileStem("tmp/test.txt"));
REQUIRE("test" == StringUtil::GetFileStem("tmp\\test.txt"));
REQUIRE("test" == StringUtil::GetFileStem("/tmp/test.txt"));
REQUIRE("test" == StringUtil::GetFileStem("\\tmp\\test.txt"));
}
SECTION("File path") {
REQUIRE("/usr/local/bin" == StringUtil::GetFilePath("/usr/local/bin/test.cpp"));
REQUIRE("\\usr\\local\\bin" == StringUtil::GetFilePath("\\usr\\local\\bin\\test.cpp"));
REQUIRE("tmp" == StringUtil::GetFilePath("tmp/test.txt"));
REQUIRE("tmp" == StringUtil::GetFilePath("tmp\\test.txt"));
REQUIRE("/tmp" == StringUtil::GetFilePath("/tmp/test.txt"));
REQUIRE("\\tmp" == StringUtil::GetFilePath("\\tmp\\test.txt"));
REQUIRE("/tmp" == StringUtil::GetFilePath("/tmp/test.txt/"));
REQUIRE("\\tmp" == StringUtil::GetFilePath("\\tmp\\test.txt\\"));
REQUIRE("/tmp" == StringUtil::GetFilePath("/tmp//test.txt"));
REQUIRE("\\tmp" == StringUtil::GetFilePath("\\tmp\\\\test.txt"));
}
}
TEST_CASE("Test JSON Parsing", "[string_util]") {
auto complex_json = StringUtil::ParseJSONMap(R"JSON_LITERAL(
{
"crs": {
"$schema": "https://proj.org/schemas/v0.7/projjson.schema.json",
"type": "GeographicCRS",
"name": "WGS 84",
"datum_ensemble": {
"name": "World Geodetic System 1984 ensemble",
"members": [
{
"name": "World Geodetic System 1984 (Transit)",
"id": {
"authority": "EPSG",
"code": 1166
}
},
{
"name": "World Geodetic System 1984 (G730)",
"id": {
"authority": "EPSG",
"code": 1152
}
},
{
"name": "World Geodetic System 1984 (G873)",
"id": {
"authority": "EPSG",
"code": 1153
}
},
{
"name": "World Geodetic System 1984 (G1150)",
"id": {
"authority": "EPSG",
"code": 1154
}
},
{
"name": "World Geodetic System 1984 (G1674)",
"id": {
"authority": "EPSG",
"code": 1155
}
},
{
"name": "World Geodetic System 1984 (G1762)",
"id": {
"authority": "EPSG",
"code": 1156
}
},
{
"name": "World Geodetic System 1984 (G2139)",
"id": {
"authority": "EPSG",
"code": 1309
}
},
{
"name": "World Geodetic System 1984 (G2296)",
"id": {
"authority": "EPSG",
"code": 1383
}
}
],
"ellipsoid": {
"name": "WGS 84",
"semi_major_axis": 6378137,
"inverse_flattening": 298.257223563
},
"accuracy": "2.0",
"id": {
"authority": "EPSG",
"code": 6326
}
},
"coordinate_system": {
"subtype": "ellipsoidal",
"axis": [
{
"name": "Geodetic latitude",
"abbreviation": "Lat",
"direction": "north",
"unit": "degree"
},
{
"name": "Geodetic longitude",
"abbreviation": "Lon",
"direction": "east",
"unit": "degree"
}
]
},
"scope": "Horizontal component of 3D system.",
"area": "World.",
"bbox": {
"south_latitude": -90,
"west_longitude": -180,
"north_latitude": 90,
"east_longitude": 180
},
"id": {
"authority": "EPSG",
"code": 4326
}
},
"crs_type": "projjson"
} )JSON_LITERAL");
complex_json = StringUtil::ParseJSONMap(R"JSON_LITERAL(
{
"int": 42,
"signed_int": -42,
"real": 1.5,
"null_val": null,
"arr": [1, 2, 3],
"obj": {
"str_val": "val"
},
"empty_arr": [],
"bool_t": true,
"bool_f": false
}
)JSON_LITERAL");
}

View File

@@ -0,0 +1,33 @@
#include "catch.hpp"
#include "duckdb/common/types/vector.hpp"
#include "duckdb/main/appender.hpp"
#include "test_helpers.hpp"
using namespace duckdb;
using namespace std;
static void test_valid_str(Vector &a, const char *str) {
Value s(str);
REQUIRE_NOTHROW(a.SetValue(0, s));
REQUIRE(a.GetValue(0) == s);
}
TEST_CASE("UTF8 error checking", "[utf8]") {
Vector a(LogicalType::VARCHAR);
test_valid_str(a, "a");
test_valid_str(a, "\xc3\xb1");
test_valid_str(a, "\xE2\x82\xA1");
test_valid_str(a, "\xF0\x9F\xA6\x86"); // a duck!
test_valid_str(a, "\xf0\x90\x8c\xbc");
REQUIRE_THROWS(a.SetValue(0, Value("\xc3\x28")));
REQUIRE_THROWS(a.SetValue(0, Value("\xa0\xa1")));
REQUIRE_THROWS(a.SetValue(0, Value("\xe2\x28\xa1")));
REQUIRE_THROWS(a.SetValue(0, Value("\xe2\x82\x28")));
REQUIRE_THROWS(a.SetValue(0, Value("\xf0\x28\x8c\xbc")));
REQUIRE_THROWS(a.SetValue(0, Value("\xf0\x90\x28\xbc")));
REQUIRE_THROWS(a.SetValue(0, Value("\xf0\x28\x8c\x28")));
REQUIRE_THROWS(a.SetValue(0, Value("\xf8\xa1\xa1\xa1\xa1")));
REQUIRE_THROWS(a.SetValue(0, Value("\xfc\xa1\xa1\xa1\xa1\xa1")));
}