should be it

2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions
--- a/external/duckdb/test/common/CMakeLists.txt
+++ b/external/duckdb/test/common/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_library_unity(
+  test_common
+  OBJECT
+  test_cast.cpp
+  test_checksum.cpp
+  test_file_system.cpp
+  test_hyperlog.cpp
+  test_numeric_cast.cpp
+  test_parse_logical_type.cpp
+  test_utf.cpp
+  test_storage_fuzz.cpp
+  test_strftime.cpp
+  test_string_util.cpp)
+
+set(ALL_OBJECT_FILES
+    ${ALL_OBJECT_FILES} $<TARGET_OBJECTS:test_common>
+    PARENT_SCOPE)
--- a/external/duckdb/test/common/test_cast.cpp
+++ b/external/duckdb/test/common/test_cast.cpp
@@ -0,0 +1,333 @@
+#include "catch.hpp"
+#include "duckdb/common/operator/cast_operators.hpp"
+#include "duckdb/common/string_util.hpp"
+#include "duckdb/common/limits.hpp"
+#include "duckdb/common/types.hpp"
+#include "duckdb/common/types/vector.hpp"
+#include "duckdb/common/vector.hpp"
+
+using namespace duckdb; // NOLINT
+using namespace std;    // NOLINT
+
+template <class SRC, class DST>
+struct ExpectedNumericCast {
+	static inline DST Operation(SRC value) {
+		return (DST)value;
+	}
+};
+
+template <class DST>
+struct ExpectedNumericCast<double, DST> {
+	static inline DST Operation(double value) {
+		return (DST)nearbyint(value);
+	}
+};
+
+template <class DST>
+struct ExpectedNumericCast<float, DST> {
+	static inline DST Operation(float value) {
+		return (DST)nearbyintf(value);
+	}
+};
+
+template <class SRC, class DST>
+static void TestNumericCast(duckdb::vector<SRC> &working_values, duckdb::vector<SRC> &broken_values) {
+	DST result;
+	for (auto value : working_values) {
+		REQUIRE_NOTHROW(Cast::Operation<SRC, DST>(value) == (DST)value);
+		REQUIRE(TryCast::Operation<SRC, DST>(value, result));
+		REQUIRE(result == ExpectedNumericCast<SRC, DST>::Operation(value));
+	}
+	for (auto value : broken_values) {
+		REQUIRE_THROWS(Cast::Operation<SRC, DST>(value));
+		REQUIRE(!TryCast::Operation<SRC, DST>(value, result));
+	}
+}
+
+template <class DST>
+static void TestStringCast(duckdb::vector<string> &working_values, duckdb::vector<DST> &expected_values,
+                           duckdb::vector<string> &broken_values) {
+	DST result;
+	for (idx_t i = 0; i < working_values.size(); i++) {
+		auto &value = working_values[i];
+		auto expected_value = expected_values[i];
+		REQUIRE_NOTHROW(Cast::Operation<string_t, DST>(string_t(value)) == expected_value);
+		REQUIRE(TryCast::Operation<string_t, DST>(string_t(value), result));
+		REQUIRE(result == expected_value);
+
+		StringUtil::Trim(value);
+		duckdb::vector<string> splits;
+		splits = StringUtil::Split(value, 'e');
+		if (splits.size() > 1 || value[0] == '+') {
+			continue;
+		}
+		splits = StringUtil::Split(value, '.');
+		REQUIRE(ConvertToString::Operation<DST>(result) == splits[0]);
+	}
+	for (auto &value : broken_values) {
+		REQUIRE_THROWS(Cast::Operation<string_t, DST>(string_t(value)));
+		REQUIRE(!TryCast::Operation<string_t, DST>(string_t(value), result));
+	}
+}
+
+template <class T>
+static void TestExponent() {
+	T parse_result;
+	string str;
+	double value = 1;
+	T expected_value = 1;
+	for (idx_t exponent = 0; exponent < 100; exponent++) {
+		if (value < (double)NumericLimits<T>::Maximum()) {
+			// expect success
+			str = "1e" + to_string(exponent);
+			REQUIRE(TryCast::Operation<string_t, T>(string_t(str), parse_result));
+			REQUIRE(parse_result == expected_value);
+			str = "-1e" + to_string(exponent);
+			REQUIRE(TryCast::Operation<string_t, T>(string_t(str), parse_result));
+			REQUIRE(parse_result == -expected_value);
+			value *= 10;
+			// check again because otherwise this overflows
+			if (value < (double)NumericLimits<T>::Maximum()) {
+				expected_value *= 10;
+			}
+		} else {
+			// expect failure
+			str = "1e" + to_string(exponent);
+			REQUIRE(!TryCast::Operation<string_t, T>(string_t(str), parse_result));
+			str = "-1e" + to_string(exponent);
+			REQUIRE(!TryCast::Operation<string_t, T>(string_t(str), parse_result));
+		}
+	}
+}
+
+TEST_CASE("Test casting to boolean", "[cast]") {
+	duckdb::vector<string> working_values = {"true", "false", "TRUE", "FALSE", "T", "F", "1", "0", "False", "True"};
+	duckdb::vector<bool> expected_values = {true, false, true, false, true, false, true, false, false, true};
+	duckdb::vector<string> broken_values = {"304", "1002", "blabla", "", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"};
+
+	bool result;
+	for (idx_t i = 0; i < working_values.size(); i++) {
+		auto &value = working_values[i];
+		auto expected_value = expected_values[i];
+		REQUIRE_NOTHROW(Cast::Operation<string_t, bool>(value) == expected_value);
+		REQUIRE(TryCast::Operation<string_t, bool>(value, result));
+		REQUIRE(result == expected_value);
+	}
+	for (auto &value : broken_values) {
+		REQUIRE_THROWS(Cast::Operation<string_t, bool>(value));
+		REQUIRE(!TryCast::Operation<string_t, bool>(value, result));
+	}
+}
+
+TEST_CASE("Test casting to int8_t", "[cast]") {
+	// int16_t -> int8_t
+	duckdb::vector<int16_t> working_values_int16 = {10, -10, 127, -128};
+	duckdb::vector<int16_t> broken_values_int16 = {128, -129, 1000, -1000};
+	TestNumericCast<int16_t, int8_t>(working_values_int16, broken_values_int16);
+	// int32_t -> int8_t
+	duckdb::vector<int32_t> working_values_int32 = {10, -10, 127, -128};
+	duckdb::vector<int32_t> broken_values_int32 = {128, -129, 1000000, -1000000};
+	TestNumericCast<int32_t, int8_t>(working_values_int32, broken_values_int32);
+	// int64_t -> int8_t
+	duckdb::vector<int64_t> working_values_int64 = {10, -10, 127, -128};
+	duckdb::vector<int64_t> broken_values_int64 = {128, -129, 10000000000LL, -10000000000LL};
+	TestNumericCast<int64_t, int8_t>(working_values_int64, broken_values_int64);
+	// float -> int8_t
+	duckdb::vector<float> working_values_float = {10, -10, 127, -128, 1.3f, -2.7f};
+	duckdb::vector<float> broken_values_float = {128, -129, 10000000000.0f, -10000000000.0f, 1e30f, -1e30f};
+	TestNumericCast<float, int8_t>(working_values_float, broken_values_float);
+	// double -> int8_t
+	duckdb::vector<double> working_values_double = {10, -10, 127, -128, 1.3, -2.7};
+	duckdb::vector<double> broken_values_double = {128, -129, 10000000000.0, -10000000000.0, 1e100, -1e100};
+	TestNumericCast<double, int8_t>(working_values_double, broken_values_double);
+	// string -> int8_t
+	duckdb::vector<string> working_values_str = {"10",  "+10", "-10",   "127", "-128", "1.3",   "1e2",
+	                                             "2e1", "2e0", "20e-1", "1.",  "  3",  " 3   ", "\t3 \t \n"};
+	duckdb::vector<int8_t> expected_values_str = {10, 10, -10, 127, -128, 1, 100, 20, 2, 2, 1, 3, 3, 3};
+	duckdb::vector<string> broken_values_str = {"128",
+	                                            "-129",
+	                                            "10000000000000000000000000000000000000000000000000000000000000",
+	                                            "aaaa",
+	                                            "19A",
+	                                            "",
+	                                            "1e3",
+	                                            "1e",
+	                                            "1e-",
+	                                            "1e100",
+	                                            "1e100000000",
+	                                            "10000e-1",
+	                                            " 3 2",
+	                                            "+"};
+	TestStringCast<int8_t>(working_values_str, expected_values_str, broken_values_str);
+	TestExponent<int8_t>();
+}
+
+TEST_CASE("Test casting to int16_t", "[cast]") {
+	// int32_t -> int16_t
+	duckdb::vector<int32_t> working_values_int32 = {10, -10, 127, -127, 32767, -32768};
+	duckdb::vector<int32_t> broken_values_int32 = {32768, -32769, 1000000, -1000000};
+	TestNumericCast<int32_t, int16_t>(working_values_int32, broken_values_int32);
+	// int64_t -> int16_t
+	duckdb::vector<int64_t> working_values_int64 = {10, -10, 127, -127, 32767, -32768};
+	duckdb::vector<int64_t> broken_values_int64 = {32768, -32769, 10000000000LL, -10000000000LL};
+	TestNumericCast<int64_t, int16_t>(working_values_int64, broken_values_int64);
+	// float -> int16_t
+	duckdb::vector<float> working_values_float = {10.0f, -10.0f, 32767.0f, -32768.0f, 1.3f, -2.7f};
+	duckdb::vector<float> broken_values_float = {32768.0f, -32769.0f, 10000000000.0f, -10000000000.0f, 1e30f, -1e30f};
+	TestNumericCast<float, int16_t>(working_values_float, broken_values_float);
+	// double -> int16_t
+	duckdb::vector<double> working_values_double = {10, -10, 32767, -32768, 1.3, -2.7};
+	duckdb::vector<double> broken_values_double = {32768, -32769, 10000000000.0, -10000000000.0, 1e100, -1e100};
+	TestNumericCast<double, int16_t>(working_values_double, broken_values_double);
+	// string -> int16_t
+	duckdb::vector<string> working_values_str = {"10",  "-10",   "32767", "-32768", "1.3",
+	                                             "3e4", "250e2", "3e+4",  "3e0",    "30e-1"};
+	duckdb::vector<int16_t> expected_values_str = {10, -10, 32767, -32768, 1, 30000, 25000, 30000, 3, 3};
+	duckdb::vector<string> broken_values_str = {
+	    "32768", "-32769",      "10000000000000000000000000000000000000000000000000000000000000",
+	    "aaaa",  "19A",         "",
+	    "1.A",   "1e",          "1e-",
+	    "1e100", "1e100000000", "+"};
+	TestStringCast<int16_t>(working_values_str, expected_values_str, broken_values_str);
+	TestExponent<int16_t>();
+}
+
+TEST_CASE("Test casting to int32_t", "[cast]") {
+	// int64_t -> int32_t
+	duckdb::vector<int64_t> working_values_int64 = {10, -10, 127, -127, 32767, -32768, 2147483647LL, -2147483648LL};
+	duckdb::vector<int64_t> broken_values_int64 = {2147483648LL, -2147483649LL, 10000000000LL, -10000000000LL};
+	TestNumericCast<int64_t, int32_t>(working_values_int64, broken_values_int64);
+	// float -> int32_t
+	duckdb::vector<float> working_values_float = {10.0f, -10.0f, 2000000000.0f, -2000000000.0f, 1.3f, -2.7f};
+	duckdb::vector<float> broken_values_float = {3000000000.0f,   -3000000000.0f, 10000000000.0f,
+	                                             -10000000000.0f, 1e30f,          -1e30f};
+	TestNumericCast<float, int32_t>(working_values_float, broken_values_float);
+	// double -> int32_t
+	duckdb::vector<double> working_values_double = {10, -10, 32767.0, -32768.0, 1.3, -2.7, 2147483647.0, -2147483648.0};
+	duckdb::vector<double> broken_values_double = {2147483648.0,   -2147483649.0, 10000000000.0,
+	                                               -10000000000.0, 1e100,         -1e100};
+	TestNumericCast<double, int32_t>(working_values_double, broken_values_double);
+	// string -> int32_t
+	duckdb::vector<string> working_values_str = {"10", "-10", "2147483647", "-2147483647", "1.3", "-1.3", "1e6"};
+	duckdb::vector<int32_t> expected_values_str = {10, -10, 2147483647, -2147483647, 1, -1, 1000000};
+	duckdb::vector<string> broken_values_str = {
+	    "2147483648", "-2147483649", "10000000000000000000000000000000000000000000000000000000000000",
+	    "aaaa",       "19A",         "",
+	    "1.A",        "1e1e1e1"};
+	TestStringCast<int32_t>(working_values_str, expected_values_str, broken_values_str);
+	TestExponent<int32_t>();
+}
+
+TEST_CASE("Test casting to int64_t", "[cast]") {
+	// float -> int64_t
+	duckdb::vector<float> working_values_float = {10.0f,
+	                                              -10.0f,
+	                                              32767.0f,
+	                                              -32768.0f,
+	                                              1.3f,
+	                                              -2.7f,
+	                                              2000000000.0f,
+	                                              -2000000000.0f,
+	                                              4000000000000000000.0f,
+	                                              -4000000000000000000.0f};
+	duckdb::vector<float> broken_values_float = {20000000000000000000.0f, -20000000000000000000.0f, 1e30f, -1e30f};
+	TestNumericCast<float, int64_t>(working_values_float, broken_values_float);
+	// double -> int64_t
+	duckdb::vector<double> working_values_double = {
+	    10, -10, 32767, -32768, 1.3, -2.7, 2147483647, -2147483648.0, 4611686018427387904.0, -4611686018427387904.0};
+	duckdb::vector<double> broken_values_double = {18446744073709551616.0, -18446744073709551617.0, 1e100, -1e100};
+	TestNumericCast<double, int64_t>(working_values_double, broken_values_double);
+	// string -> int64_t
+	duckdb::vector<string> working_values_str = {
+	    "10",    "-10", "9223372036854775807", "-9223372036854775807", "1.3", "-9223372036854775807.1293813", "1e18",
+	    "1e+18", "1."};
+	duckdb::vector<int64_t> expected_values_str = {10,
+	                                               -10,
+	                                               9223372036854775807LL,
+	                                               -9223372036854775807LL,
+	                                               1,
+	                                               -9223372036854775807LL,
+	                                               1000000000000000000LL,
+	                                               1000000000000000000LL,
+	                                               1};
+	duckdb::vector<string> broken_values_str = {"9223372036854775808",
+	                                            "-9223372036854775809",
+	                                            "10000000000000000000000000000000000000000000000000000000000000",
+	                                            "aaaa",
+	                                            "19A",
+	                                            "",
+	                                            "1.A",
+	                                            "1.2382398723A",
+	                                            "1e++1",
+	                                            "1e+1+1",
+	                                            "1e+1-1",
+	                                            "+"};
+	TestStringCast<int64_t>(working_values_str, expected_values_str, broken_values_str);
+	TestExponent<int64_t>();
+}
+
+template <class DST>
+static void TestStringCastDouble(duckdb::vector<string> &working_values, duckdb::vector<DST> &expected_values,
+                                 duckdb::vector<string> &broken_values) {
+	DST result;
+	for (idx_t i = 0; i < working_values.size(); i++) {
+		auto &value = working_values[i];
+		auto expected_value = expected_values[i];
+		REQUIRE_NOTHROW(Cast::Operation<string_t, DST>(string_t(value)) == expected_value);
+		REQUIRE(TryCast::Operation<string_t, DST>(string_t(value), result));
+		REQUIRE(ApproxEqual(result, expected_value));
+
+		auto to_str_and_back =
+		    Cast::Operation<string_t, DST>(string_t(ConvertToString::Operation<DST>(expected_value)));
+		REQUIRE(ApproxEqual(to_str_and_back, expected_value));
+	}
+	for (auto &value : broken_values) {
+		REQUIRE_THROWS(Cast::Operation<string_t, DST>(string_t(value)));
+		REQUIRE(!TryCast::Operation<string_t, DST>(string_t(value), result));
+	}
+}
+
+TEST_CASE("Test casting to float", "[cast]") {
+	// string -> float
+	duckdb::vector<string> working_values = {
+	    "1.3",         "1.34514", "1e10", "1e-2", "-1e-1", "1.1781237378938173987123987123981723981723981723987123",
+	    "1.123456789", "1."};
+	duckdb::vector<float> expected_values = {
+	    1.3f,         1.34514f, 1e10f, 1e-2f, -1e-1f, 1.1781237378938173987123987123981723981723981723987123f,
+	    1.123456789f, 1.0f};
+	duckdb::vector<string> broken_values = {
+	    "-",     "",        "aaa",
+	    "12aaa", "1e10e10", "1e",
+	    "1e-",   "1e10a",   "1.1781237378938173987123987123981723981723981723934834583490587123w",
+	    "1.2.3"};
+	TestStringCastDouble<float>(working_values, expected_values, broken_values);
+}
+
+TEST_CASE("Test casting to double", "[cast]") {
+	// string -> double
+	duckdb::vector<string> working_values = {"1.3",
+	                                         "+1.3",
+	                                         "1.34514",
+	                                         "1e10",
+	                                         "1e-2",
+	                                         "-1e-1",
+	                                         "1.1781237378938173987123987123981723981723981723987123",
+	                                         "1.123456789",
+	                                         "1.",
+	                                         "-1.2",
+	                                         "-1.2e1",
+	                                         " 1.2 ",
+	                                         "  1.2e2  ",
+	                                         " \t 1.2e2 \t"};
+	duckdb::vector<double> expected_values = {
+	    1.3,         1.3, 1.34514, 1e10, 1e-2, -1e-1, 1.1781237378938173987123987123981723981723981723987123,
+	    1.123456789, 1.0, -1.2,    -12,  1.2,  120,   120};
+	duckdb::vector<string> broken_values = {
+	    "-",     "",        "aaa",
+	    "12aaa", "1e10e10", "1e",
+	    "1e-",   "1e10a",   "1.1781237378938173987123987123981723981723981723934834583490587123w",
+	    "1.2.3", "1.222.",  "1..",
+	    "1 . 2", "1. 2",    "1.2 e20",
+	    "+"};
+	TestStringCastDouble<double>(working_values, expected_values, broken_values);
+}
--- a/external/duckdb/test/common/test_cast_hugeint.test
+++ b/external/duckdb/test/common/test_cast_hugeint.test
@@ -0,0 +1,411 @@
+# name: test/common/test_cast_hugeint.test
+# description: Test hugeint casting from various types
+# group: [common]
+
+# test float -> hugeint casts
+
+statement ok
+CREATE TABLE working_floats(f FLOAT);
+CREATE TABLE broken_floats(f FLOAT);
+
+statement ok
+INSERT INTO working_floats VALUES (10.0), (-10.0), (32767.0), (-32767.0), (1.3), (-2.7), (2000000000.0), (-2000000000.0), (4000000000000000000.0), (-4000000000000000000.0), (1329227995784915872903807060280344576.0), (-1329227995784915872903807060280344576.0);
+
+statement ok
+INSERT INTO broken_floats VALUES (170141183460469231731687303715884105729.0), (-170141183460469231731687303715884105729.0);
+
+query I
+SELECT f::HUGEINT::FLOAT FROM working_floats
+----
+10.0
+-10.0
+32767.0
+-32767.0
+1.0
+-3.0
+2000000000.0
+-2000000000.0
+4000000000000000000.0
+-4000000000000000000.0
+1329227995784915872903807060280344576
+-1329227995784915872903807060280344576
+
+# test broken casts
+# we test one by one to ensure that every single value is broken
+loop i 0 2
+
+statement error
+SELECT f::HUGEINT FROM (SELECT f FROM broken_floats ORDER BY f LIMIT 1 OFFSET ${i}) t1
+----
+<REGEX>:Conversion Error.*FLOAT.*is out of range for.*INT128.*
+
+endloop
+
+# test double -> hugeint casts
+statement ok
+CREATE TABLE working_doubles(f DOUBLE);
+CREATE TABLE broken_doubles(f DOUBLE);
+
+statement ok
+INSERT INTO working_doubles VALUES (10.0), (-10.0), (32767.0), (-32767.0), (1.3), (-2.7), (2000000000.0), (-2000000000.0), (4000000000000000000.0), (-4000000000000000000.0), (1329227995784915872903807060280344576.0), (-1329227995784915872903807060280344576.0);
+
+statement ok
+INSERT INTO broken_doubles VALUES (1361129467683753853853498429727072845824.0), (-1361129467683753853853498429727072845824.0), (1.0e100), (-1.0e100);
+
+query I
+SELECT f::HUGEINT::DOUBLE FROM working_doubles
+----
+10.0
+-10.0
+32767.0
+-32767.0
+1.0
+-3.0
+2000000000.0
+-2000000000.0
+4000000000000000000.0
+-4000000000000000000.0
+1329227995784915872903807060280344576
+-1329227995784915872903807060280344576
+
+# we handle the values one by one here
+loop i 0 4
+
+statement error
+SELECT f::HUGEINT FROM (SELECT f FROM broken_doubles ORDER BY f LIMIT 1 OFFSET ${i}) t1
+----
+<REGEX>:Conversion Error.*DOUBLE.*is out of range for.*INT128.*
+
+endloop
+
+# test varchar -> hugeint casts
+statement ok
+CREATE TABLE working_strings(f VARCHAR);
+CREATE TABLE broken_strings(f VARCHAR);
+
+statement ok
+INSERT INTO working_strings VALUES ('10'), ('-10'), ('-1329227995784915872903807060280344576'), ('170141183460469231731687303715884105727'), ('-170141183460469231731687303715884105728'), ('1.3'), ('-9223372036854775807.1293813'), ('1e18'), ('1e+18'), ('1.'), ('.1'), ('0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'), ('1000e-40'), ('10000e-3');
+
+statement ok
+INSERT INTO broken_strings VALUES ('170141183460469231731687303715884105728'), ('-170141183460469231731687303715884105729'), ('10000000000000000000000000000000000000000000000000000000000000'), ('aaaa'), ('19A'), (''), ('1.A'), ('1.2382398723A'), ('1e++1'), ('1e+1+1'), ('1e+1-1'), ('+'), ('.'), ('.   '), ('10000000000e37');
+
+query I
+SELECT f::HUGEINT FROM working_strings
+----
+10
+-10
+-1329227995784915872903807060280344576
+170141183460469231731687303715884105727
+-170141183460469231731687303715884105728
+1.3
+-9223372036854775807.1293813
+1e18
+1e+18
+1.
+.1
+0
+0
+10
+
+loop i 0 14
+
+statement error
+SELECT f::HUGEINT FROM (SELECT f FROM broken_strings ORDER BY f LIMIT 1 OFFSET ${i}) t1
+----
+<REGEX>:Conversion Error.*string.*INT128.*
+
+endloop
+
+# cast all powers of ten
+statement ok
+CREATE TABLE powers_of_ten(p VARCHAR);
+
+statement ok
+INSERT INTO powers_of_ten VALUES ('1'), ('10'), ('100'), ('1000'), ('10000'), ('100000'), ('1000000'), ('10000000'), ('100000000'), ('1000000000'), ('10000000000'), ('100000000000'), ('1000000000000'), ('10000000000000'), ('100000000000000'), ('1000000000000000'), ('10000000000000000'), ('100000000000000000'), ('1000000000000000000'), ('10000000000000000000'), ('100000000000000000000'), ('1000000000000000000000'), ('10000000000000000000000'), ('100000000000000000000000'), ('1000000000000000000000000'), ('10000000000000000000000000'), ('100000000000000000000000000'), ('1000000000000000000000000000'), ('10000000000000000000000000000'), ('100000000000000000000000000000'), ('1000000000000000000000000000000'), ('10000000000000000000000000000000'), ('100000000000000000000000000000000'), ('1000000000000000000000000000000000'), ('10000000000000000000000000000000000'), ('100000000000000000000000000000000000'), ('1000000000000000000000000000000000000'), ('10000000000000000000000000000000000000'), ('100000000000000000000000000000000000000'), ('-1'), ('-10'), ('-100'), ('-1000'), ('-10000'), ('-100000'), ('-1000000'), ('-10000000'), ('-100000000'), ('-1000000000'), ('-10000000000'), ('-100000000000'), ('-1000000000000'), ('-10000000000000'), ('-100000000000000'), ('-1000000000000000'), ('-10000000000000000'), ('-100000000000000000'), ('-1000000000000000000'), ('-10000000000000000000'), ('-100000000000000000000'), ('-1000000000000000000000'), ('-10000000000000000000000'), ('-100000000000000000000000'), ('-1000000000000000000000000'), ('-10000000000000000000000000'), ('-100000000000000000000000000'), ('-1000000000000000000000000000'), ('-10000000000000000000000000000'), ('-100000000000000000000000000000'), ('-1000000000000000000000000000000'), ('-10000000000000000000000000000000'), ('-100000000000000000000000000000000'), ('-1000000000000000000000000000000000'), ('-10000000000000000000000000000000000'), ('-100000000000000000000000000000000000'), ('-1000000000000000000000000000000000000'), ('-10000000000000000000000000000000000000'), ('-100000000000000000000000000000000000000');
+
+query I
+SELECT p::HUGEINT FROM powers_of_ten
+----
+1
+10
+100
+1000
+10000
+100000
+1000000
+10000000
+100000000
+1000000000
+10000000000
+100000000000
+1000000000000
+10000000000000
+100000000000000
+1000000000000000
+10000000000000000
+100000000000000000
+1000000000000000000
+10000000000000000000
+100000000000000000000
+1000000000000000000000
+10000000000000000000000
+100000000000000000000000
+1000000000000000000000000
+10000000000000000000000000
+100000000000000000000000000
+1000000000000000000000000000
+10000000000000000000000000000
+100000000000000000000000000000
+1000000000000000000000000000000
+10000000000000000000000000000000
+100000000000000000000000000000000
+1000000000000000000000000000000000
+10000000000000000000000000000000000
+100000000000000000000000000000000000
+1000000000000000000000000000000000000
+10000000000000000000000000000000000000
+100000000000000000000000000000000000000
+-1
+-10
+-100
+-1000
+-10000
+-100000
+-1000000
+-10000000
+-100000000
+-1000000000
+-10000000000
+-100000000000
+-1000000000000
+-10000000000000
+-100000000000000
+-1000000000000000
+-10000000000000000
+-100000000000000000
+-1000000000000000000
+-10000000000000000000
+-100000000000000000000
+-1000000000000000000000
+-10000000000000000000000
+-100000000000000000000000
+-1000000000000000000000000
+-10000000000000000000000000
+-100000000000000000000000000
+-1000000000000000000000000000
+-10000000000000000000000000000
+-100000000000000000000000000000
+-1000000000000000000000000000000
+-10000000000000000000000000000000
+-100000000000000000000000000000000
+-1000000000000000000000000000000000
+-10000000000000000000000000000000000
+-100000000000000000000000000000000000
+-1000000000000000000000000000000000000
+-10000000000000000000000000000000000000
+-100000000000000000000000000000000000000
+
+query I
+SELECT p::HUGEINT::VARCHAR FROM powers_of_ten
+----
+1
+10
+100
+1000
+10000
+100000
+1000000
+10000000
+100000000
+1000000000
+10000000000
+100000000000
+1000000000000
+10000000000000
+100000000000000
+1000000000000000
+10000000000000000
+100000000000000000
+1000000000000000000
+10000000000000000000
+100000000000000000000
+1000000000000000000000
+10000000000000000000000
+100000000000000000000000
+1000000000000000000000000
+10000000000000000000000000
+100000000000000000000000000
+1000000000000000000000000000
+10000000000000000000000000000
+100000000000000000000000000000
+1000000000000000000000000000000
+10000000000000000000000000000000
+100000000000000000000000000000000
+1000000000000000000000000000000000
+10000000000000000000000000000000000
+100000000000000000000000000000000000
+1000000000000000000000000000000000000
+10000000000000000000000000000000000000
+100000000000000000000000000000000000000
+-1
+-10
+-100
+-1000
+-10000
+-100000
+-1000000
+-10000000
+-100000000
+-1000000000
+-10000000000
+-100000000000
+-1000000000000
+-10000000000000
+-100000000000000
+-1000000000000000
+-10000000000000000
+-100000000000000000
+-1000000000000000000
+-10000000000000000000
+-100000000000000000000
+-1000000000000000000000
+-10000000000000000000000
+-100000000000000000000000
+-1000000000000000000000000
+-10000000000000000000000000
+-100000000000000000000000000
+-1000000000000000000000000000
+-10000000000000000000000000000
+-100000000000000000000000000000
+-1000000000000000000000000000000
+-10000000000000000000000000000000
+-100000000000000000000000000000000
+-1000000000000000000000000000000000
+-10000000000000000000000000000000000
+-100000000000000000000000000000000000
+-1000000000000000000000000000000000000
+-10000000000000000000000000000000000000
+-100000000000000000000000000000000000000
+
+# test large constants and correct parsing into either HUGEINT or DOUBLE
+query II
+SELECT typeof(4832904823908104981209840981240981277), 4832904823908104981209840981240981277
+----
+HUGEINT	4832904823908104981209840981240981277
+
+query II
+SELECT typeof(48329048239081049812098409812409812772), 48329048239081049812098409812409812772
+----
+HUGEINT	48329048239081049812098409812409812772
+
+query II
+SELECT typeof(483290482390810498120984098124098127725), 483290482390810498120984098124098127725
+----
+DOUBLE	483290482390810498120984098124098127725.0
+
+query II
+SELECT typeof(4832904823908104981209840981240981277256), 4832904823908104981209840981240981277256
+----
+DOUBLE	4832904823908104981209840981240981277256.0
+
+query II
+SELECT typeof(48329048239081049812098409812409812772568), 48329048239081049812098409812409812772568
+----
+DOUBLE	48329048239081049812098409812409812772568.0
+
+query II
+SELECT typeof(483290482390810498120984098124098127725683), 483290482390810498120984098124098127725683
+----
+DOUBLE	483290482390810498120984098124098127725683.0
+
+query I
+SELECT 0::HUGEINT::VARCHAR
+----
+0
+
+# hugeint -> uints
+
+# uint8
+query I
+select '255'::HUGEINT::UINT8
+----
+255
+
+statement error
+select '-1'::hugeint::uint8
+----
+<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT8.*
+
+statement error
+select '256'::hugeint::uint8
+----
+<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT8.*
+
+# uint16
+query I
+select '65535'::HUGEINT::UINT16
+----
+65535
+
+statement error
+select '-1'::hugeint::uint16
+----
+<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT16.*
+
+statement error
+select '65536'::hugeint::uint16
+----
+<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT16.*
+
+# uint32
+query I
+select '4294967295'::HUGEINT::UINT32
+----
+4294967295
+
+statement error
+select '-1'::hugeint::uint32
+----
+<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT32.*
+
+statement error
+select '4294967296'::hugeint::uint32
+----
+<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT32.*
+
+# UINT64
+query I
+select '18446744073709551615'::HUGEINT::UINT64
+----
+18446744073709551615
+
+statement error
+select '-1'::hugeint::UINT64
+----
+<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT64.*
+
+statement error
+select '18446744073709551616'::hugeint::UINT64
+----
+<REGEX>:Conversion Error.*INT128.*is out of range for.*UINT64.*
+
+# uint -> hugeint
+query I
+select '255'::UINT8::HUGEINT
+----
+255
+
+query I
+select '65535'::UINT16::HUGEINT
+----
+65535
+
+query I
+select '4294967295'::UINT32::HUGEINT
+----
+4294967295
+
+query I
+select '18446744073709551615'::UINT64::HUGEINT
+----
+18446744073709551615
--- a/external/duckdb/test/common/test_cast_struct.test
+++ b/external/duckdb/test/common/test_cast_struct.test
@@ -0,0 +1,168 @@
+# name: test/common/test_cast_struct.test
+# description: Test casting structs
+# group: [common]
+
+statement ok
+PRAGMA enable_verification
+
+statement error
+SELECT struct_pack(b => 42)::STRUCT(a INT);
+----
+<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
+
+statement error
+SELECT struct_extract(struct_pack(b => 42)::STRUCT(a INT), 'a');
+----
+<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
+
+query I
+SELECT struct_extract(struct_pack(a => 42)::STRUCT(a STRING), 'a');
+----
+42
+
+statement error
+SELECT struct_extract(struct_pack(b => 42)::ROW(a INT), 'a');
+----
+<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
+
+query I
+SELECT struct_extract(struct_pack(a => 42)::ROW(a INT), 'a');
+----
+42
+
+statement error
+SELECT struct_extract(struct_pack(b => 42::DOUBLE)::STRUCT(a INT), 'a');
+----
+<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
+
+query I
+SELECT struct_extract(struct_pack(a => 42::DOUBLE)::STRUCT(a INT), 'a');
+----
+42
+
+statement error
+SELECT struct_extract(struct_pack(b => '42'::DOUBLE)::STRUCT(a INT), 'a');
+----
+<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
+
+query I
+SELECT struct_extract(struct_pack(a => '42'::DOUBLE)::STRUCT(a INT), 'a');
+----
+42
+
+statement error
+SELECT struct_pack(b => '42'::DOUBLE)::STRUCT(a INT, c STRING)
+----
+<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
+
+statement error
+SELECT struct_pack(b => 'hello'::STRING)::STRUCT(b INT)
+----
+Could not convert string 'hello' to INT32
+
+statement error
+SELECT struct_pack(a => 'hello'::STRING, b => 'world'::STRING)::STRUCT(a STRING, b INT)
+----
+Could not convert string 'world' to INT32
+
+statement error
+SELECT struct_pack(a => [1, 2, 3])::STRUCT(a INT)
+----
+Unimplemented type for cast (INTEGER[] -> INTEGER)
+
+statement error
+SELECT struct_pack(a => struct_pack(b => 42)::STRUCT(b INT))::STRUCT(a INT)
+----
+Unimplemented type for cast (STRUCT(b INTEGER) -> INTEGER)
+
+statement error
+SELECT struct_pack(b => 'hello'::STRING)::STRUCT(a INT)
+----
+<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
+
+statement error
+SELECT struct_pack(b => '42'::DOUBLE, c => 'asdf'::STRING)::STRUCT(a1 INT, a2 STRING);
+----
+<REGEX>:Binder Error.*STRUCT to STRUCT cast must have at least one matching member.*
+
+query I
+SELECT struct_pack(a1 => '42'::DOUBLE, a2 => 'asdf'::STRING)::STRUCT(a1 INT, a2 STRING);
+----
+{'a1': 42, 'a2': asdf}
+
+query I
+SELECT ROW(42, 'asdf');
+----
+(42, asdf)
+
+statement error
+SELECT ROW();
+----
+pack nothing into a struct
+
+query I
+SELECT ROW(NULL);
+----
+(NULL)
+
+query I
+SELECT ROW(NULL, NULL);
+----
+(NULL, NULL)
+
+# MB example
+query I
+SELECT CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))
+----
+{'a': 1, 'b': 2}
+
+query I
+SELECT a::ROW(a INT, b STRING) r FROM (VALUES (ROW(1, 'asdf')), (ROW(4, 'fdsa'))) s(a);
+----
+{'a': 1, 'b': asdf}
+{'a': 4, 'b': fdsa}
+
+statement error
+SELECT struct_extract({'a': a}, a) FROM (SELECT a::VARCHAR AS a FROM range(10) tbl(a));
+----
+<REGEX>:.*Binder Error.*Key name for struct_extract needs to be a constant string.*
+
+statement error
+SELECT struct_extract({'a': 42}, 42)
+----
+<REGEX>:.*Binder Error.*can only be used on unnamed structs.*
+
+query I
+SELECT struct_extract_at({'a': 42}, 1)
+----
+42
+
+statement error
+SELECT struct_extract_at({'a': 42}, 0)
+----
+<REGEX>:.*Binder Error.*out of range.*
+
+statement error
+SELECT struct_extract_at({'a': 42}, 42)
+----
+<REGEX>:.*Binder Error.*out of range.*
+
+# Test string to struct cast within struct casting.
+query I
+SELECT {a: {b: '{a: 3, b: "Hello World"}'}}::STRUCT(a STRUCT(b STRUCT(a INT, b VARCHAR)));
+----
+{'a': {'b': {'a': 3, 'b': Hello World}}}
+
+# Test if try_cast continues after encountering error.
+query I 
+SELECT TRY_CAST(struct_pack(a => 4, b => 'Ducky', c => '1964-06-15')
+AS STRUCT(a INT, b DOUBLE, c DATE));
+----
+{'a': 4, 'b': NULL, 'c': 1964-06-15}
+
+query I 
+SELECT TRY_CAST(struct_pack(a => 4, b => 'Ducky', c => 'Tommorow', d => {a:3.0})
+AS STRUCT(a VARCHAR[], b VARCHAR, c DATE, d STRUCT(a INT)));
+----
+{'a': NULL, 'b': Ducky, 'c': NULL, 'd': {'a': 3}}
+
--- a/external/duckdb/test/common/test_checksum.cpp
+++ b/external/duckdb/test/common/test_checksum.cpp
@@ -0,0 +1,40 @@
+#include "catch.hpp"
+#include "duckdb/common/checksum.hpp"
+
+#include <vector>
+
+using namespace duckdb;
+using namespace std;
+
+#define NUM_INTS 10
+
+TEST_CASE("Checksum tests", "[checksum]") {
+	// create a buffer
+	int vals[NUM_INTS];
+	for (size_t i = 0; i < NUM_INTS; i++) {
+		vals[i] = i + 1;
+	}
+	// verify that checksum is consistent
+	uint64_t c1 = Checksum((uint8_t *)vals, sizeof(int) * NUM_INTS);
+	uint64_t c2 = Checksum((uint8_t *)vals, sizeof(int) * NUM_INTS);
+	REQUIRE(c1 == c2);
+
+	// verify that checksum is sort of good
+	vals[3] = 1;
+	uint64_t c3 = Checksum((uint8_t *)vals, sizeof(int) * NUM_INTS);
+	REQUIRE(c1 != c3);
+
+	// verify that zeros in the input does not zero the checksum
+	vals[3] = 0;
+	uint64_t c4 = Checksum((uint8_t *)vals, sizeof(int) * NUM_INTS);
+	REQUIRE(c4 != 0);
+
+	// zero at a different location should change the checksum
+	vals[3] = 4;
+	vals[4] = 0;
+	uint64_t c5 = Checksum((uint8_t *)vals, sizeof(int) * NUM_INTS);
+	REQUIRE(c4 != c5);
+
+	REQUIRE(c1 != c4);
+	REQUIRE(c1 != c5);
+}
--- a/external/duckdb/test/common/test_file_system.cpp
+++ b/external/duckdb/test/common/test_file_system.cpp
@@ -0,0 +1,238 @@
+#include "catch.hpp"
+#include "duckdb/common/file_buffer.hpp"
+#include "duckdb/common/file_system.hpp"
+#include "duckdb/common/fstream.hpp"
+#include "duckdb/common/local_file_system.hpp"
+#include "duckdb/common/vector.hpp"
+#include "duckdb/common/virtual_file_system.hpp"
+#include "test_helpers.hpp"
+
+using namespace duckdb;
+using namespace std;
+
+static void create_dummy_file(string fname) {
+	string normalized_string;
+	if (StringUtil::StartsWith(fname, "file:///")) {
+#ifdef _WIN32
+		normalized_string = fname.substr(8);
+#else
+		normalized_string = fname.substr(7);
+#endif
+
+	} else if (StringUtil::StartsWith(fname, "file://localhost/")) {
+#ifdef _WIN32
+		normalized_string = fname.substr(18);
+#else
+		normalized_string = fname.substr(18);
+#endif
+	} else {
+		normalized_string = fname;
+	}
+
+	ofstream outfile(normalized_string);
+	outfile << "I_AM_A_DUMMY" << endl;
+	outfile.close();
+}
+
+TEST_CASE("Make sure the file:// protocol works as expected", "[file_system]") {
+	duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
+	auto dname = fs->JoinPath(fs->GetWorkingDirectory(), TestCreatePath("TEST_DIR"));
+	auto dname_converted_slashes = StringUtil::Replace(dname, "\\", "/");
+
+	// handle differences between windows and linux
+	if (StringUtil::StartsWith(dname_converted_slashes, "/")) {
+		dname_converted_slashes = dname_converted_slashes.substr(1);
+	}
+
+	// Path of format file:///bla/bla on 'nix and file:///X:/bla/bla on Windows
+	auto dname_triple_slash = fs->JoinPath("file://", dname_converted_slashes);
+	// Path of format file://localhost/bla/bla on 'nix and file://localhost/X:/bla/bla on Windows
+	auto dname_localhost = fs->JoinPath("file://localhost", dname_converted_slashes);
+	auto dname_no_host = fs->JoinPath("file:", dname_converted_slashes);
+
+	string fname = "TEST_FILE";
+	string fname2 = "TEST_FILE_TWO";
+
+	if (fs->DirectoryExists(dname_triple_slash)) {
+		fs->RemoveDirectory(dname_triple_slash);
+	}
+
+	fs->CreateDirectory(dname_triple_slash);
+	REQUIRE(fs->DirectoryExists(dname_triple_slash));
+	REQUIRE(!fs->FileExists(dname_triple_slash));
+
+	// we can call this again and nothing happens
+	fs->CreateDirectory(dname_triple_slash);
+
+	auto fname_in_dir = fs->JoinPath(dname_triple_slash, fname);
+	auto fname_in_dir2 = fs->JoinPath(dname_localhost, fname2);
+	auto fname_in_dir3 = fs->JoinPath(dname_no_host, fname2);
+
+	create_dummy_file(fname_in_dir);
+	REQUIRE(fs->FileExists(fname_in_dir));
+	REQUIRE(!fs->DirectoryExists(fname_in_dir));
+
+	size_t n_files = 0;
+	REQUIRE(fs->ListFiles(dname_triple_slash, [&n_files](const string &path, bool) { n_files++; }));
+
+	REQUIRE(n_files == 1);
+
+	REQUIRE(fs->FileExists(fname_in_dir));
+	REQUIRE(!fs->FileExists(fname_in_dir2));
+
+	auto file_listing = fs->Glob(fs->JoinPath(dname_triple_slash, "*"));
+	REQUIRE(file_listing[0].path == fname_in_dir);
+
+	fs->MoveFile(fname_in_dir, fname_in_dir2);
+
+	REQUIRE(!fs->FileExists(fname_in_dir));
+	REQUIRE(fs->FileExists(fname_in_dir2));
+
+	auto file_listing_after_move = fs->Glob(fs->JoinPath(dname_no_host, "*"));
+	REQUIRE(file_listing_after_move[0].path == fname_in_dir3);
+
+	fs->RemoveDirectory(dname_triple_slash);
+
+	REQUIRE(!fs->DirectoryExists(dname_triple_slash));
+	REQUIRE(!fs->FileExists(fname_in_dir));
+	REQUIRE(!fs->FileExists(fname_in_dir2));
+}
+
+TEST_CASE("Make sure file system operators work as advertised", "[file_system]") {
+	duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
+	auto dname = TestCreatePath("TEST_DIR");
+	string fname = "TEST_FILE";
+	string fname2 = "TEST_FILE_TWO";
+
+	if (fs->DirectoryExists(dname)) {
+		fs->RemoveDirectory(dname);
+	}
+
+	fs->CreateDirectory(dname);
+	REQUIRE(fs->DirectoryExists(dname));
+	REQUIRE(!fs->FileExists(dname));
+
+	// we can call this again and nothing happens
+	fs->CreateDirectory(dname);
+
+	auto fname_in_dir = fs->JoinPath(dname, fname);
+	auto fname_in_dir2 = fs->JoinPath(dname, fname2);
+
+	create_dummy_file(fname_in_dir);
+	REQUIRE(fs->FileExists(fname_in_dir));
+	REQUIRE(!fs->DirectoryExists(fname_in_dir));
+
+	size_t n_files = 0;
+	REQUIRE(fs->ListFiles(dname, [&n_files](const string &path, bool) { n_files++; }));
+
+	REQUIRE(n_files == 1);
+
+	REQUIRE(fs->FileExists(fname_in_dir));
+	REQUIRE(!fs->FileExists(fname_in_dir2));
+
+	fs->MoveFile(fname_in_dir, fname_in_dir2);
+
+	REQUIRE(!fs->FileExists(fname_in_dir));
+	REQUIRE(fs->FileExists(fname_in_dir2));
+
+	fs->RemoveDirectory(dname);
+
+	REQUIRE(!fs->DirectoryExists(dname));
+	REQUIRE(!fs->FileExists(fname_in_dir));
+	REQUIRE(!fs->FileExists(fname_in_dir2));
+}
+
+// note: the integer count is chosen as 512 so that we write 512*8=4096 bytes to the file
+// this is required for the Direct-IO as on Windows Direct-IO can only write multiples of sector sizes
+// sector sizes are typically one of [512/1024/2048/4096] bytes, hence a 4096 bytes write succeeds.
+#define INTEGER_COUNT 512
+
+TEST_CASE("Test file operations", "[file_system]") {
+	duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
+	duckdb::unique_ptr<FileHandle> handle, handle2;
+	int64_t test_data[INTEGER_COUNT];
+	for (int i = 0; i < INTEGER_COUNT; i++) {
+		test_data[i] = i;
+	}
+
+	auto fname = TestCreatePath("test_file");
+
+	// standard reading/writing test
+
+	// open file for writing
+	REQUIRE_NOTHROW(handle = fs->OpenFile(fname, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE));
+	// write 10 integers
+	REQUIRE_NOTHROW(handle->Write(QueryContext(), (void *)test_data, sizeof(int64_t) * INTEGER_COUNT, 0));
+	// close the file
+	handle.reset();
+
+	for (int i = 0; i < INTEGER_COUNT; i++) {
+		test_data[i] = 0;
+	}
+	// now open the file for reading
+	REQUIRE_NOTHROW(handle = fs->OpenFile(fname, FileFlags::FILE_FLAGS_READ));
+	// read the 10 integers back
+	REQUIRE_NOTHROW(handle->Read(QueryContext(), (void *)test_data, sizeof(int64_t) * INTEGER_COUNT, 0));
+	// check the values of the integers
+	for (int i = 0; i < 10; i++) {
+		REQUIRE(test_data[i] == i);
+	}
+	handle.reset();
+	fs->RemoveFile(fname);
+}
+
+TEST_CASE("absolute paths", "[file_system]") {
+	duckdb::LocalFileSystem fs;
+
+#ifndef _WIN32
+	REQUIRE(fs.IsPathAbsolute("/home/me"));
+	REQUIRE(!fs.IsPathAbsolute("./me"));
+	REQUIRE(!fs.IsPathAbsolute("me"));
+#else
+	const std::string long_path = "\\\\?\\D:\\very long network\\";
+	REQUIRE(fs.IsPathAbsolute(long_path));
+	const std::string network = "\\\\network_drive\\filename.csv";
+	REQUIRE(fs.IsPathAbsolute(network));
+	REQUIRE(fs.IsPathAbsolute("C:\\folder\\filename.csv"));
+	REQUIRE(fs.IsPathAbsolute("C:/folder\\filename.csv"));
+	REQUIRE(fs.NormalizeAbsolutePath("C:/folder\\filename.csv") == "c:\\folder\\filename.csv");
+	REQUIRE(fs.NormalizeAbsolutePath(network) == network);
+	REQUIRE(fs.NormalizeAbsolutePath(long_path) == "\\\\?\\d:\\very long network\\");
+#endif
+}
+
+TEST_CASE("extract subsystem", "[file_system]") {
+	duckdb::VirtualFileSystem vfs;
+	auto local_filesystem = FileSystem::CreateLocal();
+	auto *local_filesystem_ptr = local_filesystem.get();
+	vfs.RegisterSubSystem(std::move(local_filesystem));
+
+	// Extract a non-existent filesystem gets nullptr.
+	REQUIRE(vfs.ExtractSubSystem("non-existent") == nullptr);
+
+	// Extract an existing filesystem.
+	auto extracted_filesystem = vfs.ExtractSubSystem(local_filesystem_ptr->GetName());
+	REQUIRE(extracted_filesystem.get() == local_filesystem_ptr);
+
+	// Re-extraction gets nullptr.
+	REQUIRE(vfs.ExtractSubSystem("non-existent") == nullptr);
+
+	// Register a subfilesystem and disable, which is not allowed to extract.
+	const ::duckdb::string target_fs = extracted_filesystem->GetName();
+	const ::duckdb::vector<string> disabled_subfilesystems {target_fs};
+	vfs.RegisterSubSystem(std::move(extracted_filesystem));
+	vfs.SetDisabledFileSystems(disabled_subfilesystems);
+	REQUIRE(vfs.ExtractSubSystem(target_fs) == nullptr);
+}
+
+TEST_CASE("re-register subsystem", "[file_system]") {
+	duckdb::VirtualFileSystem vfs;
+
+	// First time registration should succeed.
+	auto local_filesystem = FileSystem::CreateLocal();
+	vfs.RegisterSubSystem(std::move(local_filesystem));
+
+	// Re-register an already registered subfilesystem should throw.
+	auto second_local_filesystem = FileSystem::CreateLocal();
+	REQUIRE_THROWS(vfs.RegisterSubSystem(std::move(second_local_filesystem)));
+}
--- a/external/duckdb/test/common/test_hyperlog.cpp
+++ b/external/duckdb/test/common/test_hyperlog.cpp
@@ -0,0 +1,98 @@
+#include "catch.hpp"
+#include "duckdb/common/serializer/binary_deserializer.hpp"
+#include "duckdb/common/serializer/binary_serializer.hpp"
+#include "duckdb/common/serializer/memory_stream.hpp"
+#include "duckdb/common/types/hash.hpp"
+#include "duckdb/common/types/hyperloglog.hpp"
+
+using namespace duckdb;
+using namespace std;
+
+TEST_CASE("Test that hyperloglog works", "[hyperloglog]") {
+	HyperLogLog log;
+	// add a million elements of the same value
+	int x = 4;
+	for (size_t i = 0; i < 1000000; i++) {
+		log.InsertElement(Hash(x));
+	}
+	REQUIRE(log.Count() == 1);
+
+	// now add a million different values
+	HyperLogLog log2;
+	for (size_t i = 0; i < 1000000; i++) {
+		x = i;
+		log2.InsertElement(Hash(x));
+	}
+	// the count is approximate, but should be pretty close to a million
+	size_t count = log2.Count();
+	REQUIRE(count > 950000LL);
+	REQUIRE(count < 1050000LL);
+
+	// now we can merge the HLLs
+	log.Merge(log2);
+	// the count should be pretty much the same
+	count = log.Count();
+	REQUIRE(count > 950000LL);
+	REQUIRE(count < 1050000LL);
+
+	// now test composability of the merge
+	// add everything to one big_hll one
+	// add chunks to small_hll ones and then merge them
+	// the result should be the same
+	HyperLogLog big_hll;
+	HyperLogLog small_hll[16];
+	for (size_t i = 0; i < 1000000; i++) {
+		x = ((2 * i) + 3) % (i + 3 / 2);
+		big_hll.InsertElement(Hash(x));
+		small_hll[i % 16].InsertElement(Hash(x));
+	}
+	// now merge them into one big_hll HyperLogLog
+	for (idx_t i = 1; i < 16; i++) {
+		small_hll[0].Merge(small_hll[i]);
+	}
+	// the result should be identical to the big_hll one
+	REQUIRE(small_hll[0].Count() == big_hll.Count());
+}
+
+TEST_CASE("Test different hyperloglog version serialization", "[hyperloglog]") {
+	Allocator allocator;
+	MemoryStream stream(allocator);
+	SerializationOptions options;
+	options.serialization_compatibility = SerializationCompatibility::FromString("v1.0.0");
+
+	// Add 100M values to a NEW HyperLogLog
+	HyperLogLog original_log;
+	for (size_t i = 0; i < 100000000; i++) {
+		original_log.InsertElement(Hash(i));
+
+		switch (i + 1) {
+		case 1:
+		case 10:
+		case 100:
+		case 1000:
+		case 10000:
+		case 100000:
+		case 1000000:
+		case 10000000:
+		case 100000000:
+			break; // We roundtrip the serialization every order of magnitude
+		default:
+			continue;
+		}
+
+		// Grab the count
+		const auto original_count = original_log.Count();
+
+		// Serialize it as an OLD HyperLogLog
+		stream.Rewind();
+		BinarySerializer::Serialize(original_log, stream, options);
+
+		// Deserialize it, creating a NEW HyperLogLog from the OLD one
+		stream.Rewind();
+		auto deserialized_log = BinaryDeserializer::Deserialize<HyperLogLog>(stream);
+
+		// Verify that the deserialized count is equal
+		const auto deserialized_count = deserialized_log->Count();
+		REQUIRE(original_count == deserialized_count);
+	}
+}
--- a/external/duckdb/test/common/test_local_file_urls.test
+++ b/external/duckdb/test/common/test_local_file_urls.test
@@ -0,0 +1,98 @@
+# name: test/common/test_local_file_urls.test
+# group: [common]
+
+# Note: __WORKING_DIRECTORY__ will be replaced with the full path to the working dir of the tests (root of duckdb repo)
+
+statement ok
+SET VARIABLE work_dir_no_host='file:/' || ltrim(replace('__WORKING_DIRECTORY__', '\', '/'), '/')
+
+statement ok
+SET VARIABLE work_dir_triple_slash='file:///' || ltrim(replace('__WORKING_DIRECTORY__', '\', '/'), '/')
+
+statement ok
+SET VARIABLE work_dir_localhost='file://localhost/' || ltrim(replace('__WORKING_DIRECTORY__', '\', '/'), '/')
+
+# testing file:/some/path/to/duckdb/repo
+query II
+SELECT * FROM read_csv_auto(getvariable('work_dir_no_host') || '/data/csv/normalize.csv');
+----
+John	ipsum
+
+# testing file:///some/path/to/duckdb/repo
+query II
+SELECT * FROM read_csv_auto(getvariable('work_dir_triple_slash') || '/data/csv/normalize.csv');
+----
+John	ipsum
+
+# testing file://localhost/some/path/to/duckdb/repo
+query II
+SELECT * FROM read_csv_auto(getvariable('work_dir_localhost') || '/data/csv/normalize.csv');
+----
+John	ipsum
+
+# Test glob with file:/some/path
+query II
+SELECT file[:6], parse_filename(file) FROM glob(getvariable('work_dir_no_host') || '/data/*/bad_date_timestamp_mix.csv')
+----
+file:/	bad_date_timestamp_mix.csv
+
+# Test glob with file:///some/path
+query II
+SELECT file[:8], parse_filename(file) FROM glob(getvariable('work_dir_triple_slash') || '/data/*/bad_date_timestamp_mix.csv')
+----
+file:///	bad_date_timestamp_mix.csv
+
+# Test glob with file://localhost/some/path/to/duckdb/repo
+query II
+SELECT file[:17], parse_filename(file) FROM glob(getvariable('work_dir_localhost') || '/data/*/bad_date_timestamp_mix.csv')
+----
+file://localhost/	bad_date_timestamp_mix.csv
+
+# Test scanning multiple files using glob with file:/some/path
+query III
+SELECT id, filename[:6], parse_filename(filename) FROM read_csv_auto(getvariable('work_dir_no_host') || '/data/csv/hive-partitioning/simple/*/*/test.csv', filename=1) ORDER BY id
+----
+1	file:/	test.csv
+2	file:/	test.csv
+
+# Test scanning multiple files using glob with file:///some/path
+query III
+SELECT id, filename[:8], parse_filename(filename) FROM read_csv_auto(getvariable('work_dir_triple_slash') || '/data/csv/hive-partitioning/simple/*/*/test.csv', filename=1) ORDER BY id
+----
+1	file:///	test.csv
+2	file:///	test.csv
+
+# Test scanning multiple files using glob with file://localhost/some/path
+query III
+SELECT id, filename[:17], parse_filename(filename) FROM read_csv_auto(getvariable('work_dir_localhost') || '/data/csv/hive-partitioning/simple/*/*/test.csv', filename=1) ORDER BY id
+----
+1	file://localhost/	test.csv
+2	file://localhost/	test.csv
+
+require noforcestorage
+
+# Ensure secrets work correctly using the file://
+statement ok
+create secret secret_file_url_tripleslash (TYPE HTTP, scope 'file:///');
+
+statement ok
+create secret secret_file_url_localhost (TYPE HTTP, scope 'file://localhost/');
+
+statement ok
+create secret secret_without_file_path (TYPE HTTP);
+
+query I
+SELECT name FROM which_secret(getvariable('work_dir_triple_slash') || '/data/csv/hive-partitioning/simple/*/*/test.csv', 'http')
+----
+secret_file_url_tripleslash
+
+query I
+SELECT name FROM which_secret(getvariable('work_dir_localhost') || '/data/csv/hive-partitioning/simple/*/*/test.csv', 'http')
+----
+secret_file_url_localhost
+
+# raw paths now do not match
+query I
+SELECT name FROM which_secret('__WORKING_DIRECTORY__/data/csv/hive-partitioning/simple/*/*/test.csv', 'http')
+----
+secret_without_file_path
--- a/external/duckdb/test/common/test_numeric_cast.cpp
+++ b/external/duckdb/test/common/test_numeric_cast.cpp
@@ -0,0 +1,60 @@
+#include "catch.hpp"
+#include "duckdb/common/numeric_utils.hpp"
+#include "test_helpers.hpp"
+
+using namespace duckdb;
+using namespace std;
+
+TEST_CASE("Numeric cast checks", "[numeric_cast]") {
+#ifdef DUCKDB_CRASH_ON_ASSERT
+	return;
+#endif
+	// unsigned-unsiged
+	// can not fail upcasting unsigned type
+	REQUIRE_NOTHROW(NumericCast<uint16_t, uint8_t>(NumericLimits<uint8_t>::Maximum()));
+	REQUIRE_NOTHROW(NumericCast<uint16_t, uint8_t>(NumericLimits<uint8_t>::Minimum()));
+
+	// we can down cast if value fits
+	REQUIRE_NOTHROW(NumericCast<uint8_t, uint16_t>(NumericLimits<uint8_t>::Maximum()));
+
+	// but not if it doesn't
+	REQUIRE_THROWS(NumericCast<uint8_t, uint16_t>(NumericLimits<uint8_t>::Maximum() + 1));
+
+	// signed-signed, same as above
+	REQUIRE_NOTHROW(NumericCast<int16_t, int8_t>(NumericLimits<int8_t>::Maximum()));
+	REQUIRE_NOTHROW(NumericCast<int16_t, int8_t>(NumericLimits<int8_t>::Minimum()));
+	REQUIRE_NOTHROW(NumericCast<int8_t, int16_t>(NumericLimits<int8_t>::Maximum()));
+	REQUIRE_THROWS(NumericCast<int8_t, int16_t>(NumericLimits<int8_t>::Maximum() + 1));
+	REQUIRE_THROWS(NumericCast<int8_t, int16_t>(NumericLimits<int8_t>::Minimum() - 1));
+
+	// unsigned to signed
+	REQUIRE_NOTHROW(NumericCast<int8_t, uint8_t>(NumericLimits<int8_t>::Maximum()));
+	REQUIRE_NOTHROW(NumericCast<int8_t, uint8_t>(NumericLimits<uint8_t>::Minimum()));
+
+	// uint8 max will not fit in int8
+	REQUIRE_THROWS(NumericCast<int8_t, uint8_t>(NumericLimits<uint8_t>::Maximum()));
+
+	// signed to unsigned
+	// can cast int8 max to uint8
+	REQUIRE_NOTHROW(NumericCast<uint8_t, int8_t>(NumericLimits<int8_t>::Maximum()));
+	// cat cast int8 min to unit8
+	REQUIRE_THROWS(NumericCast<uint8_t, int8_t>(NumericLimits<int8_t>::Minimum()));
+
+	// can't cast anything negative to anything unsigned
+	REQUIRE_THROWS(NumericCast<uint64_t, int8_t>(-1));
+	REQUIRE_THROWS(NumericCast<uint64_t, int16_t>(-1));
+	REQUIRE_THROWS(NumericCast<uint64_t, int32_t>(-1));
+	REQUIRE_THROWS(NumericCast<uint64_t, int64_t>(-1));
+
+	// can't downcast big number
+	REQUIRE_THROWS(NumericCast<int64_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
+	REQUIRE_THROWS(NumericCast<int32_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
+	REQUIRE_THROWS(NumericCast<uint32_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
+	REQUIRE_THROWS(NumericCast<int16_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
+	REQUIRE_THROWS(NumericCast<uint16_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
+	REQUIRE_THROWS(NumericCast<int8_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
+	REQUIRE_THROWS(NumericCast<uint8_t, uint64_t>(NumericLimits<uint64_t>::Maximum()));
+
+	// TODO this should throw but doesn't
+	//	REQUIRE_THROWS(NumericCast<uint8_t, hugeint_t>(hugeint_t(-1)));
+}
--- a/external/duckdb/test/common/test_parse_logical_type.cpp
+++ b/external/duckdb/test/common/test_parse_logical_type.cpp
@@ -0,0 +1,81 @@
+#include "catch.hpp"
+#include "duckdb/common/exception.hpp"
+#include "duckdb/common/string.hpp"
+#include "duckdb/main/config.hpp"
+
+using namespace duckdb;
+
+TEST_CASE("Test parse logical type", "[parse_logical_type]") {
+	SECTION("simple types") {
+		REQUIRE(DBConfig::ParseLogicalType("integer") == LogicalType::INTEGER);
+		REQUIRE(DBConfig::ParseLogicalType("any") == LogicalType::ANY);
+	}
+
+	SECTION("nested types") {
+		// list
+		REQUIRE(DBConfig::ParseLogicalType("ANY[]") == LogicalType::LIST(LogicalType::ANY));
+		REQUIRE(DBConfig::ParseLogicalType("VARCHAR[]") == LogicalType::LIST(LogicalType::VARCHAR));
+
+		// array
+		REQUIRE(DBConfig::ParseLogicalType("ANY[3]") == LogicalType::ARRAY(LogicalType::ANY, 3));
+		REQUIRE(DBConfig::ParseLogicalType("FLOAT[42]") == LogicalType::ARRAY(LogicalType::FLOAT, 42));
+		REQUIRE(DBConfig::ParseLogicalType("VARCHAR[100000]") ==
+		        LogicalType::ARRAY(LogicalType::VARCHAR, ArrayType::MAX_ARRAY_SIZE));
+
+		// map
+		REQUIRE(DBConfig::ParseLogicalType("MAP(VARCHAR, VARCHAR)") ==
+		        LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR));
+		REQUIRE(DBConfig::ParseLogicalType("MAP(ANY,ANY)") == LogicalType::MAP(LogicalType::ANY, LogicalType::ANY));
+		REQUIRE(DBConfig::ParseLogicalType("MAP(INTEGER,ANY)") ==
+		        LogicalType::MAP(LogicalType::INTEGER, LogicalType::ANY));
+		REQUIRE(DBConfig::ParseLogicalType("MAP(ANY, DOUBLE)") ==
+		        LogicalType::MAP(LogicalType::ANY, LogicalType::DOUBLE));
+
+		// union
+		child_list_t<LogicalType> union_members;
+		union_members.emplace_back(make_pair("num", LogicalTypeId::INTEGER));
+		union_members.emplace_back(make_pair("v", LogicalTypeId::VARCHAR));
+		union_members.emplace_back(make_pair("f", LogicalTypeId::FLOAT));
+		REQUIRE(DBConfig::ParseLogicalType("UNION(num INTEGER, v VARCHAR, f FLOAT)") ==
+		        LogicalType::UNION(union_members));
+
+		// struct
+		child_list_t<LogicalType> struct_children;
+		struct_children.emplace_back(make_pair("year", LogicalTypeId::BIGINT));
+		struct_children.emplace_back(make_pair("month", LogicalTypeId::BIGINT));
+		struct_children.emplace_back(make_pair("day", LogicalTypeId::BIGINT));
+		REQUIRE(DBConfig::ParseLogicalType("STRUCT(year BIGINT, month BIGINT, day BIGINT)") ==
+		        LogicalType::STRUCT(struct_children));
+	}
+
+	SECTION("deeper nested types") {
+		// list of lists
+		REQUIRE(DBConfig::ParseLogicalType("VARCHAR[][]") ==
+		        LogicalType::LIST(LogicalType::LIST(LogicalType::VARCHAR)));
+
+		// array of lists
+		REQUIRE(DBConfig::ParseLogicalType("VARCHAR[][3]") ==
+		        LogicalType::ARRAY(LogicalType::LIST(LogicalType::VARCHAR), 3));
+
+		// list of structs
+		child_list_t<LogicalType> date_struct_children;
+		date_struct_children.emplace_back(make_pair("year", LogicalTypeId::BIGINT));
+		date_struct_children.emplace_back(make_pair("month", LogicalTypeId::BIGINT));
+		date_struct_children.emplace_back(make_pair("day", LogicalTypeId::BIGINT));
+		REQUIRE(DBConfig::ParseLogicalType("STRUCT(year BIGINT, month BIGINT, day BIGINT)[]") ==
+		        LogicalType::LIST(LogicalType::STRUCT(date_struct_children)));
+
+		// map with list as key
+		REQUIRE(DBConfig::ParseLogicalType("MAP(VARCHAR[],FLOAT)") ==
+		        LogicalType::MAP(LogicalType::LIST(LogicalType::VARCHAR), LogicalType::FLOAT));
+
+		// struct with list, array and map
+		child_list_t<LogicalType> mix_struct_children;
+		mix_struct_children.emplace_back(make_pair("my_list", LogicalType::LIST(LogicalType::ANY)));
+		mix_struct_children.emplace_back(make_pair("my_array", LogicalType::ARRAY(LogicalType::VARCHAR, 2)));
+		mix_struct_children.emplace_back(
+		    make_pair("my_map", LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR)));
+		REQUIRE(DBConfig::ParseLogicalType("STRUCT(my_list ANY[], my_array VARCHAR[2], my_map MAP(VARCHAR,VARCHAR))") ==
+		        LogicalType::STRUCT(mix_struct_children));
+	}
+}
--- a/external/duckdb/test/common/test_storage_fuzz.cpp
+++ b/external/duckdb/test/common/test_storage_fuzz.cpp
@@ -0,0 +1,517 @@
+#include "catch.hpp"
+#include "duckdb.hpp"
+#include "duckdb/common/common.hpp"
+#include "duckdb/common/local_file_system.hpp"
+#include "duckdb/common/virtual_file_system.hpp"
+#include "duckdb/main/materialized_query_result.hpp"
+#include "test_config.hpp"
+#include "test_helpers.hpp"
+
+#include <iostream>
+#include <shared_mutex>
+
+using namespace duckdb;
+
+bool g_enable_verbose_output = false;
+bool g_enable_info_output = true;
+
+#define PRINT_VERBOSE(x)                                                                                               \
+	do {                                                                                                               \
+		if (g_enable_verbose_output)                                                                                   \
+			std::cout << x << std::endl;                                                                               \
+	} while (0)
+
+#define PRINT_INFO(x)                                                                                                  \
+	do {                                                                                                               \
+		if (g_enable_info_output)                                                                                      \
+			std::cout << x << std::endl;                                                                               \
+	} while (0)
+
+bool ends_with(const std::string &str, const std::string &suffix) {
+	// Ensure str is at least as long as suffix
+	if (str.length() < suffix.length()) {
+		return false;
+	}
+
+	// Compare the ending characters
+	return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
+}
+
+class FaultInjectionFileSystem : public duckdb::LocalFileSystem {
+public:
+	enum FaultInjectionSite {
+		WRITE = 0,
+		FSYNC = 1,
+	};
+
+	void Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override {
+		PRINT_VERBOSE("FS write offset=" << location << " bytes=" << nr_bytes);
+		if (is_db_file(handle)) {
+			ThrowInjectedFaultIfSet(FaultInjectionSite::WRITE);
+		}
+		return duckdb::LocalFileSystem::Write(handle, buffer, nr_bytes, location);
+	}
+
+	void FileSync(duckdb::FileHandle &handle) override {
+		PRINT_VERBOSE("FS fsync " << handle.GetPath() << " file_size=" << handle.GetFileSize());
+		if (is_db_file(handle)) {
+			ThrowInjectedFaultIfSet(FaultInjectionSite::FSYNC);
+		}
+		return duckdb::LocalFileSystem::FileSync(handle);
+	}
+
+	void RemoveFile(const duckdb::string &filename,
+	                duckdb::optional_ptr<duckdb::FileOpener> opener = nullptr) override {
+		PRINT_VERBOSE("FS remove " << filename);
+		return duckdb::LocalFileSystem::RemoveFile(filename, opener);
+	}
+
+	void Truncate(duckdb::FileHandle &handle, int64_t new_size) override {
+		PRINT_VERBOSE("FS truncate " << handle.GetPath() << " from " << handle.GetFileSize() << " to " << new_size);
+		return duckdb::LocalFileSystem::Truncate(handle, new_size);
+	}
+
+	// In linux - trim() is equivalent to zeroing out a range (albeit in a much more efficient manner). Let's
+	// reproduce this behavior regardless of whether the current environment supports it.
+	bool Trim(duckdb::FileHandle &handle, idx_t offset_bytes, idx_t length_bytes) override {
+		PRINT_VERBOSE("FS trim " << handle.GetPath() << " offset=" << offset_bytes << " bytes=" << length_bytes);
+
+		std::string nulls(length_bytes, '\0');
+		duckdb::LocalFileSystem::Write(handle, (void *)nulls.data(), length_bytes, offset_bytes);
+		return true;
+	}
+
+	// Will inject a single occurrence of a fault
+	void InjectFault(FaultInjectionSite site) {
+		std::lock_guard<std::mutex> l(fault_m_);
+		// Make sure this is not called twice - as we will drop a fault
+		REQUIRE(faults.insert(site).second);
+	}
+
+protected:
+	void ThrowInjectedFaultIfSet(FaultInjectionSite site) {
+		std::lock_guard<std::mutex> l(fault_m_);
+		auto it = faults.find(site);
+		if (it != faults.end()) {
+			faults.erase(it);
+			PRINT_VERBOSE("Injecting fault");
+			throw duckdb::IOException("Injected fault");
+		}
+	}
+
+	bool is_wal_file(const duckdb::FileHandle &handle) {
+		return ends_with(handle.GetPath(), ".db.wal");
+	}
+
+	bool is_db_file(const duckdb::FileHandle &handle) {
+		return ends_with(handle.GetPath(), ".db");
+	}
+
+	bool is_wal_or_db_file(const duckdb::FileHandle &handle) {
+		return is_db_file(handle) || is_wal_file(handle);
+	}
+
+private:
+	std::mutex fault_m_;
+	std::unordered_set<FaultInjectionSite> faults;
+};
+
+// This implementation of duckdb::FileSystem will cache writes to the database file in memory until fsync is called.
+// It expects all read ranges to be perfectly aligned with previous writes.
+class LazyFlushFileSystem : public FaultInjectionFileSystem {
+public:
+	~LazyFlushFileSystem() {
+		if (!unflushed_chunks.empty()) {
+			PRINT_INFO("Unflushed chunks on shutdown for db file");
+		}
+	}
+
+	void Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override {
+		PRINT_VERBOSE("FS write offset=" << location << " bytes=" << nr_bytes);
+
+		// We only perform positional writes for the db file
+		REQUIRE(is_db_file(handle));
+
+		std::unique_lock<std::mutex> l(m_);
+
+		ThrowInjectedFaultIfSet(FaultInjectionSite::WRITE);
+
+		// Store the data in memory until fsync occurs
+		PRINT_VERBOSE("Caching chunk " << location << " bytes " << nr_bytes);
+
+		// TODO: be lazy - don't handle partial overwrites
+		REQUIRE(!partially_overlaps_existing_chunk(unflushed_chunks, location, nr_bytes));
+
+		auto it = unflushed_chunks.find(location);
+		if (it != unflushed_chunks.end()) {
+			// Check that if there is an existing chunk - it's size matches exactly
+			REQUIRE(it->second.size() == nr_bytes);
+			it->second = std::string((char *)buffer, nr_bytes);
+		} else {
+			unflushed_chunks.emplace(location, std::string((char *)buffer, nr_bytes));
+		}
+	}
+
+	int64_t Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes) override {
+		// Check appends only occur on the WAL
+		REQUIRE(is_wal_file(handle));
+
+		return duckdb::LocalFileSystem::Write(handle, buffer, nr_bytes);
+	}
+
+	void Read(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override {
+		REQUIRE(is_db_file(handle));
+
+		{
+			// TODO: shared_lock
+			std::unique_lock<std::mutex> l(m_);
+
+			// We don't handle partial overlaps for now.
+			REQUIRE(!partially_overlaps_existing_chunk(unflushed_chunks, location, nr_bytes));
+
+			auto it = unflushed_chunks.find(location);
+			if (it != unflushed_chunks.end()) {
+				PRINT_VERBOSE("FS read cached chunk at offset=" << location << " bytes=" << nr_bytes);
+				const auto &data = it->second;
+				// Assume block-aligned reads
+				REQUIRE(data.size() == nr_bytes);
+				memcpy(buffer, data.data(), nr_bytes);
+				return;
+			}
+		}
+
+		PRINT_VERBOSE("FS read disk chunk at offset=" << location << " bytes=" << nr_bytes);
+		return duckdb::LocalFileSystem::Read(handle, buffer, nr_bytes, location);
+	}
+
+	int64_t Read(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes) override {
+		PRINT_VERBOSE("FS read at end of file, bytes=" << nr_bytes);
+		REQUIRE(is_wal_or_db_file(handle));
+
+		if (is_db_file(handle)) {
+			// Just make sure we don't miss the unflushed chunks
+			REQUIRE(unflushed_chunks.empty());
+		}
+		return duckdb::LocalFileSystem::Read(handle, buffer, nr_bytes);
+	}
+
+	void FileSync(duckdb::FileHandle &handle) override {
+		PRINT_VERBOSE("FS fsync " << handle.GetPath() << " file_size=" << handle.GetFileSize());
+
+		REQUIRE(is_wal_or_db_file(handle));
+
+		if (!is_db_file(handle)) {
+			return duckdb::LocalFileSystem::FileSync(handle);
+		}
+
+		std::unique_lock<std::mutex> l(m_);
+
+		ThrowInjectedFaultIfSet(FaultInjectionSite::FSYNC);
+
+		for (const auto &location_and_data : unflushed_chunks) {
+			auto location = location_and_data.first;
+			const auto &data = location_and_data.second;
+			PRINT_VERBOSE("Flushing chunk " << location << " size=" << data.size());
+			duckdb::LocalFileSystem::Write(handle, (void *)data.data(), data.size(), location);
+		}
+		unflushed_chunks.clear();
+
+		duckdb::LocalFileSystem::FileSync(handle);
+	}
+
+	bool Trim(duckdb::FileHandle &handle, idx_t offset_bytes, idx_t length_bytes) override {
+		REQUIRE(is_db_file(handle));
+
+		std::unique_lock<std::mutex> l(m_);
+
+		// This is just simpler to implement
+		REQUIRE(unflushed_chunks.count(offset_bytes) == 0);
+
+		return FaultInjectionFileSystem::Trim(handle, offset_bytes, length_bytes);
+	}
+
+	void Truncate(duckdb::FileHandle &handle, int64_t new_size) override {
+		std::unique_lock<std::mutex> l(m_);
+
+		if (is_db_file(handle)) {
+			REQUIRE(unflushed_chunks.empty());
+		}
+
+		return duckdb::LocalFileSystem::Truncate(handle, new_size);
+	}
+
+private:
+	// Lock for modifying unflushed_chunks:
+	// 1. Adding to unflushed_chunks on write
+	// 2. Flushing unflushed_chunks on fsync
+	// 3. Reading from unflushed_chunks
+	std::mutex m_;
+	std::map<idx_t, std::string> unflushed_chunks;
+
+	bool partially_overlaps_existing_chunk(const std::map<idx_t, std::string> &chunks, idx_t offset, size_t length) {
+		idx_t end = offset + length;
+		for (const auto &off_data : chunks) {
+			auto off = off_data.first;
+			const auto &data = off_data.second;
+			idx_t chunk_end = off + data.size();
+
+			// Check for any overlap
+			bool overlap = offset < chunk_end && off < end;
+
+			// Exclude full containment and exact match
+			bool exact_match = (offset == off && length == data.size());
+
+			if (overlap && !exact_match)
+				return true;
+		}
+		return false;
+	}
+};
+
+template <class ResultT>
+void validate(ResultT &r, std::string expected_err_message = "") {
+	// For debugging
+	bool expected_err = !expected_err_message.empty();
+	if (expected_err != r.HasError() && r.HasError()) {
+		PRINT_INFO("Unexpected: query failed with " << r.GetError());
+	}
+	REQUIRE(expected_err == r.HasError());
+	if (r.HasError()) {
+		REQUIRE(r.GetError().find(expected_err_message) != std::string::npos);
+	}
+}
+
+void cleanup_db_file(const std::string &filename) {
+	bool removed_or_missing = std::remove(filename.c_str()) == 0 || errno == ENOENT;
+	REQUIRE(removed_or_missing);
+}
+
+TEST_CASE("simple fault injection storage test", "[storage][.]") {
+	if (!TestConfiguration::TestRunStorageFuzzer()) {
+		SKIP_TEST("storage-fuzzer not enabled");
+		return;
+	}
+	duckdb::DBConfig config;
+
+	LazyFlushFileSystem *raw_fs = new LazyFlushFileSystem();
+	config.file_system = duckdb::make_uniq<duckdb::VirtualFileSystem>(duckdb::unique_ptr<LazyFlushFileSystem>(raw_fs));
+
+	std::string file_path = TestCreatePath("pig.db");
+
+	cleanup_db_file(file_path);
+
+	{
+		duckdb::DuckDB db(file_path, &config);
+
+		duckdb::Connection con(db);
+
+		validate(*con.Query("CREATE TABLE IF NOT EXISTS t(i INTEGER)"));
+		validate(*con.Query("INSERT INTO t SELECT * FROM RANGE(0, 1000)"));
+		validate(*con.Query("INSERT INTO t SELECT * FROM RANGE(0, 1000000)"));
+
+		auto res = con.Query("SELECT count(*) FROM t");
+		validate(*res);
+		REQUIRE(res->GetValue(0, 0).ToString() == "1001000");
+
+		// Writes are ok - fsync are not ok
+		raw_fs->InjectFault(LazyFlushFileSystem::FaultInjectionSite::FSYNC);
+
+		validate(*con.Query("INSERT INTO t SELECT * FROM RANGE(0, 1000000)"),
+		         "TransactionContext Error: Failed to commit: Injected fault");
+
+		// Check that the tx was rolled back
+		auto res2 = con.Query("SELECT count(*) FROM t");
+		validate(*res2);
+		REQUIRE(res2->GetValue(0, 0).ToString() == "1001000");
+	}
+	{
+		duckdb::DuckDB db(file_path, &config);
+		duckdb::Connection con(db);
+
+		auto res = con.Query("SELECT count(*) FROM t");
+		validate(*res);
+		REQUIRE(res->GetValue(0, 0).ToString() == "1001000");
+	}
+}
+
+enum ActionType {
+	// This action will simply flip the setting true -> false or false -> true
+	TOGGLE_SKIP_CHECKPOINTS_ON_COMMIT = 0,
+	SMALL_WRITE = 2,
+	LARGE_WRITE = 3,
+	LARGE_WRITE_WITH_FAULT = 4,
+	UPDATE = 5,
+	DELETE = 6,
+	RESET_TABLE = 7,
+};
+
+TEST_CASE("fuzzed storage test", "[storage][.]") {
+	if (!TestConfiguration::TestRunStorageFuzzer()) {
+		SKIP_TEST("storage-fuzzer not enabled");
+		return;
+	}
+	// DuckDB Configurations
+	duckdb::DBConfig config;
+	config.options.set_variables["debug_skip_checkpoint_on_commit"] = duckdb::Value(true);
+	config.options.trim_free_blocks = true;
+	config.options.checkpoint_on_shutdown = false;
+
+	std::string file_path = TestCreatePath("pig.db");
+
+	cleanup_db_file(file_path);
+
+	{
+		duckdb::DuckDB db(file_path, &config);
+		duckdb::Connection con(db);
+		validate(*con.Query("CREATE TABLE IF NOT EXISTS t(i INTEGER)"));
+	}
+
+	std::map<double, ActionType> pct_to_action = {{0.1, ActionType::TOGGLE_SKIP_CHECKPOINTS_ON_COMMIT},
+	                                              {0.3, ActionType::LARGE_WRITE},
+	                                              {0.5, ActionType::SMALL_WRITE},
+	                                              {0.7, ActionType::UPDATE},
+	                                              {0.85, ActionType::DELETE},
+	                                              {1.0, ActionType::LARGE_WRITE_WITH_FAULT}};
+
+	// Randomly generated sequence of actions
+	std::vector<ActionType> actions = {};
+
+	int NUM_ACTIONS = 30;
+	for (int i = 0; i < NUM_ACTIONS; i++) {
+		double selection = (rand() % 100) / 100.0;
+		for (const auto &prob_type : pct_to_action) {
+			auto prob = prob_type.first;
+			auto type = prob_type.second;
+			if (selection > prob) {
+				continue;
+			}
+			actions.push_back(type);
+			break;
+		}
+	}
+	actions.push_back(RESET_TABLE);
+	for (int i = 0; i < NUM_ACTIONS; i++) {
+		double selection = (rand() % 100) / 100.0;
+		for (const auto &prob_type : pct_to_action) {
+			auto prob = prob_type.first;
+			auto type = prob_type.second;
+			if (selection > prob) {
+				continue;
+			}
+			actions.push_back(type);
+			break;
+		}
+	}
+
+	uint64_t offset = 0;
+	bool skip_checkpoint_on_commit = true;
+	std::string expected_checksum = "";
+	duckdb::unique_ptr<QueryResult> previous_result;
+	for (const auto &action : actions) {
+		// Note: the injected file system has to be reset each time. DuckDB construction seems to be std::move'ing them
+
+		/*
+		LazyFlushFileSystem *raw_fs = new LazyFlushFileSystem();
+		config.file_system =
+		    duckdb::make_uniq<duckdb::VirtualFileSystem>(duckdb::unique_ptr<LazyFlushFileSystem>(raw_fs));
+		 */
+
+		FaultInjectionFileSystem *raw_fs = new FaultInjectionFileSystem();
+		config.file_system =
+		    duckdb::make_uniq<duckdb::VirtualFileSystem>(duckdb::unique_ptr<FaultInjectionFileSystem>(raw_fs));
+
+		duckdb::DuckDB db(file_path, &config);
+		duckdb::Connection con(db);
+
+		// Compute a checksum
+		if (!expected_checksum.empty()) {
+			auto checksum = con.Query("SELECT bit_xor(hash(i)) FROM t");
+			validate(*checksum);
+			auto computed_checksum = checksum->GetValue(0, 0).ToString();
+			PRINT_INFO("Verifying checksum computed=" << computed_checksum << ", actual=" << expected_checksum);
+			if (computed_checksum != expected_checksum) {
+				auto result = con.Query("SELECT * FROM t ORDER BY ALL");
+				string error;
+				ColumnDataCollection::ResultEquals(previous_result->Cast<MaterializedQueryResult>().Collection(),
+				                                   result->Cast<MaterializedQueryResult>().Collection(), error);
+				Printer::PrintF("Checksum failure\nResult comparison:\n%s", error);
+				REQUIRE(computed_checksum == expected_checksum);
+			}
+		}
+		previous_result = con.Query("SELECT * FROM t ORDER BY ALL");
+
+		switch (action) {
+		case ActionType::TOGGLE_SKIP_CHECKPOINTS_ON_COMMIT:
+			skip_checkpoint_on_commit = !skip_checkpoint_on_commit;
+			PRINT_INFO("Setting skip commit=" << skip_checkpoint_on_commit);
+			config.options.set_variables["debug_skip_checkpoint_on_commit"] = duckdb::Value(skip_checkpoint_on_commit);
+			break;
+		case ActionType::SMALL_WRITE: {
+			std::string small_insert = "INSERT INTO t SELECT * FROM RANGE(" + std::to_string(offset) + ", " +
+			                           std::to_string(offset + 100) + ")";
+			PRINT_INFO("RUN: " << small_insert);
+			validate(*con.Query(small_insert));
+			offset += 100;
+			break;
+		}
+		case ActionType::LARGE_WRITE: {
+			std::string large_insert = "INSERT INTO t SELECT * FROM RANGE(" + std::to_string(offset) + ", " +
+			                           std::to_string(offset + 1000 * 1000) + ")";
+			PRINT_INFO("RUN: " << large_insert);
+			validate(*con.Query(large_insert));
+			offset += 1000 * 1000;
+			break;
+		}
+		case ActionType::UPDATE: {
+			if (offset != 0) {
+				uint64_t begin = rand() % offset;
+				uint64_t length = rand() % (offset - begin);
+				std::string update_query = "UPDATE t SET i = i * 2 WHERE i > " + std::to_string(begin) + " and i <" +
+				                           std::to_string(begin + length);
+
+				PRINT_INFO("RUN: " << update_query);
+				validate(*con.Query(update_query));
+			}
+			break;
+		}
+		case ActionType::DELETE: {
+			if (offset != 0) {
+				uint64_t begin = rand() % offset;
+				uint64_t length = rand() % (offset - begin);
+				std::string delete_query =
+				    "DELETE FROM t WHERE i > " + std::to_string(begin) + " and i <" + std::to_string(begin + length);
+
+				PRINT_INFO("RUN: " << delete_query);
+				validate(*con.Query(delete_query));
+				break;
+			}
+		}
+		case ActionType::LARGE_WRITE_WITH_FAULT: {
+			raw_fs->InjectFault(LazyFlushFileSystem::FaultInjectionSite::FSYNC);
+			std::string large_insert = "INSERT INTO t SELECT * FROM RANGE(" + std::to_string(offset) + ", " +
+			                           std::to_string(offset + 1000 * 1000) + ")";
+
+			PRINT_INFO("RUN with fault: " << large_insert);
+			validate(*con.Query(large_insert), "Injected fault");
+			break;
+		}
+		case ActionType::RESET_TABLE: {
+			std::string replace_query = "CREATE OR REPLACE TABLE t(i INTEGER)";
+			PRINT_INFO("RUN with fault: " << replace_query);
+			validate(*con.Query(replace_query));
+			break;
+		}
+		}
+
+		// Compute a checksum (unless we injected a fault - which will invalidate the database)
+		if (action != ActionType::LARGE_WRITE_WITH_FAULT) {
+			auto checksum = con.Query("SELECT bit_xor(hash(i)) FROM t");
+			validate(*checksum);
+			expected_checksum = checksum->GetValue(0, 0).ToString();
+
+			PRINT_INFO("Computed new checksum: " << expected_checksum);
+		} else {
+			PRINT_INFO("Keeping old checksum due to faults: " << expected_checksum);
+		}
+	}
+}
--- a/external/duckdb/test/common/test_strftime.cpp
+++ b/external/duckdb/test/common/test_strftime.cpp
@@ -0,0 +1,14 @@
+#include "catch.hpp"
+#include "duckdb/function/scalar/strftime_format.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/types/date.hpp"
+
+#include <vector>
+
+using namespace duckdb;
+using namespace std;
+
+TEST_CASE("Test that strftime format works", "[strftime]") {
+	auto string = StrfTimeFormat::Format(Timestamp::FromDatetime(Date::FromDate(1992, 1, 1), dtime_t(0)), "%Y%m%d");
+	REQUIRE(string == "19920101");
+}
--- a/external/duckdb/test/common/test_string_util.cpp
+++ b/external/duckdb/test/common/test_string_util.cpp
@@ -0,0 +1,499 @@
+#include "duckdb/common/string_util.hpp"
+#include "duckdb/common/types/value.hpp"
+#include "duckdb/common/to_string.hpp"
+#include "duckdb/common/exception/parser_exception.hpp"
+#include "catch.hpp"
+
+#include "duckdb/common/vector.hpp"
+#include <string>
+#include <cstring>
+
+using namespace duckdb;
+
+TEST_CASE("Test strcmp() to ensure platform sanity", "[comparison]") {
+	int res;
+	res = strcmp("ZZZ", "ZZZ");
+	REQUIRE(res == 0);
+
+	res = strcmp("ZZZ", "HXR");
+	REQUIRE(res > 0);
+
+	res = strcmp("ZZZ", "NUT");
+	REQUIRE(res > 0);
+
+	res = strcmp("HXR", "ZZZ");
+	REQUIRE(res < 0);
+
+	res = strcmp("HXR", "HXR");
+	REQUIRE(res == 0);
+
+	res = strcmp("HXR", "NUT");
+	REQUIRE(res < 0);
+
+	res = strcmp("NUT", "ZZZ");
+	REQUIRE(res < 0);
+
+	res = strcmp("NUT", "HXR");
+	REQUIRE(res > 0);
+
+	res = strcmp("NUT", "NUT");
+	REQUIRE(res == 0);
+
+	Value zzz("ZZZ");
+	Value hxr("HXR");
+	Value nut("NUT");
+
+	REQUIRE_FALSE(zzz > zzz);
+	REQUIRE(zzz > hxr);
+	REQUIRE(zzz > nut);
+
+	REQUIRE(zzz >= zzz);
+	REQUIRE(zzz >= hxr);
+	REQUIRE(zzz >= nut);
+
+	REQUIRE(zzz <= zzz);
+	REQUIRE_FALSE(zzz <= hxr);
+	REQUIRE_FALSE(zzz <= nut);
+
+	REQUIRE(zzz == zzz);
+	REQUIRE_FALSE(zzz == hxr);
+	REQUIRE_FALSE(zzz == nut);
+
+	REQUIRE_FALSE(zzz != zzz);
+	REQUIRE(zzz != hxr);
+	REQUIRE(zzz != nut);
+
+	REQUIRE_FALSE(hxr > zzz);
+	REQUIRE_FALSE(hxr > hxr);
+	REQUIRE_FALSE(hxr > nut);
+
+	REQUIRE_FALSE(hxr >= zzz);
+	REQUIRE(hxr >= hxr);
+	REQUIRE_FALSE(hxr >= nut);
+
+	REQUIRE(hxr <= zzz);
+	REQUIRE(hxr <= hxr);
+	REQUIRE(hxr <= nut);
+
+	REQUIRE_FALSE(hxr == zzz);
+	REQUIRE(hxr == hxr);
+	REQUIRE_FALSE(hxr == nut);
+
+	REQUIRE(hxr != zzz);
+	REQUIRE_FALSE(hxr != hxr);
+	REQUIRE(hxr != nut);
+
+	REQUIRE_FALSE(nut > zzz);
+	REQUIRE(nut > hxr);
+	REQUIRE_FALSE(nut > nut);
+
+	REQUIRE_FALSE(nut >= zzz);
+	REQUIRE(nut >= hxr);
+	REQUIRE(nut >= nut);
+
+	REQUIRE(nut <= zzz);
+	REQUIRE_FALSE(nut <= hxr);
+	REQUIRE(nut <= nut);
+
+	REQUIRE_FALSE(nut == zzz);
+	REQUIRE_FALSE(nut == hxr);
+	REQUIRE(nut == nut);
+
+	REQUIRE(nut != zzz);
+	REQUIRE(nut != hxr);
+	REQUIRE_FALSE(nut != nut);
+}
+
+TEST_CASE("Test join vector items", "[string_util]") {
+	SECTION("Three string items") {
+		duckdb::vector<std::string> str_items = {"abc", "def", "ghi"};
+		std::string result = StringUtil::Join(str_items, ",");
+		REQUIRE(result == "abc,def,ghi");
+	}
+
+	SECTION("One string item") {
+		duckdb::vector<std::string> str_items = {"abc"};
+		std::string result = StringUtil::Join(str_items, ",");
+		REQUIRE(result == "abc");
+	}
+
+	SECTION("No string items") {
+		duckdb::vector<std::string> str_items;
+		std::string result = StringUtil::Join(str_items, ",");
+		REQUIRE(result == "");
+	}
+
+	SECTION("Three int items") {
+		duckdb::vector<int> int_items = {1, 2, 3};
+		std::string result =
+		    StringUtil::Join(int_items, int_items.size(), ", ", [](const int &item) { return to_string(item); });
+		REQUIRE(result == "1, 2, 3");
+	}
+
+	SECTION("One int item") {
+		duckdb::vector<int> int_items = {1};
+		std::string result =
+		    StringUtil::Join(int_items, int_items.size(), ", ", [](const int &item) { return to_string(item); });
+		REQUIRE(result == "1");
+	}
+
+	SECTION("No int items") {
+		duckdb::vector<int> int_items;
+		std::string result =
+		    StringUtil::Join(int_items, int_items.size(), ", ", [](const int &item) { return to_string(item); });
+		REQUIRE(result == "");
+	}
+}
+
+TEST_CASE("Test SplitWithParentheses", "[string_util]") {
+	SECTION("Standard split") {
+		REQUIRE(StringUtil::SplitWithParentheses("") == duckdb::vector<string> {});
+		REQUIRE(StringUtil::SplitWithParentheses("x") == duckdb::vector<string> {"x"});
+		REQUIRE(StringUtil::SplitWithParentheses("hello") == duckdb::vector<string> {"hello"});
+		REQUIRE(StringUtil::SplitWithParentheses("hello,world") == duckdb::vector<string> {"hello", "world"});
+	}
+
+	SECTION("Single item with parentheses") {
+		REQUIRE(StringUtil::SplitWithParentheses("STRUCT(year BIGINT, month BIGINT, day BIGINT)") ==
+		        duckdb::vector<string> {"STRUCT(year BIGINT, month BIGINT, day BIGINT)"});
+		REQUIRE(StringUtil::SplitWithParentheses("(apple)") == duckdb::vector<string> {"(apple)"});
+		REQUIRE(StringUtil::SplitWithParentheses("(apple, pear)") == duckdb::vector<string> {"(apple, pear)"});
+		REQUIRE(StringUtil::SplitWithParentheses("(apple, pear) banana") ==
+		        duckdb::vector<string> {"(apple, pear) banana"});
+		REQUIRE(StringUtil::SplitWithParentheses("banana (apple, pear)") ==
+		        duckdb::vector<string> {"banana (apple, pear)"});
+		REQUIRE(StringUtil::SplitWithParentheses("banana (apple, pear) banana") ==
+		        duckdb::vector<string> {"banana (apple, pear) banana"});
+	}
+
+	SECTION("Multiple items with parentheses") {
+		REQUIRE(StringUtil::SplitWithParentheses("map::MAP(ANY,ANY),key::ANY") ==
+		        duckdb::vector<string> {"map::MAP(ANY,ANY)", "key::ANY"});
+		REQUIRE(StringUtil::SplitWithParentheses("extra,STRUCT(year BIGINT, month BIGINT, day BIGINT)") ==
+		        duckdb::vector<string> {"extra", "STRUCT(year BIGINT, month BIGINT, day BIGINT)"});
+		REQUIRE(StringUtil::SplitWithParentheses("extra,STRUCT(year BIGINT, month BIGINT, day BIGINT),extra") ==
+		        duckdb::vector<string> {"extra", "STRUCT(year BIGINT, month BIGINT, day BIGINT)", "extra"});
+		REQUIRE(StringUtil::SplitWithParentheses("aa(bb)cc,dd(ee)ff") ==
+		        duckdb::vector<string> {"aa(bb)cc", "dd(ee)ff"});
+		REQUIRE(StringUtil::SplitWithParentheses("aa(bb cc,dd),ee(f,,f)gg") ==
+		        duckdb::vector<string> {"aa(bb cc,dd)", "ee(f,,f)gg"});
+	}
+
+	SECTION("Leading and trailing separators") {
+		REQUIRE(StringUtil::SplitWithParentheses(",") == duckdb::vector<string> {""});
+		REQUIRE(StringUtil::SplitWithParentheses(",,") == duckdb::vector<string> {"", ""});
+		REQUIRE(StringUtil::SplitWithParentheses("aa,") == duckdb::vector<string> {"aa"});
+		REQUIRE(StringUtil::SplitWithParentheses(",aa") == duckdb::vector<string> {"", "aa"});
+		REQUIRE(StringUtil::SplitWithParentheses(",(aa,),") == duckdb::vector<string> {"", "(aa,)"});
+	}
+
+	SECTION("Leading and trailing spaces") {
+		REQUIRE(StringUtil::SplitWithParentheses(" ") == duckdb::vector<string> {" "});
+		REQUIRE(StringUtil::SplitWithParentheses("   ") == duckdb::vector<string> {"   "});
+		REQUIRE(StringUtil::SplitWithParentheses("   , ") == duckdb::vector<string> {"   ", " "});
+		REQUIRE(StringUtil::SplitWithParentheses("aa, bb") == duckdb::vector<string> {"aa", " bb"});
+		REQUIRE(StringUtil::SplitWithParentheses(" aa,(bb, cc) ") == duckdb::vector<string> {" aa", "(bb, cc) "});
+	}
+
+	SECTION("Nested parentheses") {
+		REQUIRE(StringUtil::SplitWithParentheses("STRUCT(aa BIGINT, bb STRUCT(cc BIGINT, dd BIGINT, BIGINT))") ==
+		        duckdb::vector<string> {"STRUCT(aa BIGINT, bb STRUCT(cc BIGINT, dd BIGINT, BIGINT))"});
+		REQUIRE(StringUtil::SplitWithParentheses("(((aa)))") == duckdb::vector<string> {"(((aa)))"});
+		REQUIRE(StringUtil::SplitWithParentheses("((aa, bb))") == duckdb::vector<string> {"((aa, bb))"});
+		REQUIRE(StringUtil::SplitWithParentheses("aa,(bb,(cc,dd)),ee") ==
+		        duckdb::vector<string> {"aa", "(bb,(cc,dd))", "ee"});
+	}
+
+	SECTION("other parentheses") {
+		REQUIRE(StringUtil::SplitWithParentheses(" aa,[bb, cc] )", ',', '[', ']') ==
+		        duckdb::vector<string> {" aa", "[bb, cc] )"});
+	}
+
+	SECTION("other separators") {
+		REQUIRE(StringUtil::SplitWithParentheses(" aa|(bb| cc),dd", '|') ==
+		        duckdb::vector<string> {" aa", "(bb| cc),dd"});
+	}
+
+	SECTION("incongruent parentheses") {
+		REQUIRE_THROWS(StringUtil::SplitWithParentheses("("));
+		REQUIRE_THROWS(StringUtil::SplitWithParentheses(")"));
+		REQUIRE_THROWS(StringUtil::SplitWithParentheses("aa(bb"));
+		REQUIRE_THROWS(StringUtil::SplitWithParentheses("aa)bb"));
+		REQUIRE_THROWS(StringUtil::SplitWithParentheses("(aa)bb)"));
+		REQUIRE_THROWS(StringUtil::SplitWithParentheses("(aa(bb)"));
+	}
+}
+
+TEST_CASE("Test split quoted strings", "[string_util]") {
+	SECTION("Empty string") {
+		REQUIRE(StringUtil::SplitWithQuote("") == duckdb::vector<string> {});
+	}
+
+	SECTION("Empty string with space") {
+		REQUIRE(StringUtil::SplitWithQuote(" ") == duckdb::vector<string> {});
+	}
+
+	SECTION("One item") {
+		REQUIRE(StringUtil::SplitWithQuote("x") == duckdb::vector<string> {"x"});
+	}
+
+	SECTION("One item with space") {
+		REQUIRE(StringUtil::SplitWithQuote(" x ") == duckdb::vector<string> {"x"});
+	}
+
+	SECTION("One item with quote") {
+		REQUIRE(StringUtil::SplitWithQuote("\"x\"") == duckdb::vector<string> {"x"});
+	}
+
+	SECTION("One empty item with quote") {
+		REQUIRE(StringUtil::SplitWithQuote("\"\"") == duckdb::vector<string> {""});
+	}
+
+	SECTION("One empty item, followed by non-empty one - Or vise versa") {
+		REQUIRE(StringUtil::SplitWithQuote("\"\",hello") == duckdb::vector<string> {"", "hello"});
+		REQUIRE(StringUtil::SplitWithQuote(",\"hello\"") == duckdb::vector<string> {"", "hello"});
+		REQUIRE(StringUtil::SplitWithQuote(",hello") == duckdb::vector<string> {"", "hello"});
+		REQUIRE(StringUtil::SplitWithQuote("\"\",\"hello\"") == duckdb::vector<string> {"", "hello"});
+
+		REQUIRE(StringUtil::SplitWithQuote("\"hello\",") == duckdb::vector<string> {"hello", ""});
+		REQUIRE(StringUtil::SplitWithQuote("hello,\"\"") == duckdb::vector<string> {"hello", ""});
+		REQUIRE(StringUtil::SplitWithQuote("hello,") == duckdb::vector<string> {"hello", ""});
+		REQUIRE(StringUtil::SplitWithQuote("\"hello\",\"\"") == duckdb::vector<string> {"hello", ""});
+	}
+
+	SECTION("One quoted item with spaces") {
+		REQUIRE(StringUtil::SplitWithQuote(" \" x y \" ") == duckdb::vector<string> {" x y "});
+	}
+
+	SECTION("One quoted item with a delimiter") {
+		REQUIRE(StringUtil::SplitWithQuote("\"x,y\"") == duckdb::vector<string> {"x,y"});
+	}
+
+	SECTION("Three items") {
+		REQUIRE(StringUtil::SplitWithQuote("x,y,z") == duckdb::vector<string> {"x", "y", "z"});
+	}
+
+	SECTION("Three items, with and without quote") {
+		REQUIRE(StringUtil::SplitWithQuote("x,\"y\",z") == duckdb::vector<string> {"x", "y", "z"});
+	}
+
+	SECTION("Even more items, with and without quote") {
+		REQUIRE(StringUtil::SplitWithQuote("a,b,c,d,e,f,g") ==
+		        duckdb::vector<string> {"a", "b", "c", "d", "e", "f", "g"});
+	}
+
+	SECTION("Three empty items") {
+		REQUIRE(StringUtil::SplitWithQuote(",,") == duckdb::vector<string> {"", "", ""});
+	}
+
+	SECTION("Three empty quoted items") {
+		REQUIRE(StringUtil::SplitWithQuote("\"\",\"\",\"\"") == duckdb::vector<string> {"", "", ""});
+	}
+
+	SECTION("Unclosed quote") {
+		REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\""), ParserException);
+		REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\"x"), ParserException);
+		REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\"x "), ParserException);
+		REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\","), ParserException);
+		REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\"x,"), ParserException);
+	}
+
+	SECTION("Unexpected quote") {
+		REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("abc\"def"), ParserException);
+	}
+
+	SECTION("Missing delimiter") {
+		REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\"x\"\"y\""), ParserException);
+		REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("\"x\" \"y\""), ParserException);
+		REQUIRE_THROWS_AS(StringUtil::SplitWithQuote("x y"), ParserException);
+	}
+}
+
+TEST_CASE("Test path utilities", "[string_util]") {
+	SECTION("File name") {
+		REQUIRE("bin" == StringUtil::GetFileName("/usr/bin/"));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt"));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("tmp/foo.txt"));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("tmp\\foo.txt"));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("/tmp/foo.txt"));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("\\tmp\\foo.txt"));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt/."));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt/./"));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt/.//"));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt\\."));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt\\.\\"));
+		REQUIRE("foo.txt" == StringUtil::GetFileName("foo.txt\\.\\\\"));
+		REQUIRE(".." == StringUtil::GetFileName(".."));
+		REQUIRE("" == StringUtil::GetFileName("/"));
+	}
+
+	SECTION("File extension") {
+		REQUIRE("cpp" == StringUtil::GetFileExtension("test.cpp"));
+		REQUIRE("gz" == StringUtil::GetFileExtension("test.cpp.gz"));
+		REQUIRE("" == StringUtil::GetFileExtension("test"));
+		REQUIRE("" == StringUtil::GetFileExtension(".gitignore"));
+	}
+
+	SECTION("File stem (base name)") {
+		REQUIRE("test" == StringUtil::GetFileStem("test.cpp"));
+		REQUIRE("test.cpp" == StringUtil::GetFileStem("test.cpp.gz"));
+		REQUIRE("test" == StringUtil::GetFileStem("test"));
+		REQUIRE(".gitignore" == StringUtil::GetFileStem(".gitignore"));
+
+		REQUIRE("test" == StringUtil::GetFileStem("test.cpp/"));
+		REQUIRE("test" == StringUtil::GetFileStem("test.cpp/."));
+		REQUIRE("test" == StringUtil::GetFileStem("test.cpp/./"));
+		REQUIRE("test" == StringUtil::GetFileStem("test.cpp/.//"));
+		REQUIRE("test" == StringUtil::GetFileStem("test.cpp\\"));
+		REQUIRE("test" == StringUtil::GetFileStem("test.cpp\\."));
+		REQUIRE("test" == StringUtil::GetFileStem("test.cpp\\.\\"));
+		REQUIRE("test" == StringUtil::GetFileStem("test.cpp\\.\\\\"));
+		REQUIRE(".." == StringUtil::GetFileStem(".."));
+		REQUIRE("" == StringUtil::GetFileStem("/"));
+		REQUIRE("test" == StringUtil::GetFileStem("tmp/test.txt"));
+		REQUIRE("test" == StringUtil::GetFileStem("tmp\\test.txt"));
+		REQUIRE("test" == StringUtil::GetFileStem("/tmp/test.txt"));
+		REQUIRE("test" == StringUtil::GetFileStem("\\tmp\\test.txt"));
+	}
+
+	SECTION("File path") {
+		REQUIRE("/usr/local/bin" == StringUtil::GetFilePath("/usr/local/bin/test.cpp"));
+		REQUIRE("\\usr\\local\\bin" == StringUtil::GetFilePath("\\usr\\local\\bin\\test.cpp"));
+		REQUIRE("tmp" == StringUtil::GetFilePath("tmp/test.txt"));
+		REQUIRE("tmp" == StringUtil::GetFilePath("tmp\\test.txt"));
+		REQUIRE("/tmp" == StringUtil::GetFilePath("/tmp/test.txt"));
+		REQUIRE("\\tmp" == StringUtil::GetFilePath("\\tmp\\test.txt"));
+		REQUIRE("/tmp" == StringUtil::GetFilePath("/tmp/test.txt/"));
+		REQUIRE("\\tmp" == StringUtil::GetFilePath("\\tmp\\test.txt\\"));
+		REQUIRE("/tmp" == StringUtil::GetFilePath("/tmp//test.txt"));
+		REQUIRE("\\tmp" == StringUtil::GetFilePath("\\tmp\\\\test.txt"));
+	}
+}
+
+TEST_CASE("Test JSON Parsing", "[string_util]") {
+	auto complex_json = StringUtil::ParseJSONMap(R"JSON_LITERAL(
+	{
+    "crs": {
+        "$schema": "https://proj.org/schemas/v0.7/projjson.schema.json",
+        "type": "GeographicCRS",
+        "name": "WGS 84",
+        "datum_ensemble": {
+            "name": "World Geodetic System 1984 ensemble",
+            "members": [
+                {
+                    "name": "World Geodetic System 1984 (Transit)",
+                    "id": {
+                        "authority": "EPSG",
+                        "code": 1166
+                    }
+                },
+                {
+                    "name": "World Geodetic System 1984 (G730)",
+                    "id": {
+                        "authority": "EPSG",
+                        "code": 1152
+                    }
+                },
+                {
+                    "name": "World Geodetic System 1984 (G873)",
+                    "id": {
+                        "authority": "EPSG",
+                        "code": 1153
+                    }
+                },
+                {
+                    "name": "World Geodetic System 1984 (G1150)",
+                    "id": {
+                        "authority": "EPSG",
+                        "code": 1154
+                    }
+                },
+                {
+                    "name": "World Geodetic System 1984 (G1674)",
+                    "id": {
+                        "authority": "EPSG",
+                        "code": 1155
+                    }
+                },
+                {
+                    "name": "World Geodetic System 1984 (G1762)",
+                    "id": {
+                        "authority": "EPSG",
+                        "code": 1156
+                    }
+                },
+                {
+                    "name": "World Geodetic System 1984 (G2139)",
+                    "id": {
+                        "authority": "EPSG",
+                        "code": 1309
+                    }
+                },
+                {
+                    "name": "World Geodetic System 1984 (G2296)",
+                    "id": {
+                        "authority": "EPSG",
+                        "code": 1383
+                    }
+                }
+            ],
+            "ellipsoid": {
+                "name": "WGS 84",
+                "semi_major_axis": 6378137,
+                "inverse_flattening": 298.257223563
+            },
+            "accuracy": "2.0",
+            "id": {
+                "authority": "EPSG",
+                "code": 6326
+            }
+        },
+        "coordinate_system": {
+            "subtype": "ellipsoidal",
+            "axis": [
+                {
+                    "name": "Geodetic latitude",
+                    "abbreviation": "Lat",
+                    "direction": "north",
+                    "unit": "degree"
+                },
+                {
+                    "name": "Geodetic longitude",
+                    "abbreviation": "Lon",
+                    "direction": "east",
+                    "unit": "degree"
+                }
+            ]
+        },
+        "scope": "Horizontal component of 3D system.",
+        "area": "World.",
+        "bbox": {
+            "south_latitude": -90,
+            "west_longitude": -180,
+            "north_latitude": 90,
+            "east_longitude": 180
+        },
+        "id": {
+            "authority": "EPSG",
+            "code": 4326
+        }
+    },
+    "crs_type": "projjson"
+}	)JSON_LITERAL");
+
+	complex_json = StringUtil::ParseJSONMap(R"JSON_LITERAL(
+	{
+		"int": 42,
+		"signed_int": -42,
+		"real": 1.5,
+		"null_val": null,
+		"arr": [1, 2, 3],
+		"obj": {
+			"str_val": "val"
+		},
+		"empty_arr": [],
+		"bool_t": true,
+		"bool_f": false
+	}
+	)JSON_LITERAL");
+}
--- a/external/duckdb/test/common/test_utf.cpp
+++ b/external/duckdb/test/common/test_utf.cpp
@@ -0,0 +1,33 @@
+#include "catch.hpp"
+#include "duckdb/common/types/vector.hpp"
+#include "duckdb/main/appender.hpp"
+#include "test_helpers.hpp"
+
+using namespace duckdb;
+using namespace std;
+
+static void test_valid_str(Vector &a, const char *str) {
+	Value s(str);
+	REQUIRE_NOTHROW(a.SetValue(0, s));
+	REQUIRE(a.GetValue(0) == s);
+}
+
+TEST_CASE("UTF8 error checking", "[utf8]") {
+	Vector a(LogicalType::VARCHAR);
+
+	test_valid_str(a, "a");
+	test_valid_str(a, "\xc3\xb1");
+	test_valid_str(a, "\xE2\x82\xA1");
+	test_valid_str(a, "\xF0\x9F\xA6\x86"); // a duck!
+	test_valid_str(a, "\xf0\x90\x8c\xbc");
+
+	REQUIRE_THROWS(a.SetValue(0, Value("\xc3\x28")));
+	REQUIRE_THROWS(a.SetValue(0, Value("\xa0\xa1")));
+	REQUIRE_THROWS(a.SetValue(0, Value("\xe2\x28\xa1")));
+	REQUIRE_THROWS(a.SetValue(0, Value("\xe2\x82\x28")));
+	REQUIRE_THROWS(a.SetValue(0, Value("\xf0\x28\x8c\xbc")));
+	REQUIRE_THROWS(a.SetValue(0, Value("\xf0\x90\x28\xbc")));
+	REQUIRE_THROWS(a.SetValue(0, Value("\xf0\x28\x8c\x28")));
+	REQUIRE_THROWS(a.SetValue(0, Value("\xf8\xa1\xa1\xa1\xa1")));
+	REQUIRE_THROWS(a.SetValue(0, Value("\xfc\xa1\xa1\xa1\xa1\xa1")));
+}