should be it

2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions
--- a/external/duckdb/extension/icu/CMakeLists.txt
+++ b/external/duckdb/extension/icu/CMakeLists.txt
@@ -0,0 +1,49 @@
+cmake_minimum_required(VERSION 2.8.12...3.29)
+
+project(ICUExtension)
+
+include_directories(include)
+option(WITH_INTERNAL_ICU "Use vendored copy of icu" TRUE)
+if(WITH_INTERNAL_ICU)
+  include_directories(third_party/icu/common)
+  include_directories(third_party/icu/i18n)
+
+  add_subdirectory(third_party)
+endif()
+
+set(ICU_EXTENSION_FILES
+    ${ICU_LIBRARY_FILES}
+    icu_extension.cpp
+    icu-current.cpp
+    icu-dateadd.cpp
+    icu-datefunc.cpp
+    icu-datepart.cpp
+    icu-datesub.cpp
+    icu-datetrunc.cpp
+    icu-makedate.cpp
+    icu-list-range.cpp
+    icu-table-range.cpp
+    icu-strptime.cpp
+    icu-timebucket.cpp
+    icu-timezone.cpp)
+
+build_static_extension(icu ${ICU_EXTENSION_FILES})
+link_threads(icu_extension "")
+if(NOT WITH_INTERNAL_ICU)
+  find_package(
+    ICU
+    COMPONENTS i18n uc data
+    REQUIRED)
+  target_link_libraries(icu_extension ICU::i18n ICU::uc ICU::data)
+endif()
+disable_target_warnings(icu_extension)
+set(PARAMETERS "-no-warnings")
+build_loadable_extension(icu ${PARAMETERS} ${ICU_EXTENSION_FILES})
+if(NOT WITH_INTERNAL_ICU)
+  target_link_libraries(icu_loadable_extension ICU::i18n ICU::uc ICU::data)
+endif()
+install(
+  TARGETS icu_extension
+  EXPORT "${DUCKDB_EXPORT_SET}"
+  LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
+  ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
--- a/external/duckdb/extension/icu/README.md
+++ b/external/duckdb/extension/icu/README.md
--- a/external/duckdb/extension/icu/filters.json
+++ b/external/duckdb/extension/icu/filters.json
@@ -0,0 +1,24 @@
+{
+    "featureFilters": {
+        "brkitr_rules" : "exclude",
+        "brkitr_dictionaries" : "exclude",
+        "brkitr_tree"   : "exclude",
+        "conversion_mappings"   : "exclude",
+        "confusables"   : "exclude",
+        "curr_supplemental" : "exclude",
+        "curr_tree" : "exclude",
+        "lang_tree" : "exclude",
+        "normalization" : "exclude",
+        "region_tree"   : "exclude",
+        "rbnf_tree" : "exclude",
+        "stringprep"    : "exclude",
+        "zone_tree" : "exclude",
+        "translit"  : "exclude",
+        "unames"    : "exclude",
+        "ulayout"   : "exclude",
+        "unit_tree" : "exclude",
+        "cnvalias" : "exclude",
+        "locales_tree"  : "exclude"
+
+    }
+}
--- a/external/duckdb/extension/icu/icu-current.cpp
+++ b/external/duckdb/extension/icu/icu-current.cpp
@@ -0,0 +1,63 @@
+#include "include/icu-dateadd.hpp"
+
+#include "duckdb/main/extension/extension_loader.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
+#include "duckdb/planner/expression/bound_function_expression.hpp"
+#include "duckdb/transaction/meta_transaction.hpp"
+#include "include/icu-current.hpp"
+#include "include/icu-casts.hpp"
+
+namespace duckdb {
+
+static timestamp_t GetTransactionTimestamp(ExpressionState &state) {
+	return MetaTransaction::Get(state.GetContext()).start_timestamp;
+}
+
+static void CurrentTimeFunction(DataChunk &input, ExpressionState &state, Vector &result) {
+	D_ASSERT(input.ColumnCount() == 0);
+	auto instant = GetTransactionTimestamp(state);
+	ICUDateFunc::BindData data(state.GetContext());
+
+	dtime_tz_t result_time(dtime_t(0), 0);
+	ICUToTimeTZ::ToTimeTZ(data.calendar.get(), instant, result_time);
+	auto val = Value::TIMETZ(result_time);
+	result.Reference(val);
+}
+
+static void CurrentDateFunction(DataChunk &input, ExpressionState &state, Vector &result) {
+	D_ASSERT(input.ColumnCount() == 0);
+	auto instant = GetTransactionTimestamp(state);
+
+	auto val = Value::DATE(ICUMakeDate::ToDate(state.GetContext(), instant));
+	result.Reference(val);
+}
+
+ScalarFunction GetCurrentTimeFun() {
+	ScalarFunction current_time({}, LogicalType::TIME_TZ, CurrentTimeFunction);
+	current_time.stability = FunctionStability::CONSISTENT_WITHIN_QUERY;
+	return current_time;
+}
+
+ScalarFunction GetCurrentDateFun() {
+	ScalarFunction current_date({}, LogicalType::DATE, CurrentDateFunction);
+	current_date.stability = FunctionStability::CONSISTENT_WITHIN_QUERY;
+	return current_date;
+}
+
+void RegisterICUCurrentFunctions(ExtensionLoader &loader) {
+	//	temporal + interval
+	ScalarFunctionSet current_time("get_current_time");
+	current_time.AddFunction(GetCurrentTimeFun());
+	loader.RegisterFunction(current_time);
+
+	ScalarFunctionSet current_date("current_date");
+	current_date.AddFunction(GetCurrentDateFun());
+	loader.RegisterFunction(current_date);
+
+	current_date.name = "today";
+	loader.RegisterFunction(current_date);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-dateadd.cpp
+++ b/external/duckdb/extension/icu/icu-dateadd.cpp
@@ -0,0 +1,330 @@
+#include "include/icu-dateadd.hpp"
+
+#include "duckdb/main/extension/extension_loader.hpp"
+#include "duckdb/common/operator/add.hpp"
+#include "duckdb/common/operator/multiply.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
+#include "duckdb/planner/expression/bound_function_expression.hpp"
+#include "include/icu-datefunc.hpp"
+#include "icu-helpers.hpp"
+
+namespace duckdb {
+
+static duckdb::unique_ptr<FunctionData> ICUBindIntervalMonths(ClientContext &context, ScalarFunction &bound_function,
+                                                              vector<duckdb::unique_ptr<Expression>> &arguments) {
+	auto result = ICUDateFunc::Bind(context, bound_function, arguments);
+	auto &info = result->Cast<ICUDateFunc::BindData>();
+	TZCalendar calendar(*info.calendar, info.cal_setting);
+	if (!calendar.SupportsIntervals()) {
+		throw NotImplementedException("INTERVALs do not work with 13 month calendars. Try using DATE_DIFF instead.");
+	}
+	return std::move(result);
+}
+
+struct ICUCalendarAdd {
+	template <class TA, class TB, class TR>
+	static inline TR Operation(TA left, TB right, TZCalendar &calendar_p) {
+		throw InternalException("Unimplemented type for ICUCalendarAdd");
+	}
+};
+
+struct ICUCalendarSub : public ICUDateFunc {
+	template <class TA, class TB, class TR>
+	static inline TR Operation(TA left, TB right, TZCalendar &calendar_p) {
+		throw InternalException("Unimplemented type for ICUCalendarSub");
+	}
+};
+
+struct ICUCalendarAge : public ICUDateFunc {
+	template <class TA, class TB, class TR>
+	static inline TR Operation(TA left, TB right, TZCalendar &calendar_p) {
+		throw InternalException("Unimplemented type for ICUCalendarAge");
+	}
+};
+
+static inline void CalendarAddHour(icu::Calendar *calendar, int64_t interval_hour, UErrorCode &status) {
+	if (interval_hour >= 0) {
+		while (interval_hour > 0) {
+			calendar->add(UCAL_HOUR,
+			              interval_hour > NumericLimits<int32_t>::Maximum() ? NumericLimits<int32_t>::Maximum()
+			                                                                : static_cast<int32_t>(interval_hour),
+			              status);
+			interval_hour -= NumericLimits<int32_t>::Maximum();
+		}
+	} else {
+		while (interval_hour < 0) {
+			calendar->add(UCAL_HOUR,
+			              interval_hour < NumericLimits<int32_t>::Minimum() ? NumericLimits<int32_t>::Minimum()
+			                                                                : static_cast<int32_t>(interval_hour),
+			              status);
+			interval_hour -= NumericLimits<int32_t>::Minimum();
+		}
+	}
+}
+
+template <>
+timestamp_t ICUCalendarAdd::Operation(timestamp_t timestamp, interval_t interval, TZCalendar &calendar_p) {
+	if (!Timestamp::IsFinite(timestamp)) {
+		return timestamp;
+	}
+	auto calendar = calendar_p.GetICUCalendar();
+
+	int64_t millis = timestamp.value / Interval::MICROS_PER_MSEC;
+	int64_t micros = timestamp.value % Interval::MICROS_PER_MSEC;
+
+	// Manually move the µs
+	micros += interval.micros % Interval::MICROS_PER_MSEC;
+	if (micros >= Interval::MICROS_PER_MSEC) {
+		micros -= Interval::MICROS_PER_MSEC;
+		++millis;
+	} else if (micros < 0) {
+		micros += Interval::MICROS_PER_MSEC;
+		--millis;
+	}
+
+	// Make sure the value is still in range
+	date_t d;
+	dtime_t t;
+	auto us = MultiplyOperatorOverflowCheck::Operation<int64_t, int64_t, int64_t>(millis, Interval::MICROS_PER_MSEC);
+	Timestamp::Convert(timestamp_t(us), d, t);
+
+	// Now use the calendar to add the other parts
+	UErrorCode status = U_ZERO_ERROR;
+	const auto udate = UDate(millis);
+	calendar->setTime(udate, status);
+
+	// Break units apart to avoid overflow
+	auto interval_h = interval.micros / Interval::MICROS_PER_MSEC;
+
+	const auto interval_ms = static_cast<int32_t>(interval_h % Interval::MSECS_PER_SEC);
+	interval_h /= Interval::MSECS_PER_SEC;
+
+	const auto interval_s = static_cast<int32_t>(interval_h % Interval::SECS_PER_MINUTE);
+	interval_h /= Interval::SECS_PER_MINUTE;
+
+	const auto interval_m = static_cast<int32_t>(interval_h % Interval::MINS_PER_HOUR);
+	interval_h /= Interval::MINS_PER_HOUR;
+
+	if (interval.months < 0 || interval.days < 0 || interval.micros < 0) {
+		// Add interval fields from lowest to highest (non-ragged to ragged)
+		calendar->add(UCAL_MILLISECOND, interval_ms, status);
+		calendar->add(UCAL_SECOND, interval_s, status);
+		calendar->add(UCAL_MINUTE, interval_m, status);
+		CalendarAddHour(calendar, interval_h, status);
+
+		// PG Adds months before days
+		calendar->add(UCAL_MONTH, interval.months, status);
+		calendar->add(UCAL_DATE, interval.days, status);
+	} else {
+		// PG Adds months before days
+		calendar->add(UCAL_MONTH, interval.months, status);
+		calendar->add(UCAL_DATE, interval.days, status);
+
+		// Add interval fields from highest to lowest (ragged to non-ragged)
+		CalendarAddHour(calendar, interval_h, status);
+		calendar->add(UCAL_MINUTE, interval_m, status);
+		calendar->add(UCAL_SECOND, interval_s, status);
+		calendar->add(UCAL_MILLISECOND, interval_ms, status);
+	}
+
+	return ICUDateFunc::GetTime(calendar, micros);
+}
+
+template <>
+timestamp_t ICUCalendarAdd::Operation(interval_t interval, timestamp_t timestamp, TZCalendar &calendar) {
+	return Operation<timestamp_t, interval_t, timestamp_t>(timestamp, interval, calendar);
+}
+
+template <>
+timestamp_t ICUCalendarSub::Operation(timestamp_t timestamp, interval_t interval, TZCalendar &calendar) {
+	const interval_t negated {-interval.months, -interval.days, -interval.micros};
+	return ICUCalendarAdd::template Operation<timestamp_t, interval_t, timestamp_t>(timestamp, negated, calendar);
+}
+
+template <>
+interval_t ICUCalendarSub::Operation(timestamp_t end_date, timestamp_t start_date, TZCalendar &calendar_p) {
+	if (!Timestamp::IsFinite(end_date) || !Timestamp::IsFinite(start_date)) {
+		throw InvalidInputException("Cannot subtract infinite timestamps");
+	}
+	if (start_date > end_date) {
+		auto negated = Operation<timestamp_t, timestamp_t, interval_t>(start_date, end_date, calendar_p);
+		return {-negated.months, -negated.days, -negated.micros};
+	}
+	auto calendar = calendar_p.GetICUCalendar();
+
+	auto start_micros = ICUDateFunc::SetTime(calendar, start_date);
+	auto end_micros = (uint64_t)(end_date.value % Interval::MICROS_PER_MSEC);
+
+	// Borrow 1ms from end_date if we wrap. This works because start_date <= end_date
+	// and if the µs are out of order, then there must be an extra ms.
+	if (start_micros > (idx_t)end_micros) {
+		end_date.value -= Interval::MICROS_PER_MSEC;
+		end_micros += Interval::MICROS_PER_MSEC;
+	}
+
+	//	Timestamp differences do not use months, so start with days
+	interval_t result;
+	result.months = 0;
+	result.days = SubtractField(calendar, UCAL_DATE, end_date);
+
+	auto hour_diff = SubtractField(calendar, UCAL_HOUR_OF_DAY, end_date);
+	auto min_diff = SubtractField(calendar, UCAL_MINUTE, end_date);
+	auto sec_diff = SubtractField(calendar, UCAL_SECOND, end_date);
+	auto ms_diff = SubtractField(calendar, UCAL_MILLISECOND, end_date);
+	auto micros_diff = UnsafeNumericCast<int32_t>(ms_diff * Interval::MICROS_PER_MSEC + (end_micros - start_micros));
+	result.micros = Time::FromTime(hour_diff, min_diff, sec_diff, micros_diff).micros;
+
+	return result;
+}
+
+template <>
+interval_t ICUCalendarAge::Operation(timestamp_t end_date, timestamp_t start_date, TZCalendar &calendar_p) {
+	auto calendar = calendar_p.GetICUCalendar();
+	if (calendar_p.IsGregorian()) {
+		auto start_data = ICUHelpers::GetComponents(timestamp_tz_t(start_date.value), calendar);
+		auto end_data = ICUHelpers::GetComponents(timestamp_tz_t(end_date.value), calendar);
+		return Interval::GetAge(end_data, start_data, start_date > end_date);
+	}
+	// fallback for non-gregorian calendars, since Interval::GetAge does not handle
+	if (start_date > end_date) {
+		auto negated = Operation<timestamp_t, timestamp_t, interval_t>(start_date, end_date, calendar_p);
+		return {-negated.months, -negated.days, -negated.micros};
+	}
+
+	auto start_micros = ICUDateFunc::SetTime(calendar, start_date);
+	auto end_micros = (uint64_t)(end_date.value % Interval::MICROS_PER_MSEC);
+
+	// Borrow 1ms from end_date if we wrap. This works because start_date <= end_date
+	// and if the µs are out of order, then there must be an extra ms.
+	if (start_micros > (idx_t)end_micros) {
+		end_date.value -= Interval::MICROS_PER_MSEC;
+		end_micros += Interval::MICROS_PER_MSEC;
+	}
+
+	//	Lunar calendars have uneven numbers of months, so we just diff months, not years
+	interval_t result;
+	result.months = SubtractField(calendar, UCAL_MONTH, end_date);
+	result.days = SubtractField(calendar, UCAL_DATE, end_date);
+
+	auto hour_diff = SubtractField(calendar, UCAL_HOUR_OF_DAY, end_date);
+	auto min_diff = SubtractField(calendar, UCAL_MINUTE, end_date);
+	auto sec_diff = SubtractField(calendar, UCAL_SECOND, end_date);
+	auto ms_diff = SubtractField(calendar, UCAL_MILLISECOND, end_date);
+	auto micros_diff = UnsafeNumericCast<int32_t>(ms_diff * Interval::MICROS_PER_MSEC + (end_micros - start_micros));
+	result.micros = Time::FromTime(hour_diff, min_diff, sec_diff, micros_diff).micros;
+
+	return result;
+}
+
+struct ICUDateAdd : public ICUDateFunc {
+
+	template <typename TA, typename TR, typename OP>
+	static void ExecuteUnary(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 1);
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		TZCalendar calendar(*info.calendar, info.cal_setting);
+
+		//	Subtract argument from current_date (at midnight)
+		const auto end_date = CurrentMidnight(calendar.GetICUCalendar(), state);
+
+		UnaryExecutor::Execute<TA, TR>(args.data[0], result, args.size(), [&](TA start_date) {
+			return OP::template Operation<timestamp_t, TA, TR>(end_date, start_date, calendar);
+		});
+	}
+
+	template <typename TA, typename TR, typename OP>
+	inline static ScalarFunction GetUnaryDateFunction(const LogicalTypeId &left_type,
+	                                                  const LogicalTypeId &result_type) {
+		return ScalarFunction({left_type}, result_type, ExecuteUnary<TA, TR, OP>, ICUBindIntervalMonths);
+	}
+
+	template <typename TA, typename TB, typename TR, typename OP>
+	static void ExecuteBinary(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 2);
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		TZCalendar calendar(*info.calendar, info.cal_setting);
+
+		BinaryExecutor::Execute<TA, TB, TR>(args.data[0], args.data[1], result, args.size(), [&](TA left, TB right) {
+			return OP::template Operation<TA, TB, TR>(left, right, calendar);
+		});
+	}
+
+	template <typename TA, typename TB, typename TR, typename OP>
+	inline static ScalarFunction GetBinaryDateFunction(const LogicalTypeId &left_type, const LogicalTypeId &right_type,
+	                                                   const LogicalTypeId &result_type) {
+		return ScalarFunction({left_type, right_type}, result_type, ExecuteBinary<TA, TB, TR, OP>,
+		                      ICUBindIntervalMonths);
+	}
+
+	template <typename TA, typename TB, typename OP>
+	static ScalarFunction GetDateAddFunction(const LogicalTypeId &left_type, const LogicalTypeId &right_type) {
+		return GetBinaryDateFunction<TA, TB, timestamp_t, OP>(left_type, right_type, LogicalType::TIMESTAMP_TZ);
+	}
+
+	static void AddDateAddOperators(const string &name, ExtensionLoader &loader) {
+		//	temporal + interval
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetDateAddFunction<timestamp_t, interval_t, ICUCalendarAdd>(LogicalType::TIMESTAMP_TZ,
+		                                                                            LogicalType::INTERVAL));
+		set.AddFunction(GetDateAddFunction<interval_t, timestamp_t, ICUCalendarAdd>(LogicalType::INTERVAL,
+		                                                                            LogicalType::TIMESTAMP_TZ));
+		loader.RegisterFunction(set);
+	}
+
+	template <typename TA, typename OP>
+	static ScalarFunction GetUnaryAgeFunction(const LogicalTypeId &left_type) {
+		return GetUnaryDateFunction<TA, interval_t, OP>(left_type, LogicalType::INTERVAL);
+	}
+
+	template <typename TA, typename TB, typename OP>
+	static ScalarFunction GetBinaryAgeFunction(const LogicalTypeId &left_type, const LogicalTypeId &right_type) {
+		return GetBinaryDateFunction<TA, TB, interval_t, OP>(left_type, right_type, LogicalType::INTERVAL);
+	}
+
+	static void AddDateSubOperators(const string &name, ExtensionLoader &loader) {
+		//	temporal - interval
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetDateAddFunction<timestamp_t, interval_t, ICUCalendarSub>(LogicalType::TIMESTAMP_TZ,
+		                                                                            LogicalType::INTERVAL));
+
+		//	temporal - temporal
+		set.AddFunction(GetBinaryAgeFunction<timestamp_t, timestamp_t, ICUCalendarSub>(LogicalType::TIMESTAMP_TZ,
+		                                                                               LogicalType::TIMESTAMP_TZ));
+		loader.RegisterFunction(set);
+	}
+
+	static void AddDateAgeFunctions(const string &name, ExtensionLoader &loader) {
+		//	age(temporal, temporal)
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetBinaryAgeFunction<timestamp_t, timestamp_t, ICUCalendarAge>(LogicalType::TIMESTAMP_TZ,
+		                                                                               LogicalType::TIMESTAMP_TZ));
+		set.AddFunction(GetUnaryAgeFunction<timestamp_t, ICUCalendarAge>(LogicalType::TIMESTAMP_TZ));
+		loader.RegisterFunction(set);
+	}
+};
+
+timestamp_t ICUDateFunc::Add(TZCalendar &calendar, timestamp_t timestamp, interval_t interval) {
+	return ICUCalendarAdd::Operation<timestamp_t, interval_t, timestamp_t>(timestamp, interval, calendar);
+}
+
+timestamp_t ICUDateFunc::Sub(TZCalendar &calendar, timestamp_t timestamp, interval_t interval) {
+	return ICUCalendarSub::Operation<timestamp_t, interval_t, timestamp_t>(timestamp, interval, calendar);
+}
+
+interval_t ICUDateFunc::Sub(TZCalendar &calendar, timestamp_t end_date, timestamp_t start_date) {
+	return ICUCalendarSub::Operation<timestamp_t, timestamp_t, interval_t>(end_date, start_date, calendar);
+}
+
+void RegisterICUDateAddFunctions(ExtensionLoader &loader) {
+	ICUDateAdd::AddDateAddOperators("+", loader);
+	ICUDateAdd::AddDateSubOperators("-", loader);
+	ICUDateAdd::AddDateAgeFunctions("age", loader);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-datefunc.cpp
+++ b/external/duckdb/extension/icu/icu-datefunc.cpp
@@ -0,0 +1,172 @@
+#include "include/icu-datefunc.hpp"
+
+#include "duckdb/main/client_context.hpp"
+#include "duckdb/common/operator/add.hpp"
+#include "duckdb/common/operator/multiply.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/exception/conversion_exception.hpp"
+#include "icu-helpers.hpp"
+#include "unicode/ucal.h"
+
+namespace duckdb {
+
+ICUDateFunc::BindData::BindData(const BindData &other)
+    : tz_setting(other.tz_setting), cal_setting(other.cal_setting), calendar(other.calendar->clone()) {
+}
+
+ICUDateFunc::BindData::BindData(const string &tz_setting_p, const string &cal_setting_p)
+    : tz_setting(tz_setting_p), cal_setting(cal_setting_p) {
+
+	InitCalendar();
+}
+
+ICUDateFunc::BindData::BindData(ClientContext &context) {
+	Value tz_value;
+	if (context.TryGetCurrentSetting("TimeZone", tz_value)) {
+		tz_setting = tz_value.ToString();
+	}
+
+	Value cal_value;
+	if (context.TryGetCurrentSetting("Calendar", cal_value)) {
+		cal_setting = cal_value.ToString();
+	} else {
+		cal_setting = "gregorian";
+	}
+
+	InitCalendar();
+}
+
+void ICUDateFunc::BindData::InitCalendar() {
+	auto tz = icu::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(icu::StringPiece(tz_setting)));
+
+	string cal_id("@calendar=");
+	cal_id += cal_setting;
+
+	icu::Locale locale(cal_id.c_str());
+
+	UErrorCode success = U_ZERO_ERROR;
+	calendar.reset(icu::Calendar::createInstance(tz, locale, success));
+	if (U_FAILURE(success)) {
+		throw InternalException("Unable to create ICU calendar.");
+	}
+
+	//	Postgres always assumes times are given in the proleptic Gregorian calendar.
+	//	ICU defaults to the Gregorian change in 1582, so we reset the change to the minimum date
+	//	so that all dates are proleptic Gregorian.
+	//	The only error here is if we have a non-Gregorian calendar,
+	//	and we just ignore that and hope for the best...
+	ucal_setGregorianChange((UCalendar *)calendar.get(), U_DATE_MIN, &success); // NOLINT
+}
+
+bool ICUDateFunc::BindData::Equals(const FunctionData &other_p) const {
+	auto &other = other_p.Cast<const BindData>();
+	return calendar->isEquivalentTo(*other.calendar);
+}
+
+unique_ptr<FunctionData> ICUDateFunc::BindData::Copy() const {
+	return make_uniq<BindData>(*this);
+}
+
+unique_ptr<FunctionData> ICUDateFunc::Bind(ClientContext &context, ScalarFunction &bound_function,
+                                           vector<duckdb::unique_ptr<Expression>> &arguments) {
+	return make_uniq<BindData>(context);
+}
+
+bool ICUDateFunc::TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
+	string tz_str = tz_id.GetString();
+	auto tz = ICUHelpers::TryGetTimeZone(tz_str);
+	if (!tz) {
+		return false;
+	}
+	calendar->adoptTimeZone(tz.release());
+	return true;
+}
+
+void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id, string *error_message) {
+	string tz_str = tz_id.GetString();
+	auto tz = ICUHelpers::GetTimeZone(tz_str, error_message);
+	if (tz) {
+		calendar->adoptTimeZone(tz.release());
+	}
+}
+
+timestamp_t ICUDateFunc::GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros) {
+	// Extract the new time
+	UErrorCode status = U_ZERO_ERROR;
+	const auto millis = int64_t(calendar->getTime(status));
+	if (U_FAILURE(status)) {
+		throw InternalException("Unable to get ICU calendar time.");
+	}
+	return timestamp_t(millis * Interval::MICROS_PER_MSEC + int64_t(micros));
+}
+
+bool ICUDateFunc::TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result) {
+	// Extract the new time
+	UErrorCode status = U_ZERO_ERROR;
+	auto millis = int64_t(calendar->getTime(status));
+	if (U_FAILURE(status)) {
+		return false;
+	}
+
+	// UDate is a double, so it can't overflow (it just loses accuracy), but converting back to µs can.
+	if (!TryMultiplyOperator::Operation<int64_t, int64_t, int64_t>(millis, Interval::MICROS_PER_MSEC, millis)) {
+		return false;
+	}
+	if (!TryAddOperator::Operation<int64_t, int64_t, int64_t>(millis, int64_t(micros), millis)) {
+		return false;
+	}
+
+	// Now make sure the value is in range
+	result = timestamp_t(millis);
+	date_t out_date = Timestamp::GetDate(result);
+	int64_t days_micros;
+	return TryMultiplyOperator::Operation<int64_t, int64_t, int64_t>(out_date.days, Interval::MICROS_PER_DAY,
+	                                                                 days_micros);
+}
+
+timestamp_t ICUDateFunc::GetTime(icu::Calendar *calendar, uint64_t micros) {
+	timestamp_t result;
+	if (!TryGetTime(calendar, micros, result)) {
+		throw ConversionException("ICU date overflows timestamp range");
+	}
+	return result;
+}
+
+uint64_t ICUDateFunc::SetTime(icu::Calendar *calendar, timestamp_t date) {
+	int64_t millis = date.value / Interval::MICROS_PER_MSEC;
+	int64_t micros = date.value % Interval::MICROS_PER_MSEC;
+	if (micros < 0) {
+		--millis;
+		micros += Interval::MICROS_PER_MSEC;
+	}
+
+	const auto udate = UDate(millis);
+	UErrorCode status = U_ZERO_ERROR;
+	calendar->setTime(udate, status);
+	if (U_FAILURE(status)) {
+		throw InternalException("Unable to set ICU calendar time.");
+	}
+	return uint64_t(micros);
+}
+
+int32_t ICUDateFunc::ExtractField(icu::Calendar *calendar, UCalendarDateFields field) {
+	UErrorCode status = U_ZERO_ERROR;
+	const auto result = calendar->get(field, status);
+	if (U_FAILURE(status)) {
+		throw InternalException("Unable to extract ICU calendar part.");
+	}
+	return result;
+}
+
+int32_t ICUDateFunc::SubtractField(icu::Calendar *calendar, UCalendarDateFields field, timestamp_t end_date) {
+	const int64_t millis = end_date.value / Interval::MICROS_PER_MSEC;
+	const auto when = UDate(millis);
+	UErrorCode status = U_ZERO_ERROR;
+	auto sub = calendar->fieldDifference(when, field, status);
+	if (U_FAILURE(status)) {
+		throw InternalException("Unable to subtract ICU calendar part.");
+	}
+	return sub;
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-datepart.cpp
+++ b/external/duckdb/extension/icu/icu-datepart.cpp
@@ -0,0 +1,721 @@
+#include "include/icu-datepart.hpp"
+#include "include/icu-datefunc.hpp"
+
+#include "duckdb/main/extension/extension_loader.hpp"
+#include "duckdb/common/enums/date_part_specifier.hpp"
+#include "duckdb/common/types/date.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/vector_operations/unary_executor.hpp"
+#include "duckdb/common/vector_operations/binary_executor.hpp"
+#include "duckdb/execution/expression_executor.hpp"
+#include "duckdb/main/client_context.hpp"
+#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
+#include "duckdb/planner/expression/bound_function_expression.hpp"
+
+namespace duckdb {
+
+struct ICUDatePart : public ICUDateFunc {
+	typedef int64_t (*part_bigint_t)(icu::Calendar *calendar, const uint64_t micros);
+	typedef double (*part_double_t)(icu::Calendar *calendar, const uint64_t micros);
+
+	// Date part adapters
+	static int64_t ExtractEra(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractField(calendar, UCAL_ERA);
+	}
+
+	static int64_t ExtractYear(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractField(calendar, UCAL_YEAR);
+	}
+
+	static int64_t ExtractDecade(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractYear(calendar, micros) / 10;
+	}
+
+	static int64_t ExtractCentury(icu::Calendar *calendar, const uint64_t micros) {
+		const auto era = ExtractEra(calendar, micros);
+		const auto cccc = ((ExtractYear(calendar, micros) - 1) / 100) + 1;
+		return era > 0 ? cccc : -cccc;
+	}
+
+	static int64_t ExtractMillenium(icu::Calendar *calendar, const uint64_t micros) {
+		const auto era = ExtractEra(calendar, micros);
+		const auto mmmm = ((ExtractYear(calendar, micros) - 1) / 1000) + 1;
+		return era > 0 ? mmmm : -mmmm;
+	}
+
+	static int64_t ExtractMonth(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractField(calendar, UCAL_MONTH) + 1;
+	}
+
+	static int64_t ExtractQuarter(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractField(calendar, UCAL_MONTH) / Interval::MONTHS_PER_QUARTER + 1;
+	}
+
+	static int64_t ExtractDay(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractField(calendar, UCAL_DATE);
+	}
+
+	static int64_t ExtractDayOfWeek(icu::Calendar *calendar, const uint64_t micros) {
+		// [Sun(0), Sat(6)]
+		return ExtractField(calendar, UCAL_DAY_OF_WEEK) - UCAL_SUNDAY;
+	}
+
+	static int64_t ExtractISODayOfWeek(icu::Calendar *calendar, const uint64_t micros) {
+		// [Mon(1), Sun(7)]
+		return 1 + (ExtractField(calendar, UCAL_DAY_OF_WEEK) + 7 - UCAL_MONDAY) % 7;
+	}
+
+	static int64_t ExtractWeek(icu::Calendar *calendar, const uint64_t micros) {
+		calendar->setFirstDayOfWeek(UCAL_MONDAY);
+		calendar->setMinimalDaysInFirstWeek(4);
+		return ExtractField(calendar, UCAL_WEEK_OF_YEAR);
+	}
+
+	static int64_t ExtractISOYear(icu::Calendar *calendar, const uint64_t micros) {
+		calendar->setFirstDayOfWeek(UCAL_MONDAY);
+		calendar->setMinimalDaysInFirstWeek(4);
+		return ExtractField(calendar, UCAL_YEAR_WOY);
+	}
+
+	static int64_t ExtractYearWeek(icu::Calendar *calendar, const uint64_t micros) {
+		calendar->setFirstDayOfWeek(UCAL_MONDAY);
+		calendar->setMinimalDaysInFirstWeek(4);
+		const auto iyyy = ExtractField(calendar, UCAL_YEAR_WOY);
+		const auto ww = ExtractField(calendar, UCAL_WEEK_OF_YEAR);
+		return iyyy * 100 + ((iyyy > 0) ? ww : -ww);
+	}
+
+	static int64_t ExtractDayOfYear(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractField(calendar, UCAL_DAY_OF_YEAR);
+	}
+
+	static int64_t ExtractHour(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractField(calendar, UCAL_HOUR_OF_DAY);
+	}
+
+	static int64_t ExtractMinute(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractField(calendar, UCAL_MINUTE);
+	}
+
+	static int64_t ExtractSecond(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractField(calendar, UCAL_SECOND);
+	}
+
+	static int64_t ExtractMillisecond(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractSecond(calendar, micros) * Interval::MSECS_PER_SEC + ExtractField(calendar, UCAL_MILLISECOND);
+	}
+
+	static int64_t ExtractMicrosecond(icu::Calendar *calendar, const uint64_t micros) {
+		return ExtractMillisecond(calendar, micros) * Interval::MICROS_PER_MSEC + micros;
+	}
+
+	static double ExtractEpoch(icu::Calendar *calendar, const uint64_t micros) {
+		UErrorCode status = U_ZERO_ERROR;
+		auto result = calendar->getTime(status) / Interval::MSECS_PER_SEC;
+		result += micros / double(Interval::MICROS_PER_SEC);
+		return result;
+	}
+
+	static int64_t ExtractTimezone(icu::Calendar *calendar, const uint64_t micros) {
+		auto millis = ExtractField(calendar, UCAL_ZONE_OFFSET);
+		millis += ExtractField(calendar, UCAL_DST_OFFSET);
+		return millis / Interval::MSECS_PER_SEC;
+	}
+
+	static int64_t ExtractTimezoneHour(icu::Calendar *calendar, const uint64_t micros) {
+		auto secs = ExtractTimezone(calendar, micros);
+		return secs / Interval::SECS_PER_HOUR;
+	}
+
+	static int64_t ExtractTimezoneMinute(icu::Calendar *calendar, const uint64_t micros) {
+		auto secs = ExtractTimezone(calendar, micros);
+		return (secs % Interval::SECS_PER_HOUR) / Interval::SECS_PER_MINUTE;
+	}
+
+	//	PG uses doubles for JDs so we can only use them with other double types
+	static double ExtractJulianDay(icu::Calendar *calendar, const uint64_t micros) {
+		//	We need days + fraction
+		auto days = ExtractField(calendar, UCAL_JULIAN_DAY);
+		auto frac = ExtractHour(calendar, micros);
+
+		frac *= Interval::MINS_PER_HOUR;
+		frac += ExtractMinute(calendar, micros);
+
+		frac *= Interval::MICROS_PER_MINUTE;
+		frac += ExtractMicrosecond(calendar, micros);
+
+		double result = frac;
+		result /= Interval::MICROS_PER_DAY;
+		result += days;
+
+		return result;
+	}
+
+	static part_bigint_t PartCodeBigintFactory(DatePartSpecifier part) {
+		switch (part) {
+		case DatePartSpecifier::YEAR:
+			return ExtractYear;
+		case DatePartSpecifier::MONTH:
+			return ExtractMonth;
+		case DatePartSpecifier::DAY:
+			return ExtractDay;
+		case DatePartSpecifier::DECADE:
+			return ExtractDecade;
+		case DatePartSpecifier::CENTURY:
+			return ExtractCentury;
+		case DatePartSpecifier::MILLENNIUM:
+			return ExtractMillenium;
+		case DatePartSpecifier::MICROSECONDS:
+			return ExtractMicrosecond;
+		case DatePartSpecifier::MILLISECONDS:
+			return ExtractMillisecond;
+		case DatePartSpecifier::SECOND:
+			return ExtractSecond;
+		case DatePartSpecifier::MINUTE:
+			return ExtractMinute;
+		case DatePartSpecifier::HOUR:
+			return ExtractHour;
+		case DatePartSpecifier::DOW:
+			return ExtractDayOfWeek;
+		case DatePartSpecifier::ISODOW:
+			return ExtractISODayOfWeek;
+		case DatePartSpecifier::WEEK:
+			return ExtractWeek;
+		case DatePartSpecifier::ISOYEAR:
+			return ExtractISOYear;
+		case DatePartSpecifier::DOY:
+			return ExtractDayOfYear;
+		case DatePartSpecifier::QUARTER:
+			return ExtractQuarter;
+		case DatePartSpecifier::YEARWEEK:
+			return ExtractYearWeek;
+		case DatePartSpecifier::ERA:
+			return ExtractEra;
+		case DatePartSpecifier::TIMEZONE:
+			return ExtractTimezone;
+		case DatePartSpecifier::TIMEZONE_HOUR:
+			return ExtractTimezoneHour;
+		case DatePartSpecifier::TIMEZONE_MINUTE:
+			return ExtractTimezoneMinute;
+		default:
+			throw InternalException("Unsupported ICU BIGINT extractor");
+		}
+	}
+
+	static part_double_t PartCodeDoubleFactory(DatePartSpecifier part) {
+		switch (part) {
+		case DatePartSpecifier::EPOCH:
+			return ExtractEpoch;
+		case DatePartSpecifier::JULIAN_DAY:
+			return ExtractJulianDay;
+		default:
+			throw InternalException("Unsupported ICU DOUBLE extractor");
+		}
+	}
+
+	static date_t MakeLastDay(icu::Calendar *calendar, const uint64_t micros) {
+		// Set the calendar to midnight on the last day of the month
+		calendar->set(UCAL_MILLISECOND, 0);
+		calendar->set(UCAL_SECOND, 0);
+		calendar->set(UCAL_MINUTE, 0);
+		calendar->set(UCAL_HOUR_OF_DAY, 0);
+
+		UErrorCode status = U_ZERO_ERROR;
+		const auto dd = calendar->getActualMaximum(UCAL_DATE, status);
+		if (U_FAILURE(status)) {
+			throw InternalException("Unable to extract ICU last day.");
+		}
+
+		calendar->set(UCAL_DATE, dd);
+
+		//	Offset to UTC
+		auto millis = calendar->getTime(status);
+		millis += ExtractField(calendar, UCAL_ZONE_OFFSET);
+		millis += ExtractField(calendar, UCAL_DST_OFFSET);
+
+		return Date::EpochToDate(millis / Interval::MSECS_PER_SEC);
+	}
+
+	static string_t MonthName(icu::Calendar *calendar, const uint64_t micros) {
+		const auto mm = ExtractMonth(calendar, micros) - 1;
+		if (mm == 12) {
+			return "Undecimber";
+		}
+		return Date::MONTH_NAMES[mm];
+	}
+
+	static string_t DayName(icu::Calendar *calendar, const uint64_t micros) {
+		return Date::DAY_NAMES[ExtractDayOfWeek(calendar, micros)];
+	}
+
+	template <typename RESULT_TYPE>
+	struct BindAdapterData : public BindData {
+		using result_t = RESULT_TYPE;
+		typedef result_t (*adapter_t)(icu::Calendar *calendar, const uint64_t micros);
+		using adapters_t = vector<adapter_t>;
+
+		BindAdapterData(ClientContext &context, adapter_t adapter_p) : BindData(context), adapters(1, adapter_p) {
+		}
+		BindAdapterData(ClientContext &context, adapters_t &adapters_p) : BindData(context), adapters(adapters_p) {
+		}
+		BindAdapterData(const BindAdapterData &other) : BindData(other), adapters(other.adapters) {
+		}
+
+		adapters_t adapters;
+
+		bool Equals(const FunctionData &other_p) const override {
+			const auto &other = other_p.Cast<BindAdapterData>();
+			return BindData::Equals(other_p) && adapters == other.adapters;
+		}
+
+		duckdb::unique_ptr<FunctionData> Copy() const override {
+			return make_uniq<BindAdapterData>(*this);
+		}
+	};
+
+	template <typename INPUT_TYPE, typename RESULT_TYPE>
+	static void UnaryTimestampFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		using BIND_TYPE = BindAdapterData<RESULT_TYPE>;
+		D_ASSERT(args.ColumnCount() == 1);
+		auto &date_arg = args.data[0];
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BIND_TYPE>();
+		CalendarPtr calendar_ptr(info.calendar->clone());
+		auto calendar = calendar_ptr.get();
+
+		UnaryExecutor::ExecuteWithNulls<INPUT_TYPE, RESULT_TYPE>(date_arg, result, args.size(),
+		                                                         [&](INPUT_TYPE input, ValidityMask &mask, idx_t idx) {
+			                                                         if (Timestamp::IsFinite(input)) {
+				                                                         const auto micros = SetTime(calendar, input);
+				                                                         return info.adapters[0](calendar, micros);
+			                                                         } else {
+				                                                         mask.SetInvalid(idx);
+				                                                         return RESULT_TYPE();
+			                                                         }
+		                                                         });
+	}
+
+	template <typename INPUT_TYPE, typename RESULT_TYPE>
+	static void BinaryTimestampFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		using BIND_TYPE = BindAdapterData<int64_t>;
+		D_ASSERT(args.ColumnCount() == 2);
+		auto &part_arg = args.data[0];
+		auto &date_arg = args.data[1];
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BIND_TYPE>();
+		CalendarPtr calendar_ptr(info.calendar->clone());
+		auto calendar = calendar_ptr.get();
+
+		BinaryExecutor::ExecuteWithNulls<string_t, INPUT_TYPE, RESULT_TYPE>(
+		    part_arg, date_arg, result, args.size(),
+		    [&](string_t specifier, INPUT_TYPE input, ValidityMask &mask, idx_t idx) {
+			    if (Timestamp::IsFinite(input)) {
+				    const auto micros = SetTime(calendar, input);
+				    auto adapter = PartCodeBigintFactory(GetDatePartSpecifier(specifier.GetString()));
+				    return adapter(calendar, micros);
+			    } else {
+				    mask.SetInvalid(idx);
+				    return RESULT_TYPE(0);
+			    }
+		    });
+	}
+
+	struct BindStructData : public BindData {
+		using part_codes_t = vector<DatePartSpecifier>;
+		using bigints_t = vector<part_bigint_t>;
+		using doubles_t = vector<part_double_t>;
+
+		BindStructData(ClientContext &context, part_codes_t &&part_codes_p)
+		    : BindData(context), part_codes(part_codes_p) {
+			InitFactories();
+		}
+		BindStructData(const string &tz_setting_p, const string &cal_setting_p, part_codes_t &&part_codes_p)
+		    : BindData(tz_setting_p, cal_setting_p), part_codes(part_codes_p) {
+			InitFactories();
+		}
+		BindStructData(const BindStructData &other)
+		    : BindData(other), part_codes(other.part_codes), bigints(other.bigints), doubles(other.doubles) {
+		}
+
+		part_codes_t part_codes;
+		bigints_t bigints;
+		doubles_t doubles;
+
+		bool Equals(const FunctionData &other_p) const override {
+			const auto &other = other_p.Cast<BindStructData>();
+			return BindData::Equals(other_p) && part_codes == other.part_codes;
+		}
+
+		duckdb::unique_ptr<FunctionData> Copy() const override {
+			return make_uniq<BindStructData>(*this);
+		}
+
+		void InitFactories() {
+			bigints.clear();
+			bigints.resize(part_codes.size(), nullptr);
+			doubles.clear();
+			doubles.resize(part_codes.size(), nullptr);
+			for (size_t col = 0; col < part_codes.size(); ++col) {
+				const auto part_code = part_codes[col];
+				if (IsBigintDatepart(part_code)) {
+					bigints[col] = PartCodeBigintFactory(part_code);
+				} else {
+					doubles[col] = PartCodeDoubleFactory(part_code);
+				}
+			}
+		}
+	};
+
+	template <typename INPUT_TYPE>
+	static void StructFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindStructData>();
+		CalendarPtr calendar_ptr(info.calendar->clone());
+		auto calendar = calendar_ptr.get();
+
+		D_ASSERT(args.ColumnCount() == 1);
+		const auto count = args.size();
+		Vector &input = args.data[0];
+
+		if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+			result.SetVectorType(VectorType::CONSTANT_VECTOR);
+
+			if (ConstantVector::IsNull(input)) {
+				ConstantVector::SetNull(result, true);
+			} else {
+				ConstantVector::SetNull(result, false);
+				auto tdata = ConstantVector::GetData<INPUT_TYPE>(input);
+				auto micros = SetTime(calendar, tdata[0]);
+				const auto is_finite = Timestamp::IsFinite(*tdata);
+				auto &child_entries = StructVector::GetEntries(result);
+				for (size_t col = 0; col < child_entries.size(); ++col) {
+					auto &child_entry = child_entries[col];
+					if (is_finite) {
+						ConstantVector::SetNull(*child_entry, false);
+						if (IsBigintDatepart(info.part_codes[col])) {
+							auto pdata = ConstantVector::GetData<int64_t>(*child_entry);
+							auto adapter = info.bigints[col];
+							pdata[0] = adapter(calendar, micros);
+						} else {
+							auto pdata = ConstantVector::GetData<double>(*child_entry);
+							auto adapter = info.doubles[col];
+							pdata[0] = adapter(calendar, micros);
+						}
+					} else {
+						ConstantVector::SetNull(*child_entry, true);
+					}
+				}
+			}
+		} else {
+			UnifiedVectorFormat rdata;
+			input.ToUnifiedFormat(count, rdata);
+
+			const auto &arg_valid = rdata.validity;
+			auto tdata = UnifiedVectorFormat::GetData<INPUT_TYPE>(rdata);
+
+			result.SetVectorType(VectorType::FLAT_VECTOR);
+			auto &child_entries = StructVector::GetEntries(result);
+			for (auto &child_entry : child_entries) {
+				child_entry->SetVectorType(VectorType::FLAT_VECTOR);
+			}
+
+			auto &res_valid = FlatVector::Validity(result);
+			for (idx_t i = 0; i < count; ++i) {
+				const auto idx = rdata.sel->get_index(i);
+				if (arg_valid.RowIsValid(idx)) {
+					res_valid.SetValid(i);
+					auto micros = SetTime(calendar, tdata[idx]);
+					const auto is_finite = Timestamp::IsFinite(tdata[idx]);
+					for (size_t col = 0; col < child_entries.size(); ++col) {
+						auto &child_entry = child_entries[col];
+						if (is_finite) {
+							FlatVector::Validity(*child_entry).SetValid(i);
+							if (IsBigintDatepart(info.part_codes[col])) {
+								auto pdata = ConstantVector::GetData<int64_t>(*child_entry);
+								auto adapter = info.bigints[col];
+								pdata[i] = adapter(calendar, micros);
+							} else {
+								auto pdata = ConstantVector::GetData<double>(*child_entry);
+								auto adapter = info.doubles[col];
+								pdata[i] = adapter(calendar, micros);
+							}
+						} else {
+							FlatVector::Validity(*child_entry).SetInvalid(i);
+						}
+					}
+				} else {
+					res_valid.SetInvalid(i);
+					for (auto &child_entry : child_entries) {
+						FlatVector::Validity(*child_entry).SetInvalid(i);
+					}
+				}
+			}
+		}
+
+		result.Verify(count);
+	}
+
+	template <typename BIND_TYPE>
+	static duckdb::unique_ptr<FunctionData> BindAdapter(ClientContext &context, ScalarFunction &bound_function,
+	                                                    vector<duckdb::unique_ptr<Expression>> &arguments,
+	                                                    typename BIND_TYPE::adapter_t adapter) {
+		return make_uniq<BIND_TYPE>(context, adapter);
+	}
+
+	static duckdb::unique_ptr<FunctionData> BindUnaryDatePart(ClientContext &context, ScalarFunction &bound_function,
+	                                                          vector<duckdb::unique_ptr<Expression>> &arguments) {
+		const auto part_code = GetDatePartSpecifier(bound_function.name);
+		if (IsBigintDatepart(part_code)) {
+			using data_t = BindAdapterData<int64_t>;
+			auto adapter = PartCodeBigintFactory(part_code);
+			return BindAdapter<data_t>(context, bound_function, arguments, adapter);
+		} else {
+			using data_t = BindAdapterData<double>;
+			auto adapter = PartCodeDoubleFactory(part_code);
+			return BindAdapter<data_t>(context, bound_function, arguments, adapter);
+		}
+	}
+
+	static duckdb::unique_ptr<FunctionData> BindBinaryDatePart(ClientContext &context, ScalarFunction &bound_function,
+	                                                           vector<duckdb::unique_ptr<Expression>> &arguments) {
+		//	If we are only looking for Julian Days, then patch in the unary function.
+		do {
+			if (arguments[0]->HasParameter() || !arguments[0]->IsFoldable()) {
+				break;
+			}
+
+			Value part_value = ExpressionExecutor::EvaluateScalar(context, *arguments[0]);
+			if (part_value.IsNull()) {
+				break;
+			}
+
+			const auto part_name = part_value.ToString();
+			const auto part_code = GetDatePartSpecifier(part_name);
+			if (IsBigintDatepart(part_code)) {
+				break;
+			}
+
+			arguments.erase(arguments.begin());
+			bound_function.arguments.erase(bound_function.arguments.begin());
+			bound_function.name = part_name;
+			bound_function.return_type = LogicalType::DOUBLE;
+			bound_function.function = UnaryTimestampFunction<timestamp_t, double>;
+
+			return BindUnaryDatePart(context, bound_function, arguments);
+		} while (false);
+
+		using data_t = BindAdapterData<int64_t>;
+		return BindAdapter<data_t>(context, bound_function, arguments, nullptr);
+	}
+
+	static duckdb::unique_ptr<FunctionData> BindStruct(ClientContext &context, ScalarFunction &bound_function,
+	                                                   vector<duckdb::unique_ptr<Expression>> &arguments) {
+		// collect names and deconflict, construct return type
+		if (arguments[0]->HasParameter()) {
+			throw ParameterNotResolvedException();
+		}
+		if (!arguments[0]->IsFoldable()) {
+			throw BinderException("%s can only take constant lists of part names", bound_function.name);
+		}
+
+		case_insensitive_set_t name_collision_set;
+		child_list_t<LogicalType> struct_children;
+		BindStructData::part_codes_t part_codes;
+
+		Value parts_list = ExpressionExecutor::EvaluateScalar(context, *arguments[0]);
+		if (parts_list.type().id() == LogicalTypeId::LIST) {
+			auto &list_children = ListValue::GetChildren(parts_list);
+			if (list_children.empty()) {
+				throw BinderException("%s requires non-empty lists of part names", bound_function.name);
+			}
+
+			for (size_t col = 0; col < list_children.size(); ++col) {
+				const auto &part_value = list_children[col];
+				if (part_value.IsNull()) {
+					throw BinderException("NULL struct entry name in %s", bound_function.name);
+				}
+				const auto part_name = part_value.ToString();
+				const auto part_code = GetDatePartSpecifier(part_name);
+				if (name_collision_set.find(part_name) != name_collision_set.end()) {
+					throw BinderException("Duplicate struct entry name \"%s\" in %s", part_name, bound_function.name);
+				}
+				name_collision_set.insert(part_name);
+				part_codes.emplace_back(part_code);
+				if (IsBigintDatepart(part_code)) {
+					struct_children.emplace_back(make_pair(part_name, LogicalType::BIGINT));
+				} else {
+					struct_children.emplace_back(make_pair(part_name, LogicalType::DOUBLE));
+				}
+			}
+		} else {
+			throw BinderException("%s can only take constant lists of part names", bound_function.name);
+		}
+
+		Function::EraseArgument(bound_function, arguments, 0);
+		bound_function.return_type = LogicalType::STRUCT(std::move(struct_children));
+		return make_uniq<BindStructData>(context, std::move(part_codes));
+	}
+
+	static void SerializeStructFunction(Serializer &serializer, const optional_ptr<FunctionData> bind_data,
+	                                    const ScalarFunction &function) {
+		D_ASSERT(bind_data);
+		auto &info = bind_data->Cast<BindStructData>();
+		serializer.WriteProperty(100, "tz_setting", info.tz_setting);
+		serializer.WriteProperty(101, "cal_setting", info.cal_setting);
+		serializer.WriteProperty(102, "part_codes", info.part_codes);
+	}
+
+	static duckdb::unique_ptr<FunctionData> DeserializeStructFunction(Deserializer &deserializer,
+	                                                                  ScalarFunction &bound_function) {
+		auto tz_setting = deserializer.ReadProperty<string>(100, "tz_setting");
+		auto cal_setting = deserializer.ReadProperty<string>(101, "cal_setting");
+		auto part_codes = deserializer.ReadProperty<vector<DatePartSpecifier>>(102, "part_codes");
+		return make_uniq<BindStructData>(tz_setting, cal_setting, std::move(part_codes));
+	}
+
+	template <typename INPUT_TYPE, typename RESULT_TYPE>
+	static ScalarFunction GetUnaryPartCodeFunction(const LogicalType &temporal_type,
+	                                               const LogicalType &result_type = LogicalType::BIGINT) {
+		return ScalarFunction({temporal_type}, result_type, UnaryTimestampFunction<INPUT_TYPE, RESULT_TYPE>,
+		                      BindUnaryDatePart);
+	}
+
+	template <typename RESULT_TYPE = int64_t>
+	static void AddUnaryPartCodeFunctions(const string &name, ExtensionLoader &loader,
+	                                      const LogicalType &result_type = LogicalType::BIGINT) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetUnaryPartCodeFunction<timestamp_t, RESULT_TYPE>(LogicalType::TIMESTAMP_TZ, result_type));
+		loader.RegisterFunction(set);
+	}
+
+	template <typename INPUT_TYPE, typename RESULT_TYPE>
+	static ScalarFunction GetBinaryPartCodeFunction(const LogicalType &temporal_type) {
+		return ScalarFunction({LogicalType::VARCHAR, temporal_type}, LogicalType::BIGINT,
+		                      BinaryTimestampFunction<INPUT_TYPE, RESULT_TYPE>, BindBinaryDatePart);
+	}
+
+	template <typename INPUT_TYPE>
+	static ScalarFunction GetStructFunction(const LogicalType &temporal_type) {
+		auto part_type = LogicalType::LIST(LogicalType::VARCHAR);
+		auto result_type = LogicalType::STRUCT({});
+		ScalarFunction result({part_type, temporal_type}, result_type, StructFunction<INPUT_TYPE>, BindStruct);
+		result.serialize = SerializeStructFunction;
+		result.deserialize = DeserializeStructFunction;
+		return result;
+	}
+
+	static void AddDatePartFunctions(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetBinaryPartCodeFunction<timestamp_t, int64_t>(LogicalType::TIMESTAMP_TZ));
+		set.AddFunction(GetStructFunction<timestamp_t>(LogicalType::TIMESTAMP_TZ));
+		for (auto &func : set.functions) {
+			BaseScalarFunction::SetReturnsError(func);
+		}
+		loader.RegisterFunction(set);
+	}
+
+	static duckdb::unique_ptr<FunctionData> BindLastDate(ClientContext &context, ScalarFunction &bound_function,
+	                                                     vector<duckdb::unique_ptr<Expression>> &arguments) {
+		using data_t = BindAdapterData<date_t>;
+		return BindAdapter<data_t>(context, bound_function, arguments, MakeLastDay);
+	}
+
+	template <typename INPUT_TYPE>
+	static ScalarFunction GetLastDayFunction(const LogicalType &temporal_type) {
+		return ScalarFunction({temporal_type}, LogicalType::DATE, UnaryTimestampFunction<INPUT_TYPE, date_t>,
+		                      BindLastDate);
+	}
+	static void AddLastDayFunctions(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetLastDayFunction<timestamp_t>(LogicalType::TIMESTAMP_TZ));
+		loader.RegisterFunction(set);
+	}
+
+	static unique_ptr<FunctionData> BindMonthName(ClientContext &context, ScalarFunction &bound_function,
+	                                              vector<unique_ptr<Expression>> &arguments) {
+		using data_t = BindAdapterData<string_t>;
+		return BindAdapter<data_t>(context, bound_function, arguments, MonthName);
+	}
+
+	template <typename INPUT_TYPE>
+	static ScalarFunction GetMonthNameFunction(const LogicalType &temporal_type) {
+		return ScalarFunction({temporal_type}, LogicalType::VARCHAR, UnaryTimestampFunction<INPUT_TYPE, string_t>,
+		                      BindMonthName);
+	}
+	static void AddMonthNameFunctions(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetMonthNameFunction<timestamp_t>(LogicalType::TIMESTAMP_TZ));
+		loader.RegisterFunction(set);
+	}
+
+	static unique_ptr<FunctionData> BindDayName(ClientContext &context, ScalarFunction &bound_function,
+	                                            vector<unique_ptr<Expression>> &arguments) {
+		using data_t = BindAdapterData<string_t>;
+		return BindAdapter<data_t>(context, bound_function, arguments, DayName);
+	}
+
+	template <typename INPUT_TYPE>
+	static ScalarFunction GetDayNameFunction(const LogicalType &temporal_type) {
+		return ScalarFunction({temporal_type}, LogicalType::VARCHAR, UnaryTimestampFunction<INPUT_TYPE, string_t>,
+		                      BindDayName);
+	}
+	static void AddDayNameFunctions(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetDayNameFunction<timestamp_t>(LogicalType::TIMESTAMP_TZ));
+		loader.RegisterFunction(set);
+	}
+};
+
+void RegisterICUDatePartFunctions(ExtensionLoader &loader) {
+	// register the individual operators
+
+	//	BIGINTs
+	ICUDatePart::AddUnaryPartCodeFunctions("era", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("year", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("month", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("day", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("decade", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("century", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("millennium", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("microsecond", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("millisecond", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("second", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("minute", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("hour", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("dayofweek", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("isodow", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("week", loader); //  Note that WeekOperator is ISO-8601, not US
+	ICUDatePart::AddUnaryPartCodeFunctions("dayofyear", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("quarter", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("isoyear", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("timezone", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("timezone_hour", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("timezone_minute", loader);
+
+	//	DOUBLEs
+	ICUDatePart::AddUnaryPartCodeFunctions<double>("epoch", loader, LogicalType::DOUBLE);
+	ICUDatePart::AddUnaryPartCodeFunctions<double>("julian", loader, LogicalType::DOUBLE);
+
+	//  register combinations
+	ICUDatePart::AddUnaryPartCodeFunctions("yearweek", loader); //  Note this is ISO year and week
+
+	//  register various aliases
+	ICUDatePart::AddUnaryPartCodeFunctions("dayofmonth", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("weekday", loader);
+	ICUDatePart::AddUnaryPartCodeFunctions("weekofyear", loader);
+
+	//  register the last_day function
+	ICUDatePart::AddLastDayFunctions("last_day", loader);
+
+	// register the dayname/monthname functions
+	ICUDatePart::AddMonthNameFunctions("monthname", loader);
+	ICUDatePart::AddDayNameFunctions("dayname", loader);
+
+	// finally the actual date_part function
+	ICUDatePart::AddDatePartFunctions("date_part", loader);
+	ICUDatePart::AddDatePartFunctions("datepart", loader);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-datesub.cpp
+++ b/external/duckdb/extension/icu/icu-datesub.cpp
@@ -0,0 +1,293 @@
+#include "include/icu-datesub.hpp"
+#include "include/icu-datefunc.hpp"
+
+#include "duckdb/main/extension/extension_loader.hpp"
+#include "duckdb/common/enums/date_part_specifier.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
+
+namespace duckdb {
+
+struct ICUCalendarSub : public ICUDateFunc {
+
+	//	ICU only has 32 bit precision for date parts, so it can overflow a high resolution.
+	//	Since there is no difference between ICU and the obvious calculations,
+	//	we make these using the DuckDB internal type.
+	static int64_t SubtractMicrosecond(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		return end_date.value - start_date.value;
+	}
+
+	static int64_t SubtractMillisecond(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		return SubtractMicrosecond(calendar, start_date, end_date) / Interval::MICROS_PER_MSEC;
+	}
+
+	static int64_t SubtractSecond(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		return SubtractMicrosecond(calendar, start_date, end_date) / Interval::MICROS_PER_SEC;
+	}
+
+	static int64_t SubtractMinute(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		return SubtractMicrosecond(calendar, start_date, end_date) / Interval::MICROS_PER_MINUTE;
+	}
+
+	static int64_t SubtractHour(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		SetTime(calendar, start_date);
+		return SubtractField(calendar, UCAL_HOUR_OF_DAY, end_date);
+	}
+
+	static int64_t SubtractDay(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		SetTime(calendar, start_date);
+		return SubtractField(calendar, UCAL_DATE, end_date);
+	}
+
+	static int64_t SubtractWeek(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		calendar->setFirstDayOfWeek(UCAL_MONDAY);
+		calendar->setMinimalDaysInFirstWeek(4);
+		SetTime(calendar, start_date);
+		return SubtractField(calendar, UCAL_WEEK_OF_YEAR, end_date);
+	}
+
+	static int64_t SubtractMonth(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		SetTime(calendar, start_date);
+		return SubtractField(calendar, UCAL_MONTH, end_date);
+	}
+
+	static int64_t SubtractQuarter(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		// No ICU part for this, so do it manually.
+		// This will not work for lunar calendars!
+		return SubtractMonth(calendar, start_date, end_date) / 3;
+	}
+
+	static int64_t SubtractYear(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		SetTime(calendar, start_date);
+		return SubtractField(calendar, UCAL_YEAR, end_date);
+	}
+
+	static int64_t SubtractISOYear(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		calendar->setFirstDayOfWeek(UCAL_MONDAY);
+		calendar->setMinimalDaysInFirstWeek(4);
+		SetTime(calendar, start_date);
+		return SubtractField(calendar, UCAL_YEAR_WOY, end_date);
+	}
+
+	static int64_t SubtractDecade(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		// No ICU part for this, so do it manually.
+		return SubtractYear(calendar, start_date, end_date) / 10;
+	}
+
+	static int64_t SubtractCentury(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		// No ICU part for this, so do it manually.
+		return SubtractYear(calendar, start_date, end_date) / 100;
+	}
+
+	static int64_t SubtractMillenium(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		// No ICU part for this, so do it manually.
+		return SubtractYear(calendar, start_date, end_date) / 1000;
+	}
+
+	static int64_t SubtractEra(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date) {
+		SetTime(calendar, start_date);
+		return SubtractField(calendar, UCAL_ERA, end_date);
+	}
+
+	template <typename T>
+	static void ICUDateSubFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 3);
+		auto &part_arg = args.data[0];
+		auto &startdate_arg = args.data[1];
+		auto &enddate_arg = args.data[2];
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		CalendarPtr calendar(info.calendar->clone());
+
+		if (part_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+			// Common case of constant part.
+			if (ConstantVector::IsNull(part_arg)) {
+				result.SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result, true);
+			} else {
+				const auto specifier = ConstantVector::GetData<string_t>(part_arg)->GetString();
+				auto part_func = SubtractFactory(GetDatePartSpecifier(specifier));
+				BinaryExecutor::ExecuteWithNulls<T, T, int64_t>(
+				    startdate_arg, enddate_arg, result, args.size(),
+				    [&](T start_date, T end_date, ValidityMask &mask, idx_t idx) {
+					    if (Timestamp::IsFinite(start_date) && Timestamp::IsFinite(end_date)) {
+						    return part_func(calendar.get(), start_date, end_date);
+					    } else {
+						    mask.SetInvalid(idx);
+						    return int64_t(0);
+					    }
+				    });
+			}
+		} else {
+			TernaryExecutor::ExecuteWithNulls<string_t, T, T, int64_t>(
+			    part_arg, startdate_arg, enddate_arg, result, args.size(),
+			    [&](string_t specifier, T start_date, T end_date, ValidityMask &mask, idx_t idx) {
+				    if (Timestamp::IsFinite(start_date) && Timestamp::IsFinite(end_date)) {
+					    auto part_func = SubtractFactory(GetDatePartSpecifier(specifier.GetString()));
+					    return part_func(calendar.get(), start_date, end_date);
+				    } else {
+					    mask.SetInvalid(idx);
+					    return int64_t(0);
+				    }
+			    });
+		}
+	}
+
+	template <typename TA>
+	static ScalarFunction GetFunction(const LogicalTypeId &type) {
+		return ScalarFunction({LogicalType::VARCHAR, type, type}, LogicalType::BIGINT, ICUDateSubFunction<TA>, Bind);
+	}
+
+	static void AddFunctions(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetFunction<timestamp_t>(LogicalType::TIMESTAMP_TZ));
+		loader.RegisterFunction(set);
+	}
+};
+
+ICUDateFunc::part_sub_t ICUDateFunc::SubtractFactory(DatePartSpecifier type) {
+	switch (type) {
+	case DatePartSpecifier::MILLENNIUM:
+		return ICUCalendarSub::SubtractMillenium;
+	case DatePartSpecifier::CENTURY:
+		return ICUCalendarSub::SubtractCentury;
+	case DatePartSpecifier::DECADE:
+		return ICUCalendarSub::SubtractDecade;
+	case DatePartSpecifier::YEAR:
+		return ICUCalendarSub::SubtractYear;
+	case DatePartSpecifier::QUARTER:
+		return ICUCalendarSub::SubtractQuarter;
+	case DatePartSpecifier::MONTH:
+		return ICUCalendarSub::SubtractMonth;
+	case DatePartSpecifier::WEEK:
+	case DatePartSpecifier::YEARWEEK:
+		return ICUCalendarSub::SubtractWeek;
+	case DatePartSpecifier::ISOYEAR:
+		return ICUCalendarSub::SubtractISOYear;
+	case DatePartSpecifier::DAY:
+	case DatePartSpecifier::DOW:
+	case DatePartSpecifier::ISODOW:
+	case DatePartSpecifier::DOY:
+	case DatePartSpecifier::JULIAN_DAY:
+		return ICUCalendarSub::SubtractDay;
+	case DatePartSpecifier::HOUR:
+		return ICUCalendarSub::SubtractHour;
+	case DatePartSpecifier::MINUTE:
+		return ICUCalendarSub::SubtractMinute;
+	case DatePartSpecifier::SECOND:
+	case DatePartSpecifier::EPOCH:
+		return ICUCalendarSub::SubtractSecond;
+	case DatePartSpecifier::MILLISECONDS:
+		return ICUCalendarSub::SubtractMillisecond;
+	case DatePartSpecifier::MICROSECONDS:
+		return ICUCalendarSub::SubtractMicrosecond;
+	case DatePartSpecifier::ERA:
+		return ICUCalendarSub::SubtractEra;
+	default:
+		throw NotImplementedException("Specifier type not implemented for ICU subtraction");
+	}
+}
+
+// MS-SQL differences can be computed using ICU by truncating both arguments
+// to the desired part precision and then applying ICU subtraction/difference
+struct ICUCalendarDiff : public ICUDateFunc {
+
+	template <typename T>
+	static int64_t DifferenceFunc(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date,
+	                              part_trunc_t trunc_func, part_sub_t sub_func) {
+		// Truncate the two arguments. This is safe because we will stay in range
+		auto micros = SetTime(calendar, start_date);
+		trunc_func(calendar, micros);
+		start_date = GetTimeUnsafe(calendar, micros);
+
+		micros = SetTime(calendar, end_date);
+		trunc_func(calendar, micros);
+		end_date = GetTimeUnsafe(calendar, micros);
+
+		// Now use ICU difference
+		return sub_func(calendar, start_date, end_date);
+	}
+
+	static part_trunc_t DiffTruncationFactory(DatePartSpecifier type) {
+		switch (type) {
+		case DatePartSpecifier::WEEK:
+			//	Weeks are computed without anchors
+			return TruncationFactory(DatePartSpecifier::DAY);
+		default:
+			break;
+		}
+		return TruncationFactory(type);
+	}
+
+	template <typename T>
+	static void ICUDateDiffFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 3);
+		auto &part_arg = args.data[0];
+		auto &startdate_arg = args.data[1];
+		auto &enddate_arg = args.data[2];
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		CalendarPtr calendar_ptr(info.calendar->clone());
+		auto calendar = calendar_ptr.get();
+
+		if (part_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+			// Common case of constant part.
+			if (ConstantVector::IsNull(part_arg)) {
+				result.SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result, true);
+			} else {
+				const auto specifier = ConstantVector::GetData<string_t>(part_arg)->GetString();
+				const auto part = GetDatePartSpecifier(specifier);
+				auto trunc_func = DiffTruncationFactory(part);
+				auto sub_func = SubtractFactory(part);
+				BinaryExecutor::ExecuteWithNulls<T, T, int64_t>(
+				    startdate_arg, enddate_arg, result, args.size(),
+				    [&](T start_date, T end_date, ValidityMask &mask, idx_t idx) {
+					    if (Timestamp::IsFinite(start_date) && Timestamp::IsFinite(end_date)) {
+						    return DifferenceFunc<T>(calendar, start_date, end_date, trunc_func, sub_func);
+					    } else {
+						    mask.SetInvalid(idx);
+						    return int64_t(0);
+					    }
+				    });
+			}
+		} else {
+			TernaryExecutor::ExecuteWithNulls<string_t, T, T, int64_t>(
+			    part_arg, startdate_arg, enddate_arg, result, args.size(),
+			    [&](string_t specifier, T start_date, T end_date, ValidityMask &mask, idx_t idx) {
+				    if (Timestamp::IsFinite(start_date) && Timestamp::IsFinite(end_date)) {
+					    const auto part = GetDatePartSpecifier(specifier.GetString());
+					    auto trunc_func = DiffTruncationFactory(part);
+					    auto sub_func = SubtractFactory(part);
+					    return DifferenceFunc<T>(calendar, start_date, end_date, trunc_func, sub_func);
+				    } else {
+					    mask.SetInvalid(idx);
+					    return int64_t(0);
+				    }
+			    });
+		}
+	}
+
+	template <typename TA>
+	static ScalarFunction GetFunction(const LogicalTypeId &type) {
+		return ScalarFunction({LogicalType::VARCHAR, type, type}, LogicalType::BIGINT, ICUDateDiffFunction<TA>, Bind);
+	}
+
+	static void AddFunctions(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetFunction<timestamp_t>(LogicalType::TIMESTAMP_TZ));
+		loader.RegisterFunction(set);
+	}
+};
+
+void RegisterICUDateSubFunctions(ExtensionLoader &loader) {
+	ICUCalendarSub::AddFunctions("date_sub", loader);
+	ICUCalendarSub::AddFunctions("datesub", loader);
+
+	ICUCalendarDiff::AddFunctions("date_diff", loader);
+	ICUCalendarDiff::AddFunctions("datediff", loader);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-datetrunc.cpp
+++ b/external/duckdb/extension/icu/icu-datetrunc.cpp
@@ -0,0 +1,239 @@
+#include "include/icu-datetrunc.hpp"
+#include "include/icu-datefunc.hpp"
+
+#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/vector_operations/binary_executor.hpp"
+#include "duckdb/main/client_context.hpp"
+#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
+#include "duckdb/planner/expression/bound_function_expression.hpp"
+#include "duckdb/main/extension/extension_loader.hpp"
+
+namespace duckdb {
+
+struct ICUDateTrunc : public ICUDateFunc {
+	static void PreserveOffsets(icu::Calendar *calendar) {
+		//	We have to extract _everything_ before setting anything
+		//	Otherwise ICU will clear the fStamp fields
+		//	This also means we must call this method first.
+
+		//	Force reuse of offsets when reassembling truncated sub-hour times.
+		const auto zone_offset = ExtractField(calendar, UCAL_ZONE_OFFSET);
+		const auto dst_offset = ExtractField(calendar, UCAL_DST_OFFSET);
+
+		calendar->set(UCAL_ZONE_OFFSET, zone_offset);
+		calendar->set(UCAL_DST_OFFSET, dst_offset);
+	}
+
+	static void TruncMicrosecondInternal(icu::Calendar *calendar, uint64_t &micros) {
+	}
+
+	static void TruncMicrosecond(icu::Calendar *calendar, uint64_t &micros) {
+		PreserveOffsets(calendar);
+		TruncMicrosecondInternal(calendar, micros);
+	}
+
+	static void TruncMillisecondInternal(icu::Calendar *calendar, uint64_t &micros) {
+		TruncMicrosecondInternal(calendar, micros);
+		micros = 0;
+	}
+
+	static void TruncMillisecond(icu::Calendar *calendar, uint64_t &micros) {
+		PreserveOffsets(calendar);
+		TruncMillisecondInternal(calendar, micros);
+	}
+
+	static void TruncSecondInternal(icu::Calendar *calendar, uint64_t &micros) {
+		TruncMillisecondInternal(calendar, micros);
+		calendar->set(UCAL_MILLISECOND, 0);
+	}
+
+	static void TruncSecond(icu::Calendar *calendar, uint64_t &micros) {
+		PreserveOffsets(calendar);
+		TruncSecondInternal(calendar, micros);
+	}
+
+	static void TruncMinuteInternal(icu::Calendar *calendar, uint64_t &micros) {
+		TruncSecondInternal(calendar, micros);
+		calendar->set(UCAL_SECOND, 0);
+	}
+
+	static void TruncMinute(icu::Calendar *calendar, uint64_t &micros) {
+		PreserveOffsets(calendar);
+		TruncMinuteInternal(calendar, micros);
+	}
+
+	static void TruncHour(icu::Calendar *calendar, uint64_t &micros) {
+		TruncMinuteInternal(calendar, micros);
+		calendar->set(UCAL_MINUTE, 0);
+	}
+
+	static void TruncDay(icu::Calendar *calendar, uint64_t &micros) {
+		TruncHour(calendar, micros);
+		calendar->set(UCAL_HOUR_OF_DAY, 0);
+	}
+
+	static void TruncWeek(icu::Calendar *calendar, uint64_t &micros) {
+		calendar->setFirstDayOfWeek(UCAL_MONDAY);
+		calendar->setMinimalDaysInFirstWeek(4);
+		TruncDay(calendar, micros);
+		calendar->set(UCAL_DAY_OF_WEEK, UCAL_MONDAY);
+	}
+
+	static void TruncMonth(icu::Calendar *calendar, uint64_t &micros) {
+		TruncDay(calendar, micros);
+		calendar->set(UCAL_DATE, 1);
+	}
+
+	static void TruncQuarter(icu::Calendar *calendar, uint64_t &micros) {
+		TruncMonth(calendar, micros);
+		auto mm = ExtractField(calendar, UCAL_MONTH);
+		calendar->set(UCAL_MONTH, (mm / 3) * 3);
+	}
+
+	static void TruncYear(icu::Calendar *calendar, uint64_t &micros) {
+		TruncMonth(calendar, micros);
+		calendar->set(UCAL_MONTH, UCAL_JANUARY);
+	}
+
+	static void TruncISOYear(icu::Calendar *calendar, uint64_t &micros) {
+		TruncWeek(calendar, micros);
+		calendar->set(UCAL_WEEK_OF_YEAR, 1);
+	}
+
+	static void TruncDecade(icu::Calendar *calendar, uint64_t &micros) {
+		TruncYear(calendar, micros);
+		auto yyyy = ExtractField(calendar, UCAL_YEAR) / 10;
+		calendar->set(UCAL_YEAR, yyyy * 10);
+	}
+
+	static void TruncCentury(icu::Calendar *calendar, uint64_t &micros) {
+		TruncYear(calendar, micros);
+		auto yyyy = ExtractField(calendar, UCAL_YEAR) / 100;
+		calendar->set(UCAL_YEAR, yyyy * 100);
+	}
+
+	static void TruncMillenium(icu::Calendar *calendar, uint64_t &micros) {
+		TruncYear(calendar, micros);
+		auto yyyy = ExtractField(calendar, UCAL_YEAR) / 1000;
+		calendar->set(UCAL_YEAR, yyyy * 1000);
+	}
+
+	static void TruncEra(icu::Calendar *calendar, uint64_t &micros) {
+		TruncYear(calendar, micros);
+		auto era = ExtractField(calendar, UCAL_ERA);
+		calendar->set(UCAL_YEAR, 0);
+		calendar->set(UCAL_ERA, era);
+	}
+
+	template <typename T>
+	static void ICUDateTruncFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 2);
+		auto &part_arg = args.data[0];
+		auto &date_arg = args.data[1];
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		CalendarPtr calendar(info.calendar->clone());
+
+		if (part_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+			// Common case of constant part.
+			if (ConstantVector::IsNull(part_arg)) {
+				result.SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result, true);
+			} else {
+				const auto specifier = ConstantVector::GetData<string_t>(part_arg)->GetString();
+				auto truncator = TruncationFactory(GetDatePartSpecifier(specifier));
+				UnaryExecutor::Execute<T, timestamp_t>(date_arg, result, args.size(), [&](T input) {
+					if (Timestamp::IsFinite(input)) {
+						auto micros = SetTime(calendar.get(), input);
+						truncator(calendar.get(), micros);
+						return GetTimeUnsafe(calendar.get(), micros);
+					} else {
+						return input;
+					}
+				});
+			}
+		} else {
+			BinaryExecutor::Execute<string_t, T, timestamp_t>(
+			    part_arg, date_arg, result, args.size(), [&](string_t specifier, T input) {
+				    if (Timestamp::IsFinite(input)) {
+					    auto truncator = TruncationFactory(GetDatePartSpecifier(specifier.GetString()));
+					    auto micros = SetTime(calendar.get(), input);
+					    truncator(calendar.get(), micros);
+					    return GetTimeUnsafe(calendar.get(), micros);
+				    } else {
+					    return input;
+				    }
+			    });
+		}
+	}
+
+	template <typename TA>
+	static ScalarFunction GetDateTruncFunction(const LogicalTypeId &type) {
+		return ScalarFunction({LogicalType::VARCHAR, type}, LogicalType::TIMESTAMP_TZ, ICUDateTruncFunction<TA>, Bind);
+	}
+
+	static void AddBinaryTimestampFunction(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetDateTruncFunction<timestamp_t>(LogicalType::TIMESTAMP_TZ));
+		loader.RegisterFunction(set);
+	}
+};
+
+ICUDateFunc::part_trunc_t ICUDateFunc::TruncationFactory(DatePartSpecifier type) {
+	switch (type) {
+	case DatePartSpecifier::ERA:
+		return ICUDateTrunc::TruncEra;
+	case DatePartSpecifier::MILLENNIUM:
+		return ICUDateTrunc::TruncMillenium;
+	case DatePartSpecifier::CENTURY:
+		return ICUDateTrunc::TruncCentury;
+	case DatePartSpecifier::DECADE:
+		return ICUDateTrunc::TruncDecade;
+	case DatePartSpecifier::YEAR:
+		return ICUDateTrunc::TruncYear;
+	case DatePartSpecifier::QUARTER:
+		return ICUDateTrunc::TruncQuarter;
+	case DatePartSpecifier::MONTH:
+		return ICUDateTrunc::TruncMonth;
+	case DatePartSpecifier::WEEK:
+	case DatePartSpecifier::YEARWEEK:
+		return ICUDateTrunc::TruncWeek;
+	case DatePartSpecifier::ISOYEAR:
+		return ICUDateTrunc::TruncISOYear;
+	case DatePartSpecifier::DAY:
+	case DatePartSpecifier::DOW:
+	case DatePartSpecifier::ISODOW:
+	case DatePartSpecifier::DOY:
+	case DatePartSpecifier::JULIAN_DAY:
+		return ICUDateTrunc::TruncDay;
+	case DatePartSpecifier::HOUR:
+		return ICUDateTrunc::TruncHour;
+	case DatePartSpecifier::MINUTE:
+		return ICUDateTrunc::TruncMinute;
+	case DatePartSpecifier::SECOND:
+	case DatePartSpecifier::EPOCH:
+		return ICUDateTrunc::TruncSecond;
+	case DatePartSpecifier::MILLISECONDS:
+		return ICUDateTrunc::TruncMillisecond;
+	case DatePartSpecifier::MICROSECONDS:
+		return ICUDateTrunc::TruncMicrosecond;
+	default:
+		throw NotImplementedException("Specifier type not implemented for ICU DATETRUNC");
+	}
+}
+
+timestamp_t ICUDateFunc::CurrentMidnight(icu::Calendar *calendar, ExpressionState &state) {
+	const auto current_timestamp = MetaTransaction::Get(state.GetContext()).start_timestamp;
+	auto current_micros = SetTime(calendar, current_timestamp);
+	ICUDateTrunc::TruncDay(calendar, current_micros);
+	return GetTime(calendar);
+}
+
+void RegisterICUDateTruncFunctions(ExtensionLoader &loader) {
+	ICUDateTrunc::AddBinaryTimestampFunction("date_trunc", loader);
+	ICUDateTrunc::AddBinaryTimestampFunction("datetrunc", loader);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-list-range.cpp
+++ b/external/duckdb/extension/icu/icu-list-range.cpp
@@ -0,0 +1,205 @@
+#include "duckdb/common/exception.hpp"
+#include "duckdb/common/types/interval.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/types/vector.hpp"
+#include "duckdb/function/function_set.hpp"
+#include "duckdb/function/scalar_function.hpp"
+#include "duckdb/main/client_context.hpp"
+#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
+#include "include/icu-datefunc.hpp"
+#include "duckdb/main/extension/extension_loader.hpp"
+
+namespace duckdb {
+
+struct ICUListRange : public ICUDateFunc {
+	template <bool INCLUSIVE_BOUND>
+	class RangeInfoStruct {
+	public:
+		explicit RangeInfoStruct(DataChunk &args_p) : args(args_p) {
+			if (args.ColumnCount() == 3) {
+				args.data[0].ToUnifiedFormat(args.size(), vdata[0]);
+				args.data[1].ToUnifiedFormat(args.size(), vdata[1]);
+				args.data[2].ToUnifiedFormat(args.size(), vdata[2]);
+			} else {
+				throw InternalException("Unsupported number of parameters for range");
+			}
+		}
+
+		bool RowIsValid(idx_t row_idx) {
+			for (idx_t i = 0; i < args.ColumnCount(); i++) {
+				auto idx = vdata[i].sel->get_index(row_idx);
+				if (!vdata[i].validity.RowIsValid(idx)) {
+					return false;
+				}
+			}
+			return true;
+		}
+
+		timestamp_t StartListValue(idx_t row_idx) {
+			auto data = (timestamp_t *)vdata[0].data;
+			auto idx = vdata[0].sel->get_index(row_idx);
+			return data[idx];
+		}
+
+		timestamp_t EndListValue(idx_t row_idx) {
+			auto data = (timestamp_t *)vdata[1].data;
+			auto idx = vdata[1].sel->get_index(row_idx);
+			return data[idx];
+		}
+
+		interval_t ListIncrementValue(idx_t row_idx) {
+			auto data = (interval_t *)vdata[2].data;
+			auto idx = vdata[2].sel->get_index(row_idx);
+			return data[idx];
+		}
+
+		void GetListValues(idx_t row_idx, timestamp_t &start_value, timestamp_t &end_value,
+		                   interval_t &increment_value) {
+			start_value = StartListValue(row_idx);
+			end_value = EndListValue(row_idx);
+			increment_value = ListIncrementValue(row_idx);
+		}
+
+		uint64_t ListLength(idx_t row_idx, TZCalendar &calendar) {
+			timestamp_t start_value;
+			timestamp_t end_value;
+			interval_t increment_value;
+			GetListValues(row_idx, start_value, end_value, increment_value);
+			return ListLength(start_value, end_value, increment_value, INCLUSIVE_BOUND, calendar);
+		}
+
+		void Increment(timestamp_t &input, interval_t increment, TZCalendar &calendar) {
+			input = Add(calendar, input, increment);
+		}
+
+	private:
+		DataChunk &args;
+		UnifiedVectorFormat vdata[3];
+
+		uint64_t ListLength(timestamp_t start_value, timestamp_t end_value, interval_t increment_value,
+		                    bool inclusive_bound, TZCalendar &calendar) {
+			bool is_positive = increment_value.months > 0 || increment_value.days > 0 || increment_value.micros > 0;
+			bool is_negative = increment_value.months < 0 || increment_value.days < 0 || increment_value.micros < 0;
+			if (!is_negative && !is_positive) {
+				// interval is 0: no result
+				return 0;
+			}
+			// We don't allow infinite bounds because they generate errors or infinite loops
+			if (!Timestamp::IsFinite(start_value) || !Timestamp::IsFinite(end_value)) {
+				throw InvalidInputException("Interval infinite bounds not supported");
+			}
+
+			if (is_negative && is_positive) {
+				// we don't allow a mix of
+				throw InvalidInputException("Interval with mix of negative/positive entries not supported");
+			}
+			if (start_value > end_value && is_positive) {
+				return 0;
+			}
+			if (start_value < end_value && is_negative) {
+				return 0;
+			}
+			int64_t total_values = 0;
+			if (is_negative) {
+				// negative interval, start_value is going down
+				while (inclusive_bound ? start_value >= end_value : start_value > end_value) {
+					start_value = Add(calendar, start_value, increment_value);
+					total_values++;
+					if (total_values > NumericLimits<uint32_t>::Maximum()) {
+						throw InvalidInputException("Lists larger than 2^32 elements are not supported");
+					}
+				}
+			} else {
+				// positive interval, start_value is going up
+				while (inclusive_bound ? start_value <= end_value : start_value < end_value) {
+					start_value = Add(calendar, start_value, increment_value);
+					total_values++;
+					if (total_values > NumericLimits<uint32_t>::Maximum()) {
+						throw InvalidInputException("Lists larger than 2^32 elements are not supported");
+					}
+				}
+			}
+			return total_values;
+		}
+	};
+
+	template <bool INCLUSIVE_BOUND>
+	static void ICUListRangeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
+		D_ASSERT(args.ColumnCount() == 3);
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &bind_info = func_expr.bind_info->Cast<BindData>();
+		TZCalendar calendar(*bind_info.calendar, bind_info.cal_setting);
+
+		RangeInfoStruct<INCLUSIVE_BOUND> info(args);
+		idx_t args_size = 1;
+		auto result_type = VectorType::CONSTANT_VECTOR;
+		for (idx_t i = 0; i < args.ColumnCount(); i++) {
+			if (args.data[i].GetVectorType() != VectorType::CONSTANT_VECTOR) {
+				args_size = args.size();
+				result_type = VectorType::FLAT_VECTOR;
+				break;
+			}
+		}
+		auto list_data = FlatVector::GetData<list_entry_t>(result);
+		auto &result_validity = FlatVector::Validity(result);
+		int64_t total_size = 0;
+		for (idx_t i = 0; i < args_size; i++) {
+			if (!info.RowIsValid(i)) {
+				result_validity.SetInvalid(i);
+				list_data[i].offset = total_size;
+				list_data[i].length = 0;
+			} else {
+				list_data[i].offset = total_size;
+				list_data[i].length = info.ListLength(i, calendar);
+				total_size += list_data[i].length;
+			}
+		}
+
+		// now construct the child vector of the list
+		ListVector::Reserve(result, total_size);
+		auto range_data = FlatVector::GetData<timestamp_t>(ListVector::GetEntry(result));
+		idx_t total_idx = 0;
+		for (idx_t i = 0; i < args_size; i++) {
+			timestamp_t start_value = info.StartListValue(i);
+			interval_t increment = info.ListIncrementValue(i);
+
+			timestamp_t range_value = start_value;
+			for (idx_t range_idx = 0; range_idx < list_data[i].length; range_idx++) {
+				if (range_idx > 0) {
+					info.Increment(range_value, increment, calendar);
+				}
+				range_data[total_idx++] = range_value;
+			}
+		}
+
+		ListVector::SetListSize(result, total_size);
+		result.SetVectorType(result_type);
+
+		result.Verify(args.size());
+	}
+
+	static void AddICUListRangeFunction(ExtensionLoader &loader) {
+
+		ScalarFunctionSet range("range");
+		range.AddFunction(ScalarFunction({LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL},
+		                                 LogicalType::LIST(LogicalType::TIMESTAMP_TZ), ICUListRangeFunction<false>,
+		                                 Bind));
+		loader.RegisterFunction(range);
+
+		// generate_series: similar to range, but inclusive instead of exclusive bounds on the RHS
+		ScalarFunctionSet generate_series("generate_series");
+		generate_series.AddFunction(
+		    ScalarFunction({LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL},
+		                   LogicalType::LIST(LogicalType::TIMESTAMP_TZ), ICUListRangeFunction<true>, Bind));
+
+		loader.RegisterFunction(generate_series);
+	}
+};
+
+void RegisterICUListRangeFunctions(ExtensionLoader &loader) {
+	ICUListRange::AddICUListRangeFunction(loader);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-makedate.cpp
+++ b/external/duckdb/extension/icu/icu-makedate.cpp
@@ -0,0 +1,176 @@
+#include "duckdb/common/operator/add.hpp"
+#include "duckdb/common/operator/cast_operators.hpp"
+#include "duckdb/common/operator/subtract.hpp"
+#include "duckdb/common/types/date.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/vector_operations/senary_executor.hpp"
+#include "duckdb/common/vector_operations/septenary_executor.hpp"
+#include "duckdb/function/cast/cast_function_set.hpp"
+#include "duckdb/main/extension/extension_loader.hpp"
+#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
+#include "include/icu-casts.hpp"
+#include "include/icu-datefunc.hpp"
+#include "include/icu-datetrunc.hpp"
+
+#include <cmath>
+
+namespace duckdb {
+
+date_t ICUMakeDate::Operation(icu::Calendar *calendar, timestamp_t instant) {
+	if (!Timestamp::IsFinite(instant)) {
+		return Timestamp::GetDate(instant);
+	}
+
+	// Extract the time zone parts
+	SetTime(calendar, instant);
+	const auto era = ExtractField(calendar, UCAL_ERA);
+	const auto year = ExtractField(calendar, UCAL_YEAR);
+	const auto mm = ExtractField(calendar, UCAL_MONTH) + 1;
+	const auto dd = ExtractField(calendar, UCAL_DATE);
+
+	const auto yyyy = era ? year : (-year + 1);
+	date_t result;
+	if (!Date::TryFromDate(yyyy, mm, dd, result)) {
+		throw ConversionException("Unable to convert TIMESTAMPTZ to DATE");
+	}
+
+	return result;
+}
+
+date_t ICUMakeDate::ToDate(ClientContext &context, timestamp_t instant) {
+	ICUDateFunc::BindData data(context);
+	return Operation(data.calendar.get(), instant);
+}
+
+bool ICUMakeDate::CastToDate(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
+	auto &cast_data = parameters.cast_data->Cast<CastData>();
+	auto &info = cast_data.info->Cast<BindData>();
+	CalendarPtr calendar(info.calendar->clone());
+
+	UnaryExecutor::Execute<timestamp_t, date_t>(source, result, count,
+	                                            [&](timestamp_t input) { return Operation(calendar.get(), input); });
+	return true;
+}
+
+BoundCastInfo ICUMakeDate::BindCastToDate(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
+	if (!input.context) {
+		throw InternalException("Missing context for TIMESTAMPTZ to DATE cast.");
+	}
+
+	auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));
+
+	return BoundCastInfo(CastToDate, std::move(cast_data));
+}
+
+void ICUMakeDate::AddCasts(ExtensionLoader &loader) {
+	loader.RegisterCastFunction(LogicalType::TIMESTAMP_TZ, LogicalType::DATE, BindCastToDate);
+}
+
+struct ICUMakeTimestampTZFunc : public ICUDateFunc {
+	template <typename T>
+	static inline timestamp_t Operation(icu::Calendar *calendar, T yyyy, T mm, T dd, T hr, T mn, double ss) {
+		const auto year = Cast::Operation<T, int32_t>(AddOperator::Operation<T, T, T>(yyyy, (yyyy < 0)));
+		const auto month = Cast::Operation<T, int32_t>(SubtractOperatorOverflowCheck::Operation<T, T, T>(mm, 1));
+		const auto day = Cast::Operation<T, int32_t>(dd);
+		const auto hour = Cast::Operation<T, int32_t>(hr);
+		const auto min = Cast::Operation<T, int32_t>(mn);
+
+		const auto secs = Cast::Operation<double, int32_t>(ss);
+		ss -= secs;
+		ss *= Interval::MSECS_PER_SEC;
+		const auto millis = int32_t(ss);
+		int64_t micros = std::round((ss - millis) * Interval::MICROS_PER_MSEC);
+
+		calendar->set(UCAL_YEAR, year);
+		calendar->set(UCAL_MONTH, month);
+		calendar->set(UCAL_DATE, day);
+		calendar->set(UCAL_HOUR_OF_DAY, hour);
+		calendar->set(UCAL_MINUTE, min);
+		calendar->set(UCAL_SECOND, secs);
+		calendar->set(UCAL_MILLISECOND, millis);
+
+		return GetTime(calendar, micros);
+	}
+
+	template <typename T>
+	static void FromMicros(DataChunk &input, ExpressionState &state, Vector &result) {
+		UnaryExecutor::Execute<T, timestamp_t>(input.data[0], result, input.size(), [&](T micros) {
+			const auto result = timestamp_t(micros);
+			if (!Timestamp::IsFinite(result)) {
+				throw ConversionException("Timestamp microseconds out of range: %ld", micros);
+			}
+			return result;
+		});
+	}
+
+	template <typename T>
+	static void Execute(DataChunk &input, ExpressionState &state, Vector &result) {
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		CalendarPtr calendar_ptr(info.calendar->clone());
+		auto calendar = calendar_ptr.get();
+
+		// Three cases: no TZ, constant TZ, variable TZ
+		if (input.ColumnCount() == SenaryExecutor::NCOLS) {
+			SenaryExecutor::Execute<T, T, T, T, T, double, timestamp_t>(
+			    input, result, [&](T yyyy, T mm, T dd, T hr, T mn, double ss) {
+				    return Operation<T>(calendar, yyyy, mm, dd, hr, mn, ss);
+			    });
+		} else {
+			D_ASSERT(input.ColumnCount() == SeptenaryExecutor::NCOLS);
+			auto &tz_vec = input.data.back();
+			if (tz_vec.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+				if (ConstantVector::IsNull(tz_vec)) {
+					result.SetVectorType(VectorType::CONSTANT_VECTOR);
+					ConstantVector::SetNull(result, true);
+				} else {
+					SetTimeZone(calendar, *ConstantVector::GetData<string_t>(tz_vec));
+					SenaryExecutor::Execute<T, T, T, T, T, double, timestamp_t>(
+					    input, result, [&](T yyyy, T mm, T dd, T hr, T mn, double ss) {
+						    return Operation<T>(calendar, yyyy, mm, dd, hr, mn, ss);
+					    });
+				}
+			} else {
+				SeptenaryExecutor::Execute<T, T, T, T, T, double, string_t, timestamp_t>(
+				    input, result, [&](T yyyy, T mm, T dd, T hr, T mn, double ss, string_t tz_id) {
+					    SetTimeZone(calendar, tz_id);
+					    return Operation<T>(calendar, yyyy, mm, dd, hr, mn, ss);
+				    });
+			}
+		}
+	}
+
+	template <typename TA>
+	static ScalarFunction GetSenaryFunction(const LogicalTypeId &type) {
+		ScalarFunction function({type, type, type, type, type, LogicalType::DOUBLE}, LogicalType::TIMESTAMP_TZ,
+		                        Execute<TA>, Bind);
+		BaseScalarFunction::SetReturnsError(function);
+		return function;
+	}
+
+	template <typename TA>
+	static ScalarFunction GetSeptenaryFunction(const LogicalTypeId &type) {
+		ScalarFunction function({type, type, type, type, type, LogicalType::DOUBLE, LogicalType::VARCHAR},
+		                        LogicalType::TIMESTAMP_TZ, Execute<TA>, Bind);
+		BaseScalarFunction::SetReturnsError(function);
+		return function;
+	}
+
+	static void AddFunction(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(GetSenaryFunction<int64_t>(LogicalType::BIGINT));
+		set.AddFunction(GetSeptenaryFunction<int64_t>(LogicalType::BIGINT));
+		ScalarFunction function({LogicalType::BIGINT}, LogicalType::TIMESTAMP_TZ, FromMicros<int64_t>);
+		BaseScalarFunction::SetReturnsError(function);
+		set.AddFunction(function);
+		loader.RegisterFunction(set);
+	}
+};
+
+void RegisterICUMakeDateFunctions(ExtensionLoader &loader) {
+	ICUMakeTimestampTZFunc::AddFunction("make_timestamptz", loader);
+	ICUMakeDate::AddCasts(loader);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-strptime.cpp
+++ b/external/duckdb/extension/icu/icu-strptime.cpp
@@ -0,0 +1,550 @@
+#include "include/icu-strptime.hpp"
+#include "include/icu-datefunc.hpp"
+#include "include/icu-helpers.hpp"
+
+#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp"
+#include "duckdb/common/operator/cast_operators.hpp"
+#include "duckdb/common/types/cast_helpers.hpp"
+#include "duckdb/common/types/date.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/vector_operations/binary_executor.hpp"
+#include "duckdb/execution/expression_executor.hpp"
+#include "duckdb/function/scalar/strftime_format.hpp"
+#include "duckdb/main/client_context.hpp"
+#include "duckdb/planner/expression/bound_function_expression.hpp"
+#include "duckdb/function/cast/default_casts.hpp"
+#include "duckdb/main/extension/extension_loader.hpp"
+
+namespace duckdb {
+
+TimestampComponents ICUHelpers::GetComponents(timestamp_tz_t ts, icu::Calendar *calendar) {
+	// Get the parts in the given time zone
+	uint64_t micros = ICUDateFunc::SetTime(calendar, timestamp_t(ts.value));
+
+	TimestampComponents ts_data;
+	ts_data.year = ICUDateFunc::ExtractField(calendar, UCAL_EXTENDED_YEAR);
+	ts_data.month = ICUDateFunc::ExtractField(calendar, UCAL_MONTH) + 1;
+	ts_data.day = ICUDateFunc::ExtractField(calendar, UCAL_DATE);
+
+	ts_data.hour = ICUDateFunc::ExtractField(calendar, UCAL_HOUR_OF_DAY);
+	ts_data.minute = ICUDateFunc::ExtractField(calendar, UCAL_MINUTE);
+	ts_data.second = ICUDateFunc::ExtractField(calendar, UCAL_SECOND);
+	ts_data.microsecond = UnsafeNumericCast<int32_t>(
+	    ICUDateFunc::ExtractField(calendar, UCAL_MILLISECOND) * Interval::MICROS_PER_MSEC + micros);
+	return ts_data;
+}
+
+timestamp_t ICUHelpers::ToTimestamp(TimestampComponents data) {
+	date_t date_val = Date::FromDate(data.year, data.month, data.day);
+	dtime_t time_val = Time::FromTime(data.hour, data.minute, data.second, data.microsecond);
+	return Timestamp::FromDatetime(date_val, time_val);
+}
+struct ICUStrptime : public ICUDateFunc {
+	using ParseResult = StrpTimeFormat::ParseResult;
+
+	struct ICUStrptimeBindData : public BindData {
+		ICUStrptimeBindData(ClientContext &context, const StrpTimeFormat &format)
+		    : BindData(context), formats(1, format) {
+		}
+		ICUStrptimeBindData(ClientContext &context, vector<StrpTimeFormat> formats_p)
+		    : BindData(context), formats(std::move(formats_p)) {
+		}
+		ICUStrptimeBindData(const ICUStrptimeBindData &other) : BindData(other), formats(other.formats) {
+		}
+
+		vector<StrpTimeFormat> formats;
+
+		bool Equals(const FunctionData &other_p) const override {
+			auto &other = other_p.Cast<ICUStrptimeBindData>();
+			if (formats.size() != other.formats.size()) {
+				return false;
+			}
+			for (size_t i = 0; i < formats.size(); ++i) {
+				if (formats[i].format_specifier != other.formats[i].format_specifier) {
+					return false;
+				}
+			}
+			return true;
+		}
+		duckdb::unique_ptr<FunctionData> Copy() const override {
+			return make_uniq<ICUStrptimeBindData>(*this);
+		}
+	};
+
+	static void ParseFormatSpecifier(string_t &format_specifier, StrpTimeFormat &format) {
+		format.format_specifier = format_specifier.GetString();
+		const auto error = StrTimeFormat::ParseFormatSpecifier(format.format_specifier, format);
+		if (!error.empty()) {
+			throw InvalidInputException("Failed to parse format specifier %s: %s", format.format_specifier, error);
+		}
+	}
+
+	static uint64_t ToMicros(icu::Calendar *calendar, const ParseResult &parsed, const StrpTimeFormat &format) {
+		// Get the parts in the current time zone
+		uint64_t micros = parsed.GetMicros();
+		calendar->set(UCAL_EXTENDED_YEAR, parsed.data[0]); // strptime doesn't understand eras
+		calendar->set(UCAL_MONTH, parsed.data[1] - 1);
+		calendar->set(UCAL_DATE, parsed.data[2]);
+		calendar->set(UCAL_HOUR_OF_DAY, parsed.data[3]);
+		calendar->set(UCAL_MINUTE, parsed.data[4]);
+		calendar->set(UCAL_SECOND, parsed.data[5]);
+		calendar->set(UCAL_MILLISECOND, UnsafeNumericCast<int32_t>(micros / Interval::MICROS_PER_MSEC));
+		micros %= Interval::MICROS_PER_MSEC;
+
+		// This overrides the TZ setting, so only use it if an offset was parsed.
+		// Note that we don't bother/worry about the DST setting because the two just combine.
+		if (format.HasFormatSpecifier(StrTimeSpecifier::UTC_OFFSET)) {
+			calendar->set(UCAL_ZONE_OFFSET, UnsafeNumericCast<int32_t>(parsed.data[7] * Interval::MSECS_PER_SEC));
+		}
+
+		return micros;
+	}
+
+	static void Parse(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 2);
+		auto &str_arg = args.data[0];
+		auto &fmt_arg = args.data[1];
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<ICUStrptimeBindData>();
+		CalendarPtr calendar_ptr(info.calendar->clone());
+		auto calendar = calendar_ptr.get();
+
+		D_ASSERT(fmt_arg.GetVectorType() == VectorType::CONSTANT_VECTOR);
+
+		if (ConstantVector::IsNull(fmt_arg)) {
+			result.SetVectorType(VectorType::CONSTANT_VECTOR);
+			ConstantVector::SetNull(result, true);
+		} else {
+			UnaryExecutor::Execute<string_t, timestamp_t>(str_arg, result, args.size(), [&](string_t input) {
+				ParseResult parsed;
+				for (auto &format : info.formats) {
+					if (format.Parse(input, parsed)) {
+						if (parsed.is_special) {
+							return parsed.ToTimestamp();
+						} else {
+							// Set TZ first, if any.
+							if (!parsed.tz.empty()) {
+								SetTimeZone(calendar, parsed.tz);
+							}
+
+							return GetTime(calendar, ToMicros(calendar, parsed, format));
+						}
+					}
+				}
+
+				throw InvalidInputException(parsed.FormatError(input, info.formats[0].format_specifier));
+			});
+		}
+	}
+
+	static void TryParse(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 2);
+		auto &str_arg = args.data[0];
+		auto &fmt_arg = args.data[1];
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<ICUStrptimeBindData>();
+		CalendarPtr calendar_ptr(info.calendar->clone());
+		auto calendar = calendar_ptr.get();
+
+		D_ASSERT(fmt_arg.GetVectorType() == VectorType::CONSTANT_VECTOR);
+
+		if (ConstantVector::IsNull(fmt_arg)) {
+			result.SetVectorType(VectorType::CONSTANT_VECTOR);
+			ConstantVector::SetNull(result, true);
+		} else {
+			UnaryExecutor::ExecuteWithNulls<string_t, timestamp_t>(
+			    str_arg, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) {
+				    ParseResult parsed;
+				    for (auto &format : info.formats) {
+					    if (format.Parse(input, parsed)) {
+						    if (parsed.is_special) {
+							    return parsed.ToTimestamp();
+						    } else if (parsed.tz.empty() || TrySetTimeZone(calendar, parsed.tz)) {
+							    timestamp_t result;
+							    if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
+								    return result;
+							    }
+						    }
+					    }
+				    }
+
+				    mask.SetInvalid(idx);
+				    return timestamp_t();
+			    });
+		}
+	}
+
+	static bind_scalar_function_t bind_strptime; // NOLINT
+
+	static duckdb::unique_ptr<FunctionData> StrpTimeBindFunction(ClientContext &context, ScalarFunction &bound_function,
+	                                                             vector<duckdb::unique_ptr<Expression>> &arguments) {
+		if (arguments[1]->HasParameter()) {
+			throw ParameterNotResolvedException();
+		}
+		if (!arguments[1]->IsFoldable()) {
+			throw InvalidInputException("strptime format must be a constant");
+		}
+		scalar_function_t function = (bound_function.name == "try_strptime") ? TryParse : Parse;
+		Value format_value = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
+		string format_string;
+		StrpTimeFormat format;
+		if (format_value.IsNull()) {
+			;
+		} else if (format_value.type().id() == LogicalTypeId::VARCHAR) {
+			format_string = format_value.ToString();
+			format.format_specifier = format_string;
+			string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
+			if (!error.empty()) {
+				throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
+			}
+
+			// If we have a time zone, we should use ICU for parsing and return a TSTZ instead.
+			if (format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME)) {
+				bound_function.function = function;
+				bound_function.return_type = LogicalType::TIMESTAMP_TZ;
+				return make_uniq<ICUStrptimeBindData>(context, format);
+			}
+		} else if (format_value.type() == LogicalType::LIST(LogicalType::VARCHAR)) {
+			const auto &children = ListValue::GetChildren(format_value);
+			if (children.empty()) {
+				throw InvalidInputException("strptime format list must not be empty");
+			}
+			vector<StrpTimeFormat> formats;
+			bool has_tz = false;
+			for (const auto &child : children) {
+				format_string = child.ToString();
+				format.format_specifier = format_string;
+				string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
+				if (!error.empty()) {
+					throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
+				}
+				// If any format has UTC offsets or names, then we have to produce TSTZ
+				has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME);
+				has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::UTC_OFFSET);
+				formats.emplace_back(format);
+			}
+			if (has_tz) {
+				bound_function.function = function;
+				bound_function.return_type = LogicalType::TIMESTAMP_TZ;
+				return make_uniq<ICUStrptimeBindData>(context, formats);
+			}
+		}
+
+		// Fall back to faster, non-TZ parsing
+		bound_function.bind = bind_strptime;
+		return bind_strptime(context, bound_function, arguments);
+	}
+
+	static void TailPatch(const string &name, ExtensionLoader &loader, const vector<LogicalType> &types) {
+		// Find the old function
+		auto &scalar_function = loader.GetFunction(name);
+		auto &functions = scalar_function.functions.functions;
+		optional_idx best_index;
+		for (idx_t i = 0; i < functions.size(); i++) {
+			auto &function = functions[i];
+			if (types == function.arguments) {
+				best_index = i;
+				break;
+			}
+		}
+		if (!best_index.IsValid()) {
+			throw InternalException("ICU - Function for TailPatch not found");
+		}
+		auto &bound_function = functions[best_index.GetIndex()];
+		bind_strptime = bound_function.bind;
+		bound_function.bind = StrpTimeBindFunction;
+	}
+
+	static void AddBinaryTimestampFunction(const string &name, ExtensionLoader &loader) {
+		vector<LogicalType> types {LogicalType::VARCHAR, LogicalType::VARCHAR};
+		TailPatch(name, loader, types);
+
+		types[1] = LogicalType::LIST(LogicalType::VARCHAR);
+		TailPatch(name, loader, types);
+	}
+
+	static bool VarcharToTimestampTZ(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
+		auto &cast_data = parameters.cast_data->Cast<CastData>();
+		auto &info = cast_data.info->Cast<BindData>();
+		CalendarPtr cal(info.calendar->clone());
+
+		UnaryExecutor::ExecuteWithNulls<string_t, timestamp_tz_t>(
+		    source, result, count, [&](string_t input, ValidityMask &mask, idx_t idx) {
+			    timestamp_tz_t result;
+			    const auto str = input.GetData();
+			    const auto len = input.GetSize();
+			    string_t tz(nullptr, 0);
+			    bool has_offset = false;
+			    auto success = Timestamp::TryConvertTimestampTZ(str, len, result, true, has_offset, tz);
+			    if (success != TimestampCastResult::SUCCESS) {
+				    string msg;
+				    if (success == TimestampCastResult::ERROR_RANGE) {
+					    msg = Timestamp::RangeError(string(str, len));
+				    } else {
+					    msg = Timestamp::FormatError(string(str, len));
+				    }
+				    HandleCastError::AssignError(msg, parameters);
+				    mask.SetInvalid(idx);
+			    } else if (!has_offset) {
+				    // Convert parts to a TZ (default or parsed) if no offset was provided
+				    auto calendar = cal.get();
+
+				    // Change TZ if one was provided.
+				    if (tz.GetSize()) {
+					    string error_msg;
+					    SetTimeZone(calendar, tz, &error_msg);
+					    if (!error_msg.empty()) {
+						    HandleCastError::AssignError(error_msg, parameters);
+						    mask.SetInvalid(idx);
+					    }
+				    }
+
+				    // Now get the parts in the given time zone
+				    result = timestamp_tz_t(FromNaive(calendar, result));
+			    }
+
+			    return result;
+		    });
+		return true;
+	}
+
+	static bool VarcharToTimeTZ(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
+		auto &cast_data = parameters.cast_data->Cast<CastData>();
+		auto &info = cast_data.info->Cast<BindData>();
+		CalendarPtr cal(info.calendar->clone());
+
+		UnaryExecutor::ExecuteWithNulls<string_t, dtime_tz_t>(
+		    source, result, count, [&](string_t input, ValidityMask &mask, idx_t idx) {
+			    dtime_tz_t result;
+			    const auto str = input.GetData();
+			    const auto len = input.GetSize();
+			    bool has_offset = false;
+			    idx_t pos = 0;
+			    if (!Time::TryConvertTimeTZ(str, len, pos, result, has_offset, false)) {
+				    auto msg = Time::ConversionError(string(str, len));
+				    HandleCastError::AssignError(msg, parameters);
+				    mask.SetInvalid(idx);
+			    } else if (!has_offset) {
+				    // Convert parts to a TZ (default or parsed) if no offset was provided
+				    auto calendar = cal.get();
+
+				    // Extract the offset from the calendar
+				    auto offset = ExtractField(calendar, UCAL_ZONE_OFFSET);
+				    offset += ExtractField(calendar, UCAL_DST_OFFSET);
+				    offset /= Interval::MSECS_PER_SEC;
+
+				    // Apply it to the offset +00 time we parsed.
+				    result = dtime_tz_t(result.time(), offset);
+			    }
+
+			    return result;
+		    });
+		return true;
+	}
+
+	static BoundCastInfo BindCastFromVarchar(BindCastInput &input, const LogicalType &source,
+	                                         const LogicalType &target) {
+		if (!input.context) {
+			throw InternalException("Missing context for VARCHAR to TIME/TIMESTAMPTZ cast.");
+		}
+
+		auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));
+
+		switch (target.id()) {
+		case LogicalTypeId::TIMESTAMP_TZ:
+			return BoundCastInfo(VarcharToTimestampTZ, std::move(cast_data));
+		case LogicalTypeId::TIME_TZ:
+			return BoundCastInfo(VarcharToTimeTZ, std::move(cast_data));
+		default:
+			throw InternalException("Unsupported type for VARCHAR to TIME/TIMESTAMPTZ cast.");
+		}
+	}
+
+	static void AddCasts(ExtensionLoader &loader) {
+		loader.RegisterCastFunction(LogicalType::VARCHAR, LogicalType::TIMESTAMP_TZ, BindCastFromVarchar);
+		loader.RegisterCastFunction(LogicalType::VARCHAR, LogicalType::TIME_TZ, BindCastFromVarchar);
+	}
+};
+
+bind_scalar_function_t ICUStrptime::bind_strptime = nullptr; // NOLINT
+
+struct ICUStrftime : public ICUDateFunc {
+	static void ParseFormatSpecifier(string_t &format_str, StrfTimeFormat &format) {
+		const auto format_specifier = format_str.GetString();
+		const auto error = StrTimeFormat::ParseFormatSpecifier(format_specifier, format);
+		if (!error.empty()) {
+			throw InvalidInputException("Failed to parse format specifier %s: %s", format_specifier, error);
+		}
+	}
+
+	static string_t Operation(icu::Calendar *calendar, timestamp_t input, const char *tz_name, StrfTimeFormat &format,
+	                          Vector &result) {
+		// Infinity is always formatted the same way
+		if (!Timestamp::IsFinite(input)) {
+			return StringVector::AddString(result, Timestamp::ToString(input));
+		}
+
+		// Get the parts in the given time zone
+		uint64_t micros = SetTime(calendar, input);
+
+		int32_t data[8];
+		data[0] = ExtractField(calendar, UCAL_EXTENDED_YEAR); // strftime doesn't understand eras.
+		data[1] = ExtractField(calendar, UCAL_MONTH) + 1;
+		data[2] = ExtractField(calendar, UCAL_DATE);
+		data[3] = ExtractField(calendar, UCAL_HOUR_OF_DAY);
+		data[4] = ExtractField(calendar, UCAL_MINUTE);
+		data[5] = ExtractField(calendar, UCAL_SECOND);
+		data[6] =
+		    UnsafeNumericCast<int32_t>(ExtractField(calendar, UCAL_MILLISECOND) * Interval::MICROS_PER_MSEC + micros);
+
+		data[7] = ExtractField(calendar, UCAL_ZONE_OFFSET) + ExtractField(calendar, UCAL_DST_OFFSET);
+		data[7] /= Interval::MSECS_PER_SEC;
+
+		const auto date = Date::FromDate(data[0], data[1], data[2]);
+		const auto time = Time::FromTime(data[3], data[4], data[5], data[6]);
+
+		const auto len = format.GetLength(date, time, data[7], tz_name);
+		string_t target = StringVector::EmptyString(result, len);
+		format.FormatString(date, data, tz_name, target.GetDataWriteable());
+		target.Finalize();
+
+		return target;
+	}
+
+	static void ICUStrftimeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 2);
+		auto &src_arg = args.data[0];
+		auto &fmt_arg = args.data[1];
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		CalendarPtr calendar(info.calendar->clone());
+		const auto tz_name = info.tz_setting.c_str();
+
+		if (fmt_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+			// Common case of constant part.
+			if (ConstantVector::IsNull(fmt_arg)) {
+				result.SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result, true);
+			} else {
+				StrfTimeFormat format;
+				ParseFormatSpecifier(*ConstantVector::GetData<string_t>(fmt_arg), format);
+
+				UnaryExecutor::ExecuteWithNulls<timestamp_t, string_t>(
+				    src_arg, result, args.size(), [&](timestamp_t input, ValidityMask &mask, idx_t idx) {
+					    if (Timestamp::IsFinite(input)) {
+						    return Operation(calendar.get(), input, tz_name, format, result);
+					    } else {
+						    return StringVector::AddString(result, Timestamp::ToString(input));
+					    }
+				    });
+			}
+		} else {
+			BinaryExecutor::ExecuteWithNulls<timestamp_t, string_t, string_t>(
+			    src_arg, fmt_arg, result, args.size(),
+			    [&](timestamp_t input, string_t format_specifier, ValidityMask &mask, idx_t idx) {
+				    if (Timestamp::IsFinite(input)) {
+					    StrfTimeFormat format;
+					    ParseFormatSpecifier(format_specifier, format);
+
+					    return Operation(calendar.get(), input, tz_name, format, result);
+				    } else {
+					    return StringVector::AddString(result, Timestamp::ToString(input));
+				    }
+			    });
+		}
+	}
+
+	static void AddBinaryTimestampFunction(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(ScalarFunction({LogicalType::TIMESTAMP_TZ, LogicalType::VARCHAR}, LogicalType::VARCHAR,
+		                               ICUStrftimeFunction, Bind));
+		loader.RegisterFunction(set);
+	}
+
+	static string_t CastOperation(icu::Calendar *calendar, timestamp_t input, Vector &result) {
+		// Infinity is always formatted the same way
+		if (!Timestamp::IsFinite(input)) {
+			return StringVector::AddString(result, Timestamp::ToString(input));
+		}
+
+		// decompose the timestamp
+		auto ts_data = ICUHelpers::GetComponents(timestamp_tz_t(input.value), calendar);
+
+		idx_t year_length;
+		bool add_bc;
+		const auto date_len = DateToStringCast::YearLength(ts_data.year, year_length, add_bc);
+
+		char micro_buffer[6];
+		const auto time_len = TimeToStringCast::MicrosLength(ts_data.microsecond, micro_buffer);
+
+		auto offset = ExtractField(calendar, UCAL_ZONE_OFFSET) + ExtractField(calendar, UCAL_DST_OFFSET);
+		offset /= Interval::MSECS_PER_SEC;
+		offset /= Interval::SECS_PER_MINUTE;
+		int hour_offset = offset / 60;
+		int minute_offset = offset % 60;
+		auto offset_str = Time::ToUTCOffset(hour_offset, minute_offset);
+		const auto offset_len = offset_str.size();
+
+		const auto len = date_len + 1 + time_len + offset_len;
+		string_t target = StringVector::EmptyString(result, len);
+		auto buffer = target.GetDataWriteable();
+
+		DateToStringCast::Format(buffer, ts_data.year, ts_data.month, ts_data.day, year_length, add_bc);
+		buffer += date_len;
+		*buffer++ = ' ';
+
+		TimeToStringCast::Format(buffer, time_len, ts_data.hour, ts_data.minute, ts_data.second, ts_data.microsecond,
+		                         micro_buffer);
+		buffer += time_len;
+
+		memcpy(buffer, offset_str.c_str(), offset_len);
+		buffer += offset_len;
+
+		target.Finalize();
+
+		return target;
+	}
+
+	static bool CastToVarchar(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
+		auto &cast_data = parameters.cast_data->Cast<CastData>();
+		auto &info = cast_data.info->Cast<BindData>();
+		CalendarPtr calendar(info.calendar->clone());
+
+		UnaryExecutor::ExecuteWithNulls<timestamp_t, string_t>(source, result, count,
+		                                                       [&](timestamp_t input, ValidityMask &mask, idx_t idx) {
+			                                                       return CastOperation(calendar.get(), input, result);
+		                                                       });
+		return true;
+	}
+
+	static BoundCastInfo BindCastToVarchar(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
+		if (!input.context) {
+			throw InternalException("Missing context for TIMESTAMPTZ to VARCHAR cast.");
+		}
+
+		auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));
+
+		return BoundCastInfo(CastToVarchar, std::move(cast_data));
+	}
+
+	static void AddCasts(ExtensionLoader &loader) {
+		loader.RegisterCastFunction(LogicalType::TIMESTAMP_TZ, LogicalType::VARCHAR, BindCastToVarchar);
+	}
+};
+
+void RegisterICUStrptimeFunctions(ExtensionLoader &loader) {
+	ICUStrptime::AddBinaryTimestampFunction("strptime", loader);
+	ICUStrptime::AddBinaryTimestampFunction("try_strptime", loader);
+
+	ICUStrftime::AddBinaryTimestampFunction("strftime", loader);
+
+	// Add string casts
+	ICUStrptime::AddCasts(loader);
+	ICUStrftime::AddCasts(loader);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-table-range.cpp
+++ b/external/duckdb/extension/icu/icu-table-range.cpp
@@ -0,0 +1,259 @@
+#include "duckdb/common/exception.hpp"
+#include "duckdb/common/operator/subtract.hpp"
+#include "duckdb/common/types/interval.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/main/extension/extension_loader.hpp"
+#include "duckdb/function/function_set.hpp"
+#include "duckdb/function/table_function.hpp"
+#include "include/icu-datefunc.hpp"
+#include "unicode/calendar.h"
+#include "tz_calendar.hpp"
+
+namespace duckdb {
+
+struct ICUTableRange {
+	using CalendarPtr = unique_ptr<icu::Calendar>;
+
+	struct ICURangeBindData : public TableFunctionData {
+		ICURangeBindData(const ICURangeBindData &other)
+		    : TableFunctionData(other), tz_setting(other.tz_setting), cal_setting(other.cal_setting),
+		      calendar(other.calendar->clone()), cardinality(other.cardinality) {
+		}
+
+		explicit ICURangeBindData(ClientContext &context, const vector<Value> &inputs) {
+			Value tz_value;
+			if (context.TryGetCurrentSetting("TimeZone", tz_value)) {
+				tz_setting = tz_value.ToString();
+			}
+			auto tz = icu::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(icu::StringPiece(tz_setting)));
+
+			string cal_id("@calendar=");
+			Value cal_value;
+			if (context.TryGetCurrentSetting("Calendar", cal_value)) {
+				cal_setting = cal_value.ToString();
+				cal_id += cal_setting;
+			} else {
+				cal_id += "gregorian";
+			}
+
+			icu::Locale locale(cal_id.c_str());
+
+			UErrorCode success = U_ZERO_ERROR;
+			calendar.reset(icu::Calendar::createInstance(tz, locale, success));
+			if (U_FAILURE(success)) {
+				throw InternalException("Unable to create ICU calendar.");
+			}
+
+			timestamp_tz_t bounds[2];
+			interval_t step;
+			for (idx_t i = 0; i < inputs.size(); i++) {
+				if (inputs[i].IsNull()) {
+					return;
+				}
+				if (i >= 2) {
+					step = inputs[i].GetValue<interval_t>();
+				} else {
+					bounds[i] = inputs[i].GetValue<timestamp_tz_t>();
+				}
+			}
+			// Estimate cardinality using micros.
+			int64_t increment = 0;
+			if (!Interval::TryGetMicro(step, increment) || !increment) {
+				return;
+			}
+			int64_t delta = 0;
+			if (!TrySubtractOperator::Operation(bounds[1].value, bounds[0].value, delta)) {
+				return;
+			}
+
+			cardinality = idx_t(delta / increment);
+		}
+
+		string tz_setting;
+		string cal_setting;
+		CalendarPtr calendar;
+		idx_t cardinality;
+	};
+
+	struct ICURangeLocalState : public LocalTableFunctionState {
+		ICURangeLocalState() {
+		}
+
+		bool initialized_row = false;
+		idx_t current_input_row = 0;
+		timestamp_t current_state;
+
+		timestamp_t start;
+		timestamp_t end;
+		interval_t increment;
+		bool inclusive_bound;
+		bool greater_than_check;
+
+		bool empty_range = false;
+
+		bool Finished(timestamp_t current_value) const {
+			if (greater_than_check) {
+				if (inclusive_bound) {
+					return current_value > end;
+				} else {
+					return current_value >= end;
+				}
+			} else {
+				if (inclusive_bound) {
+					return current_value < end;
+				} else {
+					return current_value <= end;
+				}
+			}
+		}
+	};
+
+	template <bool GENERATE_SERIES>
+	static void GenerateRangeDateTimeParameters(DataChunk &input, idx_t row_id, ICURangeLocalState &result) {
+		input.Flatten();
+		for (idx_t c = 0; c < input.ColumnCount(); c++) {
+			if (FlatVector::IsNull(input.data[c], row_id)) {
+				result.start = timestamp_t(0);
+				result.end = timestamp_t(0);
+				result.increment = interval_t();
+				result.greater_than_check = true;
+				result.inclusive_bound = false;
+				return;
+			}
+		}
+
+		result.start = FlatVector::GetValue<timestamp_t>(input.data[0], row_id);
+		result.end = FlatVector::GetValue<timestamp_t>(input.data[1], row_id);
+		result.increment = FlatVector::GetValue<interval_t>(input.data[2], row_id);
+
+		// Infinities either cause errors or infinite loops, so just ban them
+		if (!Timestamp::IsFinite(result.start) || !Timestamp::IsFinite(result.end)) {
+			throw BinderException("RANGE with infinite bounds is not supported");
+		}
+
+		if (result.increment.months == 0 && result.increment.days == 0 && result.increment.micros == 0) {
+			throw BinderException("interval cannot be 0!");
+		}
+		// all elements should point in the same direction
+		if (result.increment.months > 0 || result.increment.days > 0 || result.increment.micros > 0) {
+			if (result.increment.months < 0 || result.increment.days < 0 || result.increment.micros < 0) {
+				throw BinderException("RANGE with composite interval that has mixed signs is not supported");
+			}
+			result.greater_than_check = true;
+			if (result.start > result.end) {
+				result.empty_range = true;
+			}
+		} else {
+			result.greater_than_check = false;
+			if (result.start < result.end) {
+				result.empty_range = true;
+			}
+		}
+		result.inclusive_bound = GENERATE_SERIES;
+	}
+
+	template <bool GENERATE_SERIES>
+	static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindInput &input,
+	                                     vector<LogicalType> &return_types, vector<string> &names) {
+		auto result = make_uniq<ICURangeBindData>(context, input.inputs);
+
+		return_types.push_back(LogicalType::TIMESTAMP_TZ);
+		if (GENERATE_SERIES) {
+			names.emplace_back("generate_series");
+		} else {
+			names.emplace_back("range");
+		}
+		return std::move(result);
+	}
+
+	static unique_ptr<LocalTableFunctionState> RangeDateTimeLocalInit(ExecutionContext &context,
+	                                                                  TableFunctionInitInput &input,
+	                                                                  GlobalTableFunctionState *global_state) {
+		return make_uniq<ICURangeLocalState>();
+	}
+
+	static unique_ptr<NodeStatistics> Cardinality(ClientContext &context, const FunctionData *bind_data_p) {
+		if (!bind_data_p) {
+			return nullptr;
+		}
+		auto &bind_data = bind_data_p->Cast<ICURangeBindData>();
+		return make_uniq<NodeStatistics>(bind_data.cardinality, bind_data.cardinality);
+	}
+
+	template <bool GENERATE_SERIES>
+	static OperatorResultType ICUTableRangeFunction(ExecutionContext &context, TableFunctionInput &data_p,
+	                                                DataChunk &input, DataChunk &output) {
+		auto &bind_data = data_p.bind_data->Cast<ICURangeBindData>();
+		auto &state = data_p.local_state->Cast<ICURangeLocalState>();
+		TZCalendar calendar(*bind_data.calendar, bind_data.cal_setting);
+		while (true) {
+			if (!state.initialized_row) {
+				// initialize for the current input row
+				if (state.current_input_row >= input.size()) {
+					// ran out of rows
+					state.current_input_row = 0;
+					state.initialized_row = false;
+					return OperatorResultType::NEED_MORE_INPUT;
+				}
+				GenerateRangeDateTimeParameters<GENERATE_SERIES>(input, state.current_input_row, state);
+				state.initialized_row = true;
+				state.current_state = state.start;
+			}
+			if (state.empty_range) {
+				// empty range
+				output.SetCardinality(0);
+				state.current_input_row++;
+				state.initialized_row = false;
+				return OperatorResultType::HAVE_MORE_OUTPUT;
+			}
+			idx_t size = 0;
+			auto data = FlatVector::GetData<timestamp_t>(output.data[0]);
+			while (true) {
+				if (state.Finished(state.current_state)) {
+					break;
+				}
+				data[size++] = state.current_state;
+				state.current_state = ICUDateFunc::Add(calendar, state.current_state, state.increment);
+				if (size >= STANDARD_VECTOR_SIZE) {
+					break;
+				}
+			}
+			if (size == 0) {
+				// move to next row
+				state.current_input_row++;
+				state.initialized_row = false;
+				continue;
+			}
+			output.SetCardinality(size);
+			return OperatorResultType::HAVE_MORE_OUTPUT;
+		}
+	}
+
+	static void AddICUTableRangeFunction(ExtensionLoader &loader) {
+		TableFunctionSet range("range");
+		TableFunction range_function({LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL},
+		                             nullptr, Bind<false>, nullptr, RangeDateTimeLocalInit);
+		range_function.in_out_function = ICUTableRangeFunction<false>;
+		range_function.cardinality = Cardinality;
+		range.AddFunction(range_function);
+
+		loader.RegisterFunction(range);
+
+		// generate_series: similar to range, but inclusive instead of exclusive bounds on the RHS
+		TableFunctionSet generate_series("generate_series");
+		TableFunction generate_series_function(
+		    {LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL}, nullptr, Bind<true>, nullptr,
+		    RangeDateTimeLocalInit);
+		generate_series_function.in_out_function = ICUTableRangeFunction<true>;
+		generate_series_function.cardinality = Cardinality;
+		generate_series.AddFunction(generate_series_function);
+
+		loader.RegisterFunction(generate_series);
+	}
+};
+
+void RegisterICUTableRangeFunctions(ExtensionLoader &loader) {
+	ICUTableRange::AddICUTableRangeFunction(loader);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-timebucket.cpp
+++ b/external/duckdb/extension/icu/icu-timebucket.cpp
@@ -0,0 +1,643 @@
+#include "duckdb/common/exception.hpp"
+#include "duckdb/common/limits.hpp"
+#include "duckdb/common/operator/cast_operators.hpp"
+#include "duckdb/common/operator/subtract.hpp"
+#include "duckdb/common/types/interval.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/types/value.hpp"
+#include "duckdb/main/extension/extension_loader.hpp"
+#include "duckdb/common/vector_operations/binary_executor.hpp"
+#include "duckdb/common/vector_operations/ternary_executor.hpp"
+#include "duckdb/main/client_context.hpp"
+#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
+#include "include/icu-datefunc.hpp"
+
+namespace duckdb {
+
+struct ICUTimeBucket : public ICUDateFunc {
+
+	// Use 2000-01-03 00:00:00 (Monday) as origin when bucket_width is days, hours, ... for TimescaleDB compatibility
+	// There are 10959 days between 1970-01-01 and 2000-01-03
+	constexpr static const int64_t DEFAULT_ORIGIN_MICROS_1 = 10959 * Interval::MICROS_PER_DAY;
+	// Use 2000-01-01 as origin when bucket_width is months, years, ... for TimescaleDB compatibility
+	// There are 10957 days between 1970-01-01 and 2000-01-01
+	constexpr static const int64_t DEFAULT_ORIGIN_MICROS_2 = 10957 * Interval::MICROS_PER_DAY;
+
+	enum struct BucketWidthType { CONVERTIBLE_TO_MICROS, CONVERTIBLE_TO_DAYS, CONVERTIBLE_TO_MONTHS, UNCLASSIFIED };
+
+	static inline BucketWidthType ClassifyBucketWidth(const interval_t bucket_width) {
+		if (bucket_width.months == 0 && bucket_width.days == 0 && bucket_width.micros > 0) {
+			return BucketWidthType::CONVERTIBLE_TO_MICROS;
+		} else if (bucket_width.months == 0 && bucket_width.days >= 0 && bucket_width.micros == 0) {
+			return BucketWidthType::CONVERTIBLE_TO_DAYS;
+		} else if (bucket_width.months > 0 && bucket_width.days == 0 && bucket_width.micros == 0) {
+			return BucketWidthType::CONVERTIBLE_TO_MONTHS;
+		} else {
+			return BucketWidthType::UNCLASSIFIED;
+		}
+	}
+
+	static inline BucketWidthType ClassifyBucketWidthErrorThrow(const interval_t bucket_width) {
+		if (bucket_width.months == 0 && bucket_width.days == 0) {
+			if (bucket_width.micros <= 0) {
+				throw NotImplementedException("Period must be greater than 0");
+			}
+			return BucketWidthType::CONVERTIBLE_TO_MICROS;
+		} else if (bucket_width.months == 0 && bucket_width.micros == 0) {
+			if (bucket_width.days <= 0) {
+				throw NotImplementedException("Period must be greater than 0");
+			}
+			return BucketWidthType::CONVERTIBLE_TO_DAYS;
+		} else if (bucket_width.days == 0 && bucket_width.micros == 0) {
+			if (bucket_width.months <= 0) {
+				throw NotImplementedException("Period must be greater than 0");
+			}
+			return BucketWidthType::CONVERTIBLE_TO_MONTHS;
+		} else if (bucket_width.months == 0) {
+			throw NotImplementedException("Day intervals cannot have time component");
+		} else {
+			throw NotImplementedException("Month intervals cannot have day or time component");
+		}
+	}
+
+	static inline timestamp_t WidthConvertibleToMicrosCommon(int64_t bucket_width_micros, const timestamp_t ts,
+	                                                         const timestamp_t origin, TZCalendar &calendar) {
+		if (!bucket_width_micros) {
+			throw OutOfRangeException("Can't bucket using zero microseconds");
+		}
+		int64_t ts_micros = SubtractOperatorOverflowCheck::Operation<int64_t, int64_t, int64_t>(
+		    Timestamp::GetEpochMicroSeconds(ts), Timestamp::GetEpochMicroSeconds(origin));
+		int64_t result_micros = (ts_micros / bucket_width_micros) * bucket_width_micros;
+		if (ts_micros < 0 && ts_micros % bucket_width_micros != 0) {
+			result_micros =
+			    SubtractOperatorOverflowCheck::Operation<int64_t, int64_t, int64_t>(result_micros, bucket_width_micros);
+		}
+
+		return Add(calendar, origin, interval_t {0, 0, result_micros});
+	}
+
+	static inline timestamp_t WidthConvertibleToDaysCommon(int32_t bucket_width_days, const timestamp_t ts,
+	                                                       const timestamp_t origin, TZCalendar &calendar) {
+		if (!bucket_width_days) {
+			throw OutOfRangeException("Can't bucket using zero days");
+		}
+		const auto sub_days = SubtractFactory(DatePartSpecifier::DAY);
+
+		int64_t ts_days = sub_days(calendar.GetICUCalendar(), origin, ts);
+		int64_t result_days = (ts_days / bucket_width_days) * bucket_width_days;
+		if (result_days < NumericLimits<int32_t>::Minimum() || result_days > NumericLimits<int32_t>::Maximum()) {
+			throw OutOfRangeException("Timestamp out of range");
+		}
+		timestamp_t bucket = Add(calendar, origin, interval_t {0, static_cast<int32_t>(result_days), 0});
+		if (ts < bucket) {
+			D_ASSERT(ts < origin);
+			bucket = Add(calendar, bucket, interval_t {0, -bucket_width_days, 0});
+			D_ASSERT(ts > bucket);
+		}
+
+		return bucket;
+	}
+
+	static inline timestamp_t WidthConvertibleToMonthsCommon(int32_t bucket_width_months, const timestamp_t ts,
+	                                                         const timestamp_t origin, TZCalendar &calendar_p) {
+		if (!bucket_width_months) {
+			throw OutOfRangeException("Can't bucket using zero months");
+		}
+		const auto trunc_months = TruncationFactory(DatePartSpecifier::MONTH);
+		const auto sub_months = SubtractFactory(DatePartSpecifier::MONTH);
+
+		auto calendar = calendar_p.GetICUCalendar();
+		uint64_t tmp_micros = SetTime(calendar, ts);
+		trunc_months(calendar, tmp_micros);
+		timestamp_t truncated_ts = GetTimeUnsafe(calendar, tmp_micros);
+
+		tmp_micros = SetTime(calendar, origin);
+		trunc_months(calendar, tmp_micros);
+		timestamp_t truncated_origin = GetTimeUnsafe(calendar, tmp_micros);
+
+		int32_t ts_months =
+		    NumericCast<int64_t, int32_t>(sub_months(calendar, truncated_origin, truncated_ts)); // NOLINT
+		auto result_months = (ts_months / bucket_width_months) * bucket_width_months;
+		if (result_months < NumericLimits<int32_t>::Minimum() || result_months > NumericLimits<int32_t>::Maximum()) {
+			throw OutOfRangeException("Timestamp out of range");
+		}
+		if (ts_months < 0 && ts_months % bucket_width_months != 0) {
+			result_months =
+			    SubtractOperatorOverflowCheck::Operation<int32_t, int32_t, int32_t>(result_months, bucket_width_months);
+		}
+
+		return Add(calendar_p, truncated_origin, interval_t {static_cast<int32_t>(result_months), 0, 0});
+	}
+
+	template <typename TA, typename TB, typename TR, typename OP>
+	static void ExecuteBinary(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 2);
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		CalendarPtr calendar(info.calendar->clone());
+
+		BinaryExecutor::Execute<TA, TB, TR>(args.data[0], args.data[1], result, args.size(), [&](TA left, TB right) {
+			return OP::template Operation<TA, TB, TR>(left, right, calendar);
+		});
+	}
+
+	template <typename TA, typename TB, typename TC, typename TR, typename OP>
+	static void ExecuteTernary(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 3);
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		CalendarPtr calendar(info.calendar->clone());
+
+		TernaryExecutor::Execute<TA, TB, TC, TR>(
+		    args.data[0], args.data[1], args.data[2], result, args.size(), [&](TA ta, TB tb, TC tc) {
+			    return OP::template Operation<TA, TB, TC, TR>(args.data[0], args.data[1], args.data[2], calendar.get());
+		    });
+	}
+
+	struct WidthConvertibleToMicrosBinaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			const auto origin = Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_1);
+			return WidthConvertibleToMicrosCommon(bucket_width.micros, ts, origin, calendar);
+		}
+	};
+
+	struct WidthConvertibleToDaysBinaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			const auto origin = Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_1);
+			return WidthConvertibleToDaysCommon(bucket_width.days, ts, origin, calendar);
+		}
+	};
+
+	struct WidthConvertibleToMonthsBinaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			const auto origin = Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_2);
+			return WidthConvertibleToMonthsCommon(bucket_width.months, ts, origin, calendar);
+		}
+	};
+
+	struct BinaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, TZCalendar &calendar) {
+			BucketWidthType bucket_width_type = ClassifyBucketWidthErrorThrow(bucket_width);
+			switch (bucket_width_type) {
+			case BucketWidthType::CONVERTIBLE_TO_MICROS:
+				return WidthConvertibleToMicrosBinaryOperator::Operation(bucket_width, ts, calendar);
+			case BucketWidthType::CONVERTIBLE_TO_DAYS:
+				return WidthConvertibleToDaysBinaryOperator::Operation(bucket_width, ts, calendar);
+			case BucketWidthType::CONVERTIBLE_TO_MONTHS:
+				return WidthConvertibleToMonthsBinaryOperator::Operation(bucket_width, ts, calendar);
+			default:
+				throw NotImplementedException("Bucket type not implemented for ICU TIME_BUCKET");
+			}
+		}
+	};
+
+	struct OffsetWidthConvertibleToMicrosTernaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, interval_t offset,
+		                                    TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			const auto origin = Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_1);
+			return Add(calendar,
+			           WidthConvertibleToMicrosCommon(bucket_width.micros, Sub(calendar, ts, offset), origin, calendar),
+			           offset);
+		}
+	};
+
+	struct OffsetWidthConvertibleToDaysTernaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, interval_t offset,
+		                                    TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			const auto origin = Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_1);
+			return Add(calendar,
+			           WidthConvertibleToDaysCommon(bucket_width.days, Sub(calendar, ts, offset), origin, calendar),
+			           offset);
+		}
+	};
+
+	struct OffsetWidthConvertibleToMonthsTernaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, interval_t offset,
+		                                    TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			const auto origin = Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_2);
+			return Add(calendar,
+			           WidthConvertibleToMonthsCommon(bucket_width.months, Sub(calendar, ts, offset), origin, calendar),
+			           offset);
+		}
+	};
+
+	struct OffsetTernaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, interval_t offset,
+		                                    TZCalendar &calendar) {
+			BucketWidthType bucket_width_type = ClassifyBucketWidthErrorThrow(bucket_width);
+			switch (bucket_width_type) {
+			case BucketWidthType::CONVERTIBLE_TO_MICROS:
+				return OffsetWidthConvertibleToMicrosTernaryOperator::Operation(bucket_width, ts, offset, calendar);
+			case BucketWidthType::CONVERTIBLE_TO_DAYS:
+				return OffsetWidthConvertibleToDaysTernaryOperator::Operation(bucket_width, ts, offset, calendar);
+			case BucketWidthType::CONVERTIBLE_TO_MONTHS:
+				return OffsetWidthConvertibleToMonthsTernaryOperator::Operation(bucket_width, ts, offset, calendar);
+			default:
+				throw NotImplementedException("Bucket type not implemented for ICU TIME_BUCKET");
+			}
+		}
+	};
+
+	struct OriginWidthConvertibleToMicrosTernaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, timestamp_t origin,
+		                                    TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			return WidthConvertibleToMicrosCommon(bucket_width.micros, ts, origin, calendar);
+		}
+	};
+
+	struct OriginWidthConvertibleToDaysTernaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, timestamp_t origin,
+		                                    TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			return WidthConvertibleToDaysCommon(bucket_width.days, ts, origin, calendar);
+		}
+	};
+
+	struct OriginWidthConvertibleToMonthsTernaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, timestamp_t origin,
+		                                    TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			return WidthConvertibleToMonthsCommon(bucket_width.months, ts, origin, calendar);
+		}
+	};
+
+	struct OriginTernaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, timestamp_t origin,
+		                                    ValidityMask &mask, idx_t idx, TZCalendar &calendar) {
+			if (!Value::IsFinite(origin)) {
+				mask.SetInvalid(idx);
+				return timestamp_t(0);
+			}
+			BucketWidthType bucket_width_type = ClassifyBucketWidthErrorThrow(bucket_width);
+			switch (bucket_width_type) {
+			case BucketWidthType::CONVERTIBLE_TO_MICROS:
+				return OriginWidthConvertibleToMicrosTernaryOperator::Operation(bucket_width, ts, origin, calendar);
+			case BucketWidthType::CONVERTIBLE_TO_DAYS:
+				return OriginWidthConvertibleToDaysTernaryOperator::Operation(bucket_width, ts, origin, calendar);
+			case BucketWidthType::CONVERTIBLE_TO_MONTHS:
+				return OriginWidthConvertibleToMonthsTernaryOperator::Operation(bucket_width, ts, origin, calendar);
+			default:
+				throw NotImplementedException("Bucket type not implemented for ICU TIME_BUCKET");
+			}
+		}
+	};
+
+	struct TimeZoneWidthConvertibleToMicrosBinaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, timestamp_t origin,
+		                                    TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			return WidthConvertibleToMicrosCommon(bucket_width.micros, ts, origin, calendar);
+		}
+	};
+
+	struct TimeZoneWidthConvertibleToDaysBinaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, timestamp_t origin,
+		                                    TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			return WidthConvertibleToDaysCommon(bucket_width.days, ts, origin, calendar);
+		}
+	};
+
+	struct TimeZoneWidthConvertibleToMonthsBinaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, timestamp_t origin,
+		                                    TZCalendar &calendar) {
+			if (!Value::IsFinite(ts)) {
+				return ts;
+			}
+			return WidthConvertibleToMonthsCommon(bucket_width.months, ts, origin, calendar);
+		}
+	};
+
+	struct TimeZoneTernaryOperator {
+		static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, string_t tz,
+		                                    TZCalendar &calendar_p) {
+			auto calendar = calendar_p.GetICUCalendar();
+			SetTimeZone(calendar, tz);
+
+			timestamp_t origin;
+			BucketWidthType bucket_width_type = ClassifyBucketWidthErrorThrow(bucket_width);
+			switch (bucket_width_type) {
+			case BucketWidthType::CONVERTIBLE_TO_MICROS:
+				origin = ICUDateFunc::FromNaive(calendar, Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_1));
+				return TimeZoneWidthConvertibleToMicrosBinaryOperator::Operation(bucket_width, ts, origin, calendar_p);
+			case BucketWidthType::CONVERTIBLE_TO_DAYS:
+				origin = ICUDateFunc::FromNaive(calendar, Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_1));
+				return TimeZoneWidthConvertibleToDaysBinaryOperator::Operation(bucket_width, ts, origin, calendar_p);
+			case BucketWidthType::CONVERTIBLE_TO_MONTHS:
+				origin = ICUDateFunc::FromNaive(calendar, Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_2));
+				return TimeZoneWidthConvertibleToMonthsBinaryOperator::Operation(bucket_width, ts, origin, calendar_p);
+			default:
+				throw NotImplementedException("Bucket type not implemented for ICU TIME_BUCKET");
+			}
+		}
+	};
+
+	static void ICUTimeBucketFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 2);
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		TZCalendar calendar(*info.calendar, info.cal_setting);
+		SetTimeZone(calendar.GetICUCalendar(), string_t("UTC"));
+
+		auto &bucket_width_arg = args.data[0];
+		auto &ts_arg = args.data[1];
+
+		if (bucket_width_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+			if (ConstantVector::IsNull(bucket_width_arg)) {
+				result.SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result, true);
+			} else {
+				interval_t bucket_width = *ConstantVector::GetData<interval_t>(bucket_width_arg);
+				BucketWidthType bucket_width_type = ClassifyBucketWidth(bucket_width);
+				switch (bucket_width_type) {
+				case BucketWidthType::CONVERTIBLE_TO_MICROS:
+					BinaryExecutor::Execute<interval_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, result, args.size(), [&](interval_t bucket_width, timestamp_t ts) {
+						    return WidthConvertibleToMicrosBinaryOperator::Operation(bucket_width, ts, calendar);
+					    });
+					break;
+				case BucketWidthType::CONVERTIBLE_TO_DAYS:
+					BinaryExecutor::Execute<interval_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, result, args.size(), [&](interval_t bucket_width, timestamp_t ts) {
+						    return WidthConvertibleToDaysBinaryOperator::Operation(bucket_width, ts, calendar);
+					    });
+					break;
+				case BucketWidthType::CONVERTIBLE_TO_MONTHS:
+					BinaryExecutor::Execute<interval_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, result, args.size(), [&](interval_t bucket_width, timestamp_t ts) {
+						    return WidthConvertibleToMonthsBinaryOperator::Operation(bucket_width, ts, calendar);
+					    });
+					break;
+				case BucketWidthType::UNCLASSIFIED:
+					BinaryExecutor::Execute<interval_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, result, args.size(), [&](interval_t bucket_width, timestamp_t ts) {
+						    return BinaryOperator::Operation(bucket_width, ts, calendar);
+					    });
+					break;
+				default:
+					throw NotImplementedException("Bucket type not implemented for ICU TIME_BUCKET");
+				}
+			}
+		} else {
+			BinaryExecutor::Execute<interval_t, timestamp_t, timestamp_t>(
+			    bucket_width_arg, ts_arg, result, args.size(), [&](interval_t bucket_width, timestamp_t ts) {
+				    return BinaryOperator::Operation(bucket_width, ts, calendar);
+			    });
+		}
+	}
+
+	static void ICUTimeBucketOffsetFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 3);
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		TZCalendar calendar(*info.calendar, info.cal_setting);
+		SetTimeZone(calendar.GetICUCalendar(), string_t("UTC"));
+
+		auto &bucket_width_arg = args.data[0];
+		auto &ts_arg = args.data[1];
+		auto &offset_arg = args.data[2];
+
+		if (bucket_width_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+			if (ConstantVector::IsNull(bucket_width_arg)) {
+				result.SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result, true);
+			} else {
+				interval_t bucket_width = *ConstantVector::GetData<interval_t>(bucket_width_arg);
+				BucketWidthType bucket_width_type = ClassifyBucketWidth(bucket_width);
+				switch (bucket_width_type) {
+				case BucketWidthType::CONVERTIBLE_TO_MICROS:
+					TernaryExecutor::Execute<interval_t, timestamp_t, interval_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, offset_arg, result, args.size(),
+					    [&](interval_t bucket_width, timestamp_t ts, interval_t offset) {
+						    return OffsetWidthConvertibleToMicrosTernaryOperator::Operation(bucket_width, ts, offset,
+						                                                                    calendar);
+					    });
+					break;
+				case BucketWidthType::CONVERTIBLE_TO_DAYS:
+					TernaryExecutor::Execute<interval_t, timestamp_t, interval_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, offset_arg, result, args.size(),
+					    [&](interval_t bucket_width, timestamp_t ts, interval_t offset) {
+						    return OffsetWidthConvertibleToDaysTernaryOperator::Operation(bucket_width, ts, offset,
+						                                                                  calendar);
+					    });
+					break;
+				case BucketWidthType::CONVERTIBLE_TO_MONTHS:
+					TernaryExecutor::Execute<interval_t, timestamp_t, interval_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, offset_arg, result, args.size(),
+					    [&](interval_t bucket_width, timestamp_t ts, interval_t offset) {
+						    return OffsetWidthConvertibleToMonthsTernaryOperator::Operation(bucket_width, ts, offset,
+						                                                                    calendar);
+					    });
+					break;
+				case BucketWidthType::UNCLASSIFIED:
+					TernaryExecutor::Execute<interval_t, timestamp_t, interval_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, offset_arg, result, args.size(),
+					    [&](interval_t bucket_width, timestamp_t ts, interval_t offset) {
+						    return OffsetTernaryOperator::Operation(bucket_width, ts, offset, calendar);
+					    });
+					break;
+				default:
+					throw NotImplementedException("Bucket type not implemented for ICU TIME_BUCKET");
+				}
+			}
+		} else {
+			TernaryExecutor::Execute<interval_t, timestamp_t, interval_t, timestamp_t>(
+			    bucket_width_arg, ts_arg, offset_arg, result, args.size(),
+			    [&](interval_t bucket_width, timestamp_t ts, interval_t offset) {
+				    return OffsetTernaryOperator::Operation(bucket_width, ts, offset, calendar);
+			    });
+		}
+	}
+
+	static void ICUTimeBucketOriginFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 3);
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		TZCalendar calendar(*info.calendar, info.cal_setting);
+		SetTimeZone(calendar.GetICUCalendar(), string_t("UTC"));
+
+		auto &bucket_width_arg = args.data[0];
+		auto &ts_arg = args.data[1];
+		auto &origin_arg = args.data[2];
+
+		if (bucket_width_arg.GetVectorType() == VectorType::CONSTANT_VECTOR &&
+		    origin_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+			if (ConstantVector::IsNull(bucket_width_arg) || ConstantVector::IsNull(origin_arg) ||
+			    !Value::IsFinite(*ConstantVector::GetData<timestamp_t>(origin_arg))) {
+				result.SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result, true);
+			} else {
+				interval_t bucket_width = *ConstantVector::GetData<interval_t>(bucket_width_arg);
+				BucketWidthType bucket_width_type = ClassifyBucketWidth(bucket_width);
+				switch (bucket_width_type) {
+				case BucketWidthType::CONVERTIBLE_TO_MICROS:
+					TernaryExecutor::Execute<interval_t, timestamp_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, origin_arg, result, args.size(),
+					    [&](interval_t bucket_width, timestamp_t ts, timestamp_t origin) {
+						    return OriginWidthConvertibleToMicrosTernaryOperator::Operation(bucket_width, ts, origin,
+						                                                                    calendar);
+					    });
+					break;
+				case BucketWidthType::CONVERTIBLE_TO_DAYS:
+					TernaryExecutor::Execute<interval_t, timestamp_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, origin_arg, result, args.size(),
+					    [&](interval_t bucket_width, timestamp_t ts, timestamp_t origin) {
+						    return OriginWidthConvertibleToDaysTernaryOperator::Operation(bucket_width, ts, origin,
+						                                                                  calendar);
+					    });
+					break;
+				case BucketWidthType::CONVERTIBLE_TO_MONTHS:
+					TernaryExecutor::Execute<interval_t, timestamp_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, origin_arg, result, args.size(),
+					    [&](interval_t bucket_width, timestamp_t ts, timestamp_t origin) {
+						    return OriginWidthConvertibleToMonthsTernaryOperator::Operation(bucket_width, ts, origin,
+						                                                                    calendar);
+					    });
+					break;
+				case BucketWidthType::UNCLASSIFIED:
+					TernaryExecutor::ExecuteWithNulls<interval_t, timestamp_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, origin_arg, result, args.size(),
+					    [&](interval_t bucket_width, timestamp_t ts, timestamp_t origin, ValidityMask &mask,
+					        idx_t idx) {
+						    return OriginTernaryOperator::Operation(bucket_width, ts, origin, mask, idx, calendar);
+					    });
+					break;
+				default:
+					throw NotImplementedException("Bucket type not implemented for ICU TIME_BUCKET");
+				}
+			}
+		} else {
+			TernaryExecutor::ExecuteWithNulls<interval_t, timestamp_t, timestamp_t, timestamp_t>(
+			    bucket_width_arg, ts_arg, origin_arg, result, args.size(),
+			    [&](interval_t bucket_width, timestamp_t ts, timestamp_t origin, ValidityMask &mask, idx_t idx) {
+				    return OriginTernaryOperator::Operation(bucket_width, ts, origin, mask, idx, calendar);
+			    });
+		}
+	}
+
+	static void ICUTimeBucketTimeZoneFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+		D_ASSERT(args.ColumnCount() == 3);
+
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		TZCalendar calendar(*info.calendar, info.cal_setting);
+
+		auto &bucket_width_arg = args.data[0];
+		auto &ts_arg = args.data[1];
+		auto &tz_arg = args.data[2];
+
+		if (bucket_width_arg.GetVectorType() == VectorType::CONSTANT_VECTOR &&
+		    tz_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+			if (ConstantVector::IsNull(bucket_width_arg) || ConstantVector::IsNull(tz_arg)) {
+				result.SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result, true);
+			} else {
+				interval_t bucket_width = *ConstantVector::GetData<interval_t>(bucket_width_arg);
+				SetTimeZone(calendar.GetICUCalendar(), *ConstantVector::GetData<string_t>(tz_arg));
+				timestamp_t origin;
+				BucketWidthType bucket_width_type = ClassifyBucketWidth(bucket_width);
+				switch (bucket_width_type) {
+				case BucketWidthType::CONVERTIBLE_TO_MICROS:
+					origin = ICUDateFunc::FromNaive(calendar.GetICUCalendar(),
+					                                Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_1));
+					BinaryExecutor::Execute<interval_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, result, args.size(), [&](interval_t bucket_width, timestamp_t ts) {
+						    return TimeZoneWidthConvertibleToMicrosBinaryOperator::Operation(bucket_width, ts, origin,
+						                                                                     calendar);
+					    });
+					break;
+				case BucketWidthType::CONVERTIBLE_TO_DAYS:
+					origin = ICUDateFunc::FromNaive(calendar.GetICUCalendar(),
+					                                Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_1));
+					BinaryExecutor::Execute<interval_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, result, args.size(), [&](interval_t bucket_width, timestamp_t ts) {
+						    return TimeZoneWidthConvertibleToDaysBinaryOperator::Operation(bucket_width, ts, origin,
+						                                                                   calendar);
+					    });
+					break;
+				case BucketWidthType::CONVERTIBLE_TO_MONTHS:
+					origin = ICUDateFunc::FromNaive(calendar.GetICUCalendar(),
+					                                Timestamp::FromEpochMicroSeconds(DEFAULT_ORIGIN_MICROS_2));
+					BinaryExecutor::Execute<interval_t, timestamp_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, result, args.size(), [&](interval_t bucket_width, timestamp_t ts) {
+						    return TimeZoneWidthConvertibleToMonthsBinaryOperator::Operation(bucket_width, ts, origin,
+						                                                                     calendar);
+					    });
+					break;
+				case BucketWidthType::UNCLASSIFIED:
+					TernaryExecutor::Execute<interval_t, timestamp_t, string_t, timestamp_t>(
+					    bucket_width_arg, ts_arg, tz_arg, result, args.size(),
+					    [&](interval_t bucket_width, timestamp_t ts, string_t tz) {
+						    return TimeZoneTernaryOperator::Operation(bucket_width, ts, tz, calendar);
+					    });
+					break;
+				default:
+					throw NotImplementedException("Bucket type not implemented for ICU TIME_BUCKET");
+				}
+			}
+		} else {
+			TernaryExecutor::Execute<interval_t, timestamp_t, string_t, timestamp_t>(
+			    bucket_width_arg, ts_arg, tz_arg, result, args.size(),
+			    [&](interval_t bucket_width, timestamp_t ts, string_t tz) {
+				    return TimeZoneTernaryOperator::Operation(bucket_width, ts, tz, calendar);
+			    });
+		}
+	}
+
+	static void AddTimeBucketFunction(ExtensionLoader &loader) {
+		ScalarFunctionSet set("time_bucket");
+		set.AddFunction(ScalarFunction({LogicalType::INTERVAL, LogicalType::TIMESTAMP_TZ}, LogicalType::TIMESTAMP_TZ,
+		                               ICUTimeBucketFunction, Bind));
+		set.AddFunction(ScalarFunction({LogicalType::INTERVAL, LogicalType::TIMESTAMP_TZ, LogicalType::INTERVAL},
+		                               LogicalType::TIMESTAMP_TZ, ICUTimeBucketOffsetFunction, Bind));
+		set.AddFunction(ScalarFunction({LogicalType::INTERVAL, LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ},
+		                               LogicalType::TIMESTAMP_TZ, ICUTimeBucketOriginFunction, Bind));
+		set.AddFunction(ScalarFunction({LogicalType::INTERVAL, LogicalType::TIMESTAMP_TZ, LogicalType::VARCHAR},
+		                               LogicalType::TIMESTAMP_TZ, ICUTimeBucketTimeZoneFunction, Bind));
+		for (auto &func : set.functions) {
+			BaseScalarFunction::SetReturnsError(func);
+		}
+		loader.RegisterFunction(set);
+	}
+};
+
+void RegisterICUTimeBucketFunctions(ExtensionLoader &loader) {
+	ICUTimeBucket::AddTimeBucketFunction(loader);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu-timezone.cpp
+++ b/external/duckdb/extension/icu/icu-timezone.cpp
@@ -0,0 +1,481 @@
+#include "duckdb/common/types/date.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/exception/conversion_exception.hpp"
+#include "duckdb/function/cast/cast_function_set.hpp"
+#include "duckdb/function/cast_rules.hpp"
+#include "duckdb/main/extension/extension_loader.hpp"
+#include "include/icu-casts.hpp"
+#include "include/icu-datefunc.hpp"
+#include "duckdb/transaction/meta_transaction.hpp"
+#include "duckdb/common/operator/cast_operators.hpp"
+#include "duckdb/main/settings.hpp"
+
+namespace duckdb {
+
+template <typename T>
+static bool ICUIsFinite(const T &t) {
+	return true;
+}
+
+template <>
+bool ICUIsFinite(const timestamp_t &t) {
+	return Timestamp::IsFinite(t);
+}
+
+struct ICUTimeZoneData : public GlobalTableFunctionState {
+	ICUTimeZoneData() : tzs(icu::TimeZone::createEnumeration()) {
+		UErrorCode status = U_ZERO_ERROR;
+		duckdb::unique_ptr<icu::Calendar> calendar(icu::Calendar::createInstance(status));
+		now = calendar->getNow();
+	}
+
+	duckdb::unique_ptr<icu::StringEnumeration> tzs;
+	UDate now;
+};
+
+static duckdb::unique_ptr<FunctionData> ICUTimeZoneBind(ClientContext &context, TableFunctionBindInput &input,
+                                                        vector<LogicalType> &return_types, vector<string> &names) {
+	names.emplace_back("name");
+	return_types.emplace_back(LogicalType::VARCHAR);
+	names.emplace_back("abbrev");
+	return_types.emplace_back(LogicalType::VARCHAR);
+	names.emplace_back("utc_offset");
+	return_types.emplace_back(LogicalType::INTERVAL);
+	names.emplace_back("is_dst");
+	return_types.emplace_back(LogicalType::BOOLEAN);
+
+	return nullptr;
+}
+
+static duckdb::unique_ptr<GlobalTableFunctionState> ICUTimeZoneInit(ClientContext &context,
+                                                                    TableFunctionInitInput &input) {
+	return make_uniq<ICUTimeZoneData>();
+}
+
+static void ICUTimeZoneFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
+	auto &data = data_p.global_state->Cast<ICUTimeZoneData>();
+	idx_t index = 0;
+	while (index < STANDARD_VECTOR_SIZE) {
+		UErrorCode status = U_ZERO_ERROR;
+		auto long_id = data.tzs->snext(status);
+		if (U_FAILURE(status) || !long_id) {
+			break;
+		}
+
+		//	The LONG name is the one we looked up
+		std::string utf8;
+		long_id->toUTF8String(utf8);
+		output.SetValue(0, index, Value(utf8));
+
+		//	We don't have the zone tree for determining abbreviated names,
+		//	so the SHORT name is the shortest, lexicographically first equivalent TZ without a slash.
+		std::string short_id;
+		long_id->toUTF8String(short_id);
+		const auto nIDs = icu::TimeZone::countEquivalentIDs(*long_id);
+		for (int32_t idx = 0; idx < nIDs; ++idx) {
+			const auto eid = icu::TimeZone::getEquivalentID(*long_id, idx);
+			if (eid.indexOf(char16_t('/')) >= 0) {
+				continue;
+			}
+			utf8.clear();
+			eid.toUTF8String(utf8);
+			if (utf8.size() < short_id.size() || (utf8.size() == short_id.size() && utf8 < short_id)) {
+				short_id = utf8;
+			}
+		}
+		output.SetValue(1, index, Value(short_id));
+
+		duckdb::unique_ptr<icu::TimeZone> tz(icu::TimeZone::createTimeZone(*long_id));
+		int32_t raw_offset_ms;
+		int32_t dst_offset_ms;
+		tz->getOffset(data.now, false, raw_offset_ms, dst_offset_ms, status);
+		if (U_FAILURE(status)) {
+			break;
+		}
+
+		//	What PG reports is the total offset for today,
+		//	which is the ICU total offset (i.e., "raw") plus the DST offset.
+		raw_offset_ms += dst_offset_ms;
+		output.SetValue(2, index, Value::INTERVAL(Interval::FromMicro(raw_offset_ms * Interval::MICROS_PER_MSEC)));
+		output.SetValue(3, index, Value(dst_offset_ms != 0));
+		++index;
+	}
+	output.SetCardinality(index);
+}
+
+struct ICUFromNaiveTimestamp : public ICUDateFunc {
+	static inline timestamp_t Operation(icu::Calendar *calendar, timestamp_t naive) {
+		if (!ICUIsFinite(naive)) {
+			return naive;
+		}
+
+		// Extract the parts from the "instant"
+		date_t local_date;
+		dtime_t local_time;
+		Timestamp::Convert(naive, local_date, local_time);
+
+		int32_t year;
+		int32_t mm;
+		int32_t dd;
+		Date::Convert(local_date, year, mm, dd);
+
+		int32_t hr;
+		int32_t mn;
+		int32_t secs;
+		int32_t frac;
+		Time::Convert(local_time, hr, mn, secs, frac);
+		int32_t millis = frac / int32_t(Interval::MICROS_PER_MSEC);
+		uint64_t micros = frac % Interval::MICROS_PER_MSEC;
+
+		// Use them to set the time in the time zone
+		calendar->set(UCAL_YEAR, year);
+		calendar->set(UCAL_MONTH, int32_t(mm - 1));
+		calendar->set(UCAL_DATE, dd);
+		calendar->set(UCAL_HOUR_OF_DAY, hr);
+		calendar->set(UCAL_MINUTE, mn);
+		calendar->set(UCAL_SECOND, secs);
+		calendar->set(UCAL_MILLISECOND, millis);
+
+		return GetTime(calendar, micros);
+	}
+
+	struct CastTimestampUsToUs {
+		template <class SRC, class DST>
+		static inline DST Operation(SRC input) {
+			// no-op
+			return input;
+		}
+	};
+
+	template <class OP, class T = timestamp_t>
+	static bool CastFromNaive(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
+		auto &cast_data = parameters.cast_data->Cast<CastData>();
+		auto &info = cast_data.info->Cast<BindData>();
+		CalendarPtr calendar(info.calendar->clone());
+
+		UnaryExecutor::Execute<T, timestamp_t>(source, result, count, [&](T input) {
+			return Operation(calendar.get(), OP::template Operation<T, timestamp_t>(input));
+		});
+		return true;
+	}
+
+	static BoundCastInfo BindCastFromNaive(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
+		if (!input.context) {
+			throw InternalException("Missing context for TIMESTAMP to TIMESTAMPTZ cast.");
+		}
+		if (DBConfig::GetSetting<DisableTimestamptzCastsSetting>(*input.context)) {
+			throw BinderException("Casting from TIMESTAMP to TIMESTAMP WITH TIME ZONE without an explicit time zone "
+			                      "has been disabled  - use \"AT TIME ZONE ...\"");
+		}
+
+		auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));
+		switch (source.id()) {
+		case LogicalTypeId::TIMESTAMP:
+			return BoundCastInfo(CastFromNaive<CastTimestampUsToUs>, std::move(cast_data));
+		case LogicalTypeId::TIMESTAMP_MS:
+			return BoundCastInfo(CastFromNaive<CastTimestampMsToUs>, std::move(cast_data));
+		case LogicalTypeId::TIMESTAMP_NS:
+			return BoundCastInfo(CastFromNaive<CastTimestampNsToUs>, std::move(cast_data));
+		case LogicalTypeId::TIMESTAMP_SEC:
+			return BoundCastInfo(CastFromNaive<CastTimestampSecToUs>, std::move(cast_data));
+		case LogicalTypeId::DATE:
+			return BoundCastInfo(CastFromNaive<Cast, date_t>, std::move(cast_data));
+		default:
+			throw InternalException("Type %s not handled in BindCastFromNaive", LogicalTypeIdToString(source.id()));
+		}
+	}
+	static void AddCast(CastFunctionSet &casts, const LogicalType &source, const LogicalType &target) {
+		const auto implicit_cost = CastRules::ImplicitCast(source, target);
+		casts.RegisterCastFunction(source, target, BindCastFromNaive, implicit_cost);
+	}
+
+	static void AddCasts(ExtensionLoader &loader) {
+		auto &config = DBConfig::GetConfig(loader.GetDatabaseInstance());
+		auto &casts = config.GetCastFunctions();
+
+		AddCast(casts, LogicalType::TIMESTAMP, LogicalType::TIMESTAMP_TZ);
+		AddCast(casts, LogicalType::TIMESTAMP_MS, LogicalType::TIMESTAMP_TZ);
+		AddCast(casts, LogicalType::TIMESTAMP_NS, LogicalType::TIMESTAMP_TZ);
+		AddCast(casts, LogicalType::TIMESTAMP_S, LogicalType::TIMESTAMP_TZ);
+		AddCast(casts, LogicalType::DATE, LogicalType::TIMESTAMP_TZ);
+	}
+};
+
+struct ICUToNaiveTimestamp : public ICUDateFunc {
+	static inline timestamp_t Operation(icu::Calendar *calendar, timestamp_t instant) {
+		if (!ICUIsFinite(instant)) {
+			return instant;
+		}
+
+		// Extract the time zone parts
+		auto micros = int32_t(SetTime(calendar, instant));
+		const auto era = ExtractField(calendar, UCAL_ERA);
+		const auto year = ExtractField(calendar, UCAL_YEAR);
+		const auto mm = ExtractField(calendar, UCAL_MONTH) + 1;
+		const auto dd = ExtractField(calendar, UCAL_DATE);
+
+		const auto yyyy = era ? year : (-year + 1);
+		date_t local_date;
+		if (!Date::TryFromDate(yyyy, mm, dd, local_date)) {
+			throw ConversionException("Unable to convert TIMESTAMPTZ to local date");
+		}
+
+		const auto hr = ExtractField(calendar, UCAL_HOUR_OF_DAY);
+		const auto mn = ExtractField(calendar, UCAL_MINUTE);
+		const auto secs = ExtractField(calendar, UCAL_SECOND);
+		const auto millis = ExtractField(calendar, UCAL_MILLISECOND);
+
+		micros += millis * int32_t(Interval::MICROS_PER_MSEC);
+		dtime_t local_time = Time::FromTime(hr, mn, secs, micros);
+
+		timestamp_t naive;
+		if (!Timestamp::TryFromDatetime(local_date, local_time, naive)) {
+			throw ConversionException("Unable to convert TIMESTAMPTZ to local TIMESTAMP");
+		}
+
+		return naive;
+	}
+
+	static bool CastToNaive(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
+		auto &cast_data = parameters.cast_data->Cast<CastData>();
+		auto &info = cast_data.info->Cast<BindData>();
+		CalendarPtr calendar(info.calendar->clone());
+
+		UnaryExecutor::Execute<timestamp_t, timestamp_t>(
+		    source, result, count, [&](timestamp_t input) { return Operation(calendar.get(), input); });
+		return true;
+	}
+
+	static BoundCastInfo BindCastToNaive(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
+		if (!input.context) {
+			throw InternalException("Missing context for TIMESTAMPTZ to TIMESTAMP cast.");
+		}
+		if (DBConfig::GetSetting<DisableTimestamptzCastsSetting>(*input.context)) {
+			throw BinderException("Casting from TIMESTAMP WITH TIME ZONE to TIMESTAMP without an explicit time zone "
+			                      "has been disabled  - use \"AT TIME ZONE ...\"");
+		}
+
+		auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));
+
+		return BoundCastInfo(CastToNaive, std::move(cast_data));
+	}
+
+	static void AddCasts(ExtensionLoader &loader) {
+		loader.RegisterCastFunction(LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP, BindCastToNaive);
+	}
+};
+
+struct ICULocalTimestampFunc : public ICUDateFunc {
+
+	struct BindDataNow : public BindData {
+		explicit BindDataNow(ClientContext &context) : BindData(context) {
+			now = MetaTransaction::Get(context).start_timestamp;
+		}
+
+		BindDataNow(const BindDataNow &other) : BindData(other), now(other.now) {
+		}
+
+		bool Equals(const FunctionData &other_p) const override {
+			auto &other = other_p.Cast<const BindDataNow>();
+			if (now != other.now) {
+				return false;
+			}
+
+			return BindData::Equals(other_p);
+		}
+
+		duckdb::unique_ptr<FunctionData> Copy() const override {
+			return make_uniq<BindDataNow>(*this);
+		}
+
+		timestamp_t now;
+	};
+
+	static duckdb::unique_ptr<FunctionData> BindNow(ClientContext &context, ScalarFunction &bound_function,
+	                                                vector<duckdb::unique_ptr<Expression>> &arguments) {
+		return make_uniq<BindDataNow>(context);
+	}
+
+	static timestamp_t GetLocalTimestamp(ExpressionState &state) {
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindDataNow>();
+		CalendarPtr calendar_ptr(info.calendar->clone());
+		auto calendar = calendar_ptr.get();
+
+		const auto now = info.now;
+		return ICUToNaiveTimestamp::Operation(calendar, now);
+	}
+
+	static void Execute(DataChunk &input, ExpressionState &state, Vector &result) {
+		D_ASSERT(input.ColumnCount() == 0);
+		result.SetVectorType(VectorType::CONSTANT_VECTOR);
+		auto rdata = ConstantVector::GetData<timestamp_t>(result);
+		rdata[0] = GetLocalTimestamp(state);
+	}
+
+	static void AddFunction(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(ScalarFunction({}, LogicalType::TIMESTAMP, Execute, BindNow));
+		loader.RegisterFunction(set);
+	}
+};
+
+struct ICULocalTimeFunc : public ICUDateFunc {
+	static void Execute(DataChunk &input, ExpressionState &state, Vector &result) {
+		D_ASSERT(input.ColumnCount() == 0);
+		result.SetVectorType(VectorType::CONSTANT_VECTOR);
+		auto rdata = ConstantVector::GetData<dtime_t>(result);
+		const auto local = ICULocalTimestampFunc::GetLocalTimestamp(state);
+		rdata[0] = Timestamp::GetTime(local);
+	}
+
+	static void AddFunction(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(ScalarFunction({}, LogicalType::TIME, Execute, ICULocalTimestampFunc::BindNow));
+		loader.RegisterFunction(set);
+	}
+};
+
+dtime_tz_t ICUToTimeTZ::Operation(icu::Calendar *calendar, dtime_tz_t timetz) {
+	// Normalise to +00:00, add TZ offset, then set offset to TZ
+	auto time = Time::NormalizeTimeTZ(timetz);
+
+	auto offset = ExtractField(calendar, UCAL_ZONE_OFFSET);
+	offset += ExtractField(calendar, UCAL_DST_OFFSET);
+	offset /= Interval::MSECS_PER_SEC;
+
+	date_t date(0);
+	time = Interval::Add(time, {0, 0, offset * Interval::MICROS_PER_SEC}, date);
+	return dtime_tz_t(time, offset);
+}
+
+bool ICUToTimeTZ::ToTimeTZ(icu::Calendar *calendar, timestamp_t instant, dtime_tz_t &result) {
+	if (!ICUIsFinite(instant)) {
+		return false;
+	}
+
+	//	Time in current TZ
+	auto micros = int32_t(SetTime(calendar, instant));
+	const auto hour = ExtractField(calendar, UCAL_HOUR_OF_DAY);
+	const auto minute = ExtractField(calendar, UCAL_MINUTE);
+	const auto second = ExtractField(calendar, UCAL_SECOND);
+	const auto millis = ExtractField(calendar, UCAL_MILLISECOND);
+	micros += millis * int32_t(Interval::MICROS_PER_MSEC);
+	if (!Time::IsValidTime(hour, minute, second, micros)) {
+		return false;
+	}
+	const auto time = Time::FromTime(hour, minute, second, micros);
+
+	//	Offset in current TZ
+	auto offset = ExtractField(calendar, UCAL_ZONE_OFFSET);
+	offset += ExtractField(calendar, UCAL_DST_OFFSET);
+	offset /= Interval::MSECS_PER_SEC;
+
+	result = dtime_tz_t(time, offset);
+	return true;
+}
+
+bool ICUToTimeTZ::CastToTimeTZ(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
+	auto &cast_data = parameters.cast_data->Cast<CastData>();
+	auto &info = cast_data.info->Cast<BindData>();
+	CalendarPtr calendar(info.calendar->clone());
+
+	UnaryExecutor::ExecuteWithNulls<timestamp_t, dtime_tz_t>(source, result, count,
+	                                                         [&](timestamp_t input, ValidityMask &mask, idx_t idx) {
+		                                                         dtime_tz_t output;
+		                                                         if (ToTimeTZ(calendar.get(), input, output)) {
+			                                                         return output;
+		                                                         } else {
+			                                                         mask.SetInvalid(idx);
+			                                                         return dtime_tz_t();
+		                                                         }
+	                                                         });
+	return true;
+}
+
+BoundCastInfo ICUToTimeTZ::BindCastToTimeTZ(BindCastInput &input, const LogicalType &source,
+                                            const LogicalType &target) {
+	if (!input.context) {
+		throw InternalException("Missing context for TIMESTAMPTZ to TIMETZ cast.");
+	}
+
+	auto cast_data = make_uniq<CastData>(make_uniq<BindData>(*input.context));
+
+	return BoundCastInfo(CastToTimeTZ, std::move(cast_data));
+}
+
+void ICUToTimeTZ::AddCasts(ExtensionLoader &loader) {
+	const auto implicit_cost = CastRules::ImplicitCast(LogicalType::TIMESTAMP_TZ, LogicalType::TIME_TZ);
+	loader.RegisterCastFunction(LogicalType::TIMESTAMP_TZ, LogicalType::TIME_TZ, BindCastToTimeTZ, implicit_cost);
+}
+
+struct ICUTimeZoneFunc : public ICUDateFunc {
+	template <typename OP, typename T>
+	static void Execute(DataChunk &input, ExpressionState &state, Vector &result) {
+		auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+		auto &info = func_expr.bind_info->Cast<BindData>();
+		CalendarPtr calendar_ptr(info.calendar->clone());
+		auto calendar = calendar_ptr.get();
+
+		// Two cases: constant TZ, variable TZ
+		D_ASSERT(input.ColumnCount() == 2);
+		auto &tz_vec = input.data[0];
+		auto &ts_vec = input.data[1];
+		if (tz_vec.GetVectorType() == VectorType::CONSTANT_VECTOR) {
+			if (ConstantVector::IsNull(tz_vec)) {
+				result.SetVectorType(VectorType::CONSTANT_VECTOR);
+				ConstantVector::SetNull(result, true);
+			} else {
+				SetTimeZone(calendar, *ConstantVector::GetData<string_t>(tz_vec));
+				UnaryExecutor::Execute<T, T>(ts_vec, result, input.size(),
+				                             [&](T ts) { return OP::Operation(calendar, ts); });
+			}
+		} else {
+			BinaryExecutor::Execute<string_t, T, T>(tz_vec, ts_vec, result, input.size(), [&](string_t tz_id, T ts) {
+				if (ICUIsFinite(ts)) {
+					SetTimeZone(calendar, tz_id);
+					return OP::Operation(calendar, ts);
+				} else {
+					return ts;
+				}
+			});
+		}
+	}
+
+	static void AddFunction(const string &name, ExtensionLoader &loader) {
+		ScalarFunctionSet set(name);
+		set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, LogicalType::TIMESTAMP_TZ,
+		                               Execute<ICUFromNaiveTimestamp, timestamp_t>, Bind));
+		set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP_TZ}, LogicalType::TIMESTAMP,
+		                               Execute<ICUToNaiveTimestamp, timestamp_t>, Bind));
+		set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIME_TZ}, LogicalType::TIME_TZ,
+		                               Execute<ICUToTimeTZ, dtime_tz_t>, Bind));
+		for (auto &func : set.functions) {
+			BaseScalarFunction::SetReturnsError(func);
+		}
+		loader.RegisterFunction(set);
+	}
+};
+
+timestamp_t ICUDateFunc::FromNaive(icu::Calendar *calendar, timestamp_t naive) {
+	return ICUFromNaiveTimestamp::Operation(calendar, naive);
+}
+
+void RegisterICUTimeZoneFunctions(ExtensionLoader &loader) {
+	//	Table functions
+	TableFunction tz_names("pg_timezone_names", {}, ICUTimeZoneFunction, ICUTimeZoneBind, ICUTimeZoneInit);
+	loader.RegisterFunction(tz_names);
+
+	//	Scalar functions
+	ICUTimeZoneFunc::AddFunction("timezone", loader);
+	ICULocalTimestampFunc::AddFunction("current_localtimestamp", loader);
+	ICULocalTimeFunc::AddFunction("current_localtime", loader);
+
+	// 	Casts
+	ICUFromNaiveTimestamp::AddCasts(loader);
+	ICUToNaiveTimestamp::AddCasts(loader);
+	ICUToTimeTZ::AddCasts(loader);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/icu_config.py
+++ b/external/duckdb/extension/icu/icu_config.py
@@ -0,0 +1,18 @@
+import os
+
+# list all include directories
+include_directories = [
+    os.path.sep.join(x.split('/'))
+    for x in ['extension/icu/include', 'extension/icu/third_party/icu/common', 'extension/icu/third_party/icu/i18n']
+]
+# source files
+source_directories = [
+    os.path.sep.join(x.split('/'))
+    for x in ['.', 'third_party/icu/common', 'third_party/icu/i18n', 'third_party/icu/stubdata']
+]
+source_files = []
+base_path = os.path.dirname(os.path.abspath(__file__))
+for dir in source_directories:
+    source_files += [
+        os.path.join('extension', 'icu', dir, x) for x in os.listdir(os.path.join(base_path, dir)) if x.endswith('.cpp')
+    ]
--- a/external/duckdb/extension/icu/icu_extension.cpp
+++ b/external/duckdb/extension/icu/icu_extension.cpp
@@ -0,0 +1,456 @@
+#include "duckdb/catalog/catalog.hpp"
+#include "duckdb/common/string_util.hpp"
+#include "duckdb/common/vector_operations/unary_executor.hpp"
+#include "duckdb/execution/expression_executor.hpp"
+#include "duckdb/function/scalar_function.hpp"
+#include "duckdb/main/config.hpp"
+#include "duckdb/main/connection.hpp"
+#include "duckdb/main/database.hpp"
+#include "duckdb/main/extension/extension_loader.hpp"
+#include "duckdb/parser/parsed_data/create_collation_info.hpp"
+#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
+#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
+#include "duckdb/planner/expression/bound_function_expression.hpp"
+#include "include/icu-current.hpp"
+#include "include/icu-dateadd.hpp"
+#include "include/icu-datepart.hpp"
+#include "include/icu-datesub.hpp"
+#include "include/icu-datetrunc.hpp"
+#include "include/icu-list-range.hpp"
+#include "include/icu-makedate.hpp"
+#include "include/icu-strptime.hpp"
+#include "include/icu-table-range.hpp"
+#include "include/icu-timebucket.hpp"
+#include "include/icu-timezone.hpp"
+#include "include/icu_extension.hpp"
+#include "unicode/calendar.h"
+#include "unicode/coll.h"
+#include "unicode/errorcode.h"
+#include "unicode/sortkey.h"
+#include "unicode/stringpiece.h"
+#include "unicode/timezone.h"
+#include "unicode/ucol.h"
+#include "icu-helpers.hpp"
+
+#include <cassert>
+
+namespace duckdb {
+
+struct IcuBindData : public FunctionData {
+	duckdb::unique_ptr<icu::Collator> collator;
+	string language;
+	string country;
+	string tag;
+
+	explicit IcuBindData(duckdb::unique_ptr<icu::Collator> collator_p) : collator(std::move(collator_p)) {
+	}
+
+	IcuBindData(string language_p, string country_p) : language(std::move(language_p)), country(std::move(country_p)) {
+		UErrorCode status = U_ZERO_ERROR;
+		auto locale = icu::Locale(language.c_str(), country.c_str());
+		if (locale.isBogus()) {
+			throw InvalidInputException("Locale is bogus!?");
+		}
+		this->collator = duckdb::unique_ptr<icu::Collator>(icu::Collator::createInstance(locale, status));
+		if (U_FAILURE(status)) {
+			auto error_name = u_errorName(status);
+			throw InvalidInputException("Failed to create ICU collator: %s (language: %s, country: %s)", error_name,
+			                            language, country);
+		}
+	}
+
+	explicit IcuBindData(string tag_p) : tag(std::move(tag_p)) {
+		UErrorCode status = U_ZERO_ERROR;
+		UCollator *ucollator = ucol_open(tag.c_str(), &status);
+		if (U_FAILURE(status)) {
+			auto error_name = u_errorName(status);
+			throw InvalidInputException("Failed to create ICU collator with tag %s: %s", tag, error_name);
+		}
+		collator = unique_ptr<icu::Collator>(icu::Collator::fromUCollator(ucollator));
+	}
+
+	static duckdb::unique_ptr<FunctionData> CreateInstance(string language, string country, string tag) {
+		//! give priority to tagged collation
+		if (!tag.empty()) {
+			return make_uniq<IcuBindData>(tag);
+		} else {
+			return make_uniq<IcuBindData>(language, country);
+		}
+	}
+
+	duckdb::unique_ptr<FunctionData> Copy() const override {
+		return CreateInstance(language, country, tag);
+	}
+
+	bool Equals(const FunctionData &other_p) const override {
+		auto &other = other_p.Cast<IcuBindData>();
+		return language == other.language && country == other.country && tag == other.tag;
+	}
+
+	static void Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
+	                      const ScalarFunction &function) {
+		auto &bind_data = bind_data_p->Cast<IcuBindData>();
+		serializer.WriteProperty(100, "language", bind_data.language);
+		serializer.WriteProperty(101, "country", bind_data.country);
+		serializer.WritePropertyWithDefault<string>(102, "tag", bind_data.tag);
+	}
+
+	static unique_ptr<FunctionData> Deserialize(Deserializer &deserializer, ScalarFunction &function) {
+		string language;
+		string country;
+		string tag;
+		deserializer.ReadProperty(100, "language", language);
+		deserializer.ReadProperty(101, "country", country);
+		deserializer.ReadPropertyWithDefault<string>(102, "tag", tag);
+		return CreateInstance(language, country, tag);
+	}
+
+	static const string FUNCTION_PREFIX;
+
+	static string EncodeFunctionName(const string &collation) {
+		return FUNCTION_PREFIX + collation;
+	}
+	static string DecodeFunctionName(const string &fname) {
+		return fname.substr(FUNCTION_PREFIX.size());
+	}
+};
+
+const string IcuBindData::FUNCTION_PREFIX = "icu_collate_";
+
+static int32_t ICUGetSortKey(icu::Collator &collator, string_t input, duckdb::unique_ptr<char[]> &buffer,
+                             int32_t &buffer_size) {
+	icu::UnicodeString unicode_string =
+	    icu::UnicodeString::fromUTF8(icu::StringPiece(input.GetData(), int32_t(input.GetSize())));
+	int32_t string_size = collator.getSortKey(unicode_string, reinterpret_cast<uint8_t *>(buffer.get()), buffer_size);
+	if (string_size > buffer_size) {
+		// have to resize the buffer
+		buffer_size = string_size;
+		buffer = duckdb::unique_ptr<char[]>(new char[buffer_size]);
+
+		string_size = collator.getSortKey(unicode_string, reinterpret_cast<uint8_t *>(buffer.get()), buffer_size);
+	}
+	return string_size;
+}
+
+static void ICUCollateFunction(DataChunk &args, ExpressionState &state, Vector &result) {
+	const char HEX_TABLE[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+
+	auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
+	auto &info = func_expr.bind_info->Cast<IcuBindData>();
+	auto &collator = *info.collator;
+
+	duckdb::unique_ptr<char[]> buffer;
+	int32_t buffer_size = 0;
+	UnaryExecutor::Execute<string_t, string_t>(args.data[0], result, args.size(), [&](string_t input) {
+		// create a sort key from the string
+		const auto string_size = idx_t(ICUGetSortKey(collator, input, buffer, buffer_size));
+		// convert the sort key to hexadecimal
+		auto str_result = StringVector::EmptyString(result, (string_size - 1) * 2);
+		auto str_data = str_result.GetDataWriteable();
+		for (idx_t i = 0; i < string_size - 1; i++) {
+			uint8_t byte = uint8_t(buffer[i]);
+			D_ASSERT(byte != 0);
+			str_data[i * 2] = HEX_TABLE[byte / 16];
+			str_data[i * 2 + 1] = HEX_TABLE[byte % 16];
+		}
+		str_result.Finalize();
+		return str_result;
+	});
+}
+
+static duckdb::unique_ptr<FunctionData> ICUCollateBind(ClientContext &context, ScalarFunction &bound_function,
+                                                       vector<duckdb::unique_ptr<Expression>> &arguments) {
+	//! Return a tagged collator
+	if (!bound_function.extra_info.empty()) {
+		return make_uniq<IcuBindData>(bound_function.extra_info);
+	}
+
+	const auto collation = IcuBindData::DecodeFunctionName(bound_function.name);
+	auto splits = StringUtil::Split(collation, "_");
+	if (splits.size() == 1) {
+		return make_uniq<IcuBindData>(splits[0], "");
+	} else if (splits.size() == 2) {
+		return make_uniq<IcuBindData>(splits[0], splits[1]);
+	} else {
+		throw InvalidInputException("Expected one or two splits");
+	}
+}
+
+static duckdb::unique_ptr<FunctionData> ICUSortKeyBind(ClientContext &context, ScalarFunction &bound_function,
+                                                       vector<duckdb::unique_ptr<Expression>> &arguments) {
+	if (!arguments[1]->IsFoldable()) {
+		throw NotImplementedException("ICU_SORT_KEY(VARCHAR, VARCHAR) with non-constant collation is not supported");
+	}
+	Value val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]).CastAs(context, LogicalType::VARCHAR);
+	if (val.IsNull()) {
+		throw NotImplementedException("ICU_SORT_KEY(VARCHAR, VARCHAR) expected a non-null collation");
+	}
+	//! Verify tagged collation
+	if (!bound_function.extra_info.empty()) {
+		return make_uniq<IcuBindData>(bound_function.extra_info);
+	}
+	auto splits = StringUtil::Split(StringValue::Get(val), "_");
+	if (splits.size() == 1) {
+		return make_uniq<IcuBindData>(splits[0], "");
+	} else if (splits.size() == 2) {
+		return make_uniq<IcuBindData>(splits[0], splits[1]);
+	} else {
+		throw InvalidInputException("Expected one or two splits");
+	}
+}
+
+static ScalarFunction GetICUCollateFunction(const string &collation, const string &tag) {
+	string fname = IcuBindData::EncodeFunctionName(collation);
+	ScalarFunction result(fname, {LogicalType::VARCHAR}, LogicalType::VARCHAR, ICUCollateFunction, ICUCollateBind);
+	//! collation tag is added into the Function extra info
+	result.extra_info = tag;
+	result.serialize = IcuBindData::Serialize;
+	result.deserialize = IcuBindData::Deserialize;
+	return result;
+}
+
+unique_ptr<icu::TimeZone> GetTimeZoneInternal(string &tz_str, vector<string> &candidates) {
+	icu::StringPiece tz_name_utf8(tz_str);
+	const auto uid = icu::UnicodeString::fromUTF8(tz_name_utf8);
+	duckdb::unique_ptr<icu::TimeZone> tz(icu::TimeZone::createTimeZone(uid));
+	if (*tz != icu::TimeZone::getUnknown()) {
+		return tz;
+	}
+
+	// Try to be friendlier
+	// Go through all the zone names and look for a case insensitive match
+	// If we don't find one, make a suggestion
+	// FIXME: this is very inefficient
+	UErrorCode status = U_ZERO_ERROR;
+	duckdb::unique_ptr<icu::Calendar> calendar(icu::Calendar::createInstance(status));
+	duckdb::unique_ptr<icu::StringEnumeration> tzs(icu::TimeZone::createEnumeration());
+	for (;;) {
+		auto long_id = tzs->snext(status);
+		if (U_FAILURE(status) || !long_id) {
+			break;
+		}
+		std::string candidate_tz_name;
+		long_id->toUTF8String(candidate_tz_name);
+		if (StringUtil::CIEquals(candidate_tz_name, tz_str)) {
+			// case insensitive match - return this timezone instead
+			tz_str = candidate_tz_name;
+			icu::StringPiece utf8(tz_str);
+			const auto tz_unicode_str = icu::UnicodeString::fromUTF8(utf8);
+			duckdb::unique_ptr<icu::TimeZone> insensitive_tz(icu::TimeZone::createTimeZone(tz_unicode_str));
+			return insensitive_tz;
+		}
+
+		candidates.emplace_back(candidate_tz_name);
+	}
+	return nullptr;
+}
+
+unique_ptr<icu::TimeZone> ICUHelpers::TryGetTimeZone(string &tz_str) {
+	vector<string> candidates;
+	return GetTimeZoneInternal(tz_str, candidates);
+}
+
+unique_ptr<icu::TimeZone> ICUHelpers::GetTimeZone(string &tz_str, string *error_message) {
+	vector<string> candidates;
+	auto tz = GetTimeZoneInternal(tz_str, candidates);
+	if (tz) {
+		return tz;
+	}
+	string candidate_str =
+	    StringUtil::CandidatesMessage(StringUtil::TopNJaroWinkler(candidates, tz_str), "Candidate time zones");
+	if (error_message) {
+		duckdb::stringstream ss;
+		ss << "Unknown TimeZone '" << tz_str << "'!\n" << candidate_str;
+		*error_message = ss.str();
+		return nullptr;
+	}
+	throw NotImplementedException("Unknown TimeZone '%s'!\n%s", tz_str, candidate_str);
+}
+
+static void SetICUTimeZone(ClientContext &context, SetScope scope, Value &parameter) {
+	auto tz_str = StringValue::Get(parameter);
+	ICUHelpers::GetTimeZone(tz_str);
+	parameter = Value(tz_str);
+}
+
+struct ICUCalendarData : public GlobalTableFunctionState {
+	ICUCalendarData() {
+		// All calendars are available in all locales
+		UErrorCode status = U_ZERO_ERROR;
+		calendars.reset(icu::Calendar::getKeywordValuesForLocale("calendar", icu::Locale::getDefault(), false, status));
+	}
+
+	duckdb::unique_ptr<icu::StringEnumeration> calendars;
+};
+
+static duckdb::unique_ptr<FunctionData> ICUCalendarBind(ClientContext &context, TableFunctionBindInput &input,
+                                                        vector<LogicalType> &return_types, vector<string> &names) {
+	names.emplace_back("name");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	return nullptr;
+}
+
+static duckdb::unique_ptr<GlobalTableFunctionState> ICUCalendarInit(ClientContext &context,
+                                                                    TableFunctionInitInput &input) {
+	return make_uniq<ICUCalendarData>();
+}
+
+static void ICUCalendarFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
+	auto &data = data_p.global_state->Cast<ICUCalendarData>();
+	idx_t index = 0;
+	while (index < STANDARD_VECTOR_SIZE) {
+		if (!data.calendars) {
+			break;
+		}
+
+		UErrorCode status = U_ZERO_ERROR;
+		auto calendar = data.calendars->snext(status);
+		if (U_FAILURE(status) || !calendar) {
+			break;
+		}
+
+		//	The calendar name is all we have
+		std::string utf8;
+		calendar->toUTF8String(utf8);
+		output.SetValue(0, index, Value(utf8));
+
+		++index;
+	}
+	output.SetCardinality(index);
+}
+
+static void SetICUCalendar(ClientContext &context, SetScope scope, Value &parameter) {
+	const auto name = parameter.Value::GetValueUnsafe<string>();
+	string locale_key = "@calendar=" + name;
+	icu::Locale locale(locale_key.c_str());
+
+	UErrorCode status = U_ZERO_ERROR;
+	duckdb::unique_ptr<icu::Calendar> cal(icu::Calendar::createInstance(locale, status));
+	if (!U_FAILURE(status) && name == cal->getType()) {
+		return;
+	}
+
+	//	Try to be friendlier
+	//	Go through all the calendar names and look for a case insensitive match
+	//	If we don't find one, make a suggestion
+	status = U_ZERO_ERROR;
+	duckdb::unique_ptr<icu::StringEnumeration> calendars;
+	calendars.reset(icu::Calendar::getKeywordValuesForLocale("calendar", icu::Locale::getDefault(), false, status));
+
+	vector<string> candidates;
+	for (;;) {
+		auto calendar = calendars->snext(status);
+		if (U_FAILURE(status) || !calendar) {
+			break;
+		}
+
+		std::string utf8;
+		calendar->toUTF8String(utf8);
+		if (StringUtil::CIEquals(utf8, name)) {
+			parameter = Value(utf8);
+			return;
+		}
+
+		candidates.emplace_back(utf8);
+	}
+
+	string candidate_str =
+	    StringUtil::CandidatesMessage(StringUtil::TopNJaroWinkler(candidates, name), "Candidate calendars");
+
+	throw NotImplementedException("Unknown Calendar '%s'!\n%s", name, candidate_str);
+}
+
+static void LoadInternal(ExtensionLoader &loader) {
+
+	// iterate over all the collations
+	int32_t count;
+	auto locales = icu::Collator::getAvailableLocales(count);
+	for (int32_t i = 0; i < count; i++) {
+		string collation;
+		if (string(locales[i].getCountry()).empty()) {
+			// language only
+			collation = locales[i].getLanguage();
+		} else {
+			// language + country
+			collation = locales[i].getLanguage() + string("_") + locales[i].getCountry();
+		}
+		collation = StringUtil::Lower(collation);
+
+		CreateCollationInfo info(collation, GetICUCollateFunction(collation, ""), false, false);
+		loader.RegisterCollation(info);
+	}
+
+	/**
+	 * This collation function is inpired on the Postgres "ignore_accents":
+	 * See: https://www.postgresql.org/docs/current/collation.html
+	 * CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false);
+	 *
+	 * Also, according with the source file: postgres/src/backend/utils/adt/pg_locale.c.
+	 * "und-u-kc-ks-level1" is converted to the equivalent ICU format locale ID,
+	 * e.g. "und@colcaselevel=yes;colstrength=primary"
+	 *
+	 */
+	CreateCollationInfo info("icu_noaccent", GetICUCollateFunction("noaccent", "und-u-ks-level1-kc-true"), false,
+	                         false);
+	loader.RegisterCollation(info);
+
+	ScalarFunction sort_key("icu_sort_key", {LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::VARCHAR,
+	                        ICUCollateFunction, ICUSortKeyBind);
+	loader.RegisterFunction(sort_key);
+
+	// Time Zones
+	auto &config = DBConfig::GetConfig(loader.GetDatabaseInstance());
+	duckdb::unique_ptr<icu::TimeZone> tz(icu::TimeZone::createDefault());
+	icu::UnicodeString tz_id;
+	std::string tz_string;
+	tz->getID(tz_id).toUTF8String(tz_string);
+	config.AddExtensionOption("TimeZone", "The current time zone", LogicalType::VARCHAR, Value(tz_string),
+	                          SetICUTimeZone);
+
+	RegisterICUCurrentFunctions(loader);
+	RegisterICUDateAddFunctions(loader);
+	RegisterICUDatePartFunctions(loader);
+	RegisterICUDateSubFunctions(loader);
+	RegisterICUDateTruncFunctions(loader);
+	RegisterICUMakeDateFunctions(loader);
+	RegisterICUTableRangeFunctions(loader);
+	RegisterICUListRangeFunctions(loader);
+	RegisterICUStrptimeFunctions(loader);
+	RegisterICUTimeBucketFunctions(loader);
+	RegisterICUTimeZoneFunctions(loader);
+
+	// Calendars
+	UErrorCode status = U_ZERO_ERROR;
+	duckdb::unique_ptr<icu::Calendar> cal(icu::Calendar::createInstance(status));
+	config.AddExtensionOption("Calendar", "The current calendar", LogicalType::VARCHAR, Value(cal->getType()),
+	                          SetICUCalendar);
+
+	TableFunction cal_names("icu_calendar_names", {}, ICUCalendarFunction, ICUCalendarBind, ICUCalendarInit);
+	loader.RegisterFunction(cal_names);
+}
+
+void IcuExtension::Load(ExtensionLoader &loader) {
+	LoadInternal(loader);
+}
+
+std::string IcuExtension::Name() {
+	return "icu";
+}
+
+std::string IcuExtension::Version() const {
+#ifdef EXT_VERSION_ICU
+	return EXT_VERSION_ICU;
+#else
+	return "";
+#endif
+}
+
+} // namespace duckdb
+
+extern "C" {
+
+DUCKDB_CPP_EXTENSION_ENTRY(icu, loader) { // NOLINT
+	duckdb::LoadInternal(loader);
+}
+}
--- a/external/duckdb/extension/icu/include/icu-casts.hpp
+++ b/external/duckdb/extension/icu/include/icu-casts.hpp
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-datefunc.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "icu-datefunc.hpp"
+
+namespace duckdb {
+
+struct ICUMakeDate : public ICUDateFunc {
+	static date_t Operation(icu::Calendar *calendar, timestamp_t instant);
+
+	static bool CastToDate(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
+
+	static BoundCastInfo BindCastToDate(BindCastInput &input, const LogicalType &source, const LogicalType &target);
+
+	static void AddCasts(ExtensionLoader &loader);
+
+	static date_t ToDate(ClientContext &context, timestamp_t instant);
+};
+
+struct ICUToTimeTZ : public ICUDateFunc {
+	static dtime_tz_t Operation(icu::Calendar *calendar, dtime_tz_t timetz);
+
+	static bool ToTimeTZ(icu::Calendar *calendar, timestamp_t instant, dtime_tz_t &result);
+
+	static bool CastToTimeTZ(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
+
+	static BoundCastInfo BindCastToTimeTZ(BindCastInput &input, const LogicalType &source, const LogicalType &target);
+
+	static void AddCasts(ExtensionLoader &loader);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-current.hpp
+++ b/external/duckdb/extension/icu/include/icu-current.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-current.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUCurrentFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-dateadd.hpp
+++ b/external/duckdb/extension/icu/include/icu-dateadd.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-dateadd.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUDateAddFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-datefunc.hpp
+++ b/external/duckdb/extension/icu/include/icu-datefunc.hpp
@@ -0,0 +1,87 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-datefunc.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+
+#include "duckdb/common/enums/date_part_specifier.hpp"
+#include "duckdb/planner/expression/bound_function_expression.hpp"
+#include "tz_calendar.hpp"
+
+namespace duckdb {
+
+struct ICUDateFunc {
+	struct BindData : public FunctionData {
+		explicit BindData(ClientContext &context);
+		BindData(const string &tz_setting, const string &cal_setting);
+		BindData(const BindData &other);
+
+		string tz_setting;
+		string cal_setting;
+		CalendarPtr calendar;
+
+		bool Equals(const FunctionData &other_p) const override;
+		duckdb::unique_ptr<FunctionData> Copy() const override;
+
+		void InitCalendar();
+	};
+
+	struct CastData : public BoundCastData {
+		explicit CastData(duckdb::unique_ptr<FunctionData> info_p) : info(std::move(info_p)) {
+		}
+
+		duckdb::unique_ptr<BoundCastData> Copy() const override {
+			return make_uniq<CastData>(info->Copy());
+		}
+
+		duckdb::unique_ptr<FunctionData> info;
+	};
+
+	//! Binds a default calendar object for use by the function
+	static duckdb::unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
+	                                             vector<duckdb::unique_ptr<Expression>> &arguments);
+
+	//! Tries to set the time zone for the calendar and returns false if it is not valid.
+	static bool TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
+	//! Sets the time zone for the calendar. Throws if it is not valid
+	static void SetTimeZone(icu::Calendar *calendar, const string_t &tz_id, string *error_message = nullptr);
+	//! Gets the timestamp from the calendar, throwing if it is not in range.
+	static bool TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result);
+	//! Gets the timestamp from the calendar, throwing if it is not in range.
+	static timestamp_t GetTime(icu::Calendar *calendar, uint64_t micros = 0);
+	//! Gets the timestamp from the calendar, assuming it is in range.
+	static timestamp_t GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros = 0);
+	//! Sets the calendar to the timestamp, returning the unused µs part
+	static uint64_t SetTime(icu::Calendar *calendar, timestamp_t date);
+	//! Extracts the field from the calendar
+	static int32_t ExtractField(icu::Calendar *calendar, UCalendarDateFields field);
+	//! Subtracts the field of the given date from the calendar
+	static int32_t SubtractField(icu::Calendar *calendar, UCalendarDateFields field, timestamp_t end_date);
+	//! Adds the timestamp and the interval using the calendar
+	static timestamp_t Add(TZCalendar &calendar, timestamp_t timestamp, interval_t interval);
+	//! Subtracts the interval from the timestamp using the calendar
+	static timestamp_t Sub(TZCalendar &calendar, timestamp_t timestamp, interval_t interval);
+	//! Subtracts the latter timestamp from the former timestamp using the calendar
+	static interval_t Sub(TZCalendar &calendar, timestamp_t end_date, timestamp_t start_date);
+	//! Pulls out the bin values from the timestamp assuming it is an instant,
+	//! constructs an ICU timestamp, and then converts that back to a DuckDB instant
+	//! Adding offset doesn't really work around DST because the bin values are ambiguous
+	static timestamp_t FromNaive(icu::Calendar *calendar, timestamp_t naive);
+
+	//! Truncates the calendar time to the given part precision
+	typedef void (*part_trunc_t)(icu::Calendar *calendar, uint64_t &micros);
+	static part_trunc_t TruncationFactory(DatePartSpecifier part);
+	static timestamp_t CurrentMidnight(icu::Calendar *calendar, ExpressionState &state);
+
+	//! Subtracts the two times at the given part precision
+	typedef int64_t (*part_sub_t)(icu::Calendar *calendar, timestamp_t start_date, timestamp_t end_date);
+	static part_sub_t SubtractFactory(DatePartSpecifier part);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-datepart.hpp
+++ b/external/duckdb/extension/icu/include/icu-datepart.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-datepart.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUDatePartFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-datesub.hpp
+++ b/external/duckdb/extension/icu/include/icu-datesub.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-datediff.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUDateSubFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-datetrunc.hpp
+++ b/external/duckdb/extension/icu/include/icu-datetrunc.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-datetrunc.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUDateTruncFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-helpers.hpp
+++ b/external/duckdb/extension/icu/include/icu-helpers.hpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-helpers.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "unicode/timezone.h"
+#include "duckdb/common/types/timestamp.hpp"
+
+namespace duckdb {
+
+struct ICUHelpers {
+	//! Tries to get a time zone - returns nullptr if the timezone is not found
+	static unique_ptr<icu::TimeZone> TryGetTimeZone(string &tz_str);
+	//! Gets a time zone - throws an error if the timezone is not found
+	static unique_ptr<icu::TimeZone> GetTimeZone(string &tz_str, string *error_message = nullptr);
+
+	static TimestampComponents GetComponents(timestamp_tz_t ts, icu::Calendar *calendar);
+
+	static timestamp_t ToTimestamp(TimestampComponents data);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-list-range.hpp
+++ b/external/duckdb/extension/icu/include/icu-list-range.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-list-range.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUListRangeFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-makedate.hpp
+++ b/external/duckdb/extension/icu/include/icu-makedate.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-makedate.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUMakeDateFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-strptime.hpp
+++ b/external/duckdb/extension/icu/include/icu-strptime.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-strptime.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUStrptimeFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-table-range.hpp
+++ b/external/duckdb/extension/icu/include/icu-table-range.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-table-range.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUTableRangeFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-timebucket.hpp
+++ b/external/duckdb/extension/icu/include/icu-timebucket.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-timebucket.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUTimeBucketFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu-timezone.hpp
+++ b/external/duckdb/extension/icu/include/icu-timezone.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu-timezone.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+namespace duckdb {
+
+class ExtensionLoader;
+
+void RegisterICUTimeZoneFunctions(ExtensionLoader &loader);
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/icu_extension.hpp
+++ b/external/duckdb/extension/icu/include/icu_extension.hpp
@@ -0,0 +1,22 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// icu_extension.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+
+namespace duckdb {
+
+class IcuExtension : public Extension {
+public:
+	void Load(ExtensionLoader &loader) override;
+	std::string Name() override;
+	std::string Version() const override;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/include/tz_calendar.hpp
+++ b/external/duckdb/extension/icu/include/tz_calendar.hpp
@@ -0,0 +1,40 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// tz_calendar.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "unicode/calendar.h"
+#include "duckdb/common/string_util.hpp"
+
+namespace duckdb {
+
+using CalendarPtr = duckdb::unique_ptr<icu::Calendar>;
+
+struct TZCalendar {
+	TZCalendar(icu::Calendar &calendar_p, const string &cal_setting)
+	    : calendar(CalendarPtr(calendar_p.clone())),
+	      is_gregorian(cal_setting.empty() || StringUtil::CIEquals(cal_setting, "gregorian")),
+	      supports_intervals(calendar->getMaximum(UCAL_MONTH) < 12) { // 0-based
+	}
+
+	icu::Calendar *GetICUCalendar() {
+		return calendar.get();
+	}
+	bool IsGregorian() const {
+		return is_gregorian;
+	}
+	bool SupportsIntervals() const {
+		return supports_intervals;
+	}
+
+	CalendarPtr calendar;
+	const bool is_gregorian;
+	const bool supports_intervals;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/icu/scripts/inline-data.py
+++ b/external/duckdb/extension/icu/scripts/inline-data.py
@@ -0,0 +1,22 @@
+import sys
+
+contents = bytearray(sys.stdin.buffer.read())
+
+# encode the data into the stubdata file
+result_text = ",".join(map(str, contents))
+
+new_contents = """
+// This file is generated by scripts/inline-data.py, do not edit it manually //
+
+#include "unicode/utypes.h"
+#include "unicode/udata.h"
+#include "unicode/uversion.h"
+
+extern "C" U_EXPORT const unsigned char U_ICUDATA_ENTRY_POINT [] = {
+	%s
+};
+""" % (
+    result_text,
+)
+
+sys.stdout.write(new_contents)
--- a/external/duckdb/extension/icu/scripts/makedata.sh
+++ b/external/duckdb/extension/icu/scripts/makedata.sh
@@ -0,0 +1,47 @@
+#! /bin/sh
+
+# ICU File Structure
+icu=https://github.com/unicode-org/icu/archive/refs/tags/release-version.zip
+zip_file=release-version.zip
+source_path=icu-release-version/icu4c/source
+data_path=$source_path"/data"
+
+#rm -rf build
+set -e
+mkdir -p build
+pushd build
+
+# download ICU 66
+code_version=66-1
+wget -nc ${icu/version/$code_version}
+unzip -o ${zip_file/version/$code_version}
+
+# download ICU 72 (replace with latest version)
+data_version=72-1
+wget -nc ${icu/version/$data_version}
+unzip -o ${zip_file/version/$data_version}
+
+# copy over the collation data
+find ${data_path/version/$data_version} -type f ! -iname "*.txt" -delete
+cp -r ${data_path/version/$data_version} ${source_path/version/$code_version}
+
+# download IANA and copy the latest Time Zone Data
+tz_version=2025b
+rm -rf icu-data
+git clone git@github.com:unicode-org/icu-data.git || true
+cp icu-data/tzdata/icunew/${tz_version}/44/*.txt ${data_path/version/$code_version}/misc
+
+# build the data, make sure to create "filters.json" first, see above
+cp ../filters.json ${source_path/version/$code_version}
+pushd ${source_path/version/$code_version}
+ICU_DATA_FILTER_FILE=filters.json ./runConfigureICU Linux --with-data-packaging=archive
+make
+popd
+
+# the data file will be located in icu-release-66-1/icu4c/source/data/out/icudt66l.dat
+# copy over the data to the minimal-icu-collation data repository
+# then run the following two commands:
+popd
+
+icudt=icudt${code_version/-[[:digit:]]/}l.dat
+python3 scripts/inline-data.py < build/${data_path/version/$code_version}/out/${icudt} > third_party/icu/stubdata/stubdata.cpp
--- a/external/duckdb/extension/icu/third_party/CMakeLists.txt
+++ b/external/duckdb/extension/icu/third_party/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_subdirectory(icu)
+
+set(ICU_LIBRARY_FILES
+    ${ICU_LIBRARY_FILES}
+    PARENT_SCOPE)
--- a/external/duckdb/extension/icu/third_party/icu/CMakeLists.txt
+++ b/external/duckdb/extension/icu/third_party/icu/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_subdirectory(common)
+add_subdirectory(i18n)
+add_subdirectory(stubdata)
+
+set(ICU_LIBRARY_FILES
+    ${ICU_LIBRARY_FILES}
+    PARENT_SCOPE)
--- a/external/duckdb/extension/icu/third_party/icu/LICENSE
+++ b/external/duckdb/extension/icu/third_party/icu/LICENSE
@@ -0,0 +1,414 @@
+COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
+
+Copyright © 1991-2020 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
+
+---------------------
+
+Third-Party Software Licenses
+
+This section contains third-party software notices and/or additional
+terms for licensed third-party software components included within ICU
+libraries.
+
+1. ICU License - ICU 1.8.1 to ICU 57.1
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2016 International Business Machines Corporation and others
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies of
+the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
+SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
+CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale, use
+or other dealings in this Software without prior written authorization
+of the copyright holder.
+
+All trademarks and registered trademarks mentioned herein are the
+property of their respective owners.
+
+2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
+
+ #     The Google Chrome software developed by Google is licensed under
+ # the BSD license. Other software included in this distribution is
+ # provided under other licenses, as set forth below.
+ #
+ #  The BSD License
+ #  http://opensource.org/licenses/bsd-license.php
+ #  Copyright (C) 2006-2008, Google Inc.
+ #
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ # modification, are permitted provided that the following conditions are met:
+ #
+ #  Redistributions of source code must retain the above copyright notice,
+ # this list of conditions and the following disclaimer.
+ #  Redistributions in binary form must reproduce the above
+ # copyright notice, this list of conditions and the following
+ # disclaimer in the documentation and/or other materials provided with
+ # the distribution.
+ #  Neither the name of  Google Inc. nor the names of its
+ # contributors may be used to endorse or promote products derived from
+ # this software without specific prior written permission.
+ #
+ #
+ #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ #
+ #
+ #  The word list in cjdict.txt are generated by combining three word lists
+ # listed below with further processing for compound word breaking. The
+ # frequency is generated with an iterative training against Google web
+ # corpora.
+ #
+ #  * Libtabe (Chinese)
+ #    - https://sourceforge.net/project/?group_id=1519
+ #    - Its license terms and conditions are shown below.
+ #
+ #  * IPADIC (Japanese)
+ #    - http://chasen.aist-nara.ac.jp/chasen/distribution.html
+ #    - Its license terms and conditions are shown below.
+ #
+ #  ---------COPYING.libtabe ---- BEGIN--------------------
+ #
+ #  /*
+ #   * Copyright (c) 1999 TaBE Project.
+ #   * Copyright (c) 1999 Pai-Hsiang Hsiao.
+ #   * All rights reserved.
+ #   *
+ #   * Redistribution and use in source and binary forms, with or without
+ #   * modification, are permitted provided that the following conditions
+ #   * are met:
+ #   *
+ #   * . Redistributions of source code must retain the above copyright
+ #   *   notice, this list of conditions and the following disclaimer.
+ #   * . Redistributions in binary form must reproduce the above copyright
+ #   *   notice, this list of conditions and the following disclaimer in
+ #   *   the documentation and/or other materials provided with the
+ #   *   distribution.
+ #   * . Neither the name of the TaBE Project nor the names of its
+ #   *   contributors may be used to endorse or promote products derived
+ #   *   from this software without specific prior written permission.
+ #   *
+ #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ #   * OF THE POSSIBILITY OF SUCH DAMAGE.
+ #   */
+ #
+ #  /*
+ #   * Copyright (c) 1999 Computer Systems and Communication Lab,
+ #   *                    Institute of Information Science, Academia
+ #       *                    Sinica. All rights reserved.
+ #   *
+ #   * Redistribution and use in source and binary forms, with or without
+ #   * modification, are permitted provided that the following conditions
+ #   * are met:
+ #   *
+ #   * . Redistributions of source code must retain the above copyright
+ #   *   notice, this list of conditions and the following disclaimer.
+ #   * . Redistributions in binary form must reproduce the above copyright
+ #   *   notice, this list of conditions and the following disclaimer in
+ #   *   the documentation and/or other materials provided with the
+ #   *   distribution.
+ #   * . Neither the name of the Computer Systems and Communication Lab
+ #   *   nor the names of its contributors may be used to endorse or
+ #   *   promote products derived from this software without specific
+ #   *   prior written permission.
+ #   *
+ #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ #   * OF THE POSSIBILITY OF SUCH DAMAGE.
+ #   */
+ #
+ #  Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
+ #      University of Illinois
+ #  c-tsai4@uiuc.edu  http://casper.beckman.uiuc.edu/~c-tsai4
+ #
+ #  ---------------COPYING.libtabe-----END--------------------------------
+ #
+ #
+ #  ---------------COPYING.ipadic-----BEGIN-------------------------------
+ #
+ #  Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
+ #  and Technology.  All Rights Reserved.
+ #
+ #  Use, reproduction, and distribution of this software is permitted.
+ #  Any copy of this software, whether in its original form or modified,
+ #  must include both the above copyright notice and the following
+ #  paragraphs.
+ #
+ #  Nara Institute of Science and Technology (NAIST),
+ #  the copyright holders, disclaims all warranties with regard to this
+ #  software, including all implied warranties of merchantability and
+ #  fitness, in no event shall NAIST be liable for
+ #  any special, indirect or consequential damages or any damages
+ #  whatsoever resulting from loss of use, data or profits, whether in an
+ #  action of contract, negligence or other tortuous action, arising out
+ #  of or in connection with the use or performance of this software.
+ #
+ #  A large portion of the dictionary entries
+ #  originate from ICOT Free Software.  The following conditions for ICOT
+ #  Free Software applies to the current dictionary as well.
+ #
+ #  Each User may also freely distribute the Program, whether in its
+ #  original form or modified, to any third party or parties, PROVIDED
+ #  that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
+ #  on, or be attached to, the Program, which is distributed substantially
+ #  in the same form as set out herein and that such intended
+ #  distribution, if actually made, will neither violate or otherwise
+ #  contravene any of the laws and regulations of the countries having
+ #  jurisdiction over the User or the intended distribution itself.
+ #
+ #  NO WARRANTY
+ #
+ #  The program was produced on an experimental basis in the course of the
+ #  research and development conducted during the project and is provided
+ #  to users as so produced on an experimental basis.  Accordingly, the
+ #  program is provided without any warranty whatsoever, whether express,
+ #  implied, statutory or otherwise.  The term "warranty" used herein
+ #  includes, but is not limited to, any warranty of the quality,
+ #  performance, merchantability and fitness for a particular purpose of
+ #  the program and the nonexistence of any infringement or violation of
+ #  any right of any third party.
+ #
+ #  Each user of the program will agree and understand, and be deemed to
+ #  have agreed and understood, that there is no warranty whatsoever for
+ #  the program and, accordingly, the entire risk arising from or
+ #  otherwise connected with the program is assumed by the user.
+ #
+ #  Therefore, neither ICOT, the copyright holder, or any other
+ #  organization that participated in or was otherwise related to the
+ #  development of the program and their respective officials, directors,
+ #  officers and other employees shall be held liable for any and all
+ #  damages, including, without limitation, general, special, incidental
+ #  and consequential damages, arising out of or otherwise in connection
+ #  with the use or inability to use the program or any product, material
+ #  or result produced or otherwise obtained by using the program,
+ #  regardless of whether they have been advised of, or otherwise had
+ #  knowledge of, the possibility of such damages at any time during the
+ #  project or thereafter.  Each user will be deemed to have agreed to the
+ #  foregoing by his or her commencement of use of the program.  The term
+ #  "use" as used herein includes, but is not limited to, the use,
+ #  modification, copying and distribution of the program and the
+ #  production of secondary products from the program.
+ #
+ #  In the case where the program, whether in its original form or
+ #  modified, was distributed or delivered to or received by a user from
+ #  any person, organization or entity other than ICOT, unless it makes or
+ #  grants independently of ICOT any specific warranty to the user in
+ #  writing, such person, organization or entity, will also be exempted
+ #  from and not be held liable to the user for any such damages as noted
+ #  above as far as the program is concerned.
+ #
+ #  ---------------COPYING.ipadic-----END----------------------------------
+
+3. Lao Word Break Dictionary Data (laodict.txt)
+
+ #  Copyright (c) 2013 International Business Machines Corporation
+ #  and others. All Rights Reserved.
+ #
+ # Project: http://code.google.com/p/lao-dictionary/
+ # Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
+ # License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
+ #              (copied below)
+ #
+ #  This file is derived from the above dictionary, with slight
+ #  modifications.
+ #  ----------------------------------------------------------------------
+ #  Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ #  modification,
+ #  are permitted provided that the following conditions are met:
+ #
+ #
+ # Redistributions of source code must retain the above copyright notice, this
+ #  list of conditions and the following disclaimer. Redistributions in
+ #  binary form must reproduce the above copyright notice, this list of
+ #  conditions and the following disclaimer in the documentation and/or
+ #  other materials provided with the distribution.
+ #
+ #
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ # OF THE POSSIBILITY OF SUCH DAMAGE.
+ #  --------------------------------------------------------------------------
+
+4. Burmese Word Break Dictionary Data (burmesedict.txt)
+
+ #  Copyright (c) 2014 International Business Machines Corporation
+ #  and others. All Rights Reserved.
+ #
+ #  This list is part of a project hosted at:
+ #    github.com/kanyawtech/myanmar-karen-word-lists
+ #
+ #  --------------------------------------------------------------------------
+ #  Copyright (c) 2013, LeRoy Benjamin Sharon
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ #  modification, are permitted provided that the following conditions
+ #  are met: Redistributions of source code must retain the above
+ #  copyright notice, this list of conditions and the following
+ #  disclaimer.  Redistributions in binary form must reproduce the
+ #  above copyright notice, this list of conditions and the following
+ #  disclaimer in the documentation and/or other materials provided
+ #  with the distribution.
+ #
+ #    Neither the name Myanmar Karen Word Lists, nor the names of its
+ #    contributors may be used to endorse or promote products derived
+ #    from this software without specific prior written permission.
+ #
+ #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ #  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ #  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ #  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ #  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ #  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ #  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ #  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ #  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ #  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ #  THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ #  SUCH DAMAGE.
+ #  --------------------------------------------------------------------------
+
+5. Time Zone Database
+
+  ICU uses the public domain data and code derived from Time Zone
+Database for its time zone support. The ownership of the TZ database
+is explained in BCP 175: Procedure for Maintaining the Time Zone
+Database section 7.
+
+ # 7.  Database Ownership
+ #
+ #    The TZ database itself is not an IETF Contribution or an IETF
+ #    document.  Rather it is a pre-existing and regularly updated work
+ #    that is in the public domain, and is intended to remain in the
+ #    public domain.  Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
+ #    not apply to the TZ Database or contributions that individuals make
+ #    to it.  Should any claims be made and substantiated against the TZ
+ #    Database, the organization that is providing the IANA
+ #    Considerations defined in this RFC, under the memorandum of
+ #    understanding with the IETF, currently ICANN, may act in accordance
+ #    with all competent court orders.  No ownership claims will be made
+ #    by ICANN or the IETF Trust on the database or the code.  Any person
+ #    making a contribution to the database or code waives all rights to
+ #    future claims in that contribution or in the TZ Database.
+
+6. Google double-conversion
+
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/external/duckdb/extension/icu/third_party/icu/common/CMakeLists.txt
+++ b/external/duckdb/extension/icu/third_party/icu/common/CMakeLists.txt
@@ -0,0 +1,202 @@
+add_library_unity(
+  duckdb_icu_common
+  OBJECT
+  appendable.cpp
+  bmpset.cpp
+  brkeng.cpp
+  brkiter.cpp
+  bytesinkutil.cpp
+  bytestream.cpp
+  bytestrie.cpp
+  bytestriebuilder.cpp
+  bytestrieiterator.cpp
+  caniter.cpp
+  characterproperties.cpp
+  chariter.cpp
+  charstr.cpp
+  cmemory.cpp
+  cstr.cpp
+  cstring.cpp
+  cwchar.cpp
+  dictbe.cpp
+  dictionarydata.cpp
+  dtintrv.cpp
+  edits.cpp
+  errorcode.cpp
+  filteredbrk.cpp
+  filterednormalizer2.cpp
+  icudataver.cpp
+  icuplug.cpp
+  loadednormalizer2impl.cpp
+  localebuilder.cpp
+  localematcher.cpp
+  localeprioritylist.cpp
+  locavailable.cpp
+  locbased.cpp
+  locdispnames.cpp
+  locdistance.cpp
+  locdspnm.cpp
+  locid.cpp
+  loclikely.cpp
+  loclikelysubtags.cpp
+  locresdata.cpp
+  locutil.cpp
+  lsr.cpp
+  messagepattern.cpp
+  normalizer2.cpp
+  normalizer2impl.cpp
+  normlzr.cpp
+  parsepos.cpp
+  patternprops.cpp
+  pluralmap.cpp
+  propname.cpp
+  propsvec.cpp
+  punycode.cpp
+  rbbi.cpp
+  rbbi_cache.cpp
+  rbbidata.cpp
+  rbbinode.cpp
+  rbbirb.cpp
+  rbbiscan.cpp
+  rbbisetb.cpp
+  rbbistbl.cpp
+  rbbitblb.cpp
+  resbund.cpp
+  resbund_cnv.cpp
+  resource.cpp
+  restrace.cpp
+  ruleiter.cpp
+  schriter.cpp
+  serv.cpp
+  servlk.cpp
+  servlkf.cpp
+  servls.cpp
+  servnotf.cpp
+  servrbf.cpp
+  servslkf.cpp
+  sharedobject.cpp
+  simpleformatter.cpp
+  static_unicode_sets.cpp
+  stringpiece.cpp
+  stringtriebuilder.cpp
+  uarrsort.cpp
+  ubidi.cpp
+  ubidi_props.cpp
+  ubidiln.cpp
+  ubiditransform.cpp
+  ubidiwrt.cpp
+  ubrk.cpp
+  ucase.cpp
+  ucasemap.cpp
+  ucasemap_titlecase_brkiter.cpp
+  ucat.cpp
+  uchar.cpp
+  ucharstrie.cpp
+  ucharstriebuilder.cpp
+  ucharstrieiterator.cpp
+  uchriter.cpp
+  ucln_cmn.cpp
+  ucmndata.cpp
+  ucnv.cpp
+  ucnv2022.cpp
+  ucnv_bld.cpp
+  ucnv_cb.cpp
+  ucnv_cnv.cpp
+  ucnv_ct.cpp
+  ucnv_err.cpp
+  ucnv_ext.cpp
+  ucnv_io.cpp
+  ucnv_lmb.cpp
+  ucnv_set.cpp
+  ucnv_u16.cpp
+  ucnv_u32.cpp
+  ucnv_u7.cpp
+  ucnv_u8.cpp
+  ucnvbocu.cpp
+  ucnvdisp.cpp
+  ucnvhz.cpp
+  ucnvisci.cpp
+  ucnvlat1.cpp
+  ucnvmbcs.cpp
+  ucnvscsu.cpp
+  ucnvsel.cpp
+  ucol_swp.cpp
+  ucptrie.cpp
+  ucurr.cpp
+  udata.cpp
+  udatamem.cpp
+  udataswp.cpp
+  uenum.cpp
+  uhash.cpp
+  uhash_us.cpp
+  uidna.cpp
+  uinit.cpp
+  uinvchar.cpp
+  uiter.cpp
+  ulist.cpp
+  uloc.cpp
+  uloc_keytype.cpp
+  uloc_tag.cpp
+  umath.cpp
+  umutablecptrie.cpp
+  umutex.cpp
+  unames.cpp
+  unifiedcache.cpp
+  unifilt.cpp
+  unifunct.cpp
+  uniset.cpp
+  uniset_closure.cpp
+  uniset_props.cpp
+  unisetspan.cpp
+  unistr.cpp
+  unistr_case.cpp
+  unistr_case_locale.cpp
+  unistr_cnv.cpp
+  unistr_props.cpp
+  unistr_titlecase_brkiter.cpp
+  unorm.cpp
+  unormcmp.cpp
+  uobject.cpp
+  uprops.cpp
+  ures_cnv.cpp
+  uresbund.cpp
+  uresdata.cpp
+  usc_impl.cpp
+  uscript.cpp
+  uscript_props.cpp
+  uset.cpp
+  uset_props.cpp
+  usetiter.cpp
+  ushape.cpp
+  usprep.cpp
+  ustack.cpp
+  ustr_cnv.cpp
+  ustr_titlecase_brkiter.cpp
+  ustr_wcs.cpp
+  ustrcase.cpp
+  ustrcase_locale.cpp
+  ustrenum.cpp
+  ustrfmt.cpp
+  ustring.cpp
+  ustrtrns.cpp
+  utext.cpp
+  utf_impl.cpp
+  util.cpp
+  util_props.cpp
+  utrace.cpp
+  utrie.cpp
+  utrie2.cpp
+  utrie2_builder.cpp
+  utrie_swap.cpp
+  uts46.cpp
+  utypes.cpp
+  uvector.cpp
+  uvectr32.cpp
+  uvectr64.cpp
+  locmap.cpp
+  putil.cpp
+  umapfile.cpp
+  wintz.cpp)
+set(ICU_LIBRARY_FILES
+    ${ICU_LIBRARY_FILES} $<TARGET_OBJECTS:duckdb_icu_common>
+    PARENT_SCOPE)
--- a/external/duckdb/extension/icu/third_party/icu/common/appendable.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/appendable.cpp
@@ -0,0 +1,74 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 2011-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  appendable.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010dec07
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/appendable.h"
+#include "unicode/utf16.h"
+
+U_NAMESPACE_BEGIN
+
+Appendable::~Appendable() {}
+
+UBool
+Appendable::appendCodePoint(UChar32 c) {
+    if(c<=0xffff) {
+        return appendCodeUnit((UChar)c);
+    } else {
+        return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
+    }
+}
+
+UBool
+Appendable::appendString(const UChar *s, int32_t length) {
+    if(length<0) {
+        UChar c;
+        while((c=*s++)!=0) {
+            if(!appendCodeUnit(c)) {
+                return FALSE;
+            }
+        }
+    } else if(length>0) {
+        const UChar *limit=s+length;
+        do {
+            if(!appendCodeUnit(*s++)) {
+                return FALSE;
+            }
+        } while(s<limit);
+    }
+    return TRUE;
+}
+
+UBool
+Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
+    return TRUE;
+}
+
+UChar *
+Appendable::getAppendBuffer(int32_t minCapacity,
+                            int32_t /*desiredCapacityHint*/,
+                            UChar *scratch, int32_t scratchCapacity,
+                            int32_t *resultCapacity) {
+    if(minCapacity<1 || scratchCapacity<minCapacity) {
+        *resultCapacity=0;
+        return NULL;
+    }
+    *resultCapacity=scratchCapacity;
+    return scratch;
+}
+
+// UnicodeStringAppendable is implemented in unistr.cpp.
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/bmpset.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/bmpset.cpp
@@ -0,0 +1,741 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 2007-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  bmpset.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2007jan29
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "bmpset.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
+        list(parentList), listLength(parentListLength) {
+    uprv_memset(latin1Contains, 0, sizeof(latin1Contains));
+    uprv_memset(table7FF, 0, sizeof(table7FF));
+    uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
+
+    /*
+     * Set the list indexes for binary searches for
+     * U+0800, U+1000, U+2000, .., U+F000, U+10000.
+     * U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
+     * looked up in the bit tables.
+     * The last pair of indexes is for finding supplementary code points.
+     */
+    list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
+    int32_t i;
+    for(i=1; i<=0x10; ++i) {
+        list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
+    }
+    list4kStarts[0x11]=listLength-1;
+    containsFFFD=containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10]);
+
+    initBits();
+    overrideIllegal();
+}
+
+BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
+        containsFFFD(otherBMPSet.containsFFFD),
+        list(newParentList), listLength(newParentListLength) {
+    uprv_memcpy(latin1Contains, otherBMPSet.latin1Contains, sizeof(latin1Contains));
+    uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
+    uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
+    uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
+}
+
+BMPSet::~BMPSet() {
+}
+
+/*
+ * Set bits in a bit rectangle in "vertical" bit organization.
+ * start<limit<=0x800
+ */
+static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
+    U_ASSERT(start<limit);
+    U_ASSERT(limit<=0x800);
+
+    int32_t lead=start>>6;  // Named for UTF-8 2-byte lead byte with upper 5 bits.
+    int32_t trail=start&0x3f;  // Named for UTF-8 2-byte trail byte with lower 6 bits.
+
+    // Set one bit indicating an all-one block.
+    uint32_t bits=(uint32_t)1<<lead;
+    if((start+1)==limit) {  // Single-character shortcut.
+        table[trail]|=bits;
+        return;
+    }
+
+    int32_t limitLead=limit>>6;
+    int32_t limitTrail=limit&0x3f;
+
+    if(lead==limitLead) {
+        // Partial vertical bit column.
+        while(trail<limitTrail) {
+            table[trail++]|=bits;
+        }
+    } else {
+        // Partial vertical bit column,
+        // followed by a bit rectangle,
+        // followed by another partial vertical bit column.
+        if(trail>0) {
+            do {
+                table[trail++]|=bits;
+            } while(trail<64);
+            ++lead;
+        }
+        if(lead<limitLead) {
+            bits=~(((unsigned)1<<lead)-1);
+            if(limitLead<0x20) {
+                bits&=((unsigned)1<<limitLead)-1;
+            }
+            for(trail=0; trail<64; ++trail) {
+                table[trail]|=bits;
+            }
+        }
+        // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
+        // In that case, bits=1<<limitLead is undefined but the bits value
+        // is not used because trail<limitTrail is already false.
+        bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
+        for(trail=0; trail<limitTrail; ++trail) {
+            table[trail]|=bits;
+        }
+    }
+}
+
+void BMPSet::initBits() {
+    UChar32 start, limit;
+    int32_t listIndex=0;
+
+    // Set latin1Contains[].
+    do {
+        start=list[listIndex++];
+        if(listIndex<listLength) {
+            limit=list[listIndex++];
+        } else {
+            limit=0x110000;
+        }
+        if(start>=0x100) {
+            break;
+        }
+        do {
+            latin1Contains[start++]=1;
+        } while(start<limit && start<0x100);
+    } while(limit<=0x100);
+
+    // Find the first range overlapping with (or after) 80..FF again,
+    // to include them in table7FF as well.
+    for(listIndex=0;;) {
+        start=list[listIndex++];
+        if(listIndex<listLength) {
+            limit=list[listIndex++];
+        } else {
+            limit=0x110000;
+        }
+        if(limit>0x80) {
+            if(start<0x80) {
+                start=0x80;
+            }
+            break;
+        }
+    }
+
+    // Set table7FF[].
+    while(start<0x800) {
+        set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
+        if(limit>0x800) {
+            start=0x800;
+            break;
+        }
+
+        start=list[listIndex++];
+        if(listIndex<listLength) {
+            limit=list[listIndex++];
+        } else {
+            limit=0x110000;
+        }
+    }
+
+    // Set bmpBlockBits[].
+    int32_t minStart=0x800;
+    while(start<0x10000) {
+        if(limit>0x10000) {
+            limit=0x10000;
+        }
+
+        if(start<minStart) {
+            start=minStart;
+        }
+        if(start<limit) {  // Else: Another range entirely in a known mixed-value block.
+            if(start&0x3f) {
+                // Mixed-value block of 64 code points.
+                start>>=6;
+                bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
+                start=(start+1)<<6;  // Round up to the next block boundary.
+                minStart=start;      // Ignore further ranges in this block.
+            }
+            if(start<limit) {
+                if(start<(limit&~0x3f)) {
+                    // Multiple all-ones blocks of 64 code points each.
+                    set32x64Bits(bmpBlockBits, start>>6, limit>>6);
+                }
+
+                if(limit&0x3f) {
+                    // Mixed-value block of 64 code points.
+                    limit>>=6;
+                    bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
+                    limit=(limit+1)<<6;  // Round up to the next block boundary.
+                    minStart=limit;      // Ignore further ranges in this block.
+                }
+            }
+        }
+
+        if(limit==0x10000) {
+            break;
+        }
+
+        start=list[listIndex++];
+        if(listIndex<listLength) {
+            limit=list[listIndex++];
+        } else {
+            limit=0x110000;
+        }
+    }
+}
+
+/*
+ * Override some bits and bytes to the result of contains(FFFD)
+ * for faster validity checking at runtime.
+ * No need to set 0 values where they were reset to 0 in the constructor
+ * and not modified by initBits().
+ * (table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
+ * Need to set 0 values for surrogates D800..DFFF.
+ */
+void BMPSet::overrideIllegal() {
+    uint32_t bits, mask;
+    int32_t i;
+
+    if(containsFFFD) {
+        bits=3;                 // Lead bytes 0xC0 and 0xC1.
+        for(i=0; i<64; ++i) {
+            table7FF[i]|=bits;
+        }
+
+        bits=1;                 // Lead byte 0xE0.
+        for(i=0; i<32; ++i) {   // First half of 4k block.
+            bmpBlockBits[i]|=bits;
+        }
+
+        mask= static_cast<uint32_t>(~(0x10001<<0xd));   // Lead byte 0xED.
+        bits=1<<0xd;
+        for(i=32; i<64; ++i) {  // Second half of 4k block.
+            bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
+        }
+    } else {
+        mask= static_cast<uint32_t>(~(0x10001<<0xd));   // Lead byte 0xED.
+        for(i=32; i<64; ++i) {  // Second half of 4k block.
+            bmpBlockBits[i]&=mask;
+        }
+    }
+}
+
+int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
+    /* Examples:
+                                       findCodePoint(c)
+       set              list[]         c=0 1 3 4 7 8
+       ===              ==============   ===========
+       []               [110000]         0 0 0 0 0 0
+       [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
+       [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
+       [:Any:]          [0, 110000]      1 1 1 1 1 1
+     */
+
+    // Return the smallest i such that c < list[i].  Assume
+    // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
+    if (c < list[lo])
+        return lo;
+    // High runner test.  c is often after the last range, so an
+    // initial check for this condition pays off.
+    if (lo >= hi || c >= list[hi-1])
+        return hi;
+    // invariant: c >= list[lo]
+    // invariant: c < list[hi]
+    for (;;) {
+        int32_t i = (lo + hi) >> 1;
+        if (i == lo) {
+            break; // Found!
+        } else if (c < list[i]) {
+            hi = i;
+        } else {
+            lo = i;
+        }
+    }
+    return hi;
+}
+
+UBool
+BMPSet::contains(UChar32 c) const {
+    if((uint32_t)c<=0xff) {
+        return (UBool)latin1Contains[c];
+    } else if((uint32_t)c<=0x7ff) {
+        return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
+    } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
+        int lead=c>>12;
+        uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+        if(twoBits<=1) {
+            // All 64 code points with the same bits 15..6
+            // are either in the set or not.
+            return (UBool)twoBits;
+        } else {
+            // Look up the code point in its 4k block of code points.
+            return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
+        }
+    } else if((uint32_t)c<=0x10ffff) {
+        // surrogate or supplementary code point
+        return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
+    } else {
+        // Out-of-range code points get FALSE, consistent with long-standing
+        // behavior of UnicodeSet::contains(c).
+        return FALSE;
+    }
+}
+
+/*
+ * Check for sufficient length for trail unit for each surrogate pair.
+ * Handle single surrogates as surrogate code points as usual in ICU.
+ */
+const UChar *
+BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
+    UChar c, c2;
+
+    if(spanCondition) {
+        // span
+        do {
+            c=*s;
+            if(c<=0xff) {
+                if(!latin1Contains[c]) {
+                    break;
+                }
+            } else if(c<=0x7ff) {
+                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
+                    break;
+                }
+            } else if(c<0xd800 || c>=0xe000) {
+                int lead=c>>12;
+                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+                if(twoBits<=1) {
+                    // All 64 code points with the same bits 15..6
+                    // are either in the set or not.
+                    if(twoBits==0) {
+                        break;
+                    }
+                } else {
+                    // Look up the code point in its 4k block of code points.
+                    if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+                        break;
+                    }
+                }
+            } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
+                // surrogate code point
+                if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+                    break;
+                }
+            } else {
+                // surrogate pair
+                if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
+                    break;
+                }
+                ++s;
+            }
+        } while(++s<limit);
+    } else {
+        // span not
+        do {
+            c=*s;
+            if(c<=0xff) {
+                if(latin1Contains[c]) {
+                    break;
+                }
+            } else if(c<=0x7ff) {
+                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
+                    break;
+                }
+            } else if(c<0xd800 || c>=0xe000) {
+                int lead=c>>12;
+                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+                if(twoBits<=1) {
+                    // All 64 code points with the same bits 15..6
+                    // are either in the set or not.
+                    if(twoBits!=0) {
+                        break;
+                    }
+                } else {
+                    // Look up the code point in its 4k block of code points.
+                    if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+                        break;
+                    }
+                }
+            } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
+                // surrogate code point
+                if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+                    break;
+                }
+            } else {
+                // surrogate pair
+                if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
+                    break;
+                }
+                ++s;
+            }
+        } while(++s<limit);
+    }
+    return s;
+}
+
+/* Symmetrical with span(). */
+const UChar *
+BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
+    UChar c, c2;
+
+    if(spanCondition) {
+        // span
+        for(;;) {
+            c=*(--limit);
+            if(c<=0xff) {
+                if(!latin1Contains[c]) {
+                    break;
+                }
+            } else if(c<=0x7ff) {
+                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
+                    break;
+                }
+            } else if(c<0xd800 || c>=0xe000) {
+                int lead=c>>12;
+                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+                if(twoBits<=1) {
+                    // All 64 code points with the same bits 15..6
+                    // are either in the set or not.
+                    if(twoBits==0) {
+                        break;
+                    }
+                } else {
+                    // Look up the code point in its 4k block of code points.
+                    if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+                        break;
+                    }
+                }
+            } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
+                // surrogate code point
+                if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+                    break;
+                }
+            } else {
+                // surrogate pair
+                if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
+                    break;
+                }
+                --limit;
+            }
+            if(s==limit) {
+                return s;
+            }
+        }
+    } else {
+        // span not
+        for(;;) {
+            c=*(--limit);
+            if(c<=0xff) {
+                if(latin1Contains[c]) {
+                    break;
+                }
+            } else if(c<=0x7ff) {
+                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
+                    break;
+                }
+            } else if(c<0xd800 || c>=0xe000) {
+                int lead=c>>12;
+                uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+                if(twoBits<=1) {
+                    // All 64 code points with the same bits 15..6
+                    // are either in the set or not.
+                    if(twoBits!=0) {
+                        break;
+                    }
+                } else {
+                    // Look up the code point in its 4k block of code points.
+                    if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
+                        break;
+                    }
+                }
+            } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
+                // surrogate code point
+                if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
+                    break;
+                }
+            } else {
+                // surrogate pair
+                if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
+                    break;
+                }
+                --limit;
+            }
+            if(s==limit) {
+                return s;
+            }
+        }
+    }
+    return limit+1;
+}
+
+/*
+ * Precheck for sufficient trail bytes at end of string only once per span.
+ * Check validity.
+ */
+const uint8_t *
+BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
+    const uint8_t *limit=s+length;
+    uint8_t b=*s;
+    if(U8_IS_SINGLE(b)) {
+        // Initial all-ASCII span.
+        if(spanCondition) {
+            do {
+                if(!latin1Contains[b] || ++s==limit) {
+                    return s;
+                }
+                b=*s;
+            } while(U8_IS_SINGLE(b));
+        } else {
+            do {
+                if(latin1Contains[b] || ++s==limit) {
+                    return s;
+                }
+                b=*s;
+            } while(U8_IS_SINGLE(b));
+        }
+        length=(int32_t)(limit-s);
+    }
+
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+    }
+
+    const uint8_t *limit0=limit;
+
+    /*
+     * Make sure that the last 1/2/3/4-byte sequence before limit is complete
+     * or runs into a lead byte.
+     * In the span loop compare s with limit only once
+     * per multi-byte character.
+     *
+     * Give a trailing illegal sequence the same value as the result of contains(FFFD),
+     * including it if that is part of the span, otherwise set limit0 to before
+     * the truncated sequence.
+     */
+    b=*(limit-1);
+    if((int8_t)b<0) {
+        // b>=0x80: lead or trail byte
+        if(b<0xc0) {
+            // single trail byte, check for preceding 3- or 4-byte lead byte
+            if(length>=2 && (b=*(limit-2))>=0xe0) {
+                limit-=2;
+                if(containsFFFD!=spanCondition) {
+                    limit0=limit;
+                }
+            } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
+                // 4-byte lead byte with only two trail bytes
+                limit-=3;
+                if(containsFFFD!=spanCondition) {
+                    limit0=limit;
+                }
+            }
+        } else {
+            // lead byte with no trail bytes
+            --limit;
+            if(containsFFFD!=spanCondition) {
+                limit0=limit;
+            }
+        }
+    }
+
+    uint8_t t1, t2, t3;
+
+    while(s<limit) {
+        b=*s;
+        if(U8_IS_SINGLE(b)) {
+            // ASCII
+            if(spanCondition) {
+                do {
+                    if(!latin1Contains[b]) {
+                        return s;
+                    } else if(++s==limit) {
+                        return limit0;
+                    }
+                    b=*s;
+                } while(U8_IS_SINGLE(b));
+            } else {
+                do {
+                    if(latin1Contains[b]) {
+                        return s;
+                    } else if(++s==limit) {
+                        return limit0;
+                    }
+                    b=*s;
+                } while(U8_IS_SINGLE(b));
+            }
+        }
+        ++s;  // Advance past the lead byte.
+        if(b>=0xe0) {
+            if(b<0xf0) {
+                if( /* handle U+0000..U+FFFF inline */
+                    (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
+                    (t2=(uint8_t)(s[1]-0x80)) <= 0x3f
+                ) {
+                    b&=0xf;
+                    uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
+                    if(twoBits<=1) {
+                        // All 64 code points with this lead byte and middle trail byte
+                        // are either in the set or not.
+                        if(twoBits!=(uint32_t)spanCondition) {
+                            return s-1;
+                        }
+                    } else {
+                        // Look up the code point in its 4k block of code points.
+                        UChar32 c=(b<<12)|(t1<<6)|t2;
+                        if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
+                            return s-1;
+                        }
+                    }
+                    s+=2;
+                    continue;
+                }
+            } else if( /* handle U+10000..U+10FFFF inline */
+                (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
+                (t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
+                (t3=(uint8_t)(s[2]-0x80)) <= 0x3f
+            ) {
+                // Give an illegal sequence the same value as the result of contains(FFFD).
+                UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
+                if( (   (0x10000<=c && c<=0x10ffff) ?
+                            containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
+                            containsFFFD
+                    ) != spanCondition
+                ) {
+                    return s-1;
+                }
+                s+=3;
+                continue;
+            }
+        } else {
+            if( /* handle U+0000..U+07FF inline */
+                b>=0xc0 &&
+                (t1=(uint8_t)(*s-0x80)) <= 0x3f
+            ) {
+                if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
+                    return s-1;
+                }
+                ++s;
+                continue;
+            }
+        }
+
+        // Give an illegal sequence the same value as the result of contains(FFFD).
+        // Handle each byte of an illegal sequence separately to simplify the code;
+        // no need to optimize error handling.
+        if(containsFFFD!=spanCondition) {
+            return s-1;
+        }
+    }
+
+    return limit0;
+}
+
+/*
+ * While going backwards through UTF-8 optimize only for ASCII.
+ * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
+ * possible to tell from the last byte in a multi-byte sequence how many
+ * preceding bytes there should be. Therefore, going backwards through UTF-8
+ * is much harder than going forward.
+ */
+int32_t
+BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
+    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
+    }
+
+    uint8_t b;
+
+    do {
+        b=s[--length];
+        if(U8_IS_SINGLE(b)) {
+            // ASCII sub-span
+            if(spanCondition) {
+                do {
+                    if(!latin1Contains[b]) {
+                        return length+1;
+                    } else if(length==0) {
+                        return 0;
+                    }
+                    b=s[--length];
+                } while(U8_IS_SINGLE(b));
+            } else {
+                do {
+                    if(latin1Contains[b]) {
+                        return length+1;
+                    } else if(length==0) {
+                        return 0;
+                    }
+                    b=s[--length];
+                } while(U8_IS_SINGLE(b));
+            }
+        }
+
+        int32_t prev=length;
+        UChar32 c;
+        // trail byte: collect a multi-byte character
+        // (or  lead byte in last-trail position)
+        c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
+        // c is a valid code point, not ASCII, not a surrogate
+        if(c<=0x7ff) {
+            if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
+                return prev+1;
+            }
+        } else if(c<=0xffff) {
+            int lead=c>>12;
+            uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
+            if(twoBits<=1) {
+                // All 64 code points with the same bits 15..6
+                // are either in the set or not.
+                if(twoBits!=(uint32_t)spanCondition) {
+                    return prev+1;
+                }
+            } else {
+                // Look up the code point in its 4k block of code points.
+                if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
+                    return prev+1;
+                }
+            }
+        } else {
+            if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
+                return prev+1;
+            }
+        }
+    } while(length>0);
+    return 0;
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/bmpset.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/bmpset.h
@@ -0,0 +1,164 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  bmpset.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2007jan29
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __BMPSET_H__
+#define __BMPSET_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Helper class for frozen UnicodeSets, implements contains() and span()
+ * optimized for BMP code points. Structured to be UTF-8-friendly.
+ *
+ * Latin-1: Look up bytes.
+ * 2-byte characters: Bits organized vertically.
+ * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
+ *                    with mixed for illegal ranges.
+ * Supplementary characters: Binary search over
+ * the supplementary part of the parent set's inversion list.
+ */
+class BMPSet : public UMemory {
+public:
+    BMPSet(const int32_t *parentList, int32_t parentListLength);
+    BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
+    virtual ~BMPSet();
+
+    virtual UBool contains(UChar32 c) const;
+
+    /*
+     * Span the initial substring for which each character c has spanCondition==contains(c).
+     * It must be s<limit and spanCondition==0 or 1.
+     * @return The string pointer which limits the span.
+     */
+    const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
+
+    /*
+     * Span the trailing substring for which each character c has spanCondition==contains(c).
+     * It must be s<limit and spanCondition==0 or 1.
+     * @return The string pointer which starts the span.
+     */
+    const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
+
+    /*
+     * Span the initial substring for which each character c has spanCondition==contains(c).
+     * It must be length>0 and spanCondition==0 or 1.
+     * @return The string pointer which limits the span.
+     */
+    const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    /*
+     * Span the trailing substring for which each character c has spanCondition==contains(c).
+     * It must be length>0 and spanCondition==0 or 1.
+     * @return The start of the span.
+     */
+    int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+private:
+    void initBits();
+    void overrideIllegal();
+
+    /**
+     * Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
+     * binary search is restricted for finding code points in a certain range.
+     *
+     * For restricting the search for finding in the range start..end,
+     * pass in
+     *   lo=findCodePoint(start) and
+     *   hi=findCodePoint(end)
+     * with 0<=lo<=hi<len.
+     * findCodePoint(c) defaults to lo=0 and hi=len-1.
+     *
+     * @param c a character in a subrange of MIN_VALUE..MAX_VALUE
+     * @param lo The lowest index to be returned.
+     * @param hi The highest index to be returned.
+     * @return the smallest integer i in the range lo..hi,
+     *         inclusive, such that c < list[i]
+     */
+    int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;
+
+    inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
+
+    /*
+     * One byte 0 or 1 per Latin-1 character.
+     */
+    UBool latin1Contains[0x100];
+
+    /* TRUE if contains(U+FFFD). */
+    UBool containsFFFD;
+
+    /*
+     * One bit per code point from U+0000..U+07FF.
+     * The bits are organized vertically; consecutive code points
+     * correspond to the same bit positions in consecutive table words.
+     * With code point parts
+     *   lead=c{10..6}
+     *   trail=c{5..0}
+     * it is set.contains(c)==(table7FF[trail] bit lead)
+     *
+     * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
+     * for faster validity checking at runtime.
+     */
+    uint32_t table7FF[64];
+
+    /*
+     * One bit per 64 BMP code points.
+     * The bits are organized vertically; consecutive 64-code point blocks
+     * correspond to the same bit position in consecutive table words.
+     * With code point parts
+     *   lead=c{15..12}
+     *   t1=c{11..6}
+     * test bits (lead+16) and lead in bmpBlockBits[t1].
+     * If the upper bit is 0, then the lower bit indicates if contains(c)
+     * for all code points in the 64-block.
+     * If the upper bit is 1, then the block is mixed and set.contains(c)
+     * must be called.
+     *
+     * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
+     * the result of contains(FFFD) for faster validity checking at runtime.
+     */
+    uint32_t bmpBlockBits[64];
+
+    /*
+     * Inversion list indexes for restricted binary searches in
+     * findCodePoint(), from
+     * findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
+     * U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
+     * always looked up in the bit tables.
+     * The last pair of indexes is for finding supplementary code points.
+     */
+    int32_t list4kStarts[18];
+
+    /*
+     * The inversion list of the parent set, for the slower contains() implementation
+     * for mixed BMP blocks and for supplementary code points.
+     * The list is terminated with list[listLength-1]=0x110000.
+     */
+    const int32_t *list;
+    int32_t listLength;
+};
+
+inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
+    return (UBool)(findCodePoint(c, lo, hi) & 1);
+}
+
+U_NAMESPACE_END
+
+#endif
--- a/external/duckdb/extension/icu/third_party/icu/common/brkeng.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/brkeng.cpp
@@ -0,0 +1,284 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ ************************************************************************************
+ * Copyright (C) 2006-2016, International Business Machines Corporation
+ * and others. All Rights Reserved.
+ ************************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+// #if !UCONFIG_NO_BREAK_ITERATION
+
+// #include "unicode/uchar.h"
+// #include "unicode/uniset.h"
+// #include "unicode/chariter.h"
+// #include "unicode/ures.h"
+// #include "unicode/udata.h"
+// #include "unicode/putil.h"
+// #include "unicode/ustring.h"
+// #include "unicode/uscript.h"
+// #include "unicode/ucharstrie.h"
+// #include "unicode/bytestrie.h"
+
+// #include "brkeng.h"
+// #include "cmemory.h"
+// #include "dictbe.h"
+// #include "charstr.h"
+// #include "dictionarydata.h"
+// #include "mutex.h"
+// #include "uvector.h"
+// #include "umutex.h"
+// #include "uresimp.h"
+// #include "ubrkimpl.h"
+
+// U_NAMESPACE_BEGIN
+
+// /*
+//  ******************************************************************
+//  */
+
+// LanguageBreakEngine::LanguageBreakEngine() {
+// }
+
+// LanguageBreakEngine::~LanguageBreakEngine() {
+// }
+
+// /*
+//  ******************************************************************
+//  */
+
+// LanguageBreakFactory::LanguageBreakFactory() {
+// }
+
+// LanguageBreakFactory::~LanguageBreakFactory() {
+// }
+
+// /*
+//  ******************************************************************
+//  */
+
+// UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) {
+//     (void)status;
+// }
+
+// UnhandledEngine::~UnhandledEngine() {
+//     delete fHandled;
+//     fHandled = nullptr;
+// }
+
+// UBool
+// UnhandledEngine::handles(UChar32 c) const {
+//     return fHandled && fHandled->contains(c);
+// }
+
+// int32_t
+// UnhandledEngine::findBreaks( UText *text,
+//                              int32_t /* startPos */,
+//                              int32_t endPos,
+//                              UVector32 &/*foundBreaks*/ ) const {
+//     UChar32 c = utext_current32(text);
+//     while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
+//         utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
+//         c = utext_current32(text);
+//     }
+//     return 0;
+// }
+
+// void
+// UnhandledEngine::handleCharacter(UChar32 c) {
+//     if (fHandled == nullptr) {
+//         fHandled = new UnicodeSet();
+//         if (fHandled == nullptr) {
+//             return;
+//         }
+//     }
+//     if (!fHandled->contains(c)) {
+//         UErrorCode status = U_ZERO_ERROR;
+//         // Apply the entire script of the character.
+//         int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
+//         fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
+//     }
+// }
+
+// /*
+//  ******************************************************************
+//  */
+
+// ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
+//     fEngines = 0;
+// }
+
+// ICULanguageBreakFactory::~ICULanguageBreakFactory() {
+//     if (fEngines != 0) {
+//         delete fEngines;
+//     }
+// }
+
+// U_NAMESPACE_END
+// U_CDECL_BEGIN
+// static void U_CALLCONV _deleteEngine(void *obj) {
+//     delete (const icu::LanguageBreakEngine *) obj;
+// }
+// U_CDECL_END
+// U_NAMESPACE_BEGIN
+
+// const LanguageBreakEngine *
+// ICULanguageBreakFactory::getEngineFor(UChar32 c) {
+//     const LanguageBreakEngine *lbe = NULL;
+//     UErrorCode  status = U_ZERO_ERROR;
+
+//     static UMutex gBreakEngineMutex;
+//     Mutex m(&gBreakEngineMutex);
+
+//     if (fEngines == NULL) {
+//         UStack  *engines = new UStack(_deleteEngine, NULL, status);
+//         if (U_FAILURE(status) || engines == NULL) {
+//             // Note: no way to return error code to caller.
+//             delete engines;
+//             return NULL;
+//         }
+//         fEngines = engines;
+//     } else {
+//         int32_t i = fEngines->size();
+//         while (--i >= 0) {
+//             lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
+//             if (lbe != NULL && lbe->handles(c)) {
+//                 return lbe;
+//             }
+//         }
+//     }
+
+//     // We didn't find an engine. Create one.
+//     lbe = loadEngineFor(c);
+//     if (lbe != NULL) {
+//         fEngines->push((void *)lbe, status);
+//     }
+//     return lbe;
+// }
+
+// const LanguageBreakEngine *
+// ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
+//     UErrorCode status = U_ZERO_ERROR;
+//     UScriptCode code = uscript_getScript(c, &status);
+//     if (U_SUCCESS(status)) {
+//         DictionaryMatcher *m = loadDictionaryMatcherFor(code);
+//         if (m != NULL) {
+//             const LanguageBreakEngine *engine = NULL;
+//             switch(code) {
+//             case USCRIPT_THAI:
+//                 engine = new ThaiBreakEngine(m, status);
+//                 break;
+//             case USCRIPT_LAO:
+//                 engine = new LaoBreakEngine(m, status);
+//                 break;
+//             case USCRIPT_MYANMAR:
+//                 engine = new BurmeseBreakEngine(m, status);
+//                 break;
+//             case USCRIPT_KHMER:
+//                 engine = new KhmerBreakEngine(m, status);
+//                 break;
+
+// #if !UCONFIG_NO_NORMALIZATION
+//                 // CJK not available w/o normalization
+//             case USCRIPT_HANGUL:
+//                 engine = new CjkBreakEngine(m, kKorean, status);
+//                 break;
+
+//             // use same BreakEngine and dictionary for both Chinese and Japanese
+//             case USCRIPT_HIRAGANA:
+//             case USCRIPT_KATAKANA:
+//             case USCRIPT_HAN:
+//                 engine = new CjkBreakEngine(m, kChineseJapanese, status);
+//                 break;
+// #if 0
+//             // TODO: Have to get some characters with script=common handled
+//             // by CjkBreakEngine (e.g. U+309B). Simply subjecting
+//             // them to CjkBreakEngine does not work. The engine has to
+//             // special-case them.
+//             case USCRIPT_COMMON:
+//             {
+//                 UBlockCode block = ublock_getCode(code);
+//                 if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
+//                    engine = new CjkBreakEngine(dict, kChineseJapanese, status);
+//                 break;
+//             }
+// #endif
+// #endif
+
+//             default:
+//                 break;
+//             }
+//             if (engine == NULL) {
+//                 delete m;
+//             }
+//             else if (U_FAILURE(status)) {
+//                 delete engine;
+//                 engine = NULL;
+//             }
+//             return engine;
+//         }
+//     }
+//     return NULL;
+// }
+
+// DictionaryMatcher *
+// ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
+//     UErrorCode status = U_ZERO_ERROR;
+//     // open root from brkitr tree.
+//     UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
+//     b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
+//     int32_t dictnlength = 0;
+//     const UChar *dictfname =
+//         ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
+//     if (U_FAILURE(status)) {
+//         ures_close(b);
+//         return NULL;
+//     }
+//     CharString dictnbuf;
+//     CharString ext;
+//     const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength);  // last dot
+//     if (extStart != NULL) {
+//         int32_t len = (int32_t)(extStart - dictfname);
+//         ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
+//         dictnlength = len;
+//     }
+//     dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
+//     ures_close(b);
+
+//     UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
+//     if (U_SUCCESS(status)) {
+//         // build trie
+//         const uint8_t *data = (const uint8_t *)udata_getMemory(file);
+//         const int32_t *indexes = (const int32_t *)data;
+//         const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
+//         const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
+//         DictionaryMatcher *m = NULL;
+//         if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
+//             const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
+//             const char *characters = (const char *)(data + offset);
+//             m = new BytesDictionaryMatcher(characters, transform, file);
+//         }
+//         else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
+//             const UChar *characters = (const UChar *)(data + offset);
+//             m = new UCharsDictionaryMatcher(characters, file);
+//         }
+//         if (m == NULL) {
+//             // no matcher exists to take ownership - either we are an invalid
+//             // type or memory allocation failed
+//             udata_close(file);
+//         }
+//         return m;
+//     } else if (dictfname != NULL) {
+//         // we don't have a dictionary matcher.
+//         // returning NULL here will cause us to fail to find a dictionary break engine, as expected
+//         status = U_ZERO_ERROR;
+//         return NULL;
+//     }
+//     return NULL;
+// }
+
+// U_NAMESPACE_END
+
+// #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
--- a/external/duckdb/extension/icu/third_party/icu/common/brkeng.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/brkeng.h
@@ -0,0 +1,271 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ ************************************************************************************
+ * Copyright (C) 2006-2012, International Business Machines Corporation and others. *
+ * All Rights Reserved.                                                             *
+ ************************************************************************************
+ */
+
+#ifndef BRKENG_H
+#define BRKENG_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/utext.h"
+#include "unicode/uscript.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeSet;
+class UStack;
+class UVector32;
+class DictionaryMatcher;
+
+/*******************************************************************
+ * LanguageBreakEngine
+ */
+
+/**
+ * <p>LanguageBreakEngines implement language-specific knowledge for
+ * finding text boundaries within a run of characters belonging to a
+ * specific set. The boundaries will be of a specific kind, e.g. word,
+ * line, etc.</p>
+ *
+ * <p>LanguageBreakEngines should normally be implemented so as to
+ * be shared between threads without locking.</p>
+ */
+class LanguageBreakEngine : public UMemory {
+ public:
+
+  /**
+   * <p>Default constructor.</p>
+   *
+   */
+  LanguageBreakEngine();
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~LanguageBreakEngine();
+
+ /**
+  * <p>Indicate whether this engine handles a particular character for
+  * a particular kind of break.</p>
+  *
+  * @param c A character which begins a run that the engine might handle
+  * @return TRUE if this engine handles the particular character and break
+  * type.
+  */
+  virtual UBool handles(UChar32 c) const = 0;
+
+ /**
+  * <p>Find any breaks within a run in the supplied text.</p>
+  *
+  * @param text A UText representing the text. The
+  * iterator is left at the end of the run of characters which the engine
+  * is capable of handling.
+  * @param startPos The start of the run within the supplied text.
+  * @param endPos The end of the run within the supplied text.
+  * @param foundBreaks A Vector of int32_t to receive the breaks.
+  * @return The number of breaks found.
+  */
+  virtual int32_t findBreaks( UText *text,
+                              int32_t startPos,
+                              int32_t endPos,
+                              UVector32 &foundBreaks ) const = 0;
+
+};
+
+/*******************************************************************
+ * LanguageBreakFactory
+ */
+
+/**
+ * <p>LanguageBreakFactorys find and return a LanguageBreakEngine
+ * that can determine breaks for characters in a specific set, if
+ * such an object can be found.</p>
+ *
+ * <p>If a LanguageBreakFactory is to be shared between threads,
+ * appropriate synchronization must be used; there is none internal
+ * to the factory.</p>
+ *
+ * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
+ * normally be shared between threads without synchronization, unless
+ * the specific subclass of LanguageBreakFactory indicates otherwise.</p>
+ *
+ * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
+ * it returns when it itself is deleted, unless the specific subclass of
+ * LanguageBreakFactory indicates otherwise. Naturally, the factory should
+ * not be deleted until the LanguageBreakEngines it has returned are no
+ * longer needed.</p>
+ */
+class LanguageBreakFactory : public UMemory {
+ public:
+
+  /**
+   * <p>Default constructor.</p>
+   *
+   */
+  LanguageBreakFactory();
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~LanguageBreakFactory();
+
+ /**
+  * <p>Find and return a LanguageBreakEngine that can find the desired
+  * kind of break for the set of characters to which the supplied
+  * character belongs. It is up to the set of available engines to
+  * determine what the sets of characters are.</p>
+  *
+  * @param c A character that begins a run for which a LanguageBreakEngine is
+  * sought.
+  * @return A LanguageBreakEngine with the desired characteristics, or 0.
+  */
+  virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
+
+};
+
+/*******************************************************************
+ * UnhandledEngine
+ */
+
+/**
+ * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
+ * handles characters that no other LanguageBreakEngine is available to
+ * handle. It is told the character and the type of break; at its
+ * discretion it may handle more than the specified character (e.g.,
+ * the entire script to which that character belongs.</p>
+ *
+ * <p>UnhandledEngines may not be shared between threads without
+ * external synchronization.</p>
+ */
+
+class UnhandledEngine : public LanguageBreakEngine {
+ private:
+
+    /**
+     * The sets of characters handled.
+     * @internal
+     */
+
+  UnicodeSet    *fHandled;
+
+ public:
+
+  /**
+   * <p>Default constructor.</p>
+   *
+   */
+  UnhandledEngine(UErrorCode &status);
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~UnhandledEngine();
+
+ /**
+  * <p>Indicate whether this engine handles a particular character for
+  * a particular kind of break.</p>
+  *
+  * @param c A character which begins a run that the engine might handle
+  * @return TRUE if this engine handles the particular character and break
+  * type.
+  */
+  virtual UBool handles(UChar32 c) const;
+
+ /**
+  * <p>Find any breaks within a run in the supplied text.</p>
+  *
+  * @param text A UText representing the text (TODO: UText). The
+  * iterator is left at the end of the run of characters which the engine
+  * is capable of handling.
+  * @param startPos The start of the run within the supplied text.
+  * @param endPos The end of the run within the supplied text.
+  * @param foundBreaks An allocated C array of the breaks found, if any
+  * @return The number of breaks found.
+  */
+  virtual int32_t findBreaks( UText *text,
+                              int32_t startPos,
+                              int32_t endPos,
+                              UVector32 &foundBreaks ) const;
+
+ /**
+  * <p>Tell the engine to handle a particular character and break type.</p>
+  *
+  * @param c A character which the engine should handle
+  */
+  virtual void handleCharacter(UChar32 c);
+
+};
+
+/*******************************************************************
+ * ICULanguageBreakFactory
+ */
+
+/**
+ * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
+ * ICU. It creates dictionary-based LanguageBreakEngines from dictionary
+ * data in the ICU data file.</p>
+ */
+class ICULanguageBreakFactory : public LanguageBreakFactory {
+ private:
+
+    /**
+     * The stack of break engines created by this factory
+     * @internal
+     */
+
+  UStack    *fEngines;
+
+ public:
+
+  /**
+   * <p>Standard constructor.</p>
+   *
+   */
+  ICULanguageBreakFactory(UErrorCode &status);
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~ICULanguageBreakFactory();
+
+ /**
+  * <p>Find and return a LanguageBreakEngine that can find the desired
+  * kind of break for the set of characters to which the supplied
+  * character belongs. It is up to the set of available engines to
+  * determine what the sets of characters are.</p>
+  *
+  * @param c A character that begins a run for which a LanguageBreakEngine is
+  * sought.
+  * @return A LanguageBreakEngine with the desired characteristics, or 0.
+  */
+  virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
+
+protected:
+ /**
+  * <p>Create a LanguageBreakEngine for the set of characters to which
+  * the supplied character belongs, for the specified break type.</p>
+  *
+  * @param c A character that begins a run for which a LanguageBreakEngine is
+  * sought.
+  * @return A LanguageBreakEngine with the desired characteristics, or 0.
+  */
+  virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
+
+  /**
+   * <p>Create a DictionaryMatcher for the specified script and break type.</p>
+   * @param script An ISO 15924 script code that identifies the dictionary to be
+   * created.
+   * @return A DictionaryMatcher with the desired characteristics, or NULL.
+   */
+  virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
+};
+
+U_NAMESPACE_END
+
+    /* BRKENG_H */
+#endif
--- a/external/duckdb/extension/icu/third_party/icu/common/brkiter.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/brkiter.cpp
@@ -0,0 +1,508 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 1997-2015, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File brkiter.cpp
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/18/97    aliu        Converted from OpenClass.  Added DONE.
+*   01/13/2000  helena      Added UErrorCode parameter to createXXXInstance methods.
+*****************************************************************************************
+*/
+
+// *****************************************************************************
+// This file was generated from the java source file BreakIterator.java
+// *****************************************************************************
+
+#include "unicode/utypes.h"
+
+// #if !UCONFIG_NO_BREAK_ITERATION
+
+// #include "unicode/rbbi.h"
+// #include "unicode/brkiter.h"
+// #include "unicode/udata.h"
+// #include "unicode/ures.h"
+// #include "unicode/ustring.h"
+// #include "unicode/filteredbrk.h"
+// #include "ucln_cmn.h"
+// #include "cstring.h"
+// #include "umutex.h"
+// #include "servloc.h"
+// #include "locbased.h"
+// #include "uresimp.h"
+// #include "uassert.h"
+// #include "ubrkimpl.h"
+// #include "charstr.h"
+
+// // *****************************************************************************
+// // class BreakIterator
+// // This class implements methods for finding the location of boundaries in text.
+// // Instances of BreakIterator maintain a current position and scan over text
+// // returning the index of characters where boundaries occur.
+// // *****************************************************************************
+
+// U_NAMESPACE_BEGIN
+
+// // -------------------------------------
+
+// BreakIterator*
+// BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
+// {
+//     char fnbuff[256];
+//     char ext[4]={'\0'};
+//     CharString actualLocale;
+//     int32_t size;
+//     const UChar* brkfname = NULL;
+//     UResourceBundle brkRulesStack;
+//     UResourceBundle brkNameStack;
+//     UResourceBundle *brkRules = &brkRulesStack;
+//     UResourceBundle *brkName  = &brkNameStack;
+//     RuleBasedBreakIterator *result = NULL;
+
+//     if (U_FAILURE(status))
+//         return NULL;
+
+//     ures_initStackObject(brkRules);
+//     ures_initStackObject(brkName);
+
+//     // Get the locale
+//     UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status);
+
+//     // Get the "boundaries" array.
+//     if (U_SUCCESS(status)) {
+//         brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
+//         // Get the string object naming the rules file
+//         brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
+//         // Get the actual string
+//         brkfname = ures_getString(brkName, &size, &status);
+//         U_ASSERT((size_t)size<sizeof(fnbuff));
+//         if ((size_t)size>=sizeof(fnbuff)) {
+//             size=0;
+//             if (U_SUCCESS(status)) {
+//                 status = U_BUFFER_OVERFLOW_ERROR;
+//             }
+//         }
+
+//         // Use the string if we found it
+//         if (U_SUCCESS(status) && brkfname) {
+//             actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
+
+//             UChar* extStart=u_strchr(brkfname, 0x002e);
+//             int len = 0;
+//             if(extStart!=NULL){
+//                 len = (int)(extStart-brkfname);
+//                 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
+//                 u_UCharsToChars(brkfname, fnbuff, len);
+//             }
+//             fnbuff[len]=0; // nul terminate
+//         }
+//     }
+
+//     ures_close(brkRules);
+//     ures_close(brkName);
+
+//     UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
+//     if (U_FAILURE(status)) {
+//         ures_close(b);
+//         return NULL;
+//     }
+
+//     // Create a RuleBasedBreakIterator
+//     result = new RuleBasedBreakIterator(file, status);
+
+//     // If there is a result, set the valid locale and actual locale, and the kind
+//     if (U_SUCCESS(status) && result != NULL) {
+//         U_LOCALE_BASED(locBased, *(BreakIterator*)result);
+//         locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
+//                               actualLocale.data());
+//     }
+
+//     ures_close(b);
+
+//     if (U_FAILURE(status) && result != NULL) {  // Sometimes redundant check, but simple
+//         delete result;
+//         return NULL;
+//     }
+
+//     if (result == NULL) {
+//         udata_close(file);
+//         if (U_SUCCESS(status)) {
+//             status = U_MEMORY_ALLOCATION_ERROR;
+//         }
+//     }
+
+//     return result;
+// }
+
+// // Creates a break iterator for word breaks.
+// BreakIterator* U_EXPORT2
+// BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
+// {
+//     return createInstance(key, UBRK_WORD, status);
+// }
+
+// // -------------------------------------
+
+// // Creates a break iterator  for line breaks.
+// BreakIterator* U_EXPORT2
+// BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
+// {
+//     return createInstance(key, UBRK_LINE, status);
+// }
+
+// // -------------------------------------
+
+// // Creates a break iterator  for character breaks.
+// BreakIterator* U_EXPORT2
+// BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
+// {
+//     return createInstance(key, UBRK_CHARACTER, status);
+// }
+
+// // -------------------------------------
+
+// // Creates a break iterator  for sentence breaks.
+// BreakIterator* U_EXPORT2
+// BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
+// {
+//     return createInstance(key, UBRK_SENTENCE, status);
+// }
+
+// // -------------------------------------
+
+// // Creates a break iterator for title casing breaks.
+// BreakIterator* U_EXPORT2
+// BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
+// {
+//     return createInstance(key, UBRK_TITLE, status);
+// }
+
+// // -------------------------------------
+
+// // Gets all the available locales that has localized text boundary data.
+// const Locale* U_EXPORT2
+// BreakIterator::getAvailableLocales(int32_t& count)
+// {
+//     return Locale::getAvailableLocales(count);
+// }
+
+// // ------------------------------------------
+// //
+// // Constructors, destructor and assignment operator
+// //
+// //-------------------------------------------
+
+// BreakIterator::BreakIterator()
+// {
+//     *validLocale = *actualLocale = 0;
+// }
+
+// BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
+//     uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
+//     uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
+// }
+
+// BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
+//     if (this != &other) {
+//         uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
+//         uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
+//     }
+//     return *this;
+// }
+
+// BreakIterator::~BreakIterator()
+// {
+// }
+
+// // ------------------------------------------
+// //
+// // Registration
+// //
+// //-------------------------------------------
+// #if !UCONFIG_NO_SERVICE
+
+// // -------------------------------------
+
+// class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
+// public:
+//     virtual ~ICUBreakIteratorFactory();
+// protected:
+//     virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
+//         return BreakIterator::makeInstance(loc, kind, status);
+//     }
+// };
+
+// ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
+
+// // -------------------------------------
+
+// class ICUBreakIteratorService : public ICULocaleService {
+// public:
+//     ICUBreakIteratorService()
+//         : ICULocaleService(UNICODE_STRING("Break Iterator", 14))
+//     {
+//         UErrorCode status = U_ZERO_ERROR;
+//         registerFactory(new ICUBreakIteratorFactory(), status);
+//     }
+
+//     virtual ~ICUBreakIteratorService();
+
+//     virtual UObject* cloneInstance(UObject* instance) const {
+//         return ((BreakIterator*)instance)->clone();
+//     }
+
+//     virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
+//         LocaleKey& lkey = (LocaleKey&)key;
+//         int32_t kind = lkey.kind();
+//         Locale loc;
+//         lkey.currentLocale(loc);
+//         return BreakIterator::makeInstance(loc, kind, status);
+//     }
+
+//     virtual UBool isDefault() const {
+//         return countFactories() == 1;
+//     }
+// };
+
+// ICUBreakIteratorService::~ICUBreakIteratorService() {}
+
+// // -------------------------------------
+
+// // defined in ucln_cmn.h
+// U_NAMESPACE_END
+
+// static icu::UInitOnce gInitOnceBrkiter = U_INITONCE_INITIALIZER;
+// static icu::ICULocaleService* gService = NULL;
+
+
+
+// /**
+//  * Release all static memory held by breakiterator.
+//  */
+// U_CDECL_BEGIN
+// static UBool U_CALLCONV breakiterator_cleanup(void) {
+// #if !UCONFIG_NO_SERVICE
+//     if (gService) {
+//         delete gService;
+//         gService = NULL;
+//     }
+//     gInitOnceBrkiter.reset();
+// #endif
+//     return TRUE;
+// }
+// U_CDECL_END
+// U_NAMESPACE_BEGIN
+
+// static void U_CALLCONV
+// initService(void) {
+//     gService = new ICUBreakIteratorService();
+//     ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
+// }
+
+// static ICULocaleService*
+// getService(void)
+// {
+//     umtx_initOnce(gInitOnceBrkiter, &initService);
+//     return gService;
+// }
+
+
+// // -------------------------------------
+
+// static inline UBool
+// hasService(void)
+// {
+//     return !gInitOnceBrkiter.isReset() && getService() != NULL;
+// }
+
+// // -------------------------------------
+
+// URegistryKey U_EXPORT2
+// BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
+// {
+//     ICULocaleService *service = getService();
+//     if (service == NULL) {
+//         status = U_MEMORY_ALLOCATION_ERROR;
+//         return NULL;
+//     }
+//     return service->registerInstance(toAdopt, locale, kind, status);
+// }
+
+// // -------------------------------------
+
+// UBool U_EXPORT2
+// BreakIterator::unregister(URegistryKey key, UErrorCode& status)
+// {
+//     if (U_SUCCESS(status)) {
+//         if (hasService()) {
+//             return gService->unregister(key, status);
+//         }
+//         status = U_MEMORY_ALLOCATION_ERROR;
+//     }
+//     return FALSE;
+// }
+
+// // -------------------------------------
+
+// StringEnumeration* U_EXPORT2
+// BreakIterator::getAvailableLocales(void)
+// {
+//     ICULocaleService *service = getService();
+//     if (service == NULL) {
+//         return NULL;
+//     }
+//     return service->getAvailableLocales();
+// }
+// #endif /* UCONFIG_NO_SERVICE */
+
+// // -------------------------------------
+
+// BreakIterator*
+// BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
+// {
+//     if (U_FAILURE(status)) {
+//         return NULL;
+//     }
+
+// #if !UCONFIG_NO_SERVICE
+//     if (hasService()) {
+//         Locale actualLoc("");
+//         BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
+//         // TODO: The way the service code works in ICU 2.8 is that if
+//         // there is a real registered break iterator, the actualLoc
+//         // will be populated, but if the handleDefault path is taken
+//         // (because nothing is registered that can handle the
+//         // requested locale) then the actualLoc comes back empty.  In
+//         // that case, the returned object already has its actual/valid
+//         // locale data populated (by makeInstance, which is what
+//         // handleDefault calls), so we don't touch it.  YES, A COMMENT
+//         // THIS LONG is a sign of bad code -- so the action item is to
+//         // revisit this in ICU 3.0 and clean it up/fix it/remove it.
+//         if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
+//             U_LOCALE_BASED(locBased, *result);
+//             locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
+//         }
+//         return result;
+//     }
+//     else
+// #endif
+//     {
+//         return makeInstance(loc, kind, status);
+//     }
+// }
+
+// // -------------------------------------
+// enum { kKeyValueLenMax = 32 };
+
+// BreakIterator*
+// BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
+// {
+
+//     if (U_FAILURE(status)) {
+//         return NULL;
+//     }
+//     char lbType[kKeyValueLenMax];
+
+//     BreakIterator *result = NULL;
+//     switch (kind) {
+//     case UBRK_CHARACTER:
+//         result = BreakIterator::buildInstance(loc, "grapheme", status);
+//         break;
+//     case UBRK_WORD:
+//         result = BreakIterator::buildInstance(loc, "word", status);
+//         break;
+//     case UBRK_LINE:
+//         uprv_strcpy(lbType, "line");
+//         {
+//             char lbKeyValue[kKeyValueLenMax] = {0};
+//             UErrorCode kvStatus = U_ZERO_ERROR;
+//             int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
+//             if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
+//                 uprv_strcat(lbType, "_");
+//                 uprv_strcat(lbType, lbKeyValue);
+//             }
+//         }
+//         result = BreakIterator::buildInstance(loc, lbType, status);
+//         break;
+//     case UBRK_SENTENCE:
+//         result = BreakIterator::buildInstance(loc, "sentence", status);
+// #if !UCONFIG_NO_FILTERED_BREAK_ITERATION
+//         {
+//             char ssKeyValue[kKeyValueLenMax] = {0};
+//             UErrorCode kvStatus = U_ZERO_ERROR;
+//             int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
+//             if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) {
+//                 FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus);
+//                 if (U_SUCCESS(kvStatus)) {
+//                     result = fbiBuilder->build(result, status);
+//                     delete fbiBuilder;
+//                 }
+//             }
+//         }
+// #endif
+//         break;
+//     case UBRK_TITLE:
+//         result = BreakIterator::buildInstance(loc, "title", status);
+//         break;
+//     default:
+//         status = U_ILLEGAL_ARGUMENT_ERROR;
+//     }
+
+//     if (U_FAILURE(status)) {
+//         return NULL;
+//     }
+
+//     return result;
+// }
+
+// Locale
+// BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
+//     U_LOCALE_BASED(locBased, *this);
+//     return locBased.getLocale(type, status);
+// }
+
+// const char *
+// BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
+//     U_LOCALE_BASED(locBased, *this);
+//     return locBased.getLocaleID(type, status);
+// }
+
+
+// // This implementation of getRuleStatus is a do-nothing stub, here to
+// // provide a default implementation for any derived BreakIterator classes that
+// // do not implement it themselves.
+// int32_t BreakIterator::getRuleStatus() const {
+//     return 0;
+// }
+
+// // This implementation of getRuleStatusVec is a do-nothing stub, here to
+// // provide a default implementation for any derived BreakIterator classes that
+// // do not implement it themselves.
+// int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
+//     if (U_FAILURE(status)) {
+//         return 0;
+//     }
+//     if (capacity < 1) {
+//         status = U_BUFFER_OVERFLOW_ERROR;
+//         return 1;
+//     }
+//     *fillInVec = 0;
+//     return 1;
+// }
+
+// BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
+//   U_LOCALE_BASED(locBased, (*this));
+//   locBased.setLocaleIDs(valid, actual);
+// }
+
+// U_NAMESPACE_END
+
+// #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+//eof
--- a/external/duckdb/extension/icu/third_party/icu/common/bytesinkutil.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/bytesinkutil.cpp
@@ -0,0 +1,157 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// bytesinkutil.cpp
+// created: 2017sep14 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "unicode/edits.h"
+#include "unicode/stringoptions.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+UBool
+ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
+                           ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    char scratch[200];
+    int32_t s8Length = 0;
+    for (int32_t i = 0; i < s16Length;) {
+        int32_t capacity;
+        int32_t desiredCapacity = s16Length - i;
+        if (desiredCapacity < (INT32_MAX / 3)) {
+            desiredCapacity *= 3;  // max 3 UTF-8 bytes per UTF-16 code unit
+        } else if (desiredCapacity < (INT32_MAX / 2)) {
+            desiredCapacity *= 2;
+        } else {
+            desiredCapacity = INT32_MAX;
+        }
+        char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
+                                            scratch, UPRV_LENGTHOF(scratch), &capacity);
+        capacity -= U8_MAX_LENGTH - 1;
+        int32_t j = 0;
+        for (; i < s16Length && j < capacity;) {
+            UChar32 c;
+            U16_NEXT_UNSAFE(s16, i, c);
+            U8_APPEND_UNSAFE(buffer, j, c);
+        }
+        if (j > (INT32_MAX - s8Length)) {
+            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            return FALSE;
+        }
+        sink.Append(buffer, j);
+        s8Length += j;
+    }
+    if (edits != nullptr) {
+        edits->addReplace(length, s8Length);
+    }
+    return TRUE;
+}
+
+UBool
+ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
+                           const char16_t *s16, int32_t s16Length,
+                           ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    if ((limit - s) > INT32_MAX) {
+        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+        return FALSE;
+    }
+    return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
+}
+
+void
+ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
+    char s8[U8_MAX_LENGTH];
+    int32_t s8Length = 0;
+    U8_APPEND_UNSAFE(s8, s8Length, c);
+    if (edits != nullptr) {
+        edits->addReplace(length, s8Length);
+    }
+    sink.Append(s8, s8Length);
+}
+
+// See unicode/utf8.h U8_APPEND_UNSAFE().
+inline uint8_t bytesinkutil_getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
+inline uint8_t bytesinkutil_getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
+
+void
+ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
+    U_ASSERT(0x80 <= c && c <= 0x7ff);  // 2-byte UTF-8
+    char s8[2] = { (char)bytesinkutil_getTwoByteLead(c), (char)bytesinkutil_getTwoByteTrail(c) };
+    sink.Append(s8, 2);
+}
+
+void
+ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
+                                      ByteSink &sink, uint32_t options, Edits *edits) {
+    U_ASSERT(length > 0);
+    if (edits != nullptr) {
+        edits->addUnchanged(length);
+    }
+    if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
+        sink.Append(reinterpret_cast<const char *>(s), length);
+    }
+}
+
+UBool
+ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
+                              ByteSink &sink, uint32_t options, Edits *edits,
+                              UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    if ((limit - s) > INT32_MAX) {
+        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+        return FALSE;
+    }
+    int32_t length = (int32_t)(limit - s);
+    if (length > 0) {
+        appendNonEmptyUnchanged(s, length, sink, options, edits);
+    }
+    return TRUE;
+}
+
+CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
+}
+
+CharStringByteSink::~CharStringByteSink() = default;
+
+void
+CharStringByteSink::Append(const char* bytes, int32_t n) {
+    UErrorCode status = U_ZERO_ERROR;
+    dest_.append(bytes, n, status);
+    // Any errors are silently ignored.
+}
+
+char*
+CharStringByteSink::GetAppendBuffer(int32_t min_capacity,
+                                    int32_t desired_capacity_hint,
+                                    char* scratch,
+                                    int32_t scratch_capacity,
+                                    int32_t* result_capacity) {
+    if (min_capacity < 1 || scratch_capacity < min_capacity) {
+        *result_capacity = 0;
+        return nullptr;
+    }
+
+    UErrorCode status = U_ZERO_ERROR;
+    char* result = dest_.getAppendBuffer(
+            min_capacity,
+            desired_capacity_hint,
+            *result_capacity,
+            status);
+    if (U_SUCCESS(status)) {
+        return result;
+    }
+
+    *result_capacity = scratch_capacity;
+    return scratch;
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/bytesinkutil.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/bytesinkutil.h
@@ -0,0 +1,85 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// bytesinkutil.h
+// created: 2017sep14 Markus W. Scherer
+
+#pragma once
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "unicode/edits.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+class ByteSink;
+class CharString;
+class Edits;
+
+class U_COMMON_API ByteSinkUtil {
+public:
+    ByteSinkUtil() = delete;  // all static
+
+    /** (length) bytes were mapped to valid (s16, s16Length). */
+    static UBool appendChange(int32_t length,
+                              const char16_t *s16, int32_t s16Length,
+                              ByteSink &sink, Edits *edits, UErrorCode &errorCode);
+
+    /** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
+    static UBool appendChange(const uint8_t *s, const uint8_t *limit,
+                              const char16_t *s16, int32_t s16Length,
+                              ByteSink &sink, Edits *edits, UErrorCode &errorCode);
+
+    /** (length) bytes were mapped/changed to valid code point c. */
+    static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);
+
+    /** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
+    static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
+                                       ByteSink &sink, Edits *edits = nullptr) {
+        appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
+    }
+
+    /** Append the two-byte character (U+0080..U+07FF). */
+    static void appendTwoBytes(UChar32 c, ByteSink &sink);
+
+    static UBool appendUnchanged(const uint8_t *s, int32_t length,
+                                 ByteSink &sink, uint32_t options, Edits *edits,
+                                 UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) { return FALSE; }
+        if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
+        return TRUE;
+    }
+
+    static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
+                                 ByteSink &sink, uint32_t options, Edits *edits,
+                                 UErrorCode &errorCode);
+
+private:
+    static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
+                                        ByteSink &sink, uint32_t options, Edits *edits);
+};
+
+class U_COMMON_API CharStringByteSink : public ByteSink {
+public:
+    CharStringByteSink(CharString* dest);
+    ~CharStringByteSink() override;
+
+    CharStringByteSink() = delete;
+    CharStringByteSink(const CharStringByteSink&) = delete;
+    CharStringByteSink& operator=(const CharStringByteSink&) = delete;
+
+    void Append(const char* bytes, int32_t n) override;
+
+    char* GetAppendBuffer(int32_t min_capacity,
+                          int32_t desired_capacity_hint,
+                          char* scratch,
+                          int32_t scratch_capacity,
+                          int32_t* result_capacity) override;
+
+private:
+    CharString& dest_;
+};
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/bytestream.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/bytestream.cpp
@@ -0,0 +1,85 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+// Copyright (C) 2009-2011, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2007 Google Inc. All Rights Reserved.
+// Author: sanjay@google.com (Sanjay Ghemawat)
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+ByteSink::~ByteSink() {}
+
+char* ByteSink::GetAppendBuffer(int32_t min_capacity,
+                                int32_t /*desired_capacity_hint*/,
+                                char* scratch, int32_t scratch_capacity,
+                                int32_t* result_capacity) {
+  if (min_capacity < 1 || scratch_capacity < min_capacity) {
+    *result_capacity = 0;
+    return NULL;
+  }
+  *result_capacity = scratch_capacity;
+  return scratch;
+}
+
+void ByteSink::Flush() {}
+
+CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
+    : outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity),
+      size_(0), appended_(0), overflowed_(FALSE) {
+}
+
+CheckedArrayByteSink::~CheckedArrayByteSink() {}
+
+CheckedArrayByteSink& CheckedArrayByteSink::Reset() {
+  size_ = appended_ = 0;
+  overflowed_ = FALSE;
+  return *this;
+}
+
+void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
+  if (n <= 0) {
+    return;
+  }
+  if (n > (INT32_MAX - appended_)) {
+    // TODO: Report as integer overflow, not merely buffer overflow.
+    appended_ = INT32_MAX;
+    overflowed_ = TRUE;
+    return;
+  }
+  appended_ += n;
+  int32_t available = capacity_ - size_;
+  if (n > available) {
+    n = available;
+    overflowed_ = TRUE;
+  }
+  if (n > 0 && bytes != (outbuf_ + size_)) {
+    uprv_memcpy(outbuf_ + size_, bytes, n);
+  }
+  size_ += n;
+}
+
+char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity,
+                                            int32_t /*desired_capacity_hint*/,
+                                            char* scratch,
+                                            int32_t scratch_capacity,
+                                            int32_t* result_capacity) {
+  if (min_capacity < 1 || scratch_capacity < min_capacity) {
+    *result_capacity = 0;
+    return NULL;
+  }
+  int32_t available = capacity_ - size_;
+  if (available >= min_capacity) {
+    *result_capacity = available;
+    return outbuf_ + size_;
+  } else {
+    *result_capacity = scratch_capacity;
+    return scratch;
+  }
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/bytestrie.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/bytestrie.cpp
@@ -0,0 +1,441 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 2010-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  bytestrie.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010sep25
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "unicode/bytestrie.h"
+#include "unicode/uobject.h"
+#include "cmemory.h"
+#include "uassert.h"
+
+U_NAMESPACE_BEGIN
+
+BytesTrie::~BytesTrie() {
+    uprv_free(ownedArray_);
+}
+
+// lead byte already shifted right by 1.
+int32_t
+BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
+    int32_t value;
+    if(leadByte<kMinTwoByteValueLead) {
+        value=leadByte-kMinOneByteValueLead;
+    } else if(leadByte<kMinThreeByteValueLead) {
+        value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
+    } else if(leadByte<kFourByteValueLead) {
+        value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
+    } else if(leadByte==kFourByteValueLead) {
+        value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
+    } else {
+        value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
+    }
+    return value;
+}
+
+const uint8_t *
+BytesTrie::jumpByDelta(const uint8_t *pos) {
+    int32_t delta=*pos++;
+    if(delta<kMinTwoByteDeltaLead) {
+        // nothing to do
+    } else if(delta<kMinThreeByteDeltaLead) {
+        delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
+    } else if(delta<kFourByteDeltaLead) {
+        delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
+        pos+=2;
+    } else if(delta==kFourByteDeltaLead) {
+        delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
+        pos+=3;
+    } else {
+        delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
+        pos+=4;
+    }
+    return pos+delta;
+}
+
+UStringTrieResult
+BytesTrie::current() const {
+    const uint8_t *pos=pos_;
+    if(pos==NULL) {
+        return USTRINGTRIE_NO_MATCH;
+    } else {
+        int32_t node;
+        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
+                valueResult(node) : USTRINGTRIE_NO_VALUE;
+    }
+}
+
+UStringTrieResult
+BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
+    // Branch according to the current byte.
+    if(length==0) {
+        length=*pos++;
+    }
+    ++length;
+    // The length of the branch is the number of bytes to select from.
+    // The data structure encodes a binary search.
+    while(length>kMaxBranchLinearSubNodeLength) {
+        if(inByte<*pos++) {
+            length>>=1;
+            pos=jumpByDelta(pos);
+        } else {
+            length=length-(length>>1);
+            pos=skipDelta(pos);
+        }
+    }
+    // Drop down to linear search for the last few bytes.
+    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
+    // and divides length by 2.
+    do {
+        if(inByte==*pos++) {
+            UStringTrieResult result;
+            int32_t node=*pos;
+            U_ASSERT(node>=kMinValueLead);
+            if(node&kValueIsFinal) {
+                // Leave the final value for getValue() to read.
+                result=USTRINGTRIE_FINAL_VALUE;
+            } else {
+                // Use the non-final value as the jump delta.
+                ++pos;
+                // int32_t delta=readValue(pos, node>>1);
+                node>>=1;
+                int32_t delta;
+                if(node<kMinTwoByteValueLead) {
+                    delta=node-kMinOneByteValueLead;
+                } else if(node<kMinThreeByteValueLead) {
+                    delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
+                } else if(node<kFourByteValueLead) {
+                    delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
+                    pos+=2;
+                } else if(node==kFourByteValueLead) {
+                    delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
+                    pos+=3;
+                } else {
+                    delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
+                    pos+=4;
+                }
+                // end readValue()
+                pos+=delta;
+                node=*pos;
+                result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
+            }
+            pos_=pos;
+            return result;
+        }
+        --length;
+        pos=skipValue(pos);
+    } while(length>1);
+    if(inByte==*pos++) {
+        pos_=pos;
+        int32_t node=*pos;
+        return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
+    } else {
+        stop();
+        return USTRINGTRIE_NO_MATCH;
+    }
+}
+
+UStringTrieResult
+BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
+    for(;;) {
+        int32_t node=*pos++;
+        if(node<kMinLinearMatch) {
+            return branchNext(pos, node, inByte);
+        } else if(node<kMinValueLead) {
+            // Match the first of length+1 bytes.
+            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
+            if(inByte==*pos++) {
+                remainingMatchLength_=--length;
+                pos_=pos;
+                return (length<0 && (node=*pos)>=kMinValueLead) ?
+                        valueResult(node) : USTRINGTRIE_NO_VALUE;
+            } else {
+                // No match.
+                break;
+            }
+        } else if(node&kValueIsFinal) {
+            // No further matching bytes.
+            break;
+        } else {
+            // Skip intermediate value.
+            pos=skipValue(pos, node);
+            // The next node must not also be a value node.
+            U_ASSERT(*pos<kMinValueLead);
+        }
+    }
+    stop();
+    return USTRINGTRIE_NO_MATCH;
+}
+
+UStringTrieResult
+BytesTrie::next(int32_t inByte) {
+    const uint8_t *pos=pos_;
+    if(pos==NULL) {
+        return USTRINGTRIE_NO_MATCH;
+    }
+    if(inByte<0) {
+        inByte+=0x100;
+    }
+    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
+    if(length>=0) {
+        // Remaining part of a linear-match node.
+        if(inByte==*pos++) {
+            remainingMatchLength_=--length;
+            pos_=pos;
+            int32_t node;
+            return (length<0 && (node=*pos)>=kMinValueLead) ?
+                    valueResult(node) : USTRINGTRIE_NO_VALUE;
+        } else {
+            stop();
+            return USTRINGTRIE_NO_MATCH;
+        }
+    }
+    return nextImpl(pos, inByte);
+}
+
+UStringTrieResult
+BytesTrie::next(const char *s, int32_t sLength) {
+    if(sLength<0 ? *s==0 : sLength==0) {
+        // Empty input.
+        return current();
+    }
+    const uint8_t *pos=pos_;
+    if(pos==NULL) {
+        return USTRINGTRIE_NO_MATCH;
+    }
+    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
+    for(;;) {
+        // Fetch the next input byte, if there is one.
+        // Continue a linear-match node without rechecking sLength<0.
+        int32_t inByte;
+        if(sLength<0) {
+            for(;;) {
+                if((inByte=*s++)==0) {
+                    remainingMatchLength_=length;
+                    pos_=pos;
+                    int32_t node;
+                    return (length<0 && (node=*pos)>=kMinValueLead) ?
+                            valueResult(node) : USTRINGTRIE_NO_VALUE;
+                }
+                if(length<0) {
+                    remainingMatchLength_=length;
+                    break;
+                }
+                if(inByte!=*pos) {
+                    stop();
+                    return USTRINGTRIE_NO_MATCH;
+                }
+                ++pos;
+                --length;
+            }
+        } else {
+            for(;;) {
+                if(sLength==0) {
+                    remainingMatchLength_=length;
+                    pos_=pos;
+                    int32_t node;
+                    return (length<0 && (node=*pos)>=kMinValueLead) ?
+                            valueResult(node) : USTRINGTRIE_NO_VALUE;
+                }
+                inByte=*s++;
+                --sLength;
+                if(length<0) {
+                    remainingMatchLength_=length;
+                    break;
+                }
+                if(inByte!=*pos) {
+                    stop();
+                    return USTRINGTRIE_NO_MATCH;
+                }
+                ++pos;
+                --length;
+            }
+        }
+        for(;;) {
+            int32_t node=*pos++;
+            if(node<kMinLinearMatch) {
+                UStringTrieResult result=branchNext(pos, node, inByte);
+                if(result==USTRINGTRIE_NO_MATCH) {
+                    return USTRINGTRIE_NO_MATCH;
+                }
+                // Fetch the next input byte, if there is one.
+                if(sLength<0) {
+                    if((inByte=*s++)==0) {
+                        return result;
+                    }
+                } else {
+                    if(sLength==0) {
+                        return result;
+                    }
+                    inByte=*s++;
+                    --sLength;
+                }
+                if(result==USTRINGTRIE_FINAL_VALUE) {
+                    // No further matching bytes.
+                    stop();
+                    return USTRINGTRIE_NO_MATCH;
+                }
+                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
+            } else if(node<kMinValueLead) {
+                // Match length+1 bytes.
+                length=node-kMinLinearMatch;  // Actual match length minus 1.
+                if(inByte!=*pos) {
+                    stop();
+                    return USTRINGTRIE_NO_MATCH;
+                }
+                ++pos;
+                --length;
+                break;
+            } else if(node&kValueIsFinal) {
+                // No further matching bytes.
+                stop();
+                return USTRINGTRIE_NO_MATCH;
+            } else {
+                // Skip intermediate value.
+                pos=skipValue(pos, node);
+                // The next node must not also be a value node.
+                U_ASSERT(*pos<kMinValueLead);
+            }
+        }
+    }
+}
+
+const uint8_t *
+BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
+                                     UBool haveUniqueValue, int32_t &uniqueValue) {
+    while(length>kMaxBranchLinearSubNodeLength) {
+        ++pos;  // ignore the comparison byte
+        if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
+            return NULL;
+        }
+        length=length-(length>>1);
+        pos=skipDelta(pos);
+    }
+    do {
+        ++pos;  // ignore a comparison byte
+        // handle its value
+        int32_t node=*pos++;
+        UBool isFinal=(UBool)(node&kValueIsFinal);
+        int32_t value=readValue(pos, node>>1);
+        pos=skipValue(pos, node);
+        if(isFinal) {
+            if(haveUniqueValue) {
+                if(value!=uniqueValue) {
+                    return NULL;
+                }
+            } else {
+                uniqueValue=value;
+                haveUniqueValue=TRUE;
+            }
+        } else {
+            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
+                return NULL;
+            }
+            haveUniqueValue=TRUE;
+        }
+    } while(--length>1);
+    return pos+1;  // ignore the last comparison byte
+}
+
+UBool
+BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
+    for(;;) {
+        int32_t node=*pos++;
+        if(node<kMinLinearMatch) {
+            if(node==0) {
+                node=*pos++;
+            }
+            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
+            if(pos==NULL) {
+                return FALSE;
+            }
+            haveUniqueValue=TRUE;
+        } else if(node<kMinValueLead) {
+            // linear-match node
+            pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
+        } else {
+            UBool isFinal=(UBool)(node&kValueIsFinal);
+            int32_t value=readValue(pos, node>>1);
+            if(haveUniqueValue) {
+                if(value!=uniqueValue) {
+                    return FALSE;
+                }
+            } else {
+                uniqueValue=value;
+                haveUniqueValue=TRUE;
+            }
+            if(isFinal) {
+                return TRUE;
+            }
+            pos=skipValue(pos, node);
+        }
+    }
+}
+
+int32_t
+BytesTrie::getNextBytes(ByteSink &out) const {
+    const uint8_t *pos=pos_;
+    if(pos==NULL) {
+        return 0;
+    }
+    if(remainingMatchLength_>=0) {
+        append(out, *pos);  // Next byte of a pending linear-match node.
+        return 1;
+    }
+    int32_t node=*pos++;
+    if(node>=kMinValueLead) {
+        if(node&kValueIsFinal) {
+            return 0;
+        } else {
+            pos=skipValue(pos, node);
+            node=*pos++;
+            U_ASSERT(node<kMinValueLead);
+        }
+    }
+    if(node<kMinLinearMatch) {
+        if(node==0) {
+            node=*pos++;
+        }
+        getNextBranchBytes(pos, ++node, out);
+        return node;
+    } else {
+        // First byte of the linear-match node.
+        append(out, *pos);
+        return 1;
+    }
+}
+
+void
+BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
+    while(length>kMaxBranchLinearSubNodeLength) {
+        ++pos;  // ignore the comparison byte
+        getNextBranchBytes(jumpByDelta(pos), length>>1, out);
+        length=length-(length>>1);
+        pos=skipDelta(pos);
+    }
+    do {
+        append(out, *pos++);
+        pos=skipValue(pos);
+    } while(--length>1);
+    append(out, *pos);
+}
+
+void
+BytesTrie::append(ByteSink &out, int c) {
+    char ch=(char)c;
+    out.Append(&ch, 1);
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp
@@ -0,0 +1,504 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 2010-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  bytestriebuilder.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010sep25
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/bytestriebuilder.h"
+#include "unicode/stringpiece.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "uhash.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Note: This builder implementation stores (bytes, value) pairs with full copies
+ * of the byte sequences, until the BytesTrie is built.
+ * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
+ */
+
+class BytesTrieElement : public UMemory {
+public:
+    // Use compiler's default constructor, initializes nothing.
+
+    void setTo(StringPiece s, int32_t val, CharString &strings, UErrorCode &errorCode);
+
+    StringPiece getString(const CharString &strings) const {
+        int32_t offset=stringOffset;
+        int32_t length;
+        if(offset>=0) {
+            length=(uint8_t)strings[offset++];
+        } else {
+            offset=~offset;
+            length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
+            offset+=2;
+        }
+        return StringPiece(strings.data()+offset, length);
+    }
+    int32_t getStringLength(const CharString &strings) const {
+        int32_t offset=stringOffset;
+        if(offset>=0) {
+            return (uint8_t)strings[offset];
+        } else {
+            offset=~offset;
+            return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
+        }
+    }
+
+    char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; }
+
+    int32_t getValue() const { return value; }
+
+    int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;
+
+private:
+    const char *data(const CharString &strings) const {
+        int32_t offset=stringOffset;
+        if(offset>=0) {
+            ++offset;
+        } else {
+            offset=~offset+2;
+        }
+        return strings.data()+offset;
+    }
+
+    // If the stringOffset is non-negative, then the first strings byte contains
+    // the string length.
+    // If the stringOffset is negative, then the first two strings bytes contain
+    // the string length (big-endian), and the offset needs to be bit-inverted.
+    // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.)
+    int32_t stringOffset;
+    int32_t value;
+};
+
+void
+BytesTrieElement::setTo(StringPiece s, int32_t val,
+                        CharString &strings, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    int32_t length=s.length();
+    if(length>0xffff) {
+        // Too long: We store the length in 1 or 2 bytes.
+        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+        return;
+    }
+    int32_t offset=strings.length();
+    if(length>0xff) {
+        offset=~offset;
+        strings.append((char)(length>>8), errorCode);
+    }
+    strings.append((char)length, errorCode);
+    stringOffset=offset;
+    value=val;
+    strings.append(s, errorCode);
+}
+
+int32_t
+BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
+    // TODO: add StringPiece::compare(), see ticket #8187
+    StringPiece thisString=getString(strings);
+    StringPiece otherString=other.getString(strings);
+    int32_t lengthDiff=thisString.length()-otherString.length();
+    int32_t commonLength;
+    if(lengthDiff<=0) {
+        commonLength=thisString.length();
+    } else {
+        commonLength=otherString.length();
+    }
+    int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength);
+    return diff!=0 ? diff : lengthDiff;
+}
+
+BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode)
+        : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0),
+          bytes(NULL), bytesCapacity(0), bytesLength(0) {
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    strings=new CharString();
+    if(strings==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+    }
+}
+
+BytesTrieBuilder::~BytesTrieBuilder() {
+    delete strings;
+    delete[] elements;
+    uprv_free(bytes);
+}
+
+BytesTrieBuilder &
+BytesTrieBuilder::add(StringPiece s, int32_t value, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return *this;
+    }
+    if(bytesLength>0) {
+        // Cannot add elements after building.
+        errorCode=U_NO_WRITE_PERMISSION;
+        return *this;
+    }
+    if(elementsLength==elementsCapacity) {
+        int32_t newCapacity;
+        if(elementsCapacity==0) {
+            newCapacity=1024;
+        } else {
+            newCapacity=4*elementsCapacity;
+        }
+        BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
+        if(newElements==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return *this; // error instead of dereferencing null
+        }
+        if(elementsLength>0) {
+            uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(BytesTrieElement));
+        }
+        delete[] elements;
+        elements=newElements;
+        elementsCapacity=newCapacity;
+    }
+    elements[elementsLength++].setTo(s, value, *strings, errorCode);
+    return *this;
+}
+
+U_CDECL_BEGIN
+
+static int32_t U_CALLCONV
+bytestriebuilder_compareElementStrings(const void *context, const void *left, const void *right) {
+    const CharString *strings=static_cast<const CharString *>(context);
+    const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left);
+    const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right);
+    return leftElement->compareStringTo(*rightElement, *strings);
+}
+
+U_CDECL_END
+
+BytesTrie *
+BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
+    buildBytes(buildOption, errorCode);
+    BytesTrie *newTrie=NULL;
+    if(U_SUCCESS(errorCode)) {
+        newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
+        if(newTrie==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+        } else {
+            bytes=NULL;  // The new trie now owns the array.
+            bytesCapacity=0;
+        }
+    }
+    return newTrie;
+}
+
+StringPiece
+BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
+    buildBytes(buildOption, errorCode);
+    StringPiece result;
+    if(U_SUCCESS(errorCode)) {
+        result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
+    }
+    return result;
+}
+
+void
+BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    if(bytes!=NULL && bytesLength>0) {
+        // Already built.
+        return;
+    }
+    if(bytesLength==0) {
+        if(elementsLength==0) {
+            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return;
+        }
+        uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
+                      bytestriebuilder_compareElementStrings, strings,
+                      FALSE,  // need not be a stable sort
+                      &errorCode);
+        if(U_FAILURE(errorCode)) {
+            return;
+        }
+        // Duplicate strings are not allowed.
+        StringPiece prev=elements[0].getString(*strings);
+        for(int32_t i=1; i<elementsLength; ++i) {
+            StringPiece current=elements[i].getString(*strings);
+            if(prev==current) {
+                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+                return;
+            }
+            prev=current;
+        }
+    }
+    // Create and byte-serialize the trie for the elements.
+    bytesLength=0;
+    int32_t capacity=strings->length();
+    if(capacity<1024) {
+        capacity=1024;
+    }
+    if(bytesCapacity<capacity) {
+        uprv_free(bytes);
+        bytes=static_cast<char *>(uprv_malloc(capacity));
+        if(bytes==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            bytesCapacity=0;
+            return;
+        }
+        bytesCapacity=capacity;
+    }
+    StringTrieBuilder::build(buildOption, elementsLength, errorCode);
+    if(bytes==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+    }
+}
+
+BytesTrieBuilder &
+BytesTrieBuilder::clear() {
+    strings->clear();
+    elementsLength=0;
+    bytesLength=0;
+    return *this;
+}
+
+int32_t
+BytesTrieBuilder::getElementStringLength(int32_t i) const {
+    return elements[i].getStringLength(*strings);
+}
+
+UChar
+BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
+    return (uint8_t)elements[i].charAt(byteIndex, *strings);
+}
+
+int32_t
+BytesTrieBuilder::getElementValue(int32_t i) const {
+    return elements[i].getValue();
+}
+
+int32_t
+BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
+    const BytesTrieElement &firstElement=elements[first];
+    const BytesTrieElement &lastElement=elements[last];
+    int32_t minStringLength=firstElement.getStringLength(*strings);
+    while(++byteIndex<minStringLength &&
+            firstElement.charAt(byteIndex, *strings)==
+            lastElement.charAt(byteIndex, *strings)) {}
+    return byteIndex;
+}
+
+int32_t
+BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
+    int32_t length=0;  // Number of different bytes at byteIndex.
+    int32_t i=start;
+    do {
+        char byte=elements[i++].charAt(byteIndex, *strings);
+        while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) {
+            ++i;
+        }
+        ++length;
+    } while(i<limit);
+    return length;
+}
+
+int32_t
+BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
+    do {
+        char byte=elements[i++].charAt(byteIndex, *strings);
+        while(byte==elements[i].charAt(byteIndex, *strings)) {
+            ++i;
+        }
+    } while(--count>0);
+    return i;
+}
+
+int32_t
+BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
+    char b=(char)byte;
+    while(b==elements[i].charAt(byteIndex, *strings)) {
+        ++i;
+    }
+    return i;
+}
+
+BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
+        : LinearMatchNode(len, nextNode), s(bytes) {
+    hash=static_cast<int32_t>(
+        static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len)));
+}
+
+bool
+BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
+    if(this==&other) {
+        return TRUE;
+    }
+    if(!LinearMatchNode::operator==(other)) {
+        return FALSE;
+    }
+    const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
+    return 0==uprv_memcmp(s, o.s, length);
+}
+
+void
+BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
+    BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
+    next->write(builder);
+    b.write(s, length);
+    offset=b.write(b.getMinLinearMatch()+length-1);
+}
+
+StringTrieBuilder::Node *
+BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
+                                        Node *nextNode) const {
+    return new BTLinearMatchNode(
+            elements[i].getString(*strings).data()+byteIndex,
+            length,
+            nextNode);
+}
+
+UBool
+BytesTrieBuilder::ensureCapacity(int32_t length) {
+    if(bytes==NULL) {
+        return FALSE;  // previous memory allocation had failed
+    }
+    if(length>bytesCapacity) {
+        int32_t newCapacity=bytesCapacity;
+        do {
+            newCapacity*=2;
+        } while(newCapacity<=length);
+        char *newBytes=static_cast<char *>(uprv_malloc(newCapacity));
+        if(newBytes==NULL) {
+            // unable to allocate memory
+            uprv_free(bytes);
+            bytes=NULL;
+            bytesCapacity=0;
+            return FALSE;
+        }
+        uprv_memcpy(newBytes+(newCapacity-bytesLength),
+                    bytes+(bytesCapacity-bytesLength), bytesLength);
+        uprv_free(bytes);
+        bytes=newBytes;
+        bytesCapacity=newCapacity;
+    }
+    return TRUE;
+}
+
+int32_t
+BytesTrieBuilder::write(int32_t byte) {
+    int32_t newLength=bytesLength+1;
+    if(ensureCapacity(newLength)) {
+        bytesLength=newLength;
+        bytes[bytesCapacity-bytesLength]=(char)byte;
+    }
+    return bytesLength;
+}
+
+int32_t
+BytesTrieBuilder::write(const char *b, int32_t length) {
+    int32_t newLength=bytesLength+length;
+    if(ensureCapacity(newLength)) {
+        bytesLength=newLength;
+        uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length);
+    }
+    return bytesLength;
+}
+
+int32_t
+BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
+    return write(elements[i].getString(*strings).data()+byteIndex, length);
+}
+
+int32_t
+BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
+    if(0<=i && i<=BytesTrie::kMaxOneByteValue) {
+        return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal);
+    }
+    char intBytes[5];
+    int32_t length=1;
+    if(i<0 || i>0xffffff) {
+        intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
+        intBytes[1]=(char)((uint32_t)i>>24);
+        intBytes[2]=(char)((uint32_t)i>>16);
+        intBytes[3]=(char)((uint32_t)i>>8);
+        intBytes[4]=(char)i;
+        length=5;
+    // } else if(i<=BytesTrie::kMaxOneByteValue) {
+    //     intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
+    } else {
+        if(i<=BytesTrie::kMaxTwoByteValue) {
+            intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
+        } else {
+            if(i<=BytesTrie::kMaxThreeByteValue) {
+                intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
+            } else {
+                intBytes[0]=(char)BytesTrie::kFourByteValueLead;
+                intBytes[1]=(char)(i>>16);
+                length=2;
+            }
+            intBytes[length++]=(char)(i>>8);
+        }
+        intBytes[length++]=(char)i;
+    }
+    intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
+    return write(intBytes, length);
+}
+
+int32_t
+BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
+    int32_t offset=write(node);
+    if(hasValue) {
+        offset=writeValueAndFinal(value, FALSE);
+    }
+    return offset;
+}
+
+int32_t
+BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
+    int32_t i=bytesLength-jumpTarget;
+    U_ASSERT(i>=0);
+    if(i<=BytesTrie::kMaxOneByteDelta) {
+        return write(i);
+    }
+    char intBytes[5];
+    int32_t length;
+    if(i<=BytesTrie::kMaxTwoByteDelta) {
+        intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
+        length=1;
+    } else {
+        if(i<=BytesTrie::kMaxThreeByteDelta) {
+            intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
+            length=2;
+        } else {
+            if(i<=0xffffff) {
+                intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
+                length=3;
+            } else {
+                intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
+                intBytes[1]=(char)(i>>24);
+                length=4;
+            }
+            intBytes[1]=(char)(i>>16);
+        }
+        intBytes[1]=(char)(i>>8);
+    }
+    intBytes[length++]=(char)i;
+    return write(intBytes, length);
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/bytestrieiterator.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/bytestrieiterator.cpp
@@ -0,0 +1,214 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 2010-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  bytestrieiterator.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010nov03
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/stringpiece.h"
+#include "charstr.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
+                              UErrorCode &errorCode)
+        : bytes_(static_cast<const uint8_t *>(trieBytes)),
+          pos_(bytes_), initialPos_(bytes_),
+          remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
+          str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
+    // a public API header for which we would want it to depend only on
+    // other public headers.
+    // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
+    // via the CharString and UVector32 implementations, so this additional
+    // cost is minimal.
+    str_=new CharString();
+    stack_=new UVector32(errorCode);
+    if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+    }
+}
+
+BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
+                              UErrorCode &errorCode)
+        : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
+          remainingMatchLength_(trie.remainingMatchLength_),
+          initialRemainingMatchLength_(trie.remainingMatchLength_),
+          str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    str_=new CharString();
+    stack_=new UVector32(errorCode);
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    if(str_==NULL || stack_==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
+    if(length>=0) {
+        // Pending linear-match node, append remaining bytes to str_.
+        ++length;
+        if(maxLength_>0 && length>maxLength_) {
+            length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
+        }
+        str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
+        pos_+=length;
+        remainingMatchLength_-=length;
+    }
+}
+
+BytesTrie::Iterator::~Iterator() {
+    delete str_;
+    delete stack_;
+}
+
+BytesTrie::Iterator &
+BytesTrie::Iterator::reset() {
+    pos_=initialPos_;
+    remainingMatchLength_=initialRemainingMatchLength_;
+    int32_t length=remainingMatchLength_+1;  // Remaining match length.
+    if(maxLength_>0 && length>maxLength_) {
+        length=maxLength_;
+    }
+    str_->truncate(length);
+    pos_+=length;
+    remainingMatchLength_-=length;
+    stack_->setSize(0);
+    return *this;
+}
+
+UBool
+BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
+
+UBool
+BytesTrie::Iterator::next(UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return FALSE;
+    }
+    const uint8_t *pos=pos_;
+    if(pos==NULL) {
+        if(stack_->isEmpty()) {
+            return FALSE;
+        }
+        // Pop the state off the stack and continue with the next outbound edge of
+        // the branch node.
+        int32_t stackSize=stack_->size();
+        int32_t length=stack_->elementAti(stackSize-1);
+        pos=bytes_+stack_->elementAti(stackSize-2);
+        stack_->setSize(stackSize-2);
+        str_->truncate(length&0xffff);
+        length=(int32_t)((uint32_t)length>>16);
+        if(length>1) {
+            pos=branchNext(pos, length, errorCode);
+            if(pos==NULL) {
+                return TRUE;  // Reached a final value.
+            }
+        } else {
+            str_->append((char)*pos++, errorCode);
+        }
+    }
+    if(remainingMatchLength_>=0) {
+        // We only get here if we started in a pending linear-match node
+        // with more than maxLength remaining bytes.
+        return truncateAndStop();
+    }
+    for(;;) {
+        int32_t node=*pos++;
+        if(node>=kMinValueLead) {
+            // Deliver value for the byte sequence so far.
+            UBool isFinal=(UBool)(node&kValueIsFinal);
+            value_=readValue(pos, node>>1);
+            if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
+                pos_=NULL;
+            } else {
+                pos_=skipValue(pos, node);
+            }
+            return TRUE;
+        }
+        if(maxLength_>0 && str_->length()==maxLength_) {
+            return truncateAndStop();
+        }
+        if(node<kMinLinearMatch) {
+            if(node==0) {
+                node=*pos++;
+            }
+            pos=branchNext(pos, node+1, errorCode);
+            if(pos==NULL) {
+                return TRUE;  // Reached a final value.
+            }
+        } else {
+            // Linear-match node, append length bytes to str_.
+            int32_t length=node-kMinLinearMatch+1;
+            if(maxLength_>0 && str_->length()+length>maxLength_) {
+                str_->append(reinterpret_cast<const char *>(pos),
+                            maxLength_-str_->length(), errorCode);
+                return truncateAndStop();
+            }
+            str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
+            pos+=length;
+        }
+    }
+}
+
+StringPiece
+BytesTrie::Iterator::getString() const {
+    return str_ == NULL ? StringPiece() : str_->toStringPiece();
+}
+
+UBool
+BytesTrie::Iterator::truncateAndStop() {
+    pos_=NULL;
+    value_=-1;  // no real value for str
+    return TRUE;
+}
+
+// Branch node, needs to take the first outbound edge and push state for the rest.
+const uint8_t *
+BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
+    while(length>kMaxBranchLinearSubNodeLength) {
+        ++pos;  // ignore the comparison byte
+        // Push state for the greater-or-equal edge.
+        stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
+        stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
+        // Follow the less-than edge.
+        length>>=1;
+        pos=jumpByDelta(pos);
+    }
+    // List of key-value pairs where values are either final values or jump deltas.
+    // Read the first (key, value) pair.
+    uint8_t trieByte=*pos++;
+    int32_t node=*pos++;
+    UBool isFinal=(UBool)(node&kValueIsFinal);
+    int32_t value=readValue(pos, node>>1);
+    pos=skipValue(pos, node);
+    stack_->addElement((int32_t)(pos-bytes_), errorCode);
+    stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
+    str_->append((char)trieByte, errorCode);
+    if(isFinal) {
+        pos_=NULL;
+        value_=value;
+        return NULL;
+    } else {
+        return pos+value;
+    }
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/caniter.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/caniter.cpp
@@ -0,0 +1,586 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *****************************************************************************
+ * Copyright (C) 1996-2015, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *****************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/caniter.h"
+#include "unicode/normalizer2.h"
+#include "unicode/uchar.h"
+#include "unicode/uniset.h"
+#include "unicode/usetiter.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "hash.h"
+#include "normalizer2impl.h"
+
+/**
+ * This class allows one to iterate through all the strings that are canonically equivalent to a given
+ * string. For example, here are some sample results:
+Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+1: \u0041\u030A\u0064\u0307\u0327
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+2: \u0041\u030A\u0064\u0327\u0307
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+3: \u0041\u030A\u1E0B\u0327
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+4: \u0041\u030A\u1E11\u0307
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+5: \u00C5\u0064\u0307\u0327
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+6: \u00C5\u0064\u0327\u0307
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+7: \u00C5\u1E0B\u0327
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+8: \u00C5\u1E11\u0307
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+9: \u212B\u0064\u0307\u0327
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+10: \u212B\u0064\u0327\u0307
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+11: \u212B\u1E0B\u0327
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+12: \u212B\u1E11\u0307
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+ *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
+ * since it has not been optimized for that situation.
+ *@author M. Davis
+ *@draft
+ */
+
+// public
+
+U_NAMESPACE_BEGIN
+
+// TODO: add boilerplate methods.
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
+
+/**
+ *@param source string to get results for
+ */
+CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
+    pieces(NULL),
+    pieces_length(0),
+    pieces_lengths(NULL),
+    current(NULL),
+    current_length(0),
+    nfd(*Normalizer2::getNFDInstance(status)),
+    nfcImpl(*Normalizer2Factory::getNFCImpl(status))
+{
+    if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
+      setSource(sourceStr, status);
+    }
+}
+
+CanonicalIterator::~CanonicalIterator() {
+  cleanPieces();
+}
+
+void CanonicalIterator::cleanPieces() {
+    int32_t i = 0;
+    if(pieces != NULL) {
+        for(i = 0; i < pieces_length; i++) {
+            if(pieces[i] != NULL) {
+                delete[] pieces[i];
+            }
+        }
+        uprv_free(pieces);
+        pieces = NULL;
+        pieces_length = 0;
+    }
+    if(pieces_lengths != NULL) {
+        uprv_free(pieces_lengths);
+        pieces_lengths = NULL;
+    }
+    if(current != NULL) {
+        uprv_free(current);
+        current = NULL;
+        current_length = 0;
+    }
+}
+
+/**
+ *@return gets the source: NOTE: it is the NFD form of source
+ */
+UnicodeString CanonicalIterator::getSource() {
+  return source;
+}
+
+/**
+ * Resets the iterator so that one can start again from the beginning.
+ */
+void CanonicalIterator::reset() {
+    done = FALSE;
+    for (int i = 0; i < current_length; ++i) {
+        current[i] = 0;
+    }
+}
+
+/**
+ *@return the next string that is canonically equivalent. The value null is returned when
+ * the iteration is done.
+ */
+UnicodeString CanonicalIterator::next() {
+    int32_t i = 0;
+
+    if (done) {
+      buffer.setToBogus();
+      return buffer;
+    }
+
+    // delete old contents
+    buffer.remove();
+
+    // construct return value
+
+    for (i = 0; i < pieces_length; ++i) {
+        buffer.append(pieces[i][current[i]]);
+    }
+    //String result = buffer.toString(); // not needed
+
+    // find next value for next time
+
+    for (i = current_length - 1; ; --i) {
+        if (i < 0) {
+            done = TRUE;
+            break;
+        }
+        current[i]++;
+        if (current[i] < pieces_lengths[i]) break; // got sequence
+        current[i] = 0;
+    }
+    return buffer;
+}
+
+/**
+ *@param set the source string to iterate against. This allows the same iterator to be used
+ * while changing the source string, saving object creation.
+ */
+void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
+    int32_t list_length = 0;
+    UChar32 cp = 0;
+    int32_t start = 0;
+    int32_t i = 0;
+    UnicodeString *list = NULL;
+
+    nfd.normalize(newSource, source, status);
+    if(U_FAILURE(status)) {
+      return;
+    }
+    done = FALSE;
+
+    cleanPieces();
+
+    // catch degenerate case
+    if (newSource.length() == 0) {
+        pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
+        pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
+        pieces_length = 1;
+        current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
+        current_length = 1;
+        if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            goto CleanPartialInitialization;
+        }
+        current[0] = 0;
+        pieces[0] = new UnicodeString[1];
+        pieces_lengths[0] = 1;
+        if (pieces[0] == 0) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            goto CleanPartialInitialization;
+        }
+        return;
+    }
+
+
+    list = new UnicodeString[source.length()];
+    if (list == 0) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        goto CleanPartialInitialization;
+    }
+
+    // i should initialy be the number of code units at the 
+    // start of the string
+    i = U16_LENGTH(source.char32At(0));
+    //int32_t i = 1;
+    // find the segments
+    // This code iterates through the source string and 
+    // extracts segments that end up on a codepoint that
+    // doesn't start any decompositions. (Analysis is done
+    // on the NFD form - see above).
+    for (; i < source.length(); i += U16_LENGTH(cp)) {
+        cp = source.char32At(i);
+        if (nfcImpl.isCanonSegmentStarter(cp)) {
+            source.extract(start, i-start, list[list_length++]); // add up to i
+            start = i;
+        }
+    }
+    source.extract(start, i-start, list[list_length++]); // add last one
+
+
+    // allocate the arrays, and find the strings that are CE to each segment
+    pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
+    pieces_length = list_length;
+    pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
+    current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
+    current_length = list_length;
+    if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        goto CleanPartialInitialization;
+    }
+
+    for (i = 0; i < current_length; i++) {
+        current[i] = 0;
+    }
+    // for each segment, get all the combinations that can produce 
+    // it after NFD normalization
+    for (i = 0; i < pieces_length; ++i) {
+        //if (PROGRESS) printf("SEGMENT\n");
+        pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
+    }
+
+    delete[] list;
+    return;
+// Common section to cleanup all local variables and reset object variables.
+CleanPartialInitialization:
+    if (list != NULL) {
+        delete[] list;
+    }
+    cleanPieces();
+}
+
+/**
+ * Dumb recursive implementation of permutation.
+ * TODO: optimize
+ * @param source the string to find permutations for
+ * @return the results in a set.
+ */
+void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
+    if(U_FAILURE(status)) {
+        return;
+    }
+    //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
+    int32_t i = 0;
+
+    // optimization:
+    // if zero or one character, just return a set with it
+    // we check for length < 2 to keep from counting code points all the time
+    if (source.length() <= 2 && source.countChar32() <= 1) {
+        UnicodeString *toPut = new UnicodeString(source);
+        /* test for NULL */
+        if (toPut == 0) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        result->put(source, toPut, status);
+        return;
+    }
+
+    // otherwise iterate through the string, and recursively permute all the other characters
+    UChar32 cp;
+    Hashtable subpermute(status);
+    if(U_FAILURE(status)) {
+        return;
+    }
+    subpermute.setValueDeleter(uprv_deleteUObject);
+
+    for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
+        cp = source.char32At(i);
+        const UHashElement *ne = NULL;
+        int32_t el = UHASH_FIRST;
+        UnicodeString subPermuteString = source;
+
+        // optimization:
+        // if the character is canonical combining class zero,
+        // don't permute it
+        if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
+            //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
+            continue;
+        }
+
+        subpermute.removeAll();
+
+        // see what the permutations of the characters before and after this one are
+        //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
+        permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
+        /* Test for buffer overflows */
+        if(U_FAILURE(status)) {
+            return;
+        }
+        // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents 
+        // of source at this point.
+
+        // prefix this character to all of them
+        ne = subpermute.nextElement(el);
+        while (ne != NULL) {
+            UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
+            UnicodeString *chStr = new UnicodeString(cp);
+            //test for  NULL
+            if (chStr == NULL) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
+            //if (PROGRESS) printf("  Piece: %s\n", UToS(*chStr));
+            result->put(*chStr, chStr, status);
+            ne = subpermute.nextElement(el);
+        }
+    }
+    //return result;
+}
+
+// privates
+
+// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
+UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
+    Hashtable result(status);
+    Hashtable permutations(status);
+    Hashtable basic(status);
+    if (U_FAILURE(status)) {
+        return 0;
+    }
+    result.setValueDeleter(uprv_deleteUObject);
+    permutations.setValueDeleter(uprv_deleteUObject);
+    basic.setValueDeleter(uprv_deleteUObject);
+
+    UChar USeg[256];
+    int32_t segLen = segment.extract(USeg, 256, status);
+    getEquivalents2(&basic, USeg, segLen, status);
+
+    // now get all the permutations
+    // add only the ones that are canonically equivalent
+    // TODO: optimize by not permuting any class zero.
+
+    const UHashElement *ne = NULL;
+    int32_t el = UHASH_FIRST;
+    //Iterator it = basic.iterator();
+    ne = basic.nextElement(el);
+    //while (it.hasNext())
+    while (ne != NULL) {
+        //String item = (String) it.next();
+        UnicodeString item = *((UnicodeString *)(ne->value.pointer));
+
+        permutations.removeAll();
+        permute(item, CANITER_SKIP_ZEROES, &permutations, status);
+        const UHashElement *ne2 = NULL;
+        int32_t el2 = UHASH_FIRST;
+        //Iterator it2 = permutations.iterator();
+        ne2 = permutations.nextElement(el2);
+        //while (it2.hasNext())
+        while (ne2 != NULL) {
+            //String possible = (String) it2.next();
+            //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
+            UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
+            UnicodeString attempt;
+            nfd.normalize(possible, attempt, status);
+
+            // TODO: check if operator == is semanticaly the same as attempt.equals(segment)
+            if (attempt==segment) {
+                //if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
+                // TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
+                result.put(possible, new UnicodeString(possible), status); //add(possible);
+            } else {
+                //if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
+            }
+
+            ne2 = permutations.nextElement(el2);
+        }
+        ne = basic.nextElement(el);
+    }
+
+    /* Test for buffer overflows */
+    if(U_FAILURE(status)) {
+        return 0;
+    }
+    // convert into a String[] to clean up storage
+    //String[] finalResult = new String[result.size()];
+    UnicodeString *finalResult = NULL;
+    int32_t resultCount;
+    if((resultCount = result.count()) != 0) {
+        finalResult = new UnicodeString[resultCount];
+        if (finalResult == 0) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+    }
+    else {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+    //result.toArray(finalResult);
+    result_len = 0;
+    el = UHASH_FIRST;
+    ne = result.nextElement(el);
+    while(ne != NULL) {
+        finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
+        ne = result.nextElement(el);
+    }
+
+
+    return finalResult;
+}
+
+Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
+
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+
+    //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
+
+    UnicodeString toPut(segment, segLen);
+
+    fillinResult->put(toPut, new UnicodeString(toPut), status);
+
+    UnicodeSet starts;
+
+    // cycle through all the characters
+    UChar32 cp;
+    for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
+        // see if any character is at the start of some decomposition
+        U16_GET(segment, 0, i, segLen, cp);
+        if (!nfcImpl.getCanonStartSet(cp, starts)) {
+            continue;
+        }
+        // if so, see which decompositions match
+        UnicodeSetIterator iter(starts);
+        while (iter.next()) {
+            UChar32 cp2 = iter.getCodepoint();
+            Hashtable remainder(status);
+            remainder.setValueDeleter(uprv_deleteUObject);
+            if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
+                continue;
+            }
+
+            // there were some matches, so add all the possibilities to the set.
+            UnicodeString prefix(segment, i);
+            prefix += cp2;
+
+            int32_t el = UHASH_FIRST;
+            const UHashElement *ne = remainder.nextElement(el);
+            while (ne != NULL) {
+                UnicodeString item = *((UnicodeString *)(ne->value.pointer));
+                UnicodeString *toAdd = new UnicodeString(prefix);
+                /* test for NULL */
+                if (toAdd == 0) {
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                    return NULL;
+                }
+                *toAdd += item;
+                fillinResult->put(*toAdd, toAdd, status);
+
+                //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
+
+                ne = remainder.nextElement(el);
+            }
+        }
+    }
+
+    /* Test for buffer overflows */
+    if(U_FAILURE(status)) {
+        return NULL;
+    }
+    return fillinResult;
+}
+
+/**
+ * See if the decomposition of cp2 is at segment starting at segmentPos 
+ * (with canonical rearrangment!)
+ * If so, take the remainder, and return the equivalents 
+ */
+Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
+//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
+    //if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
+    //if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
+
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+
+    UnicodeString temp(comp);
+    int32_t inputLen=temp.length();
+    UnicodeString decompString;
+    nfd.normalize(temp, decompString, status);
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+    if (decompString.isBogus()) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    const UChar *decomp=decompString.getBuffer();
+    int32_t decompLen=decompString.length();
+
+    // See if it matches the start of segment (at segmentPos)
+    UBool ok = FALSE;
+    UChar32 cp;
+    int32_t decompPos = 0;
+    UChar32 decompCp;
+    U16_NEXT(decomp, decompPos, decompLen, decompCp);
+
+    int32_t i = segmentPos;
+    while(i < segLen) {
+        U16_NEXT(segment, i, segLen, cp);
+
+        if (cp == decompCp) { // if equal, eat another cp from decomp
+
+            //if (PROGRESS) printf("  matches: %s\n", UToS(Tr(UnicodeString(cp))));
+
+            if (decompPos == decompLen) { // done, have all decomp characters!
+                temp.append(segment+i, segLen-i);
+                ok = TRUE;
+                break;
+            }
+            U16_NEXT(decomp, decompPos, decompLen, decompCp);
+        } else {
+            //if (PROGRESS) printf("  buffer: %s\n", UToS(Tr(UnicodeString(cp))));
+
+            // brute force approach
+            temp.append(cp);
+
+            /* TODO: optimize
+            // since we know that the classes are monotonically increasing, after zero
+            // e.g. 0 5 7 9 0 3
+            // we can do an optimization
+            // there are only a few cases that work: zero, less, same, greater
+            // if both classes are the same, we fail
+            // if the decomp class < the segment class, we fail
+
+            segClass = getClass(cp);
+            if (decompClass <= segClass) return null;
+            */
+        }
+    }
+    if (!ok)
+        return NULL; // we failed, characters left over
+
+    //if (PROGRESS) printf("Matches\n");
+
+    if (inputLen == temp.length()) {
+        fillinResult->put(UnicodeString(), new UnicodeString(), status);
+        return fillinResult; // succeed, but no remainder
+    }
+
+    // brute force approach
+    // check to make sure result is canonically equivalent
+    UnicodeString trial;
+    nfd.normalize(temp, trial, status);
+    if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
+        return NULL;
+    }
+
+    return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
--- a/external/duckdb/extension/icu/third_party/icu/common/capi_helper.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/capi_helper.h
@@ -0,0 +1,97 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#ifndef __CAPI_HELPER_H__
+#define __CAPI_HELPER_H__
+
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * An internal helper class to help convert between C and C++ APIs.
+ */
+template<typename CType, typename CPPType, int32_t kMagic>
+class IcuCApiHelper {
+  public:
+    /**
+     * Convert from the C type to the C++ type (const version).
+     */
+    static const CPPType* validate(const CType* input, UErrorCode& status);
+
+    /**
+     * Convert from the C type to the C++ type (non-const version).
+     */
+    static CPPType* validate(CType* input, UErrorCode& status);
+
+    /**
+     * Convert from the C++ type to the C type (const version).
+     */
+    const CType* exportConstForC() const;
+
+    /**
+     * Convert from the C++ type to the C type (non-const version).
+     */
+    CType* exportForC();
+
+    /**
+     * Invalidates the object.
+     */
+    ~IcuCApiHelper();
+
+  private:
+    /**
+     * While the object is valid, fMagic equals kMagic.
+     */
+    int32_t fMagic = kMagic;
+};
+
+
+template<typename CType, typename CPPType, int32_t kMagic>
+const CPPType*
+IcuCApiHelper<CType, CPPType, kMagic>::validate(const CType* input, UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+    if (input == nullptr) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    auto* impl = reinterpret_cast<const CPPType*>(input);
+    if (static_cast<const IcuCApiHelper<CType, CPPType, kMagic>*>(impl)->fMagic != kMagic) {
+        status = U_INVALID_FORMAT_ERROR;
+        return nullptr;
+    }
+    return impl;
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+CPPType*
+IcuCApiHelper<CType, CPPType, kMagic>::validate(CType* input, UErrorCode& status) {
+    auto* constInput = static_cast<const CType*>(input);
+    auto* validated = validate(constInput, status);
+    return const_cast<CPPType*>(validated);
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+const CType*
+IcuCApiHelper<CType, CPPType, kMagic>::exportConstForC() const {
+    return reinterpret_cast<const CType*>(static_cast<const CPPType*>(this));
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+CType*
+IcuCApiHelper<CType, CPPType, kMagic>::exportForC() {
+    return reinterpret_cast<CType*>(static_cast<CPPType*>(this));
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+IcuCApiHelper<CType, CPPType, kMagic>::~IcuCApiHelper() {
+    // head off application errors by preventing use of of deleted objects.
+    fMagic = 0;
+}
+
+
+U_NAMESPACE_END
+
+#endif // __CAPI_HELPER_H__
--- a/external/duckdb/extension/icu/third_party/icu/common/characterproperties.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/characterproperties.cpp
@@ -0,0 +1,383 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// characterproperties.cpp
+// created: 2018sep03 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+#include "unicode/uchar.h"
+#include "unicode/ucpmap.h"
+#include "unicode/ucptrie.h"
+#include "unicode/umutablecptrie.h"
+#include "unicode/uniset.h"
+#include "unicode/uscript.h"
+#include "unicode/uset.h"
+#include "cmemory.h"
+#include "mutex.h"
+#include "normalizer2impl.h"
+#include "uassert.h"
+#include "ubidi_props.h"
+#include "ucase.h"
+#include "ucln_cmn.h"
+#include "umutex.h"
+#include "uprops.h"
+
+using icu::LocalPointer;
+#if !UCONFIG_NO_NORMALIZATION
+using icu::Normalizer2Factory;
+using icu::Normalizer2Impl;
+#endif
+using icu::UInitOnce;
+using icu::UnicodeSet;
+
+namespace {
+
+UBool U_CALLCONV characterproperties_cleanup();
+
+constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;
+
+struct Inclusion {
+    UnicodeSet  *fSet = nullptr;
+    UInitOnce    fInitOnce = U_INITONCE_INITIALIZER;
+};
+Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()
+
+UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};
+
+UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};
+
+icu::UMutex cpMutex;
+
+//----------------------------------------------------------------
+// Inclusions list
+//----------------------------------------------------------------
+
+// USetAdder implementation
+// Does not use uset.h to reduce code dependencies
+void U_CALLCONV
+_set_add(USet *set, UChar32 c) {
+    ((UnicodeSet *)set)->add(c);
+}
+
+void U_CALLCONV
+_set_addRange(USet *set, UChar32 start, UChar32 end) {
+    ((UnicodeSet *)set)->add(start, end);
+}
+
+void U_CALLCONV
+_set_addString(USet *set, const UChar *str, int32_t length) {
+    ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
+}
+
+UBool U_CALLCONV characterproperties_cleanup() {
+    for (Inclusion &in: gInclusions) {
+        delete in.fSet;
+        in.fSet = nullptr;
+        in.fInitOnce.reset();
+    }
+    for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) {
+        delete sets[i];
+        sets[i] = nullptr;
+    }
+    for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) {
+        ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
+        maps[i] = nullptr;
+    }
+    return TRUE;
+}
+
+void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
+    // This function is invoked only via umtx_initOnce().
+    U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
+    if (src == UPROPS_SRC_NONE) {
+        errorCode = U_INTERNAL_PROGRAM_ERROR;
+        return;
+    }
+    U_ASSERT(gInclusions[src].fSet == nullptr);
+
+    LocalPointer<UnicodeSet> incl(new UnicodeSet());
+    if (incl.isNull()) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    USetAdder sa = {
+        (USet *)incl.getAlias(),
+        _set_add,
+        _set_addRange,
+        _set_addString,
+        nullptr, // don't need remove()
+        nullptr // don't need removeRange()
+    };
+
+    switch(src) {
+    case UPROPS_SRC_CHAR:
+        uchar_addPropertyStarts(&sa, &errorCode);
+        break;
+    case UPROPS_SRC_PROPSVEC:
+        upropsvec_addPropertyStarts(&sa, &errorCode);
+        break;
+    case UPROPS_SRC_CHAR_AND_PROPSVEC:
+        uchar_addPropertyStarts(&sa, &errorCode);
+        upropsvec_addPropertyStarts(&sa, &errorCode);
+        break;
+#if !UCONFIG_NO_NORMALIZATION
+    case UPROPS_SRC_CASE_AND_NORM: {
+        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+        if(U_SUCCESS(errorCode)) {
+            impl->addPropertyStarts(&sa, errorCode);
+        }
+        ucase_addPropertyStarts(&sa, &errorCode);
+        break;
+    }
+    case UPROPS_SRC_NFC: {
+        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+        if(U_SUCCESS(errorCode)) {
+            impl->addPropertyStarts(&sa, errorCode);
+        }
+        break;
+    }
+    case UPROPS_SRC_NFKC: {
+        const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode);
+        if(U_SUCCESS(errorCode)) {
+            impl->addPropertyStarts(&sa, errorCode);
+        }
+        break;
+    }
+    case UPROPS_SRC_NFKC_CF: {
+        const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode);
+        if(U_SUCCESS(errorCode)) {
+            impl->addPropertyStarts(&sa, errorCode);
+        }
+        break;
+    }
+    case UPROPS_SRC_NFC_CANON_ITER: {
+        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+        if(U_SUCCESS(errorCode)) {
+            impl->addCanonIterPropertyStarts(&sa, errorCode);
+        }
+        break;
+    }
+#endif
+    case UPROPS_SRC_CASE:
+        ucase_addPropertyStarts(&sa, &errorCode);
+        break;
+    case UPROPS_SRC_BIDI:
+        ubidi_addPropertyStarts(&sa, &errorCode);
+        break;
+    case UPROPS_SRC_INPC:
+    case UPROPS_SRC_INSC:
+    case UPROPS_SRC_VO:
+        uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
+        break;
+    default:
+        errorCode = U_INTERNAL_PROGRAM_ERROR;
+        break;
+    }
+
+    if (U_FAILURE(errorCode)) {
+        return;
+    }
+    if (incl->isBogus()) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    // Compact for caching.
+    incl->compact();
+    gInclusions[src].fSet = incl.orphan();
+    ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
+}
+
+const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    if (src < 0 || UPROPS_SRC_COUNT <= src) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    Inclusion &i = gInclusions[src];
+    umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode);
+    return i.fSet;
+}
+
+void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
+    // This function is invoked only via umtx_initOnce().
+    U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
+    int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
+    U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
+    UPropertySource src = uprops_getSource(prop);
+    const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
+    if (U_FAILURE(errorCode)) {
+        return;
+    }
+
+    LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0));
+    if (intPropIncl.isNull()) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    int32_t numRanges = incl->getRangeCount();
+    int32_t prevValue = 0;
+    for (int32_t i = 0; i < numRanges; ++i) {
+        UChar32 rangeEnd = incl->getRangeEnd(i);
+        for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) {
+            // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
+            int32_t value = u_getIntPropertyValue(c, prop);
+            if (value != prevValue) {
+                intPropIncl->add(c);
+                prevValue = value;
+            }
+        }
+    }
+
+    if (intPropIncl->isBogus()) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    // Compact for caching.
+    intPropIncl->compact();
+    gInclusions[inclIndex].fSet = intPropIncl.orphan();
+    ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
+}
+
+}  // namespace
+
+U_NAMESPACE_BEGIN
+
+const UnicodeSet *CharacterProperties::getInclusionsForProperty(
+        UProperty prop, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
+        int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
+        Inclusion &i = gInclusions[inclIndex];
+        umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
+        return i.fSet;
+    } else {
+        UPropertySource src = uprops_getSource(prop);
+        return getInclusionsForSource(src, errorCode);
+    }
+}
+
+U_NAMESPACE_END
+
+namespace {
+
+UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    LocalPointer<UnicodeSet> set(new UnicodeSet());
+    if (set.isNull()) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return nullptr;
+    }
+    const UnicodeSet *inclusions =
+        icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    int32_t numRanges = inclusions->getRangeCount();
+    UChar32 startHasProperty = -1;
+
+    for (int32_t i = 0; i < numRanges; ++i) {
+        UChar32 rangeEnd = inclusions->getRangeEnd(i);
+        for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
+            // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch.
+            if (u_hasBinaryProperty(c, property)) {
+                if (startHasProperty < 0) {
+                    // Transition from false to true.
+                    startHasProperty = c;
+                }
+            } else if (startHasProperty >= 0) {
+                // Transition from true to false.
+                set->add(startHasProperty, c - 1);
+                startHasProperty = -1;
+            }
+        }
+    }
+    if (startHasProperty >= 0) {
+        set->add(startHasProperty, 0x10FFFF);
+    }
+    set->freeze();
+    return set.orphan();
+}
+
+UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0;
+    icu::LocalUMutableCPTriePointer mutableTrie(
+        umutablecptrie_open(nullValue, nullValue, &errorCode));
+    const UnicodeSet *inclusions =
+        icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    int32_t numRanges = inclusions->getRangeCount();
+    UChar32 start = 0;
+    uint32_t value = nullValue;
+
+    for (int32_t i = 0; i < numRanges; ++i) {
+        UChar32 rangeEnd = inclusions->getRangeEnd(i);
+        for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
+            // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
+            uint32_t nextValue = u_getIntPropertyValue(c, property);
+            if (value != nextValue) {
+                if (value != nullValue) {
+                    umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode);
+                }
+                start = c;
+                value = nextValue;
+            }
+        }
+    }
+    if (value != 0) {
+        umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode);
+    }
+
+    UCPTrieType type;
+    if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) {
+        type = UCPTRIE_TYPE_FAST;
+    } else {
+        type = UCPTRIE_TYPE_SMALL;
+    }
+    UCPTrieValueWidth valueWidth;
+    // TODO: UCharacterProperty.IntProperty
+    int32_t max = u_getIntPropertyMaxValue(property);
+    if (max <= 0xff) {
+        valueWidth = UCPTRIE_VALUE_BITS_8;
+    } else if (max <= 0xffff) {
+        valueWidth = UCPTRIE_VALUE_BITS_16;
+    } else {
+        valueWidth = UCPTRIE_VALUE_BITS_32;
+    }
+    return reinterpret_cast<UCPMap *>(
+        umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode));
+}
+
+}  // namespace
+
+U_NAMESPACE_USE
+
+U_CAPI const USet * U_EXPORT2
+u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
+    if (U_FAILURE(*pErrorCode)) { return nullptr; }
+    if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
+        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    Mutex m(&cpMutex);
+    UnicodeSet *set = sets[property];
+    if (set == nullptr) {
+        sets[property] = set = makeSet(property, *pErrorCode);
+    }
+    if (U_FAILURE(*pErrorCode)) { return nullptr; }
+    return set->toUSet();
+}
+
+U_CAPI const UCPMap * U_EXPORT2
+u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) {
+    if (U_FAILURE(*pErrorCode)) { return nullptr; }
+    if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) {
+        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    Mutex m(&cpMutex);
+    UCPMap *map = maps[property - UCHAR_INT_START];
+    if (map == nullptr) {
+        maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode);
+    }
+    return map;
+}
--- a/external/duckdb/extension/icu/third_party/icu/common/chariter.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/chariter.cpp
@@ -0,0 +1,100 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+*   Copyright (C) 1999-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*/
+
+#include "unicode/chariter.h"
+
+U_NAMESPACE_BEGIN
+
+ForwardCharacterIterator::~ForwardCharacterIterator() {}
+ForwardCharacterIterator::ForwardCharacterIterator()
+: UObject()
+{}
+ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other)
+: UObject(other)
+{}
+
+
+CharacterIterator::CharacterIterator()
+: textLength(0), pos(0), begin(0), end(0) {
+}
+
+CharacterIterator::CharacterIterator(int32_t length)
+: textLength(length), pos(0), begin(0), end(length) {
+    if(textLength < 0) {
+        textLength = end = 0;
+    }
+}
+
+CharacterIterator::CharacterIterator(int32_t length, int32_t position)
+: textLength(length), pos(position), begin(0), end(length) {
+    if(textLength < 0) {
+        textLength = end = 0;
+    }
+    if(pos < 0) {
+        pos = 0;
+    } else if(pos > end) {
+        pos = end;
+    }
+}
+
+CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position)
+: textLength(length), pos(position), begin(textBegin), end(textEnd) {
+    if(textLength < 0) {
+        textLength = 0;
+    }
+    if(begin < 0) {
+        begin = 0;
+    } else if(begin > textLength) {
+        begin = textLength;
+    }
+    if(end < begin) {
+        end = begin;
+    } else if(end > textLength) {
+        end = textLength;
+    }
+    if(pos < begin) {
+        pos = begin;
+    } else if(pos > end) {
+        pos = end;
+    }
+}
+
+CharacterIterator::~CharacterIterator() {}
+
+CharacterIterator::CharacterIterator(const CharacterIterator &that) :
+ForwardCharacterIterator(that),
+textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end)
+{
+}
+
+CharacterIterator &
+CharacterIterator::operator=(const CharacterIterator &that) {
+    ForwardCharacterIterator::operator=(that);
+    textLength = that.textLength;
+    pos = that.pos;
+    begin = that.begin;
+    end = that.end;
+    return *this;
+}
+
+// implementing first[32]PostInc() directly in a subclass should be faster
+// but these implementations make subclassing a little easier
+UChar
+CharacterIterator::firstPostInc(void) {
+    setToStart();
+    return nextPostInc();
+}
+
+UChar32
+CharacterIterator::first32PostInc(void) {
+    setToStart();
+    return next32PostInc();
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/charstr.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/charstr.cpp
@@ -0,0 +1,215 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 2010-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  charstr.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010may19
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uinvchar.h"
+
+U_NAMESPACE_BEGIN
+
+CharString::CharString(CharString&& src) U_NOEXCEPT
+        : buffer(std::move(src.buffer)), len(src.len) {
+    src.len = 0;  // not strictly necessary because we make no guarantees on the source string
+}
+
+CharString& CharString::operator=(CharString&& src) U_NOEXCEPT {
+    buffer = std::move(src.buffer);
+    len = src.len;
+    src.len = 0;  // not strictly necessary because we make no guarantees on the source string
+    return *this;
+}
+
+char *CharString::cloneData(UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    char *p = static_cast<char *>(uprv_malloc(len + 1));
+    if (p == nullptr) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return nullptr;
+    }
+    uprv_memcpy(p, buffer.getAlias(), len + 1);
+    return p;
+}
+
+CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
+    if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
+        len=s.len;
+        uprv_memcpy(buffer.getAlias(), s.buffer.getAlias(), len+1);
+    }
+    return *this;
+}
+
+int32_t CharString::lastIndexOf(char c) const {
+    for(int32_t i=len; i>0;) {
+        if(buffer[--i]==c) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+bool CharString::contains(StringPiece s) const {
+    if (s.empty()) { return false; }
+    const char *p = buffer.getAlias();
+    int32_t lastStart = len - s.length();
+    for (int32_t i = 0; i <= lastStart; ++i) {
+        if (uprv_memcmp(p + i, s.data(), s.length()) == 0) {
+            return true;
+        }
+    }
+    return false;
+}
+
+CharString &CharString::truncate(int32_t newLength) {
+    if(newLength<0) {
+        newLength=0;
+    }
+    if(newLength<len) {
+        buffer[len=newLength]=0;
+    }
+    return *this;
+}
+
+CharString &CharString::append(char c, UErrorCode &errorCode) {
+    if(ensureCapacity(len+2, 0, errorCode)) {
+        buffer[len++]=c;
+        buffer[len]=0;
+    }
+    return *this;
+}
+
+CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return *this;
+    }
+    if(sLength<-1 || (s==NULL && sLength!=0)) {
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    if(sLength<0) {
+        sLength= static_cast<int32_t>(uprv_strlen(s));
+    }
+    if(sLength>0) {
+        if(s==(buffer.getAlias()+len)) {
+            // The caller wrote into the getAppendBuffer().
+            if(sLength>=(buffer.getCapacity()-len)) {
+                // The caller wrote too much.
+                errorCode=U_INTERNAL_PROGRAM_ERROR;
+            } else {
+                buffer[len+=sLength]=0;
+            }
+        } else if(buffer.getAlias()<=s && s<(buffer.getAlias()+len) &&
+                  sLength>=(buffer.getCapacity()-len)
+        ) {
+            // (Part of) this string is appended to itself which requires reallocation,
+            // so we have to make a copy of the substring and append that.
+            return append(CharString(s, sLength, errorCode), errorCode);
+        } else if(ensureCapacity(len+sLength+1, 0, errorCode)) {
+            uprv_memcpy(buffer.getAlias()+len, s, sLength);
+            buffer[len+=sLength]=0;
+        }
+    }
+    return *this;
+}
+
+char *CharString::getAppendBuffer(int32_t minCapacity,
+                                  int32_t desiredCapacityHint,
+                                  int32_t &resultCapacity,
+                                  UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        resultCapacity=0;
+        return NULL;
+    }
+    int32_t appendCapacity=buffer.getCapacity()-len-1;  // -1 for NUL
+    if(appendCapacity>=minCapacity) {
+        resultCapacity=appendCapacity;
+        return buffer.getAlias()+len;
+    }
+    if(ensureCapacity(len+minCapacity+1, len+desiredCapacityHint+1, errorCode)) {
+        resultCapacity=buffer.getCapacity()-len-1;
+        return buffer.getAlias()+len;
+    }
+    resultCapacity=0;
+    return NULL;
+}
+
+CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) {
+    return appendInvariantChars(s.getBuffer(), s.length(), errorCode);
+}
+
+CharString &CharString::appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return *this;
+    }
+    if (!uprv_isInvariantUString(uchars, ucharsLen)) {
+        errorCode = U_INVARIANT_CONVERSION_ERROR;
+        return *this;
+    }
+    if(ensureCapacity(len+ucharsLen+1, 0, errorCode)) {
+        u_UCharsToChars(uchars, buffer.getAlias()+len, ucharsLen);
+        len += ucharsLen;
+        buffer[len] = 0;
+    }
+    return *this;
+}
+
+UBool CharString::ensureCapacity(int32_t capacity,
+                                 int32_t desiredCapacityHint,
+                                 UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return FALSE;
+    }
+    if(capacity>buffer.getCapacity()) {
+        if(desiredCapacityHint==0) {
+            desiredCapacityHint=capacity+buffer.getCapacity();
+        }
+        if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==NULL) &&
+            buffer.resize(capacity, len+1)==NULL
+        ) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+CharString &CharString::appendPathPart(StringPiece s, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return *this;
+    }
+    if(s.length()==0) {
+        return *this;
+    }
+    char c;
+    if(len>0 && (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
+        append(U_FILE_SEP_CHAR, errorCode);
+    }
+    append(s, errorCode);
+    return *this;
+}
+
+CharString &CharString::ensureEndsWithFileSeparator(UErrorCode &errorCode) {
+    char c;
+    if(U_SUCCESS(errorCode) && len>0 &&
+            (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
+        append(U_FILE_SEP_CHAR, errorCode);
+    }
+    return *this;
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/charstr.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/charstr.h
@@ -0,0 +1,168 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+*   Copyright (c) 2001-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/19/2001  aliu        Creation.
+*   05/19/2010  markus      Rewritten from scratch
+**********************************************************************
+*/
+
+#ifndef CHARSTRING_H
+#define CHARSTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+// Windows needs us to DLL-export the MaybeStackArray template specialization,
+// but MacOS X cannot handle it. Same as in digitlst.h.
+#if !U_PLATFORM_IS_DARWIN_BASED
+template class U_COMMON_API MaybeStackArray<char, 40>;
+#endif
+
+/**
+ * ICU-internal char * string class.
+ * This class does not assume or enforce any particular character encoding.
+ * Raw bytes can be stored. The string object owns its characters.
+ * A terminating NUL is stored, but the class does not prevent embedded NUL characters.
+ *
+ * This class wants to be convenient but is also deliberately minimalist.
+ * Please do not add methods if they only add minor convenience.
+ * For example:
+ *   cs.data()[5]='a';  // no need for setCharAt(5, 'a')
+ */
+class U_COMMON_API CharString : public UMemory {
+public:
+    CharString() : len(0) { buffer[0]=0; }
+    CharString(StringPiece s, UErrorCode &errorCode) : len(0) {
+        buffer[0]=0;
+        append(s, errorCode);
+    }
+    CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
+        buffer[0]=0;
+        append(s, errorCode);
+    }
+    CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
+        buffer[0]=0;
+        append(s, sLength, errorCode);
+    }
+    ~CharString() {}
+
+    /**
+     * Move constructor; might leave src in an undefined state.
+     * This string will have the same contents and state that the source string had.
+     */
+    CharString(CharString &&src) U_NOEXCEPT;
+    /**
+     * Move assignment operator; might leave src in an undefined state.
+     * This string will have the same contents and state that the source string had.
+     * The behavior is undefined if *this and src are the same object.
+     */
+    CharString &operator=(CharString &&src) U_NOEXCEPT;
+
+    /**
+     * Replaces this string's contents with the other string's contents.
+     * CharString does not support the standard copy constructor nor
+     * the assignment operator, to make copies explicit and to
+     * use a UErrorCode where memory allocations might be needed.
+     */
+    CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
+
+    UBool isEmpty() const { return len==0; }
+    int32_t length() const { return len; }
+    char operator[](int32_t index) const { return buffer[index]; }
+    StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
+
+    const char *data() const { return buffer.getAlias(); }
+    char *data() { return buffer.getAlias(); }
+    /**
+     * Allocates length()+1 chars and copies the NUL-terminated data().
+     * The caller must uprv_free() the result.
+     */
+    char *cloneData(UErrorCode &errorCode) const;
+
+    bool operator==(StringPiece other) const {
+        return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
+    }
+    bool operator!=(StringPiece other) const {
+        return !operator==(other);
+    }
+
+    /** @return last index of c, or -1 if c is not in this string */
+    int32_t lastIndexOf(char c) const;
+
+    bool contains(StringPiece s) const;
+
+    CharString &clear() { len=0; buffer[0]=0; return *this; }
+    CharString &truncate(int32_t newLength);
+
+    CharString &append(char c, UErrorCode &errorCode);
+    CharString &append(StringPiece s, UErrorCode &errorCode) {
+        return append(s.data(), s.length(), errorCode);
+    }
+    CharString &append(const CharString &s, UErrorCode &errorCode) {
+        return append(s.data(), s.length(), errorCode);
+    }
+    CharString &append(const char *s, int32_t sLength, UErrorCode &status);
+    /**
+     * Returns a writable buffer for appending and writes the buffer's capacity to
+     * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
+     * There will additionally be space for a terminating NUL right at resultCapacity.
+     * (This function is similar to ByteSink.GetAppendBuffer().)
+     *
+     * The returned buffer is only valid until the next write operation
+     * on this string.
+     *
+     * After writing at most resultCapacity bytes, call append() with the
+     * pointer returned from this function and the number of bytes written.
+     *
+     * @param minCapacity required minimum capacity of the returned buffer;
+     *                    must be non-negative
+     * @param desiredCapacityHint desired capacity of the returned buffer;
+     *                            must be non-negative
+     * @param resultCapacity will be set to the capacity of the returned buffer
+     * @param errorCode in/out error code
+     * @return a buffer with resultCapacity>=min_capacity
+     */
+    char *getAppendBuffer(int32_t minCapacity,
+                          int32_t desiredCapacityHint,
+                          int32_t &resultCapacity,
+                          UErrorCode &errorCode);
+
+    CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
+    CharString &appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode& errorCode);
+
+    /**
+     * Appends a filename/path part, e.g., a directory name.
+     * First appends a U_FILE_SEP_CHAR if necessary.
+     * Does nothing if s is empty.
+     */
+    CharString &appendPathPart(StringPiece s, UErrorCode &errorCode);
+
+    /**
+     * Appends a U_FILE_SEP_CHAR if this string is not empty
+     * and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR.
+     */
+    CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);
+
+private:
+    MaybeStackArray<char, 40> buffer;
+    int32_t len;
+
+    UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);
+
+    CharString(const CharString &other); // forbid copying of this class
+    CharString &operator=(const CharString &other); // forbid copying of this class
+};
+
+U_NAMESPACE_END
+
+#endif
+//eof
--- a/external/duckdb/extension/icu/third_party/icu/common/cmemory.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/cmemory.cpp
@@ -0,0 +1,138 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 2002-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File cmemory.c      ICU Heap allocation.
+*                     All ICU heap allocation, both for C and C++ new of ICU
+*                     class types, comes through these functions.
+*
+*                     If you have a need to replace ICU allocation, this is the
+*                     place to do it.
+*
+*                     Note that uprv_malloc(0) returns a non-NULL pointer, and
+*                     that a subsequent free of that pointer value is a NOP.
+*
+******************************************************************************
+*/
+#include "unicode/uclean.h"
+#include "cmemory.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include <stdlib.h>
+
+/* uprv_malloc(0) returns a pointer to this read-only data. */
+static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};
+
+/* Function Pointers for user-supplied heap functions  */
+static const void     *pContext;
+static UMemAllocFn    *pAlloc;
+static UMemReallocFn  *pRealloc;
+static UMemFreeFn     *pFree;
+
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#include <stdio.h>
+static int n=0;
+static long b=0; 
+#endif
+
+U_CAPI void * U_EXPORT2
+uprv_malloc(size_t s) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#if 1
+  putchar('>');
+  fflush(stdout);
+#else
+  fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr);
+#endif
+#endif
+    if (s > 0) {
+        if (pAlloc) {
+            return (*pAlloc)(pContext, s);
+        } else {
+            return uprv_default_malloc(s);
+        }
+    } else {
+        return (void *)zeroMem;
+    }
+}
+
+U_CAPI void * U_EXPORT2
+uprv_realloc(void * buffer, size_t size) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+  putchar('~');
+  fflush(stdout);
+#endif
+    if (buffer == zeroMem) {
+        return uprv_malloc(size);
+    } else if (size == 0) {
+        if (pFree) {
+            (*pFree)(pContext, buffer);
+        } else {
+            uprv_default_free(buffer);
+        }
+        return (void *)zeroMem;
+    } else {
+        if (pRealloc) {
+            return (*pRealloc)(pContext, buffer, size);
+        } else {
+            return uprv_default_realloc(buffer, size);
+        }
+    }
+}
+
+U_CAPI void U_EXPORT2
+uprv_free(void *buffer) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+  putchar('<');
+  fflush(stdout);
+#endif
+    if (buffer != zeroMem) {
+        if (pFree) {
+            (*pFree)(pContext, buffer);
+        } else {
+            uprv_default_free(buffer);
+        }
+    }
+}
+
+U_CAPI void * U_EXPORT2
+uprv_calloc(size_t num, size_t size) {
+    void *mem = NULL;
+    size *= num;
+    mem = uprv_malloc(size);
+    if (mem) {
+        uprv_memset(mem, 0, size);
+    }
+    return mem;
+}
+
+U_CAPI void U_EXPORT2
+u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f,  UErrorCode *status)
+{
+    if (U_FAILURE(*status)) {
+        return;
+    }
+    if (a==NULL || r==NULL || f==NULL) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    pContext  = context;
+    pAlloc    = a;
+    pRealloc  = r;
+    pFree     = f;
+}
+
+
+U_CFUNC UBool cmemory_cleanup(void) {
+    pContext   = NULL;
+    pAlloc     = NULL;
+    pRealloc   = NULL;
+    pFree      = NULL;
+    return TRUE;
+}
--- a/external/duckdb/extension/icu/third_party/icu/common/cmemory.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/cmemory.h
@@ -0,0 +1,736 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2016, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File CMEMORY.H
+*
+*  Contains stdlib.h/string.h memory functions
+*
+* @author       Bertrand A. Damiba
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   6/20/98     Bertrand    Created.
+*  05/03/99     stephen     Changed from functions to macros.
+*
+******************************************************************************
+*/
+
+#ifndef CMEMORY_H
+#define CMEMORY_H
+
+#include "unicode/utypes.h"
+
+#include <stddef.h>
+#include <string.h>
+#include "unicode/localpointer.h"
+
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+#include <stdio.h>
+#endif
+
+
+#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
+#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
+
+/**
+ * \def UPRV_LENGTHOF
+ * Convenience macro to determine the length of a fixed array at compile-time.
+ * @param array A fixed length array
+ * @return The length of the array, in elements
+ * @internal
+ */
+#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
+#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
+#define uprv_memchr(ptr, value, num) U_STANDARD_CPP_NAMESPACE memchr(ptr, value, num)
+
+U_CAPI void * U_EXPORT2
+uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);
+
+U_CAPI void * U_EXPORT2
+uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2);
+
+U_CAPI void U_EXPORT2
+uprv_free(void *mem);
+
+U_CAPI void * U_EXPORT2
+uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2);
+
+/**
+ * Get the least significant bits of a pointer (a memory address).
+ * For example, with a mask of 3, the macro gets the 2 least significant bits,
+ * which will be 0 if the pointer is 32-bit (4-byte) aligned.
+ *
+ * uintptr_t is the most appropriate integer type to cast to.
+ */
+#define U_POINTER_MASK_LSB(ptr, mask) ((uintptr_t)(ptr) & (mask))
+
+/**
+ * Create & return an instance of "type" in statically allocated storage.
+ * e.g.
+ *    static std::mutex *myMutex = STATIC_NEW(std::mutex);
+ * To destroy an object created in this way, invoke the destructor explicitly, e.g.
+ *    myMutex->~mutex();
+ * DO NOT use delete.
+ * DO NOT use with class UMutex, which has specific support for static instances.
+ *
+ * STATIC_NEW is intended for use when
+ *   - We want a static (or global) object.
+ *   - We don't want it to ever be destructed, or to explicitly control destruction,
+ *     to avoid use-after-destruction problems.
+ *   - We want to avoid an ordinary heap allocated object,
+ *     to avoid the possibility of memory allocation failures, and
+ *     to avoid memory leak reports, from valgrind, for example.
+ * This is defined as a macro rather than a template function because each invocation
+ * must define distinct static storage for the object being returned.
+ */
+#define STATIC_NEW(type) [] () { \
+    alignas(type) static char storage[sizeof(type)]; \
+    return new(storage) type();} ()
+
+/**
+  *  Heap clean up function, called from u_cleanup()
+  *    Clears any user heap functions from u_setMemoryFunctions()
+  *    Does NOT deallocate any remaining allocated memory.
+  */
+U_CFUNC UBool 
+cmemory_cleanup(void);
+
+/**
+ * A function called by <TT>uhash_remove</TT>,
+ * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
+ * an existing key or value.
+ * @param obj A key or value stored in a hashtable
+ * @see uprv_deleteUObject
+ */
+typedef void U_CALLCONV UObjectDeleter(void* obj);
+
+/**
+ * Deleter for UObject instances.
+ * Works for all subclasses of UObject because it has a virtual destructor.
+ */
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj);
+
+#ifdef __cplusplus
+
+#include <utility>
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * "Smart pointer" class, deletes memory via uprv_free().
+ * For most methods see the LocalPointerBase base class.
+ * Adds operator[] for array item access.
+ *
+ * @see LocalPointerBase
+ */
+template<typename T>
+class LocalMemory : public LocalPointerBase<T> {
+public:
+    using LocalPointerBase<T>::operator*;
+    using LocalPointerBase<T>::operator->;
+    /**
+     * Constructor takes ownership.
+     * @param p simple pointer to an array of T items that is adopted
+     */
+    explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
+    /**
+     * Move constructor, leaves src with isNull().
+     * @param src source smart pointer
+     */
+    LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
+        src.ptr=NULL;
+    }
+    /**
+     * Destructor deletes the memory it owns.
+     */
+    ~LocalMemory() {
+        uprv_free(LocalPointerBase<T>::ptr);
+    }
+    /**
+     * Move assignment operator, leaves src with isNull().
+     * The behavior is undefined if *this and src are the same object.
+     * @param src source smart pointer
+     * @return *this
+     */
+    LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT {
+        uprv_free(LocalPointerBase<T>::ptr);
+        LocalPointerBase<T>::ptr=src.ptr;
+        src.ptr=NULL;
+        return *this;
+    }
+    /**
+     * Swap pointers.
+     * @param other other smart pointer
+     */
+    void swap(LocalMemory<T> &other) U_NOEXCEPT {
+        T *temp=LocalPointerBase<T>::ptr;
+        LocalPointerBase<T>::ptr=other.ptr;
+        other.ptr=temp;
+    }
+    /**
+     * Non-member LocalMemory swap function.
+     * @param p1 will get p2's pointer
+     * @param p2 will get p1's pointer
+     */
+    friend inline void swap(LocalMemory<T> &p1, LocalMemory<T> &p2) U_NOEXCEPT {
+        p1.swap(p2);
+    }
+    /**
+     * Deletes the array it owns,
+     * and adopts (takes ownership of) the one passed in.
+     * @param p simple pointer to an array of T items that is adopted
+     */
+    void adoptInstead(T *p) {
+        uprv_free(LocalPointerBase<T>::ptr);
+        LocalPointerBase<T>::ptr=p;
+    }
+    /**
+     * Deletes the array it owns, allocates a new one and reset its bytes to 0.
+     * Returns the new array pointer.
+     * If the allocation fails, then the current array is unchanged and
+     * this method returns NULL.
+     * @param newCapacity must be >0
+     * @return the allocated array pointer, or NULL if the allocation failed
+     */
+    inline T *allocateInsteadAndReset(int32_t newCapacity=1);
+    /**
+     * Deletes the array it owns and allocates a new one, copying length T items.
+     * Returns the new array pointer.
+     * If the allocation fails, then the current array is unchanged and
+     * this method returns NULL.
+     * @param newCapacity must be >0
+     * @param length number of T items to be copied from the old array to the new one;
+     *               must be no more than the capacity of the old array,
+     *               which the caller must track because the LocalMemory does not track it
+     * @return the allocated array pointer, or NULL if the allocation failed
+     */
+    inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0);
+    /**
+     * Array item access (writable).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     */
+    T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
+};
+
+template<typename T>
+inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) {
+    if(newCapacity>0) {
+        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+        if(p!=NULL) {
+            uprv_memset(p, 0, newCapacity*sizeof(T));
+            uprv_free(LocalPointerBase<T>::ptr);
+            LocalPointerBase<T>::ptr=p;
+        }
+        return p;
+    } else {
+        return NULL;
+    }
+}
+
+
+template<typename T>
+inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) {
+    if(newCapacity>0) {
+        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+        if(p!=NULL) {
+            if(length>0) {
+                if(length>newCapacity) {
+                    length=newCapacity;
+                }
+                uprv_memcpy(p, LocalPointerBase<T>::ptr, (size_t)length*sizeof(T));
+            }
+            uprv_free(LocalPointerBase<T>::ptr);
+            LocalPointerBase<T>::ptr=p;
+        }
+        return p;
+    } else {
+        return NULL;
+    }
+}
+
+/**
+ * Simple array/buffer management class using uprv_malloc() and uprv_free().
+ * Provides an internal array with fixed capacity. Can alias another array
+ * or allocate one.
+ *
+ * The array address is properly aligned for type T. It might not be properly
+ * aligned for types larger than T (or larger than the largest subtype of T).
+ *
+ * Unlike LocalMemory and LocalArray, this class never adopts
+ * (takes ownership of) another array.
+ *
+ * WARNING: MaybeStackArray only works with primitive (plain-old data) types.
+ * It does NOT know how to call a destructor! If you work with classes with
+ * destructors, consider LocalArray in localpointer.h or MemoryPool.
+ */
+template<typename T, int32_t stackCapacity>
+class MaybeStackArray {
+public:
+    // No heap allocation. Use only on the stack.
+    static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
+    static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
+#if U_HAVE_PLACEMENT_NEW
+    static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
+#endif
+
+    /**
+     * Default constructor initializes with internal T[stackCapacity] buffer.
+     */
+    MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
+    /**
+     * Automatically allocates the heap array if the argument is larger than the stack capacity.
+     * Intended for use when an approximate capacity is known at compile time but the true
+     * capacity is not known until runtime.
+     */
+    MaybeStackArray(int32_t newCapacity) : MaybeStackArray() {
+        if (capacity < newCapacity) { resize(newCapacity); }
+    }
+    /**
+     * Destructor deletes the array (if owned).
+     */
+    ~MaybeStackArray() { releaseArray(); }
+    /**
+     * Move constructor: transfers ownership or copies the stack array.
+     */
+    MaybeStackArray(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
+    /**
+     * Move assignment: transfers ownership or copies the stack array.
+     */
+    MaybeStackArray<T, stackCapacity> &operator=(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
+    /**
+     * Returns the array capacity (number of T items).
+     * @return array capacity
+     */
+    int32_t getCapacity() const { return capacity; }
+    /**
+     * Access without ownership change.
+     * @return the array pointer
+     */
+    T *getAlias() const { return ptr; }
+    /**
+     * Returns the array limit. Simple convenience method.
+     * @return getAlias()+getCapacity()
+     */
+    T *getArrayLimit() const { return getAlias()+capacity; }
+    // No "operator T *() const" because that can make
+    // expressions like mbs[index] ambiguous for some compilers.
+    /**
+     * Array item access (const).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     */
+    const T &operator[](ptrdiff_t i) const { return ptr[i]; }
+    /**
+     * Array item access (writable).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     */
+    T &operator[](ptrdiff_t i) { return ptr[i]; }
+    /**
+     * Deletes the array (if owned) and aliases another one, no transfer of ownership.
+     * If the arguments are illegal, then the current array is unchanged.
+     * @param otherArray must not be NULL
+     * @param otherCapacity must be >0
+     */
+    void aliasInstead(T *otherArray, int32_t otherCapacity) {
+        if(otherArray!=NULL && otherCapacity>0) {
+            releaseArray();
+            ptr=otherArray;
+            capacity=otherCapacity;
+            needToRelease=FALSE;
+        }
+    }
+    /**
+     * Deletes the array (if owned) and allocates a new one, copying length T items.
+     * Returns the new array pointer.
+     * If the allocation fails, then the current array is unchanged and
+     * this method returns NULL.
+     * @param newCapacity can be less than or greater than the current capacity;
+     *                    must be >0
+     * @param length number of T items to be copied from the old array to the new one
+     * @return the allocated array pointer, or NULL if the allocation failed
+     */
+    inline T *resize(int32_t newCapacity, int32_t length=0);
+    /**
+     * Gives up ownership of the array if owned, or else clones it,
+     * copying length T items; resets itself to the internal stack array.
+     * Returns NULL if the allocation failed.
+     * @param length number of T items to copy when cloning,
+     *        and capacity of the clone when cloning
+     * @param resultCapacity will be set to the returned array's capacity (output-only)
+     * @return the array pointer;
+     *         caller becomes responsible for deleting the array
+     */
+    inline T *orphanOrClone(int32_t length, int32_t &resultCapacity);
+private:
+    T *ptr;
+    int32_t capacity;
+    UBool needToRelease;
+    T stackArray[stackCapacity];
+    void releaseArray() {
+        if(needToRelease) {
+            uprv_free(ptr);
+        }
+    }
+    void resetToStackArray() {
+        ptr=stackArray;
+        capacity=stackCapacity;
+        needToRelease=FALSE;
+    }
+    /* No comparison operators with other MaybeStackArray's. */
+    bool operator==(const MaybeStackArray & /*other*/) {return FALSE;}
+    bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;}
+    /* No ownership transfer: No copy constructor, no assignment operator. */
+    MaybeStackArray(const MaybeStackArray & /*other*/) {}
+    void operator=(const MaybeStackArray & /*other*/) {}
+};
+
+template<typename T, int32_t stackCapacity>
+icu::MaybeStackArray<T, stackCapacity>::MaybeStackArray(
+        MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT
+        : ptr(src.ptr), capacity(src.capacity), needToRelease(src.needToRelease) {
+    if (src.ptr == src.stackArray) {
+        ptr = stackArray;
+        uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity);
+    } else {
+        src.resetToStackArray();  // take ownership away from src
+    }
+}
+
+template<typename T, int32_t stackCapacity>
+inline MaybeStackArray <T, stackCapacity>&
+MaybeStackArray<T, stackCapacity>::operator=(MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT {
+    releaseArray();  // in case this instance had its own memory allocated
+    capacity = src.capacity;
+    needToRelease = src.needToRelease;
+    if (src.ptr == src.stackArray) {
+        ptr = stackArray;
+        uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity);
+    } else {
+        ptr = src.ptr;
+        src.resetToStackArray();  // take ownership away from src
+    }
+    return *this;
+}
+
+template<typename T, int32_t stackCapacity>
+inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
+    if(newCapacity>0) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+      ::fprintf(::stderr,"MaybeStacArray (resize) alloc %d * %lu\n", newCapacity,sizeof(T));
+#endif
+        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
+        if(p!=NULL) {
+            if(length>0) {
+                if(length>capacity) {
+                    length=capacity;
+                }
+                if(length>newCapacity) {
+                    length=newCapacity;
+                }
+                uprv_memcpy(p, ptr, (size_t)length*sizeof(T));
+            }
+            releaseArray();
+            ptr=p;
+            capacity=newCapacity;
+            needToRelease=TRUE;
+        }
+        return p;
+    } else {
+        return NULL;
+    }
+}
+
+template<typename T, int32_t stackCapacity>
+inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32_t &resultCapacity) {
+    T *p;
+    if(needToRelease) {
+        p=ptr;
+    } else if(length<=0) {
+        return NULL;
+    } else {
+        if(length>capacity) {
+            length=capacity;
+        }
+        p=(T *)uprv_malloc(length*sizeof(T));
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+      ::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T));
+#endif
+        if(p==NULL) {
+            return NULL;
+        }
+        uprv_memcpy(p, ptr, (size_t)length*sizeof(T));
+    }
+    resultCapacity=length;
+    resetToStackArray();
+    return p;
+}
+
+/**
+ * Variant of MaybeStackArray that allocates a header struct and an array
+ * in one contiguous memory block, using uprv_malloc() and uprv_free().
+ * Provides internal memory with fixed array capacity. Can alias another memory
+ * block or allocate one.
+ * The stackCapacity is the number of T items in the internal memory,
+ * not counting the H header.
+ * Unlike LocalMemory and LocalArray, this class never adopts
+ * (takes ownership of) another memory block.
+ */
+template<typename H, typename T, int32_t stackCapacity>
+class MaybeStackHeaderAndArray {
+public:
+    // No heap allocation. Use only on the stack.
+    static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
+    static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
+#if U_HAVE_PLACEMENT_NEW
+    static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
+#endif
+
+    /**
+     * Default constructor initializes with internal H+T[stackCapacity] buffer.
+     */
+    MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(FALSE) {}
+    /**
+     * Destructor deletes the memory (if owned).
+     */
+    ~MaybeStackHeaderAndArray() { releaseMemory(); }
+    /**
+     * Returns the array capacity (number of T items).
+     * @return array capacity
+     */
+    int32_t getCapacity() const { return capacity; }
+    /**
+     * Access without ownership change.
+     * @return the header pointer
+     */
+    H *getAlias() const { return ptr; }
+    /**
+     * Returns the array start.
+     * @return array start, same address as getAlias()+1
+     */
+    T *getArrayStart() const { return reinterpret_cast<T *>(getAlias()+1); }
+    /**
+     * Returns the array limit.
+     * @return array limit
+     */
+    T *getArrayLimit() const { return getArrayStart()+capacity; }
+    /**
+     * Access without ownership change. Same as getAlias().
+     * A class instance can be used directly in expressions that take a T *.
+     * @return the header pointer
+     */
+    operator H *() const { return ptr; }
+    /**
+     * Array item access (writable).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     */
+    T &operator[](ptrdiff_t i) { return getArrayStart()[i]; }
+    /**
+     * Deletes the memory block (if owned) and aliases another one, no transfer of ownership.
+     * If the arguments are illegal, then the current memory is unchanged.
+     * @param otherArray must not be NULL
+     * @param otherCapacity must be >0
+     */
+    void aliasInstead(H *otherMemory, int32_t otherCapacity) {
+        if(otherMemory!=NULL && otherCapacity>0) {
+            releaseMemory();
+            ptr=otherMemory;
+            capacity=otherCapacity;
+            needToRelease=FALSE;
+        }
+    }
+    /**
+     * Deletes the memory block (if owned) and allocates a new one,
+     * copying the header and length T array items.
+     * Returns the new header pointer.
+     * If the allocation fails, then the current memory is unchanged and
+     * this method returns NULL.
+     * @param newCapacity can be less than or greater than the current capacity;
+     *                    must be >0
+     * @param length number of T items to be copied from the old array to the new one
+     * @return the allocated pointer, or NULL if the allocation failed
+     */
+    inline H *resize(int32_t newCapacity, int32_t length=0);
+    /**
+     * Gives up ownership of the memory if owned, or else clones it,
+     * copying the header and length T array items; resets itself to the internal memory.
+     * Returns NULL if the allocation failed.
+     * @param length number of T items to copy when cloning,
+     *        and array capacity of the clone when cloning
+     * @param resultCapacity will be set to the returned array's capacity (output-only)
+     * @return the header pointer;
+     *         caller becomes responsible for deleting the array
+     */
+    inline H *orphanOrClone(int32_t length, int32_t &resultCapacity);
+private:
+    H *ptr;
+    int32_t capacity;
+    UBool needToRelease;
+    // stackHeader must precede stackArray immediately.
+    H stackHeader;
+    T stackArray[stackCapacity];
+    void releaseMemory() {
+        if(needToRelease) {
+            uprv_free(ptr);
+        }
+    }
+    /* No comparison operators with other MaybeStackHeaderAndArray's. */
+    bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return FALSE;}
+    bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return TRUE;}
+    /* No ownership transfer: No copy constructor, no assignment operator. */
+    MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {}
+    void operator=(const MaybeStackHeaderAndArray & /*other*/) {}
+};
+
+template<typename H, typename T, int32_t stackCapacity>
+inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapacity,
+                                                                int32_t length) {
+    if(newCapacity>=0) {
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+      ::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T));
+#endif
+        H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T));
+        if(p!=NULL) {
+            if(length<0) {
+                length=0;
+            } else if(length>0) {
+                if(length>capacity) {
+                    length=capacity;
+                }
+                if(length>newCapacity) {
+                    length=newCapacity;
+                }
+            }
+            uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T));
+            releaseMemory();
+            ptr=p;
+            capacity=newCapacity;
+            needToRelease=TRUE;
+        }
+        return p;
+    } else {
+        return NULL;
+    }
+}
+
+template<typename H, typename T, int32_t stackCapacity>
+inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t length,
+                                                                       int32_t &resultCapacity) {
+    H *p;
+    if(needToRelease) {
+        p=ptr;
+    } else {
+        if(length<0) {
+            length=0;
+        } else if(length>capacity) {
+            length=capacity;
+        }
+#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
+      ::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T));
+#endif
+        p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T));
+        if(p==NULL) {
+            return NULL;
+        }
+        uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T));
+    }
+    resultCapacity=length;
+    ptr=&stackHeader;
+    capacity=stackCapacity;
+    needToRelease=FALSE;
+    return p;
+}
+
+/**
+ * A simple memory management class that creates new heap allocated objects (of
+ * any class that has a public constructor), keeps track of them and eventually
+ * deletes them all in its own destructor.
+ *
+ * A typical use-case would be code like this:
+ *
+ *     MemoryPool<MyType> pool;
+ *
+ *     MyType* o1 = pool.create();
+ *     if (o1 != nullptr) {
+ *         foo(o1);
+ *     }
+ *
+ *     MyType* o2 = pool.create(1, 2, 3);
+ *     if (o2 != nullptr) {
+ *         bar(o2);
+ *     }
+ *
+ *     // MemoryPool will take care of deleting the MyType objects.
+ *
+ * It doesn't do anything more than that, and is intentionally kept minimalist.
+ */
+template<typename T, int32_t stackCapacity = 8>
+class MemoryPool : public UMemory {
+public:
+    MemoryPool() : count(0), pool() {}
+
+    ~MemoryPool() {
+        for (int32_t i = 0; i < count; ++i) {
+            delete pool[i];
+        }
+    }
+
+    MemoryPool(const MemoryPool&) = delete;
+    MemoryPool& operator=(const MemoryPool&) = delete;
+
+    MemoryPool(MemoryPool&& other) U_NOEXCEPT : count(other.count),
+                                                pool(std::move(other.pool)) {
+        other.count = 0;
+    }
+
+    MemoryPool& operator=(MemoryPool&& other) U_NOEXCEPT {
+        count = other.count;
+        pool = std::move(other.pool);
+        other.count = 0;
+        return *this;
+    }
+
+    /**
+     * Creates a new object of typename T, by forwarding any and all arguments
+     * to the typename T constructor.
+     *
+     * @param args Arguments to be forwarded to the typename T constructor.
+     * @return A pointer to the newly created object, or nullptr on error.
+     */
+    template<typename... Args>
+    T* create(Args&&... args) {
+        int32_t capacity = pool.getCapacity();
+        if (count == capacity &&
+            pool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity,
+                        capacity) == nullptr) {
+            return nullptr;
+        }
+        return pool[count++] = new T(std::forward<Args>(args)...);
+    }
+
+private:
+    int32_t count;
+    MaybeStackArray<T*, stackCapacity> pool;
+};
+
+U_NAMESPACE_END
+
+#endif  /* __cplusplus */
+#endif  /* CMEMORY_H */
--- a/external/duckdb/extension/icu/third_party/icu/common/cpputils.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/cpputils.h
@@ -0,0 +1,97 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  cpputils.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*/
+
+#ifndef CPPUTILS_H
+#define CPPUTILS_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "cmemory.h"
+
+/*==========================================================================*/
+/* Array copy utility functions */
+/*==========================================================================*/
+
+// static
+// inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
+// { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
+
+// static
+// inline void uprv_arrayCopy(const double* src, int32_t srcStart,
+//               double* dst, int32_t dstStart, int32_t count)
+// { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
+
+static
+inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
+    { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
+
+// static
+// inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
+//               int8_t* dst, int32_t dstStart, int32_t count)
+// { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
+
+// static
+// inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
+// { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
+
+// static
+// inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
+//               int16_t* dst, int32_t dstStart, int32_t count)
+// { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
+
+static
+inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
+{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); }
+
+// static
+// inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
+//               int32_t* dst, int32_t dstStart, int32_t count)
+// { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
+
+// static
+// inline void
+// uprv_arrayCopy(const UChar *src, int32_t srcStart,
+//         UChar *dst, int32_t dstStart, int32_t count)
+// { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }
+
+/**
+ * Copy an array of UnicodeString OBJECTS (not pointers).
+ * @internal
+ */
+static inline void
+uprv_arrayCopy(const icu::UnicodeString *src, icu::UnicodeString *dst, int32_t count)
+{ while(count-- > 0) *dst++ = *src++; }
+
+/**
+ * Copy an array of UnicodeString OBJECTS (not pointers).
+ * @internal
+ */
+static inline void
+uprv_arrayCopy(const icu::UnicodeString *src, int32_t srcStart,
+               icu::UnicodeString *dst, int32_t dstStart, int32_t count)
+{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
+
+/**
+ * Checks that the string is readable and writable.
+ * Sets U_ILLEGAL_ARGUMENT_ERROR if the string isBogus() or has an open getBuffer().
+ */
+inline void
+uprv_checkCanGetBuffer(const icu::UnicodeString &s, UErrorCode &errorCode) {
+    if(U_SUCCESS(errorCode) && s.isBogus()) {
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+#endif /* _CPPUTILS */
--- a/external/duckdb/extension/icu/third_party/icu/common/cstr.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/cstr.cpp
@@ -0,0 +1,54 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 2015-2016, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  charstr.cpp
+*/
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/unistr.h"
+
+#include "cstr.h"
+
+#include "charstr.h"
+#include "uinvchar.h"
+
+U_NAMESPACE_BEGIN
+
+CStr::CStr(const UnicodeString &in) {
+    UErrorCode status = U_ZERO_ERROR;
+#if !UCONFIG_NO_CONVERSION || U_CHARSET_IS_UTF8
+    int32_t length = in.extract(0, in.length(), static_cast<char *>(NULL), static_cast<uint32_t>(0));
+    int32_t resultCapacity = 0;
+    char *buf = s.getAppendBuffer(length, length, resultCapacity, status);
+    if (U_SUCCESS(status)) {
+        in.extract(0, in.length(), buf, resultCapacity);
+        s.append(buf, length, status);
+    }
+#else
+    // No conversion available. Convert any invariant characters; substitute '?' for the rest.
+    // Note: can't just call u_UCharsToChars() or CharString.appendInvariantChars() on the 
+    //       whole string because they require that the entire input be invariant.
+    char buf[2];
+    for (int i=0; i<in.length(); i = in.moveIndex32(i, 1)) {
+        if (uprv_isInvariantUString(in.getBuffer()+i, 1)) {
+            u_UCharsToChars(in.getBuffer()+i, buf, 1);
+        } else {
+            buf[0] = '?';
+        }
+        s.append(buf, 1, status);
+    }
+#endif
+}
+
+CStr::~CStr() {
+}
+
+const char * CStr::operator ()() const {
+    return s.data();
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/cstr.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/cstr.h
@@ -0,0 +1,60 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 2016, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File: cstr.h
+*/
+
+#ifndef CSTR_H
+#define CSTR_H
+
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "unicode/utypes.h"
+
+#include "charstr.h"
+
+/**
+ * ICU-internal class CStr, a small helper class to facilitate passing UnicodeStrings
+ * to functions needing (const char *) strings, such as printf().
+ *
+ * It is intended primarily for use in debugging or in tests. Uses platform 
+ * default code page conversion, which will do the best job possible,
+ * but may be lossy, depending on the platform.
+ *
+ * If no other conversion is available, use invariant conversion and substitue
+ * '?' for non-invariant characters.
+ *
+ * Example Usage:
+ *   UnicodeString s = whatever;
+ *   printf("%s", CStr(s)());
+ *
+ *   The explicit call to the CStr() constructor creates a temporary object.
+ *   Operator () on the temporary object returns a (const char *) pointer.
+ *   The lifetime of the (const char *) data is that of the temporary object,
+ *   which works well when passing it as a parameter to another function, such as printf.
+ */
+
+U_NAMESPACE_BEGIN
+
+class U_COMMON_API CStr : public UMemory {
+  public:
+    CStr(const UnicodeString &in);
+    ~CStr();
+    const char * operator ()() const;
+
+  private:
+    CharString s;
+    CStr(const CStr &other);               //  Forbid copying of this class.
+    CStr &operator =(const CStr &other);   //  Forbid assignment.
+};
+
+U_NAMESPACE_END
+
+#endif
--- a/external/duckdb/extension/icu/third_party/icu/common/cstring.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/cstring.cpp
@@ -0,0 +1,341 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File CSTRING.C
+*
+* @author       Helena Shih
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   6/18/98     hshih       Created
+*   09/08/98    stephen     Added include for ctype, for Mac Port
+*   11/15/99    helena      Integrated S/390 IEEE changes. 
+******************************************************************************
+*/
+
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+
+/*
+ * We hardcode case conversion for invariant characters to match our expectation
+ * and the compiler execution charset.
+ * This prevents problems on systems
+ * - with non-default casing behavior, like Turkish system locales where
+ *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
+ * - where there are no lowercase Latin characters at all, or using different
+ *   codes (some old EBCDIC codepages)
+ *
+ * This works because the compiler usually runs on a platform where the execution
+ * charset includes all of the invariant characters at their expected
+ * code positions, so that the char * string literals in ICU code match
+ * the char literals here.
+ *
+ * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
+ * and the set of uppercase Latin letters is discontiguous as well.
+ */
+
+U_CAPI UBool U_EXPORT2
+uprv_isASCIILetter(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+    return
+        ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
+        ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
+#else
+    return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
+#endif
+}
+
+U_CAPI char U_EXPORT2
+uprv_toupper(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+    if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
+        c=(char)(c+('A'-'a'));
+    }
+#else
+    if('a'<=c && c<='z') {
+        c=(char)(c+('A'-'a'));
+    }
+#endif
+    return c;
+}
+
+
+#if 0
+/*
+ * Commented out because cstring.h defines uprv_tolower() to be
+ * the same as either uprv_asciitolower() or uprv_ebcdictolower()
+ * to reduce the amount of code to cover with tests.
+ *
+ * Note that this uprv_tolower() definition is likely to work for most
+ * charset families, not just ASCII and EBCDIC, because its #else branch
+ * is written generically.
+ */
+U_CAPI char U_EXPORT2
+uprv_tolower(char c) {
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+    if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
+        c=(char)(c+('a'-'A'));
+    }
+#else
+    if('A'<=c && c<='Z') {
+        c=(char)(c+('a'-'A'));
+    }
+#endif
+    return c;
+}
+#endif
+
+U_CAPI char U_EXPORT2
+uprv_asciitolower(char c) {
+    if(0x41<=c && c<=0x5a) {
+        c=(char)(c+0x20);
+    }
+    return c;
+}
+
+U_CAPI char U_EXPORT2
+uprv_ebcdictolower(char c) {
+    if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
+        (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
+        (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
+    ) {
+        c=(char)(c-0x40);
+    }
+    return c;
+}
+
+
+U_CAPI char* U_EXPORT2
+T_CString_toLowerCase(char* str)
+{
+    char* origPtr = str;
+
+    if (str) {
+        do
+            *str = (char)uprv_tolower(*str);
+        while (*(str++));
+    }
+
+    return origPtr;
+}
+
+U_CAPI char* U_EXPORT2
+T_CString_toUpperCase(char* str)
+{
+    char* origPtr = str;
+
+    if (str) {
+        do
+            *str = (char)uprv_toupper(*str);
+        while (*(str++));
+    }
+
+    return origPtr;
+}
+
+/*
+ * Takes a int32_t and fills in  a char* string with that number "radix"-based.
+ * Does not handle negative values (makes an empty string for them).
+ * Writes at most 12 chars ("-2147483647" plus NUL).
+ * Returns the length of the string (not including the NUL).
+ */
+U_CAPI int32_t U_EXPORT2
+T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
+{
+    char      tbuf[30];
+    int32_t   tbx    = sizeof(tbuf);
+    uint8_t   digit;
+    int32_t   length = 0;
+    uint32_t  uval;
+    
+    U_ASSERT(radix>=2 && radix<=16);
+    uval = (uint32_t) v;
+    if(v<0 && radix == 10) {
+        /* Only in base 10 do we conside numbers to be signed. */
+        uval = (uint32_t)(-v); 
+        buffer[length++] = '-';
+    }
+    
+    tbx = sizeof(tbuf)-1;
+    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
+    do {
+        digit = (uint8_t)(uval % radix);
+        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
+        uval  = uval / radix;
+    } while (uval != 0);
+    
+    /* copy converted number into user buffer  */
+    uprv_strcpy(buffer+length, tbuf+tbx);
+    length += sizeof(tbuf) - tbx -1;
+    return length;
+}
+
+
+
+/*
+ * Takes a int64_t and fills in  a char* string with that number "radix"-based.
+ * Writes at most 21: chars ("-9223372036854775807" plus NUL).
+ * Returns the length of the string, not including the terminating NULL.
+ */
+U_CAPI int32_t U_EXPORT2
+T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
+{
+    char      tbuf[30];
+    int32_t   tbx    = sizeof(tbuf);
+    uint8_t   digit;
+    int32_t   length = 0;
+    uint64_t  uval;
+    
+    U_ASSERT(radix>=2 && radix<=16);
+    uval = (uint64_t) v;
+    if(v<0 && radix == 10) {
+        /* Only in base 10 do we conside numbers to be signed. */
+        uval = (uint64_t)(-v); 
+        buffer[length++] = '-';
+    }
+    
+    tbx = sizeof(tbuf)-1;
+    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
+    do {
+        digit = (uint8_t)(uval % radix);
+        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
+        uval  = uval / radix;
+    } while (uval != 0);
+    
+    /* copy converted number into user buffer  */
+    uprv_strcpy(buffer+length, tbuf+tbx);
+    length += sizeof(tbuf) - tbx -1;
+    return length;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+T_CString_stringToInteger(const char *integerString, int32_t radix)
+{
+    char *end;
+    return uprv_strtoul(integerString, &end, radix);
+
+}
+
+U_CAPI int U_EXPORT2
+uprv_stricmp(const char *str1, const char *str2) {
+    if(str1==NULL) {
+        if(str2==NULL) {
+            return 0;
+        } else {
+            return -1;
+        }
+    } else if(str2==NULL) {
+        return 1;
+    } else {
+        /* compare non-NULL strings lexically with lowercase */
+        int rc;
+        unsigned char c1, c2;
+
+        for(;;) {
+            c1=(unsigned char)*str1;
+            c2=(unsigned char)*str2;
+            if(c1==0) {
+                if(c2==0) {
+                    return 0;
+                } else {
+                    return -1;
+                }
+            } else if(c2==0) {
+                return 1;
+            } else {
+                /* compare non-zero characters with lowercase */
+                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
+                if(rc!=0) {
+                    return rc;
+                }
+            }
+            ++str1;
+            ++str2;
+        }
+    }
+}
+
+U_CAPI int U_EXPORT2
+uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
+    if(str1==NULL) {
+        if(str2==NULL) {
+            return 0;
+        } else {
+            return -1;
+        }
+    } else if(str2==NULL) {
+        return 1;
+    } else {
+        /* compare non-NULL strings lexically with lowercase */
+        int rc;
+        unsigned char c1, c2;
+
+        for(; n--;) {
+            c1=(unsigned char)*str1;
+            c2=(unsigned char)*str2;
+            if(c1==0) {
+                if(c2==0) {
+                    return 0;
+                } else {
+                    return -1;
+                }
+            } else if(c2==0) {
+                return 1;
+            } else {
+                /* compare non-zero characters with lowercase */
+                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
+                if(rc!=0) {
+                    return rc;
+                }
+            }
+            ++str1;
+            ++str2;
+        }
+    }
+
+    return 0;
+}
+
+U_CAPI char* U_EXPORT2
+uprv_strdup(const char *src) {
+    size_t len = uprv_strlen(src) + 1;
+    char *dup = (char *) uprv_malloc(len);
+
+    if (dup) {
+        uprv_memcpy(dup, src, len);
+    }
+
+    return dup;
+}
+
+U_CAPI char* U_EXPORT2
+uprv_strndup(const char *src, int32_t n) {
+    char *dup;
+
+    if(n < 0) {
+        dup = uprv_strdup(src);
+    } else {
+        dup = (char*)uprv_malloc(n+1);
+        if (dup) { 
+            uprv_memcpy(dup, src, n);
+            dup[n] = 0;
+        }
+    }
+
+    return dup;
+}
--- a/external/duckdb/extension/icu/third_party/icu/common/cstring.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/cstring.h
@@ -0,0 +1,124 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File CSTRING.H
+*
+* Contains CString interface
+*
+* @author       Helena Shih
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   6/17/98     hshih       Created.
+*  05/03/99     stephen     Changed from functions to macros.
+*  06/14/99     stephen     Added icu_strncat, icu_strncmp, icu_tolower
+*
+******************************************************************************
+*/
+
+#ifndef CSTRING_H
+#define CSTRING_H 1
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE  strcpy(dst, src)
+#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
+#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
+#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
+#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
+#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
+#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
+#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
+#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
+#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
+
+/**
+ * Is c an ASCII-repertoire letter a-z or A-Z?
+ * Note: The implementation is specific to whether ICU is compiled for
+ * an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this.
+ */
+U_CAPI UBool U_EXPORT2
+uprv_isASCIILetter(char c);
+
+U_CAPI char U_EXPORT2
+uprv_toupper(char c);
+
+
+U_CAPI char U_EXPORT2
+uprv_asciitolower(char c);
+
+U_CAPI char U_EXPORT2
+uprv_ebcdictolower(char c);
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define uprv_tolower uprv_asciitolower
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#   define uprv_tolower uprv_ebcdictolower
+#else
+#   error U_CHARSET_FAMILY is not valid
+#endif
+
+#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
+#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
+#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)
+
+/* Conversion from a digit to the character with radix base from 2-19 */
+/* May need to use U_UPPER_ORDINAL*/
+#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))
+
+U_CAPI char* U_EXPORT2
+uprv_strdup(const char *src);
+
+/**
+ * uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
+ * Terminate with a null at offset n.   If n is -1, works like uprv_strdup
+ * @param src
+ * @param n length of the input string, not including null.
+ * @return new string (owned by caller, use uprv_free to free).
+ * @internal
+ */
+U_CAPI char* U_EXPORT2
+uprv_strndup(const char *src, int32_t n);
+
+U_CAPI char* U_EXPORT2
+T_CString_toLowerCase(char* str);
+
+U_CAPI char* U_EXPORT2
+T_CString_toUpperCase(char* str);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_integerToString(char *buffer, int32_t n, int32_t radix);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);
+
+U_CAPI int32_t U_EXPORT2
+T_CString_stringToInteger(const char *integerString, int32_t radix);
+
+/**
+ * Case-insensitive, language-independent string comparison
+ * limited to the ASCII character repertoire.
+ */
+U_CAPI int U_EXPORT2
+uprv_stricmp(const char *str1, const char *str2);
+
+/**
+ * Case-insensitive, language-independent string comparison
+ * limited to the ASCII character repertoire.
+ */
+U_CAPI int U_EXPORT2
+uprv_strnicmp(const char *str1, const char *str2, uint32_t n);
+
+#endif /* ! CSTRING_H */
--- a/external/duckdb/extension/icu/third_party/icu/common/cwchar.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/cwchar.cpp
@@ -0,0 +1,55 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*  
+******************************************************************************
+*
+*   Copyright (C) 2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  cwchar.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001may25
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !U_HAVE_WCSCPY
+
+#include "cwchar.h"
+
+U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) {
+    wchar_t *start=dst;
+    while(*dst!=0) {
+        ++dst;
+    }
+    while((*dst=*src)!=0) {
+        ++dst;
+        ++src;
+    }
+    return start;
+}
+
+U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) {
+    wchar_t *start=dst;
+    while((*dst=*src)!=0) {
+        ++dst;
+        ++src;
+    }
+    return start;
+}
+
+U_CAPI size_t uprv_wcslen(const wchar_t *src) {
+    const wchar_t *start=src;
+    while(*src!=0) {
+        ++src;
+    }
+    return src-start;
+}
+
+#endif
+
--- a/external/duckdb/extension/icu/third_party/icu/common/cwchar.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/cwchar.h
@@ -0,0 +1,58 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*  
+******************************************************************************
+*
+*   Copyright (C) 2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  cwchar.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001may25
+*   created by: Markus W. Scherer
+*
+*   This file contains ICU-internal definitions of wchar_t operations.
+*   These definitions were moved here from cstring.h so that fewer
+*   ICU implementation files include wchar.h.
+*/
+
+#ifndef __CWCHAR_H__
+#define __CWCHAR_H__
+
+#include <string.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+
+/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
+#if U_HAVE_WCHAR_H
+#   include <wchar.h>
+#endif
+
+/*===========================================================================*/
+/* Wide-character functions                                                  */
+/*===========================================================================*/
+
+/* The following are not available on all systems, defined in wchar.h or string.h. */
+#if U_HAVE_WCSCPY
+#   define uprv_wcscpy wcscpy
+#   define uprv_wcscat wcscat
+#   define uprv_wcslen wcslen
+#else
+U_CAPI wchar_t* U_EXPORT2 
+uprv_wcscpy(wchar_t *dst, const wchar_t *src);
+U_CAPI wchar_t* U_EXPORT2 
+uprv_wcscat(wchar_t *dst, const wchar_t *src);
+U_CAPI size_t U_EXPORT2 
+uprv_wcslen(const wchar_t *src);
+#endif
+
+/* The following are part of the ANSI C standard, defined in stdlib.h . */
+#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
+#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)
+
+
+#endif
--- a/external/duckdb/extension/icu/third_party/icu/common/dictbe.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/dictbe.cpp
--- a/external/duckdb/extension/icu/third_party/icu/common/dictbe.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/dictbe.h
@@ -0,0 +1,402 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2014, International Business Machines Corporation   *
+ * and others. All Rights Reserved.                                            *
+ *******************************************************************************
+ */
+
+#ifndef DICTBE_H
+#define DICTBE_H
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+#include "unicode/utext.h"
+
+#include "brkeng.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+class DictionaryMatcher;
+class Normalizer2;
+
+/*******************************************************************
+ * DictionaryBreakEngine
+ */
+
+/**
+ * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a
+ * dictionary to determine language-specific breaks.</p>
+ *
+ * <p>After it is constructed a DictionaryBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class DictionaryBreakEngine : public LanguageBreakEngine {
+ private:
+    /**
+     * The set of characters handled by this engine
+     * @internal
+     */
+
+  UnicodeSet    fSet;
+
+ public:
+
+  /**
+   * <p>Constructor </p>
+   */
+  DictionaryBreakEngine();
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~DictionaryBreakEngine();
+
+  /**
+   * <p>Indicate whether this engine handles a particular character for
+   * a particular kind of break.</p>
+   *
+   * @param c A character which begins a run that the engine might handle
+   * @return TRUE if this engine handles the particular character and break
+   * type.
+   */
+  virtual UBool handles(UChar32 c) const;
+
+  /**
+   * <p>Find any breaks within a run in the supplied text.</p>
+   *
+   * @param text A UText representing the text. The iterator is left at
+   * the end of the run of characters which the engine is capable of handling 
+   * that starts from the first character in the range.
+   * @param startPos The start of the run within the supplied text.
+   * @param endPos The end of the run within the supplied text.
+   * @param foundBreaks vector of int32_t to receive the break positions
+   * @return The number of breaks found.
+   */
+  virtual int32_t findBreaks( UText *text,
+                              int32_t startPos,
+                              int32_t endPos,
+                              UVector32 &foundBreaks ) const;
+
+ protected:
+
+ /**
+  * <p>Set the character set handled by this engine.</p>
+  *
+  * @param set A UnicodeSet of the set of characters handled by the engine
+  */
+  virtual void setCharacters( const UnicodeSet &set );
+
+ /**
+  * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+  *
+  * @param text A UText representing the text
+  * @param rangeStart The start of the range of dictionary characters
+  * @param rangeEnd The end of the range of dictionary characters
+  * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @return The number of breaks found
+  */
+  virtual int32_t divideUpDictionaryRange( UText *text,
+                                           int32_t rangeStart,
+                                           int32_t rangeEnd,
+                                           UVector32 &foundBreaks ) const = 0;
+
+};
+
+/*******************************************************************
+ * ThaiBreakEngine
+ */
+
+/**
+ * <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * dictionary and heuristics to determine Thai-specific breaks.</p>
+ *
+ * <p>After it is constructed a ThaiBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class ThaiBreakEngine : public DictionaryBreakEngine {
+ private:
+    /**
+     * The set of characters handled by this engine
+     * @internal
+     */
+
+  UnicodeSet                fThaiWordSet;
+  UnicodeSet                fEndWordSet;
+  UnicodeSet                fBeginWordSet;
+  UnicodeSet                fSuffixSet;
+  UnicodeSet                fMarkSet;
+  DictionaryMatcher  *fDictionary;
+
+ public:
+
+  /**
+   * <p>Default constructor.</p>
+   *
+   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+   * engine is deleted.
+   */
+  ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~ThaiBreakEngine();
+
+ protected:
+ /**
+  * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+  *
+  * @param text A UText representing the text
+  * @param rangeStart The start of the range of dictionary characters
+  * @param rangeEnd The end of the range of dictionary characters
+  * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @return The number of breaks found
+  */
+  virtual int32_t divideUpDictionaryRange( UText *text,
+                                           int32_t rangeStart,
+                                           int32_t rangeEnd,
+                                           UVector32 &foundBreaks ) const;
+
+};
+
+/*******************************************************************
+ * LaoBreakEngine
+ */
+
+/**
+ * <p>LaoBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * dictionary and heuristics to determine Lao-specific breaks.</p>
+ *
+ * <p>After it is constructed a LaoBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class LaoBreakEngine : public DictionaryBreakEngine {
+ private:
+    /**
+     * The set of characters handled by this engine
+     * @internal
+     */
+
+  UnicodeSet                fLaoWordSet;
+  UnicodeSet                fEndWordSet;
+  UnicodeSet                fBeginWordSet;
+  UnicodeSet                fMarkSet;
+  DictionaryMatcher  *fDictionary;
+
+ public:
+
+  /**
+   * <p>Default constructor.</p>
+   *
+   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+   * engine is deleted.
+   */
+  LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~LaoBreakEngine();
+
+ protected:
+ /**
+  * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+  *
+  * @param text A UText representing the text
+  * @param rangeStart The start of the range of dictionary characters
+  * @param rangeEnd The end of the range of dictionary characters
+  * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @return The number of breaks found
+  */
+  virtual int32_t divideUpDictionaryRange( UText *text,
+                                           int32_t rangeStart,
+                                           int32_t rangeEnd,
+                                           UVector32 &foundBreaks ) const;
+
+};
+
+/******************************************************************* 
+ * BurmeseBreakEngine 
+ */ 
+ 
+/** 
+ * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a 
+ * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p> 
+ * 
+ * <p>After it is constructed a BurmeseBreakEngine may be shared between 
+ * threads without synchronization.</p> 
+ */ 
+class BurmeseBreakEngine : public DictionaryBreakEngine { 
+ private: 
+    /** 
+     * The set of characters handled by this engine 
+     * @internal 
+     */ 
+ 
+  UnicodeSet                fBurmeseWordSet; 
+  UnicodeSet                fEndWordSet; 
+  UnicodeSet                fBeginWordSet; 
+  UnicodeSet                fMarkSet; 
+  DictionaryMatcher  *fDictionary; 
+ 
+ public: 
+ 
+  /** 
+   * <p>Default constructor.</p> 
+   * 
+   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the 
+   * engine is deleted. 
+   */ 
+  BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); 
+ 
+  /** 
+   * <p>Virtual destructor.</p> 
+   */ 
+  virtual ~BurmeseBreakEngine(); 
+ 
+ protected: 
+ /** 
+  * <p>Divide up a range of known dictionary characters.</p> 
+  * 
+  * @param text A UText representing the text 
+  * @param rangeStart The start of the range of dictionary characters 
+  * @param rangeEnd The end of the range of dictionary characters 
+  * @param foundBreaks Output of C array of int32_t break positions, or 0 
+  * @return The number of breaks found 
+  */ 
+  virtual int32_t divideUpDictionaryRange( UText *text, 
+                                           int32_t rangeStart, 
+                                           int32_t rangeEnd, 
+                                           UVector32 &foundBreaks ) const; 
+ 
+}; 
+ 
+/******************************************************************* 
+ * KhmerBreakEngine 
+ */ 
+ 
+/** 
+ * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a 
+ * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> 
+ * 
+ * <p>After it is constructed a KhmerBreakEngine may be shared between 
+ * threads without synchronization.</p> 
+ */ 
+class KhmerBreakEngine : public DictionaryBreakEngine { 
+ private: 
+    /** 
+     * The set of characters handled by this engine 
+     * @internal 
+     */ 
+ 
+  UnicodeSet                fKhmerWordSet; 
+  UnicodeSet                fEndWordSet; 
+  UnicodeSet                fBeginWordSet; 
+  UnicodeSet                fMarkSet; 
+  DictionaryMatcher  *fDictionary; 
+ 
+ public: 
+ 
+  /** 
+   * <p>Default constructor.</p> 
+   * 
+   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the 
+   * engine is deleted. 
+   */ 
+  KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); 
+ 
+  /** 
+   * <p>Virtual destructor.</p> 
+   */ 
+  virtual ~KhmerBreakEngine(); 
+ 
+ protected: 
+ /** 
+  * <p>Divide up a range of known dictionary characters.</p> 
+  * 
+  * @param text A UText representing the text 
+  * @param rangeStart The start of the range of dictionary characters 
+  * @param rangeEnd The end of the range of dictionary characters 
+  * @param foundBreaks Output of C array of int32_t break positions, or 0 
+  * @return The number of breaks found 
+  */ 
+  virtual int32_t divideUpDictionaryRange( UText *text, 
+                                           int32_t rangeStart, 
+                                           int32_t rangeEnd, 
+                                           UVector32 &foundBreaks ) const; 
+ 
+}; 
+ 
+#if !UCONFIG_NO_NORMALIZATION
+
+/*******************************************************************
+ * CjkBreakEngine
+ */
+
+//indicates language/script that the CjkBreakEngine will handle
+enum LanguageType {
+    kKorean,
+    kChineseJapanese
+};
+
+/**
+ * <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * dictionary with costs associated with each word and
+ * Viterbi decoding to determine CJK-specific breaks.</p>
+ */
+class CjkBreakEngine : public DictionaryBreakEngine {
+ protected:
+    /**
+     * The set of characters handled by this engine
+     * @internal
+     */
+  UnicodeSet                fHangulWordSet;
+  UnicodeSet                fHanWordSet;
+  UnicodeSet                fKatakanaWordSet;
+  UnicodeSet                fHiraganaWordSet;
+
+  DictionaryMatcher        *fDictionary;
+  const Normalizer2        *nfkcNorm2;
+
+ public:
+
+    /**
+     * <p>Default constructor.</p>
+     *
+     * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+     * engine is deleted. The DictionaryMatcher must contain costs for each word
+     * in order for the dictionary to work properly.
+     */
+  CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status);
+
+    /**
+     * <p>Virtual destructor.</p>
+     */
+  virtual ~CjkBreakEngine();
+
+ protected:
+    /**
+     * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+     *
+     * @param text A UText representing the text
+     * @param rangeStart The start of the range of dictionary characters
+     * @param rangeEnd The end of the range of dictionary characters
+     * @param foundBreaks Output of C array of int32_t break positions, or 0
+     * @return The number of breaks found
+     */
+  virtual int32_t divideUpDictionaryRange( UText *text,
+          int32_t rangeStart,
+          int32_t rangeEnd,
+          UVector32 &foundBreaks ) const;
+
+};
+
+#endif
+
+U_NAMESPACE_END
+
+    /* DICTBE_H */
+#endif
--- a/external/duckdb/extension/icu/third_party/icu/common/dictionarydata.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/dictionarydata.cpp
@@ -0,0 +1,242 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2014-2016, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* dictionarydata.h
+*
+* created on: 2012may31
+* created by: Markus W. Scherer & Maxime Serrano
+*/
+
+// #include "dictionarydata.h"
+// #include "unicode/ucharstrie.h"
+// #include "unicode/bytestrie.h"
+// #include "unicode/udata.h"
+// #include "cmemory.h"
+
+// #if !UCONFIG_NO_BREAK_ITERATION
+
+// U_NAMESPACE_BEGIN
+
+// const int32_t  DictionaryData::TRIE_TYPE_BYTES = 0;
+// const int32_t  DictionaryData::TRIE_TYPE_UCHARS = 1;
+// const int32_t  DictionaryData::TRIE_TYPE_MASK = 7;
+// const int32_t  DictionaryData::TRIE_HAS_VALUES = 8;
+
+// const int32_t  DictionaryData::TRANSFORM_NONE = 0;
+// const int32_t  DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000;
+// const int32_t  DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000;
+// const int32_t  DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff;
+
+// DictionaryMatcher::~DictionaryMatcher() {
+// }
+
+// UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
+//     udata_close(file);
+// }
+
+// int32_t UCharsDictionaryMatcher::getType() const {
+//     return DictionaryData::TRIE_TYPE_UCHARS;
+// }
+
+// int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
+//                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
+//                             int32_t *prefix) const {
+
+//     UCharsTrie uct(characters);
+//     int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+//     int32_t wordCount = 0;
+//     int32_t codePointsMatched = 0;
+
+//     for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
+//         UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
+//         int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+//         codePointsMatched += 1;
+//         if (USTRINGTRIE_HAS_VALUE(result)) {
+//             if (wordCount < limit) {
+//                 if (values != NULL) {
+//                     values[wordCount] = uct.getValue();
+//                 }
+//                 if (lengths != NULL) {
+//                     lengths[wordCount] = lengthMatched;
+//                 }
+//                 if (cpLengths != NULL) {
+//                     cpLengths[wordCount] = codePointsMatched;
+//                 }
+//                 ++wordCount;
+//             }
+//             if (result == USTRINGTRIE_FINAL_VALUE) {
+//                 break;
+//             }
+//         }
+//         else if (result == USTRINGTRIE_NO_MATCH) {
+//             break;
+//         }
+//         if (lengthMatched >= maxLength) {
+//             break;
+//         }
+//     }
+
+//     if (prefix != NULL) {
+//         *prefix = codePointsMatched;
+//     }
+//     return wordCount;
+// }
+
+// BytesDictionaryMatcher::~BytesDictionaryMatcher() {
+//     udata_close(file);
+// }
+
+// UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
+//     if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {
+//         if (c == 0x200D) {
+//             return 0xFF;
+//         } else if (c == 0x200C) {
+//             return 0xFE;
+//         }
+//         int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);
+//         if (delta < 0 || 0xFD < delta) {
+//             return U_SENTINEL;
+//         }
+//         return (UChar32)delta;
+//     }
+//     return c;
+// }
+
+// int32_t BytesDictionaryMatcher::getType() const {
+//     return DictionaryData::TRIE_TYPE_BYTES;
+// }
+
+// int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
+//                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
+//                             int32_t *prefix) const {
+//     BytesTrie bt(characters);
+//     int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+//     int32_t wordCount = 0;
+//     int32_t codePointsMatched = 0;
+
+//     for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
+//         UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
+//         int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+//         codePointsMatched += 1;
+//         if (USTRINGTRIE_HAS_VALUE(result)) {
+//             if (wordCount < limit) {
+//                 if (values != NULL) {
+//                     values[wordCount] = bt.getValue();
+//                 }
+//                 if (lengths != NULL) {
+//                     lengths[wordCount] = lengthMatched;
+//                 }
+//                 if (cpLengths != NULL) {
+//                     cpLengths[wordCount] = codePointsMatched;
+//                 }
+//                 ++wordCount;
+//             }
+//             if (result == USTRINGTRIE_FINAL_VALUE) {
+//                 break;
+//             }
+//         }
+//         else if (result == USTRINGTRIE_NO_MATCH) {
+//             break;
+//         }
+//         if (lengthMatched >= maxLength) {
+//             break;
+//         }
+//     }
+
+//     if (prefix != NULL) {
+//         *prefix = codePointsMatched;
+//     }
+//     return wordCount;
+// }
+
+
+// U_NAMESPACE_END
+
+// U_NAMESPACE_USE
+
+// U_CAPI int32_t U_EXPORT2
+// udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
+//            void *outData, UErrorCode *pErrorCode) {
+//     const UDataInfo *pInfo;
+//     int32_t headerSize;
+//     const uint8_t *inBytes;
+//     uint8_t *outBytes;
+//     const int32_t *inIndexes;
+//     int32_t indexes[DictionaryData::IX_COUNT];
+//     int32_t i, offset, size;
+
+//     headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+//     if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
+//     pInfo = (const UDataInfo *)((const char *)inData + 4);
+//     if (!(pInfo->dataFormat[0] == 0x44 &&
+//           pInfo->dataFormat[1] == 0x69 &&
+//           pInfo->dataFormat[2] == 0x63 &&
+//           pInfo->dataFormat[3] == 0x74 &&
+//           pInfo->formatVersion[0] == 1)) {
+//         udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
+//                          pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);
+//         *pErrorCode = U_UNSUPPORTED_ERROR;
+//         return 0;
+//     }
+
+//     inBytes = (const uint8_t *)inData + headerSize;
+//     outBytes = (uint8_t *)outData + headerSize;
+
+//     inIndexes = (const int32_t *)inBytes;
+//     if (length >= 0) {
+//         length -= headerSize;
+//         if (length < (int32_t)(sizeof(indexes))) {
+//             udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);
+//             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+//             return 0;
+//         }
+//     }
+
+//     for (i = 0; i < DictionaryData::IX_COUNT; i++) {
+//         indexes[i] = udata_readInt32(ds, inIndexes[i]);
+//     }
+
+//     size = indexes[DictionaryData::IX_TOTAL_SIZE];
+
+//     if (length >= 0) {
+//         if (length < size) {
+//             udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);
+//             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+//             return 0;
+//         }
+
+//         if (inBytes != outBytes) {
+//             uprv_memcpy(outBytes, inBytes, size);
+//         }
+
+//         offset = 0;
+//         ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);
+//         offset = (int32_t)sizeof(indexes);
+//         int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
+//         int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];
+
+//         if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
+//             ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);
+//         } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
+//             // nothing to do
+//         } else {
+//             udata_printError(ds, "udict_swap(): unknown trie type!\n");
+//             *pErrorCode = U_UNSUPPORTED_ERROR;
+//             return 0;
+//         }
+
+//         // these next two sections are empty in the current format,
+//         // but may be used later.
+//         offset = nextOffset;
+//         nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];
+//         offset = nextOffset;
+//         nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];
+//         offset = nextOffset;
+//     }
+//     return headerSize + size;
+// }
+// #endif
--- a/external/duckdb/extension/icu/third_party/icu/common/dictionarydata.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/dictionarydata.h
@@ -0,0 +1,191 @@
+// // © 2016 and later: Unicode, Inc. and others.
+// // License & terms of use: http://www.unicode.org/copyright.html
+// /*
+// *******************************************************************************
+// * Copyright (C) 2014, International Business Machines
+// * Corporation and others.  All Rights Reserved.
+// *******************************************************************************
+// * dictionarydata.h
+// *
+// * created on: 2012may31
+// * created by: Markus W. Scherer & Maxime Serrano
+// */
+
+// #ifndef __DICTIONARYDATA_H__
+// #define __DICTIONARYDATA_H__
+
+// #include "unicode/utypes.h"
+
+// // #if !UCONFIG_NO_BREAK_ITERATION
+
+// // #include "unicode/utext.h"
+// // #include "unicode/udata.h"
+// // #include "udataswp.h"
+// // #include "unicode/uobject.h"
+// // #include "unicode/ustringtrie.h"
+
+// // U_NAMESPACE_BEGIN
+
+// // class UCharsTrie;
+// // class BytesTrie;
+
+// // class U_COMMON_API DictionaryData : public UMemory {
+// // public:
+// //     static const int32_t TRIE_TYPE_BYTES; // = 0;
+// //     static const int32_t TRIE_TYPE_UCHARS; // = 1;
+// //     static const int32_t TRIE_TYPE_MASK; // = 7;
+// //     static const int32_t TRIE_HAS_VALUES; // = 8;
+
+// //     static const int32_t TRANSFORM_NONE; // = 0;
+// //     static const int32_t TRANSFORM_TYPE_OFFSET; // = 0x1000000;
+// //     static const int32_t TRANSFORM_TYPE_MASK; // = 0x7f000000;
+// //     static const int32_t TRANSFORM_OFFSET_MASK; // = 0x1fffff;
+
+// //     enum {
+// //         // Byte offsets from the start of the data, after the generic header.
+// //         IX_STRING_TRIE_OFFSET,
+// //         IX_RESERVED1_OFFSET,
+// //         IX_RESERVED2_OFFSET,
+// //         IX_TOTAL_SIZE,
+
+// //         // Trie type: TRIE_HAS_VALUES | TRIE_TYPE_BYTES etc.
+// //         IX_TRIE_TYPE,
+// //         // Transform specification: TRANSFORM_TYPE_OFFSET | 0xe00 etc.
+// //         IX_TRANSFORM,
+
+// //         IX_RESERVED6,
+// //         IX_RESERVED7,
+// //         IX_COUNT
+// //     };
+// // };
+
+// // /**
+// //  * Wrapper class around generic dictionaries, implementing matches().
+// //  * getType() should return a TRIE_TYPE_??? constant from DictionaryData.
+// //  *
+// //  * All implementations of this interface must be thread-safe if they are to be used inside of the
+// //  * dictionary-based break iteration code.
+// //  */
+// // class U_COMMON_API DictionaryMatcher : public UMemory {
+// // public:
+// //     DictionaryMatcher() {}
+// //     virtual ~DictionaryMatcher();
+// //     // this should emulate CompactTrieDictionary::matches()
+// //     /*  @param text      The text in which to look for matching words. Matching begins
+// //      *                   at the current position of the UText.
+// //      *  @param maxLength The max length of match to consider. Units are the native indexing
+// //      *                   units of the UText.
+// //      *  @param limit     Capacity of output arrays, which is also the maximum number of
+// //      *                   matching words to be found.
+// //      *  @param lengths   output array, filled with the lengths of the matches, in order,
+// //      *                   from shortest to longest. Lengths are in native indexing units
+// //      *                   of the UText. May be NULL.
+// //      *  @param cpLengths output array, filled with the lengths of the matches, in order,
+// //      *                   from shortest to longest. Lengths are the number of Unicode code points.
+// //      *                   May be NULL.
+// //      *  @param values    Output array, filled with the values associated with the words found.
+// //      *                   May be NULL.
+// //      *  @param prefix    Output parameter, the code point length of the prefix match, even if that
+// //      *                   prefix didn't lead to a complete word. Will always be >= the cpLength
+// //      *                   of the longest complete word matched. May be NULL.
+// //      *  @return          Number of matching words found.
+// //      */
+// //     virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+// //                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
+// //                             int32_t *prefix) const = 0;
+
+// //     /** @return DictionaryData::TRIE_TYPE_XYZ */
+// //     virtual int32_t getType() const = 0;
+// // };
+
+// // // Implementation of the DictionaryMatcher interface for a UCharsTrie dictionary
+// // class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher {
+// // public:
+// //     // constructs a new UCharsDictionaryMatcher.
+// //     // The UDataMemory * will be closed on this object's destruction.
+// //     UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), file(f) { }
+// //     virtual ~UCharsDictionaryMatcher();
+// //     virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+// //                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
+// //                             int32_t *prefix) const;
+// //     virtual int32_t getType() const;
+// // private:
+// //     const UChar *characters;
+// //     UDataMemory *file;
+// // };
+
+// // // Implementation of the DictionaryMatcher interface for a BytesTrie dictionary
+// // class U_COMMON_API BytesDictionaryMatcher : public DictionaryMatcher {
+// // public:
+// //     // constructs a new BytesTrieDictionaryMatcher
+// //     // the transform constant should be the constant read from the file, not a masked version!
+// //     // the UDataMemory * fed in here will be closed on this object's destruction
+// //     BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f)
+// //             : characters(c), transformConstant(t), file(f) { }
+// //     virtual ~BytesDictionaryMatcher();
+// //     virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+// //                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
+// //                             int32_t *prefix) const;
+// //     virtual int32_t getType() const;
+// // private:
+// //     UChar32 transform(UChar32 c) const;
+
+// //     const char *characters;
+// //     int32_t transformConstant;
+// //     UDataMemory *file;
+// // };
+
+// // U_NAMESPACE_END
+
+// // U_CAPI int32_t U_EXPORT2
+// // udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode);
+
+// // /**
+// //  * Format of dictionary .dict data files.
+// //  * Format version 1.0.
+// //  *
+// //  * A dictionary .dict data file contains a byte-serialized BytesTrie or
+// //  * a UChars-serialized UCharsTrie.
+// //  * Such files are used in dictionary-based break iteration (DBBI).
+// //  *
+// //  * For a BytesTrie, a transformation type is specified for
+// //  * transforming Unicode strings into byte sequences.
+// //  *
+// //  * A .dict file begins with a standard ICU data file header
+// //  * (DataHeader, see ucmndata.h and unicode/udata.h).
+// //  * The UDataInfo.dataVersion field is currently unused (set to 0.0.0.0).
+// //  *
+// //  * After the header, the file contains the following parts.
+// //  * Constants are defined in the DictionaryData class.
+// //  *
+// //  * For the data structure of BytesTrie & UCharsTrie see
+// //  * http://site.icu-project.org/design/struct/tries
+// //  * and the bytestrie.h and ucharstrie.h header files.
+// //  *
+// //  * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4;
+// //  *
+// //  *      The first four indexes are byte offsets in ascending order.
+// //  *      Each byte offset marks the start of the next part in the data file,
+// //  *      and the end of the previous one.
+// //  *      When two consecutive byte offsets are the same, then the corresponding part is empty.
+// //  *      Byte offsets are offsets from after the header,
+// //  *      that is, from the beginning of the indexes[].
+// //  *      Each part starts at an offset with proper alignment for its data.
+// //  *      If necessary, the previous part may include padding bytes to achieve this alignment.
+// //  *
+// //  *      trieType=indexes[IX_TRIE_TYPE] defines the trie type.
+// //  *      transform=indexes[IX_TRANSFORM] defines the Unicode-to-bytes transformation.
+// //  *          If the transformation type is TRANSFORM_TYPE_OFFSET,
+// //  *          then the lower 21 bits contain the offset code point.
+// //  *          Each code point c is mapped to byte b = (c - offset).
+// //  *          Code points outside the range offset..(offset+0xff) cannot be mapped
+// //  *          and do not occur in the dictionary.
+// //  *
+// //  * stringTrie; -- a serialized BytesTrie or UCharsTrie
+// //  *
+// //  *      The dictionary maps strings to specific values (TRIE_HAS_VALUES bit set in trieType),
+// //  *      or it maps all strings to 0 (TRIE_HAS_VALUES bit not set).
+// //  */
+
+// // #endif  /* !UCONFIG_NO_BREAK_ITERATION */
+// // #endif  /* __DICTIONARYDATA_H__ */
--- a/external/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp
@@ -0,0 +1,63 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*******************************************************************************
+* Copyright (C) 2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTINTRV.CPP 
+*
+*******************************************************************************
+*/
+
+
+
+#include "unicode/dtintrv.h"
+
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval)
+
+//DateInterval::DateInterval(){}
+
+
+DateInterval::DateInterval(UDate from, UDate to)
+:   fromDate(from),
+    toDate(to)
+{}
+
+
+DateInterval::~DateInterval(){}
+
+
+DateInterval::DateInterval(const DateInterval& other)
+: UObject(other) {
+    *this = other;
+}   
+
+
+DateInterval&
+DateInterval::operator=(const DateInterval& other) {
+    if ( this != &other ) {
+        fromDate = other.fromDate;
+        toDate = other.toDate;
+    }
+    return *this;
+}
+
+
+DateInterval* 
+DateInterval::clone() const {
+    return new DateInterval(*this);
+}
+
+
+bool
+DateInterval::operator==(const DateInterval& other) const { 
+    return ( fromDate == other.fromDate && toDate == other.toDate );
+}
+
+
+U_NAMESPACE_END
+
--- a/external/duckdb/extension/icu/third_party/icu/common/edits.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/edits.cpp
@@ -0,0 +1,803 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// edits.cpp
+// created: 2017feb08 Markus W. Scherer
+
+#include "unicode/edits.h"
+#include "unicode/unistr.h"
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "uassert.h"
+#include "util.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+// 0000uuuuuuuuuuuu records u+1 unchanged text units.
+const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
+const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
+
+// 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
+const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6;
+const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7;
+const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff;
+const int32_t MAX_SHORT_CHANGE = 0x6fff;
+
+// 0111mmmmmmnnnnnn records a replacement of m text units with n.
+// m or n = 61: actual length follows in the next edits array unit.
+// m or n = 62..63: actual length follows in the next two edits array units.
+// Bit 30 of the actual length is in the head unit.
+// Trailing units have bit 15 set.
+const int32_t LENGTH_IN_1TRAIL = 61;
+const int32_t LENGTH_IN_2TRAIL = 62;
+
+}  // namespace
+
+void Edits::releaseArray() U_NOEXCEPT {
+    if (array != stackArray) {
+        uprv_free(array);
+    }
+}
+
+Edits &Edits::copyArray(const Edits &other) {
+    if (U_FAILURE(errorCode_)) {
+        length = delta = numChanges = 0;
+        return *this;
+    }
+    if (length > capacity) {
+        uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
+        if (newArray == nullptr) {
+            length = delta = numChanges = 0;
+            errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+        releaseArray();
+        array = newArray;
+        capacity = length;
+    }
+    if (length > 0) {
+        uprv_memcpy(array, other.array, (size_t)length * 2);
+    }
+    return *this;
+}
+
+Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
+    if (U_FAILURE(errorCode_)) {
+        length = delta = numChanges = 0;
+        return *this;
+    }
+    releaseArray();
+    if (length > STACK_CAPACITY) {
+        array = src.array;
+        capacity = src.capacity;
+        src.array = src.stackArray;
+        src.capacity = STACK_CAPACITY;
+        src.reset();
+        return *this;
+    }
+    array = stackArray;
+    capacity = STACK_CAPACITY;
+    if (length > 0) {
+        uprv_memcpy(array, src.array, (size_t)length * 2);
+    }
+    return *this;
+}
+
+Edits &Edits::operator=(const Edits &other) {
+    length = other.length;
+    delta = other.delta;
+    numChanges = other.numChanges;
+    errorCode_ = other.errorCode_;
+    return copyArray(other);
+}
+
+Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
+    length = src.length;
+    delta = src.delta;
+    numChanges = src.numChanges;
+    errorCode_ = src.errorCode_;
+    return moveArray(src);
+}
+
+Edits::~Edits() {
+    releaseArray();
+}
+
+void Edits::reset() U_NOEXCEPT {
+    length = delta = numChanges = 0;
+    errorCode_ = U_ZERO_ERROR;
+}
+
+void Edits::addUnchanged(int32_t unchangedLength) {
+    if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
+    if(unchangedLength < 0) {
+        errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    // Merge into previous unchanged-text record, if any.
+    int32_t last = lastUnit();
+    if(last < MAX_UNCHANGED) {
+        int32_t remaining = MAX_UNCHANGED - last;
+        if (remaining >= unchangedLength) {
+            setLastUnit(last + unchangedLength);
+            return;
+        }
+        setLastUnit(MAX_UNCHANGED);
+        unchangedLength -= remaining;
+    }
+    // Split large lengths into multiple units.
+    while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
+        append(MAX_UNCHANGED);
+        unchangedLength -= MAX_UNCHANGED_LENGTH;
+    }
+    // Write a small (remaining) length.
+    if(unchangedLength > 0) {
+        append(unchangedLength - 1);
+    }
+}
+
+void Edits::addReplace(int32_t oldLength, int32_t newLength) {
+    if(U_FAILURE(errorCode_)) { return; }
+    if(oldLength < 0 || newLength < 0) {
+        errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    if (oldLength == 0 && newLength == 0) {
+        return;
+    }
+    ++numChanges;
+    int32_t newDelta = newLength - oldLength;
+    if (newDelta != 0) {
+        if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
+                (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
+            // Integer overflow or underflow.
+            errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
+            return;
+        }
+        delta += newDelta;
+    }
+
+    if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
+            newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
+        // Merge into previous same-lengths short-replacement record, if any.
+        int32_t u = (oldLength << 12) | (newLength << 9);
+        int32_t last = lastUnit();
+        if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
+                (last & ~SHORT_CHANGE_NUM_MASK) == u &&
+                (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
+            setLastUnit(last + 1);
+            return;
+        }
+        append(u);
+        return;
+    }
+
+    int32_t head = 0x7000;
+    if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
+        head |= oldLength << 6;
+        head |= newLength;
+        append(head);
+    } else if ((capacity - length) >= 5 || growArray()) {
+        int32_t limit = length + 1;
+        if(oldLength < LENGTH_IN_1TRAIL) {
+            head |= oldLength << 6;
+        } else if(oldLength <= 0x7fff) {
+            head |= LENGTH_IN_1TRAIL << 6;
+            array[limit++] = (uint16_t)(0x8000 | oldLength);
+        } else {
+            head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
+            array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
+            array[limit++] = (uint16_t)(0x8000 | oldLength);
+        }
+        if(newLength < LENGTH_IN_1TRAIL) {
+            head |= newLength;
+        } else if(newLength <= 0x7fff) {
+            head |= LENGTH_IN_1TRAIL;
+            array[limit++] = (uint16_t)(0x8000 | newLength);
+        } else {
+            head |= LENGTH_IN_2TRAIL + (newLength >> 30);
+            array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
+            array[limit++] = (uint16_t)(0x8000 | newLength);
+        }
+        array[length] = (uint16_t)head;
+        length = limit;
+    }
+}
+
+void Edits::append(int32_t r) {
+    if(length < capacity || growArray()) {
+        array[length++] = (uint16_t)r;
+    }
+}
+
+UBool Edits::growArray() {
+    int32_t newCapacity;
+    if (array == stackArray) {
+        newCapacity = 2000;
+    } else if (capacity == INT32_MAX) {
+        // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
+        // with a result-string-buffer overflow.
+        errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
+        return FALSE;
+    } else if (capacity >= (INT32_MAX / 2)) {
+        newCapacity = INT32_MAX;
+    } else {
+        newCapacity = 2 * capacity;
+    }
+    // Grow by at least 5 units so that a maximal change record will fit.
+    if ((newCapacity - capacity) < 5) {
+        errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
+        return FALSE;
+    }
+    uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
+    if (newArray == NULL) {
+        errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+        return FALSE;
+    }
+    uprv_memcpy(newArray, array, (size_t)length * 2);
+    releaseArray();
+    array = newArray;
+    capacity = newCapacity;
+    return TRUE;
+}
+
+UBool Edits::copyErrorTo(UErrorCode &outErrorCode) const {
+    if (U_FAILURE(outErrorCode)) { return TRUE; }
+    if (U_SUCCESS(errorCode_)) { return FALSE; }
+    outErrorCode = errorCode_;
+    return TRUE;
+}
+
+Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
+    if (copyErrorTo(errorCode)) { return *this; }
+    // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
+    // Parallel iteration over both Edits.
+    Iterator abIter = ab.getFineIterator();
+    Iterator bcIter = bc.getFineIterator();
+    UBool abHasNext = TRUE, bcHasNext = TRUE;
+    // Copy iterator state into local variables, so that we can modify and subdivide spans.
+    // ab old & new length, bc old & new length
+    int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
+    // When we have different-intermediate-length changes, we accumulate a larger change.
+    int32_t pending_aLength = 0, pending_cLength = 0;
+    for (;;) {
+        // At this point, for each of the two iterators:
+        // Either we are done with the locally cached current edit,
+        // and its intermediate-string length has been reset,
+        // or we will continue to work with a truncated remainder of this edit.
+        //
+        // If the current edit is done, and the iterator has not yet reached the end,
+        // then we fetch the next edit. This is true for at least one of the iterators.
+        //
+        // Normally it does not matter whether we fetch from ab and then bc or vice versa.
+        // However, the result is observably different when
+        // ab deletions meet bc insertions at the same intermediate-string index.
+        // Some users expect the bc insertions to come first, so we fetch from bc first.
+        if (bc_bLength == 0) {
+            if (bcHasNext && (bcHasNext = bcIter.next(errorCode)) != 0) {
+                bc_bLength = bcIter.oldLength();
+                cLength = bcIter.newLength();
+                if (bc_bLength == 0) {
+                    // insertion
+                    if (ab_bLength == 0 || !abIter.hasChange()) {
+                        addReplace(pending_aLength, pending_cLength + cLength);
+                        pending_aLength = pending_cLength = 0;
+                    } else {
+                        pending_cLength += cLength;
+                    }
+                    continue;
+                }
+            }
+            // else see if the other iterator is done, too.
+        }
+        if (ab_bLength == 0) {
+            if (abHasNext && (abHasNext = abIter.next(errorCode)) != 0) {
+                aLength = abIter.oldLength();
+                ab_bLength = abIter.newLength();
+                if (ab_bLength == 0) {
+                    // deletion
+                    if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
+                        addReplace(pending_aLength + aLength, pending_cLength);
+                        pending_aLength = pending_cLength = 0;
+                    } else {
+                        pending_aLength += aLength;
+                    }
+                    continue;
+                }
+            } else if (bc_bLength == 0) {
+                // Both iterators are done at the same time:
+                // The intermediate-string lengths match.
+                break;
+            } else {
+                // The ab output string is shorter than the bc input string.
+                if (!copyErrorTo(errorCode)) {
+                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+                }
+                return *this;
+            }
+        }
+        if (bc_bLength == 0) {
+            // The bc input string is shorter than the ab output string.
+            if (!copyErrorTo(errorCode)) {
+                errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            }
+            return *this;
+        }
+        //  Done fetching: ab_bLength > 0 && bc_bLength > 0
+
+        // The current state has two parts:
+        // - Past: We accumulate a longer ac edit in the "pending" variables.
+        // - Current: We have copies of the current ab/bc edits in local variables.
+        //   At least one side is newly fetched.
+        //   One side might be a truncated remainder of an edit we fetched earlier.
+
+        if (!abIter.hasChange() && !bcIter.hasChange()) {
+            // An unchanged span all the way from string a to string c.
+            if (pending_aLength != 0 || pending_cLength != 0) {
+                addReplace(pending_aLength, pending_cLength);
+                pending_aLength = pending_cLength = 0;
+            }
+            int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
+            addUnchanged(unchangedLength);
+            ab_bLength = aLength -= unchangedLength;
+            bc_bLength = cLength -= unchangedLength;
+            // At least one of the unchanged spans is now empty.
+            continue;
+        }
+        if (!abIter.hasChange() && bcIter.hasChange()) {
+            // Unchanged a->b but changed b->c.
+            if (ab_bLength >= bc_bLength) {
+                // Split the longer unchanged span into change + remainder.
+                addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
+                pending_aLength = pending_cLength = 0;
+                aLength = ab_bLength -= bc_bLength;
+                bc_bLength = 0;
+                continue;
+            }
+            // Handle the shorter unchanged span below like a change.
+        } else if (abIter.hasChange() && !bcIter.hasChange()) {
+            // Changed a->b and then unchanged b->c.
+            if (ab_bLength <= bc_bLength) {
+                // Split the longer unchanged span into change + remainder.
+                addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
+                pending_aLength = pending_cLength = 0;
+                cLength = bc_bLength -= ab_bLength;
+                ab_bLength = 0;
+                continue;
+            }
+            // Handle the shorter unchanged span below like a change.
+        } else {  // both abIter.hasChange() && bcIter.hasChange()
+            if (ab_bLength == bc_bLength) {
+                // Changes on both sides up to the same position. Emit & reset.
+                addReplace(pending_aLength + aLength, pending_cLength + cLength);
+                pending_aLength = pending_cLength = 0;
+                ab_bLength = bc_bLength = 0;
+                continue;
+            }
+        }
+        // Accumulate the a->c change, reset the shorter side,
+        // keep a remainder of the longer one.
+        pending_aLength += aLength;
+        pending_cLength += cLength;
+        if (ab_bLength < bc_bLength) {
+            bc_bLength -= ab_bLength;
+            cLength = ab_bLength = 0;
+        } else {  // ab_bLength > bc_bLength
+            ab_bLength -= bc_bLength;
+            aLength = bc_bLength = 0;
+        }
+    }
+    if (pending_aLength != 0 || pending_cLength != 0) {
+        addReplace(pending_aLength, pending_cLength);
+    }
+    copyErrorTo(errorCode);
+    return *this;
+}
+
+Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
+        array(a), index(0), length(len), remaining(0),
+        onlyChanges_(oc), coarse(crs),
+        dir(0), changed(FALSE), oldLength_(0), newLength_(0),
+        srcIndex(0), replIndex(0), destIndex(0) {}
+
+int32_t Edits::Iterator::readLength(int32_t head) {
+    if (head < LENGTH_IN_1TRAIL) {
+        return head;
+    } else if (head < LENGTH_IN_2TRAIL) {
+        U_ASSERT(index < length);
+        U_ASSERT(array[index] >= 0x8000);
+        return array[index++] & 0x7fff;
+    } else {
+        U_ASSERT((index + 2) <= length);
+        U_ASSERT(array[index] >= 0x8000);
+        U_ASSERT(array[index + 1] >= 0x8000);
+        int32_t len = ((head & 1) << 30) |
+                ((int32_t)(array[index] & 0x7fff) << 15) |
+                (array[index + 1] & 0x7fff);
+        index += 2;
+        return len;
+    }
+}
+
+void Edits::Iterator::updateNextIndexes() {
+    srcIndex += oldLength_;
+    if (changed) {
+        replIndex += newLength_;
+    }
+    destIndex += newLength_;
+}
+
+void Edits::Iterator::updatePreviousIndexes() {
+    srcIndex -= oldLength_;
+    if (changed) {
+        replIndex -= newLength_;
+    }
+    destIndex -= newLength_;
+}
+
+UBool Edits::Iterator::noNext() {
+    // No change before or beyond the string.
+    dir = 0;
+    changed = FALSE;
+    oldLength_ = newLength_ = 0;
+    return FALSE;
+}
+
+UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
+    // Forward iteration: Update the string indexes to the limit of the current span,
+    // and post-increment-read array units to assemble a new span.
+    // Leaves the array index one after the last unit of that span.
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    // We have an errorCode in case we need to start guarding against integer overflows.
+    // It is also convenient for caller loops if we bail out when an error was set elsewhere.
+    if (dir > 0) {
+        updateNextIndexes();
+    } else {
+        if (dir < 0) {
+            // Turn around from previous() to next().
+            // Post-increment-read the same span again.
+            if (remaining > 0) {
+                // Fine-grained iterator:
+                // Stay on the current one of a sequence of compressed changes.
+                ++index;  // next() rests on the index after the sequence unit.
+                dir = 1;
+                return TRUE;
+            }
+        }
+        dir = 1;
+    }
+    if (remaining >= 1) {
+        // Fine-grained iterator: Continue a sequence of compressed changes.
+        if (remaining > 1) {
+            --remaining;
+            return TRUE;
+        }
+        remaining = 0;
+    }
+    if (index >= length) {
+        return noNext();
+    }
+    int32_t u = array[index++];
+    if (u <= MAX_UNCHANGED) {
+        // Combine adjacent unchanged ranges.
+        changed = FALSE;
+        oldLength_ = u + 1;
+        while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
+            ++index;
+            oldLength_ += u + 1;
+        }
+        newLength_ = oldLength_;
+        if (onlyChanges) {
+            updateNextIndexes();
+            if (index >= length) {
+                return noNext();
+            }
+            // already fetched u > MAX_UNCHANGED at index
+            ++index;
+        } else {
+            return TRUE;
+        }
+    }
+    changed = TRUE;
+    if (u <= MAX_SHORT_CHANGE) {
+        int32_t oldLen = u >> 12;
+        int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
+        int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+        if (coarse) {
+            oldLength_ = num * oldLen;
+            newLength_ = num * newLen;
+        } else {
+            // Split a sequence of changes that was compressed into one unit.
+            oldLength_ = oldLen;
+            newLength_ = newLen;
+            if (num > 1) {
+                remaining = num;  // This is the first of two or more changes.
+            }
+            return TRUE;
+        }
+    } else {
+        U_ASSERT(u <= 0x7fff);
+        oldLength_ = readLength((u >> 6) & 0x3f);
+        newLength_ = readLength(u & 0x3f);
+        if (!coarse) {
+            return TRUE;
+        }
+    }
+    // Combine adjacent changes.
+    while (index < length && (u = array[index]) > MAX_UNCHANGED) {
+        ++index;
+        if (u <= MAX_SHORT_CHANGE) {
+            int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+            oldLength_ += (u >> 12) * num;
+            newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
+        } else {
+            U_ASSERT(u <= 0x7fff);
+            oldLength_ += readLength((u >> 6) & 0x3f);
+            newLength_ += readLength(u & 0x3f);
+        }
+    }
+    return TRUE;
+}
+
+UBool Edits::Iterator::previous(UErrorCode &errorCode) {
+    // Backward iteration: Pre-decrement-read array units to assemble a new span,
+    // then update the string indexes to the start of that span.
+    // Leaves the array index on the head unit of that span.
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    // We have an errorCode in case we need to start guarding against integer overflows.
+    // It is also convenient for caller loops if we bail out when an error was set elsewhere.
+    if (dir >= 0) {
+        if (dir > 0) {
+            // Turn around from next() to previous().
+            // Set the string indexes to the span limit and
+            // pre-decrement-read the same span again.
+            if (remaining > 0) {
+                // Fine-grained iterator:
+                // Stay on the current one of a sequence of compressed changes.
+                --index;  // previous() rests on the sequence unit.
+                dir = -1;
+                return TRUE;
+            }
+            updateNextIndexes();
+        }
+        dir = -1;
+    }
+    if (remaining > 0) {
+        // Fine-grained iterator: Continue a sequence of compressed changes.
+        int32_t u = array[index];
+        U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
+        if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
+            ++remaining;
+            updatePreviousIndexes();
+            return TRUE;
+        }
+        remaining = 0;
+    }
+    if (index <= 0) {
+        return noNext();
+    }
+    int32_t u = array[--index];
+    if (u <= MAX_UNCHANGED) {
+        // Combine adjacent unchanged ranges.
+        changed = FALSE;
+        oldLength_ = u + 1;
+        while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
+            --index;
+            oldLength_ += u + 1;
+        }
+        newLength_ = oldLength_;
+        // No need to handle onlyChanges as long as previous() is called only from findIndex().
+        updatePreviousIndexes();
+        return TRUE;
+    }
+    changed = TRUE;
+    if (u <= MAX_SHORT_CHANGE) {
+        int32_t oldLen = u >> 12;
+        int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
+        int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+        if (coarse) {
+            oldLength_ = num * oldLen;
+            newLength_ = num * newLen;
+        } else {
+            // Split a sequence of changes that was compressed into one unit.
+            oldLength_ = oldLen;
+            newLength_ = newLen;
+            if (num > 1) {
+                remaining = 1;  // This is the last of two or more changes.
+            }
+            updatePreviousIndexes();
+            return TRUE;
+        }
+    } else {
+        if (u <= 0x7fff) {
+            // The change is encoded in u alone.
+            oldLength_ = readLength((u >> 6) & 0x3f);
+            newLength_ = readLength(u & 0x3f);
+        } else {
+            // Back up to the head of the change, read the lengths,
+            // and reset the index to the head again.
+            U_ASSERT(index > 0);
+            while ((u = array[--index]) > 0x7fff) {}
+            U_ASSERT(u > MAX_SHORT_CHANGE);
+            int32_t headIndex = index++;
+            oldLength_ = readLength((u >> 6) & 0x3f);
+            newLength_ = readLength(u & 0x3f);
+            index = headIndex;
+        }
+        if (!coarse) {
+            updatePreviousIndexes();
+            return TRUE;
+        }
+    }
+    // Combine adjacent changes.
+    while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
+        --index;
+        if (u <= MAX_SHORT_CHANGE) {
+            int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
+            oldLength_ += (u >> 12) * num;
+            newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
+        } else if (u <= 0x7fff) {
+            // Read the lengths, and reset the index to the head again.
+            int32_t headIndex = index++;
+            oldLength_ += readLength((u >> 6) & 0x3f);
+            newLength_ += readLength(u & 0x3f);
+            index = headIndex;
+        }
+    }
+    updatePreviousIndexes();
+    return TRUE;
+}
+
+int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode) || i < 0) { return -1; }
+    int32_t spanStart, spanLength;
+    if (findSource) {  // find source index
+        spanStart = srcIndex;
+        spanLength = oldLength_;
+    } else {  // find destination index
+        spanStart = destIndex;
+        spanLength = newLength_;
+    }
+    if (i < spanStart) {
+        if (i >= (spanStart / 2)) {
+            // Search backwards.
+            for (;;) {
+                UBool hasPrevious = previous(errorCode);
+                U_ASSERT(hasPrevious);  // because i>=0 and the first span starts at 0
+                (void)hasPrevious;  // avoid unused-variable warning
+                spanStart = findSource ? srcIndex : destIndex;
+                if (i >= spanStart) {
+                    // The index is in the current span.
+                    return 0;
+                }
+                if (remaining > 0) {
+                    // Is the index in one of the remaining compressed edits?
+                    // spanStart is the start of the current span, first of the remaining ones.
+                    spanLength = findSource ? oldLength_ : newLength_;
+                    int32_t u = array[index];
+                    U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
+                    int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
+                    int32_t len = num * spanLength;
+                    if (i >= (spanStart - len)) {
+                        int32_t n = ((spanStart - i - 1) / spanLength) + 1;
+                        // 1 <= n <= num
+                        srcIndex -= n * oldLength_;
+                        replIndex -= n * newLength_;
+                        destIndex -= n * newLength_;
+                        remaining += n;
+                        return 0;
+                    }
+                    // Skip all of these edits at once.
+                    srcIndex -= num * oldLength_;
+                    replIndex -= num * newLength_;
+                    destIndex -= num * newLength_;
+                    remaining = 0;
+                }
+            }
+        }
+        // Reset the iterator to the start.
+        dir = 0;
+        index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
+    } else if (i < (spanStart + spanLength)) {
+        // The index is in the current span.
+        return 0;
+    }
+    while (next(FALSE, errorCode)) {
+        if (findSource) {
+            spanStart = srcIndex;
+            spanLength = oldLength_;
+        } else {
+            spanStart = destIndex;
+            spanLength = newLength_;
+        }
+        if (i < (spanStart + spanLength)) {
+            // The index is in the current span.
+            return 0;
+        }
+        if (remaining > 1) {
+            // Is the index in one of the remaining compressed edits?
+            // spanStart is the start of the current span, first of the remaining ones.
+            int32_t len = remaining * spanLength;
+            if (i < (spanStart + len)) {
+                int32_t n = (i - spanStart) / spanLength;  // 1 <= n <= remaining - 1
+                srcIndex += n * oldLength_;
+                replIndex += n * newLength_;
+                destIndex += n * newLength_;
+                remaining -= n;
+                return 0;
+            }
+            // Make next() skip all of these edits at once.
+            oldLength_ *= remaining;
+            newLength_ *= remaining;
+            remaining = 0;
+        }
+    }
+    return 1;
+}
+
+int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
+    int32_t where = findIndex(i, TRUE, errorCode);
+    if (where < 0) {
+        // Error or before the string.
+        return 0;
+    }
+    if (where > 0 || i == srcIndex) {
+        // At or after string length, or at start of the found span.
+        return destIndex;
+    }
+    if (changed) {
+        // In a change span, map to its end.
+        return destIndex + newLength_;
+    } else {
+        // In an unchanged span, offset 1:1 within it.
+        return destIndex + (i - srcIndex);
+    }
+}
+
+int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
+    int32_t where = findIndex(i, FALSE, errorCode);
+    if (where < 0) {
+        // Error or before the string.
+        return 0;
+    }
+    if (where > 0 || i == destIndex) {
+        // At or after string length, or at start of the found span.
+        return srcIndex;
+    }
+    if (changed) {
+        // In a change span, map to its end.
+        return srcIndex + oldLength_;
+    } else {
+        // In an unchanged span, offset within it.
+        return srcIndex + (i - destIndex);
+    }
+}
+
+UnicodeString& Edits::Iterator::toString(UnicodeString& sb) const {
+    sb.append(u"{ src[", -1);
+    ICU_Utility::appendNumber(sb, srcIndex);
+    sb.append(u"..", -1);
+    ICU_Utility::appendNumber(sb, srcIndex + oldLength_);
+    if (changed) {
+        sb.append(u"] ⇝ dest[", -1);
+    } else {
+        sb.append(u"] ≡ dest[", -1);
+    }
+    ICU_Utility::appendNumber(sb, destIndex);
+    sb.append(u"..", -1);
+    ICU_Utility::appendNumber(sb, destIndex + newLength_);
+    if (changed) {
+        sb.append(u"], repl[", -1);
+        ICU_Utility::appendNumber(sb, replIndex);
+        sb.append(u"..", -1);
+        ICU_Utility::appendNumber(sb, replIndex + newLength_);
+        sb.append(u"] }", -1);
+    } else {
+        sb.append(u"] (no-change) }", -1);
+    }
+    return sb;
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/errorcode.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/errorcode.cpp
@@ -0,0 +1,42 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  errorcode.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009mar10
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/errorcode.h"
+
+U_NAMESPACE_BEGIN
+
+ErrorCode::~ErrorCode() {}
+
+UErrorCode ErrorCode::reset() {
+    UErrorCode code = errorCode;
+    errorCode = U_ZERO_ERROR;
+    return code;
+}
+
+void ErrorCode::assertSuccess() const {
+    if(isFailure()) {
+        handleFailure();
+    }
+}
+
+const char* ErrorCode::errorName() const {
+  return u_errorName(errorCode);
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp
@@ -0,0 +1,710 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2014-2015, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+// #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
+
+// #include "cmemory.h"
+
+// #include "unicode/filteredbrk.h"
+// #include "unicode/ucharstriebuilder.h"
+// #include "unicode/ures.h"
+
+// #include "uresimp.h" // ures_getByKeyWithFallback
+// #include "ubrkimpl.h" // U_ICUDATA_BRKITR
+// #include "uvector.h"
+// #include "cmemory.h"
+
+// U_NAMESPACE_BEGIN
+
+// #ifndef FB_DEBUG
+// #define FB_DEBUG 0
+// #endif
+
+// #if FB_DEBUG
+// #include <stdio.h>
+// static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) {
+//   char buf[2048];
+//   if(s) {
+//     s->extract(0,s->length(),buf,2048);
+//   } else {
+//     strcpy(buf,"NULL");
+//   }
+//   fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
+//           f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
+// }
+
+// #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
+// #else
+// #define FB_TRACE(m,s,b,d)
+// #endif
+
+// /**
+//  * Used with sortedInsert()
+//  */
+// static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
+//     const UnicodeString &a = *(const UnicodeString*)t1.pointer;
+//     const UnicodeString &b = *(const UnicodeString*)t2.pointer;
+//     return a.compare(b);
+// }
+
+// /**
+//  * A UVector which implements a set of strings.
+//  */
+// class U_COMMON_API UStringSet : public UVector {
+//  public:
+//   UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
+//                                            uhash_compareUnicodeString,
+//                                            1,
+//                                            status) {}
+//   virtual ~UStringSet();
+//   /**
+//    * Is this UnicodeSet contained?
+//    */
+//   inline UBool contains(const UnicodeString& s) {
+//     return contains((void*) &s);
+//   }
+//   using UVector::contains;
+//   /**
+//    * Return the ith UnicodeString alias
+//    */
+//   inline const UnicodeString* getStringAt(int32_t i) const {
+//     return (const UnicodeString*)elementAt(i);
+//   }
+//   /**
+//    * Adopt the UnicodeString if not already contained.
+//    * Caller no longer owns the pointer in any case.
+//    * @return true if adopted successfully, false otherwise (error, or else duplicate)
+//    */
+//   inline UBool adopt(UnicodeString *str, UErrorCode &status) {
+//     if(U_FAILURE(status) || contains(*str)) {
+//       delete str;
+//       return false;
+//     } else {
+//       sortedInsert(str, compareUnicodeString, status);
+//       if(U_FAILURE(status)) {
+//         delete str;
+//         return false;
+//       }
+//       return true;
+//     }
+//   }
+//   /**
+//    * Add by value.
+//    * @return true if successfully adopted.
+//    */
+//   inline UBool add(const UnicodeString& str, UErrorCode &status) {
+//     if(U_FAILURE(status)) return false;
+//     UnicodeString *t = new UnicodeString(str);
+//     if(t==NULL) {
+//       status = U_MEMORY_ALLOCATION_ERROR; return false;
+//     }
+//     return adopt(t, status);
+//   }
+//   /**
+//    * Remove this string.
+//    * @return true if successfully removed, false otherwise (error, or else it wasn't there)
+//    */
+//   inline UBool remove(const UnicodeString &s, UErrorCode &status) {
+//     if(U_FAILURE(status)) return false;
+//     return removeElement((void*) &s);
+//   }
+// };
+
+// /**
+//  * Virtual, won't be inlined
+//  */
+// UStringSet::~UStringSet() {}
+
+// /* ----------------------------------------------------------- */
+
+
+// /* Filtered Break constants */
+// static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
+// static const int32_t kMATCH   = (1<<1); //< exact match - skip this one.
+// static const int32_t kSuppressInReverse = (1<<0);
+// static const int32_t kAddToForward = (1<<1);
+// static const UChar   kFULLSTOP = 0x002E; // '.'
+
+// /**
+//  * Shared data for SimpleFilteredSentenceBreakIterator
+//  */
+// class SimpleFilteredSentenceBreakData : public UMemory {
+// public:
+//   SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
+//       : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
+//   SimpleFilteredSentenceBreakData *incr() { refcount++;  return this; }
+//   SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
+//   virtual ~SimpleFilteredSentenceBreakData();
+
+//   LocalPointer<UCharsTrie>    fForwardsPartialTrie; //  Has ".a" for "a.M."
+//   LocalPointer<UCharsTrie>    fBackwardsTrie; //  i.e. ".srM" for Mrs.
+//   int32_t                     refcount;
+// };
+
+// SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
+
+// /**
+//  * Concrete implementation
+//  */
+// class SimpleFilteredSentenceBreakIterator : public BreakIterator {
+// public:
+//   SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
+//   SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
+//   virtual ~SimpleFilteredSentenceBreakIterator();
+// private:
+//   SimpleFilteredSentenceBreakData *fData;
+//   LocalPointer<BreakIterator> fDelegate;
+//   LocalUTextPointer           fText;
+
+//   /* -- subclass interface -- */
+// public:
+//   /* -- cloning and other subclass stuff -- */
+//   virtual BreakIterator *  createBufferClone(void * /*stackBuffer*/,
+//                                              int32_t &/*BufferSize*/,
+//                                              UErrorCode &status) {
+//     // for now - always deep clone
+//     status = U_SAFECLONE_ALLOCATED_WARNING;
+//     return clone();
+//   }
+//   virtual SimpleFilteredSentenceBreakIterator* clone() const { return new SimpleFilteredSentenceBreakIterator(*this); }
+//   virtual UClassID getDynamicClassID(void) const { return NULL; }
+//   virtual bool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
+
+//   /* -- text modifying -- */
+//   virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
+//   virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
+//   virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
+//   virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
+
+//   /* -- other functions that are just delegated -- */
+//   virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
+//   virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
+
+//   /* -- ITERATION -- */
+//   virtual int32_t first(void);
+//   virtual int32_t preceding(int32_t offset);
+//   virtual int32_t previous(void);
+//   virtual UBool isBoundary(int32_t offset);
+//   virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
+
+//   virtual int32_t next(void);
+
+//   virtual int32_t next(int32_t n);
+//   virtual int32_t following(int32_t offset);
+//   virtual int32_t last(void);
+
+// private:
+//     /**
+//      * Given that the fDelegate has already given its "initial" answer,
+//      * find the NEXT actual (non-excepted) break.
+//      * @param n initial position from delegate
+//      * @return new break position or UBRK_DONE
+//      */
+//     int32_t internalNext(int32_t n);
+//     /**
+//      * Given that the fDelegate has already given its "initial" answer,
+//      * find the PREV actual (non-excepted) break.
+//      * @param n initial position from delegate
+//      * @return new break position or UBRK_DONE
+//      */
+//     int32_t internalPrev(int32_t n);
+//     /**
+//      * set up the UText with the value of the fDelegate.
+//      * Call this before calling breakExceptionAt.
+//      * May be able to avoid excess calls
+//      */
+//     void resetState(UErrorCode &status);
+//     /**
+//      * Is there a match  (exception) at this spot?
+//      */
+//     enum EFBMatchResult { kNoExceptionHere, kExceptionHere };
+//     /**
+//      * Determine if there is an exception at this spot
+//      * @param n spot to check
+//      * @return kNoExceptionHere or kExceptionHere
+//      **/
+//     enum EFBMatchResult breakExceptionAt(int32_t n);
+// };
+
+// SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other)
+//   : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone())
+// {
+// }
+
+
+// SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
+//   BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
+//   fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
+//   fDelegate(adopt)
+// {
+//   // all set..
+// }
+
+// SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
+//     fData = fData->decr();
+// }
+
+// void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) {
+//   fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
+// }
+
+// SimpleFilteredSentenceBreakIterator::EFBMatchResult
+// SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
+//     int64_t bestPosn = -1;
+//     int32_t bestValue = -1;
+//     // loops while 'n' points to an exception.
+//     utext_setNativeIndex(fText.getAlias(), n); // from n..
+//     fData->fBackwardsTrie->reset();
+//     UChar32 uch;
+
+//     //if(debug2) u_printf(" n@ %d\n", n);
+//     // Assume a space is following the '.'  (so we handle the case:  "Mr. /Brown")
+//     if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) {  // TODO: skip a class of chars here??
+//       // TODO only do this the 1st time?
+//       //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
+//     } else {
+//       //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
+//       uch = utext_next32(fText.getAlias());
+//       //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
+//     }
+
+//     UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
+
+//     while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL  &&   // more to consume backwards and..
+//           USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
+//       if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
+//         bestPosn = utext_getNativeIndex(fText.getAlias());
+//         bestValue = fData->fBackwardsTrie->getValue();
+//       }
+//       //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
+//     }
+
+//     if(USTRINGTRIE_MATCHES(r)) { // exact match?
+//       //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
+//       bestValue = fData->fBackwardsTrie->getValue();
+//       bestPosn = utext_getNativeIndex(fText.getAlias());
+//       //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
+//     }
+
+//     if(bestPosn>=0) {
+//       //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
+
+//       //if(USTRINGTRIE_MATCHES(r)) {  // matched - so, now what?
+//       //int32_t bestValue = fBackwardsTrie->getValue();
+//       ////if(debug2) u_printf("rev< /%C/ matched, skip..%d  bestValue=%d\n", (UChar)uch, r, bestValue);
+
+//       if(bestValue == kMATCH) { // exact match!
+//         //if(debug2) u_printf(" exact backward match\n");
+//         return kExceptionHere; // See if the next is another exception.
+//       } else if(bestValue == kPARTIAL
+//                 && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
+//         //if(debug2) u_printf(" partial backward match\n");
+//         // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
+//         // to see if it matches something going forward.
+//         fData->fForwardsPartialTrie->reset();
+//         UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
+//         utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
+//         //if(debug2) u_printf("Retrying at %d\n", bestPosn);
+//         while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
+//               USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
+//           //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
+//         }
+//         if(USTRINGTRIE_MATCHES(rfwd)) {
+//           //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
+//           // only full matches here, nothing to check
+//           // skip the next:
+//             return kExceptionHere;
+//         } else {
+//           //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
+//           // no match (no exception) -return the 'underlying' break
+//           return kNoExceptionHere;
+//         }
+//       } else {
+//         return kNoExceptionHere; // internal error and/or no forwards trie
+//       }
+//     } else {
+//       //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r);  // no best match
+//       return kNoExceptionHere; // No match - so exit. Not an exception.
+//     }
+// }
+
+// // the workhorse single next.
+// int32_t
+// SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
+//   if(n == UBRK_DONE || // at end  or
+//     fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
+//       return n;
+//   }
+//   // OK, do we need to break here?
+//   UErrorCode status = U_ZERO_ERROR;
+//   // refresh text
+//   resetState(status);
+//   if(U_FAILURE(status)) return UBRK_DONE; // bail out
+//   int64_t utextLen = utext_nativeLength(fText.getAlias());
+
+//   //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
+//   while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate).
+//     SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
+
+//     switch(m) {
+//     case kExceptionHere:
+//       n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
+//       continue;
+
+//     default:
+//     case kNoExceptionHere:
+//       return n;
+//     }
+//   }
+//   return n;
+// }
+
+// int32_t
+// SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
+//   if(n == 0 || n == UBRK_DONE || // at end  or
+//     fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
+//       return n;
+//   }
+//   // OK, do we need to break here?
+//   UErrorCode status = U_ZERO_ERROR;
+//   // refresh text
+//   resetState(status);
+//   if(U_FAILURE(status)) return UBRK_DONE; // bail out
+
+//   //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
+//   while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate).
+//     SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
+
+//     switch(m) {
+//     case kExceptionHere:
+//       n = fDelegate->previous(); // skip this one. Find the next lowerlevel break.
+//       continue;
+
+//     default:
+//     case kNoExceptionHere:
+//       return n;
+//     }
+//   }
+//   return n;
+// }
+
+
+// int32_t
+// SimpleFilteredSentenceBreakIterator::next() {
+//   return internalNext(fDelegate->next());
+// }
+
+// int32_t
+// SimpleFilteredSentenceBreakIterator::first(void) {
+//   // Don't suppress a break opportunity at the beginning of text.
+//   return fDelegate->first();
+// }
+
+// int32_t
+// SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
+//   return internalPrev(fDelegate->preceding(offset));
+// }
+
+// int32_t
+// SimpleFilteredSentenceBreakIterator::previous(void) {
+//   return internalPrev(fDelegate->previous());
+// }
+
+// UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
+//   if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
+
+//   if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
+
+//   UErrorCode status = U_ZERO_ERROR;
+//   resetState(status);
+
+//   SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset);
+
+//   switch(m) {
+//   case kExceptionHere:
+//     return false;
+//   default:
+//   case kNoExceptionHere:
+//     return true;
+//   }
+// }
+
+// int32_t
+// SimpleFilteredSentenceBreakIterator::next(int32_t offset) {
+//   return internalNext(fDelegate->next(offset));
+// }
+
+// int32_t
+// SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
+//   return internalNext(fDelegate->following(offset));
+// }
+
+// int32_t
+// SimpleFilteredSentenceBreakIterator::last(void) {
+//   // Don't suppress a break opportunity at the end of text.
+//   return fDelegate->last();
+// }
+
+
+// /**
+//  * Concrete implementation of builder class.
+//  */
+// class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
+// public:
+//   virtual ~SimpleFilteredBreakIteratorBuilder();
+//   SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
+//   SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
+//   virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
+//   virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
+//   virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
+// private:
+//   UStringSet fSet;
+// };
+
+// SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
+// {
+// }
+
+// SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status)
+//   : fSet(status)
+// {
+// }
+
+// SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
+//   : fSet(status)
+// {
+//   if(U_SUCCESS(status)) {
+//     UErrorCode subStatus = U_ZERO_ERROR;
+//     LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &subStatus));
+//     if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
+//       status = subStatus; // copy the failing status
+// #if FB_DEBUG
+//       fprintf(stderr, "open BUNDLE %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
+// #endif
+//       return;  // leaves the builder empty, if you try to use it.
+//     }
+//     LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &subStatus));
+//     if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
+//       status = subStatus; // copy the failing status
+// #if FB_DEBUG
+//       fprintf(stderr, "open EXCEPTIONS %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
+// #endif
+//       return;  // leaves the builder empty, if you try to use it.
+//     }
+//     LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &subStatus));
+
+// #if FB_DEBUG
+//     {
+//       UErrorCode subsub = subStatus;
+//       fprintf(stderr, "open SentenceBreak %s => %s, %s\n", fromLocale.getBaseName(), ures_getLocale(breaks.getAlias(), &subsub), u_errorName(subStatus));
+//     }
+// #endif
+
+//     if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
+//       status = subStatus; // copy the failing status
+// #if FB_DEBUG
+//       fprintf(stderr, "open %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
+// #endif
+//       return;  // leaves the builder empty, if you try to use it.
+//     }
+
+//     LocalUResourceBundlePointer strs;
+//     subStatus = status; // Pick up inherited warning status now
+//     do {
+//       strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
+//       if(strs.isValid() && U_SUCCESS(subStatus)) {
+//         UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
+//         suppressBreakAfter(str, status); // load the string
+//       }
+//     } while (strs.isValid() && U_SUCCESS(subStatus));
+//     if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
+//       status = subStatus;
+//     }
+//   }
+// }
+
+// UBool
+// SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
+// {
+//   UBool r = fSet.add(exception, status);
+//   FB_TRACE("suppressBreakAfter",&exception,r,0);
+//   return r;
+// }
+
+// UBool
+// SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
+// {
+//   UBool r = fSet.remove(exception, status);
+//   FB_TRACE("unsuppressBreakAfter",&exception,r,0);
+//   return r;
+// }
+
+// /**
+//  * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
+//  * Work around this.
+//  *
+//  * Note: "new UnicodeString[subCount]" ends up calling global operator new
+//  * on MSVC2012 for some reason.
+//  */
+// static inline UnicodeString* newUnicodeStringArray(size_t count) {
+//     return new UnicodeString[count ? count : 1];
+// }
+
+// BreakIterator *
+// SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
+//   LocalPointer<BreakIterator> adopt(adoptBreakIterator);
+
+//   LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
+//   LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
+//   if(U_FAILURE(status)) {
+//     return NULL;
+//   }
+
+//   int32_t revCount = 0;
+//   int32_t fwdCount = 0;
+
+//   int32_t subCount = fSet.size();
+
+//   UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);
+
+//   LocalArray<UnicodeString> ustrs(ustrs_ptr);
+
+//   LocalMemory<int> partials;
+//   partials.allocateInsteadAndReset(subCount);
+
+//   LocalPointer<UCharsTrie>    backwardsTrie; //  i.e. ".srM" for Mrs.
+//   LocalPointer<UCharsTrie>    forwardsPartialTrie; //  Has ".a" for "a.M."
+
+//   int n=0;
+//   for ( int32_t i = 0;
+//         i<fSet.size();
+//         i++) {
+//     const UnicodeString *abbr = fSet.getStringAt(i);
+//     if(abbr) {
+//       FB_TRACE("build",abbr,TRUE,i);
+//       ustrs[n] = *abbr; // copy by value
+//       FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
+//     } else {
+//       FB_TRACE("build",abbr,FALSE,i);
+//       status = U_MEMORY_ALLOCATION_ERROR;
+//       return NULL;
+//     }
+//     partials[n] = 0; // default: not partial
+//     n++;
+//   }
+//   // first pass - find partials.
+//   for(int i=0;i<subCount;i++) {
+//     int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
+//     if(nn>-1 && (nn+1)!=ustrs[i].length()) {
+//       FB_TRACE("partial",&ustrs[i],FALSE,i);
+//       // is partial.
+//       // is it unique?
+//       int sameAs = -1;
+//       for(int j=0;j<subCount;j++) {
+//         if(j==i) continue;
+//         if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
+//           FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
+//           //UBool otherIsPartial = ((nn+1)!=ustrs[j].length());  // true if ustrs[j] doesn't end at nn
+//           if(partials[j]==0) { // hasn't been processed yet
+//             partials[j] = kSuppressInReverse | kAddToForward;
+//             FB_TRACE("suppressing",&ustrs[j],FALSE,j);
+//           } else if(partials[j] & kSuppressInReverse) {
+//             sameAs = j; // the other entry is already in the reverse table.
+//           }
+//         }
+//       }
+//       FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
+//       FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
+//       UnicodeString prefix(ustrs[i], 0, nn+1);
+//       if(sameAs == -1 && partials[i] == 0) {
+//         // first one - add the prefix to the reverse table.
+//         prefix.reverse();
+//         builder->add(prefix, kPARTIAL, status);
+//         revCount++;
+//         FB_TRACE("Added partial",&prefix,FALSE, i);
+//         FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
+//         partials[i] = kSuppressInReverse | kAddToForward;
+//       } else {
+//         FB_TRACE("NOT adding partial",&prefix,FALSE, i);
+//         FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
+//       }
+//     }
+//   }
+//   for(int i=0;i<subCount;i++) {
+//     if(partials[i]==0) {
+//       ustrs[i].reverse();
+//       builder->add(ustrs[i], kMATCH, status);
+//       revCount++;
+//       FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
+//     } else {
+//       FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
+
+//       // an optimization would be to only add the portion after the '.'
+//       // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
+//       // instead of "Ph.D." since we already know the "Ph." part is a match.
+//       // would need the trie to be able to hold 0-length strings, though.
+//       builder2->add(ustrs[i], kMATCH, status); // forward
+//       fwdCount++;
+//       //ustrs[i].reverse();
+//       ////if(debug2) u_printf("SUPPRESS- not Added(%d):  /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
+//     }
+//   }
+//   FB_TRACE("AbbrCount",NULL,FALSE, subCount);
+
+//   if(revCount>0) {
+//     backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
+//     if(U_FAILURE(status)) {
+//       FB_TRACE(u_errorName(status),NULL,FALSE, -1);
+//       return NULL;
+//     }
+//   }
+
+//   if(fwdCount>0) {
+//     forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
+//     if(U_FAILURE(status)) {
+//       FB_TRACE(u_errorName(status),NULL,FALSE, -1);
+//       return NULL;
+//     }
+//   }
+
+//   return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
+// }
+
+
+// // ----------- Base class implementation
+
+// FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
+// }
+
+// FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
+// }
+
+// FilteredBreakIteratorBuilder *
+// FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
+//   if(U_FAILURE(status)) return NULL;
+//   LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
+//   return (U_SUCCESS(status))? ret.orphan(): NULL;
+// }
+
+// FilteredBreakIteratorBuilder *
+// FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) {
+//   return createEmptyInstance(status);
+// }
+
+// FilteredBreakIteratorBuilder *
+// FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
+//   if(U_FAILURE(status)) return NULL;
+//   LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
+//   return (U_SUCCESS(status))? ret.orphan(): NULL;
+// }
+
+// U_NAMESPACE_END
+
+// #endif //#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
--- a/external/duckdb/extension/icu/third_party/icu/common/filterednormalizer2.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/filterednormalizer2.cpp
@@ -0,0 +1,363 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  filterednormalizer2.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009dec10
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/edits.h"
+#include "unicode/normalizer2.h"
+#include "unicode/stringoptions.h"
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "cpputils.h"
+
+U_NAMESPACE_BEGIN
+
+FilteredNormalizer2::~FilteredNormalizer2() {}
+
+UnicodeString &
+FilteredNormalizer2::normalize(const UnicodeString &src,
+                               UnicodeString &dest,
+                               UErrorCode &errorCode) const {
+    uprv_checkCanGetBuffer(src, errorCode);
+    if(U_FAILURE(errorCode)) {
+        dest.setToBogus();
+        return dest;
+    }
+    if(&dest==&src) {
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return dest;
+    }
+    dest.remove();
+    return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
+}
+
+// Internal: No argument checking, and appends to dest.
+// Pass as input spanCondition the one that is likely to yield a non-zero
+// span length at the start of src.
+// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
+// USET_SPAN_SIMPLE should be passed in for the start of src
+// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
+// an in-filter prefix.
+UnicodeString &
+FilteredNormalizer2::normalize(const UnicodeString &src,
+                               UnicodeString &dest,
+                               USetSpanCondition spanCondition,
+                               UErrorCode &errorCode) const {
+    UnicodeString tempDest;  // Don't throw away destination buffer between iterations.
+    for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
+        int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
+        int32_t spanLength=spanLimit-prevSpanLimit;
+        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+            if(spanLength!=0) {
+                dest.append(src, prevSpanLimit, spanLength);
+            }
+            spanCondition=USET_SPAN_SIMPLE;
+        } else {
+            if(spanLength!=0) {
+                // Not norm2.normalizeSecondAndAppend() because we do not want
+                // to modify the non-filter part of dest.
+                dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
+                                            tempDest, errorCode));
+                if(U_FAILURE(errorCode)) {
+                    break;
+                }
+            }
+            spanCondition=USET_SPAN_NOT_CONTAINED;
+        }
+        prevSpanLimit=spanLimit;
+    }
+    return dest;
+}
+
+void
+FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+                                   Edits *edits, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) {
+        return;
+    }
+    if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
+        edits->reset();
+    }
+    options |= U_EDITS_NO_RESET;  // Do not reset for each span.
+    normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode);
+}
+
+void
+FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length,
+                                   ByteSink &sink, Edits *edits,
+                                   USetSpanCondition spanCondition,
+                                   UErrorCode &errorCode) const {
+    while (length > 0) {
+        int32_t spanLength = set.spanUTF8(src, length, spanCondition);
+        if (spanCondition == USET_SPAN_NOT_CONTAINED) {
+            if (spanLength != 0) {
+                if (edits != nullptr) {
+                    edits->addUnchanged(spanLength);
+                }
+                if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
+                    sink.Append(src, spanLength);
+                }
+            }
+            spanCondition = USET_SPAN_SIMPLE;
+        } else {
+            if (spanLength != 0) {
+                // Not norm2.normalizeSecondAndAppend() because we do not want
+                // to modify the non-filter part of dest.
+                norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode);
+                if (U_FAILURE(errorCode)) {
+                    break;
+                }
+            }
+            spanCondition = USET_SPAN_NOT_CONTAINED;
+        }
+        src += spanLength;
+        length -= spanLength;
+    }
+}
+
+UnicodeString &
+FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
+                                              const UnicodeString &second,
+                                              UErrorCode &errorCode) const {
+    return normalizeSecondAndAppend(first, second, TRUE, errorCode);
+}
+
+UnicodeString &
+FilteredNormalizer2::append(UnicodeString &first,
+                            const UnicodeString &second,
+                            UErrorCode &errorCode) const {
+    return normalizeSecondAndAppend(first, second, FALSE, errorCode);
+}
+
+UnicodeString &
+FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
+                                              const UnicodeString &second,
+                                              UBool doNormalize,
+                                              UErrorCode &errorCode) const {
+    uprv_checkCanGetBuffer(first, errorCode);
+    uprv_checkCanGetBuffer(second, errorCode);
+    if(U_FAILURE(errorCode)) {
+        return first;
+    }
+    if(&first==&second) {
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return first;
+    }
+    if(first.isEmpty()) {
+        if(doNormalize) {
+            return normalize(second, first, errorCode);
+        } else {
+            return first=second;
+        }
+    }
+    // merge the in-filter suffix of the first string with the in-filter prefix of the second
+    int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
+    if(prefixLimit!=0) {
+        UnicodeString prefix(second.tempSubString(0, prefixLimit));
+        int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
+        if(suffixStart==0) {
+            if(doNormalize) {
+                norm2.normalizeSecondAndAppend(first, prefix, errorCode);
+            } else {
+                norm2.append(first, prefix, errorCode);
+            }
+        } else {
+            UnicodeString middle(first, suffixStart, INT32_MAX);
+            if(doNormalize) {
+                norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
+            } else {
+                norm2.append(middle, prefix, errorCode);
+            }
+            first.replace(suffixStart, INT32_MAX, middle);
+        }
+    }
+    if(prefixLimit<second.length()) {
+        UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
+        if(doNormalize) {
+            normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
+        } else {
+            first.append(rest);
+        }
+    }
+    return first;
+}
+
+UBool
+FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
+    return set.contains(c) && norm2.getDecomposition(c, decomposition);
+}
+
+UBool
+FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
+    return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
+}
+
+UChar32
+FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
+    return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
+}
+
+uint8_t
+FilteredNormalizer2::getCombiningClass(UChar32 c) const {
+    return set.contains(c) ? norm2.getCombiningClass(c) : 0;
+}
+
+UBool
+FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
+    uprv_checkCanGetBuffer(s, errorCode);
+    if(U_FAILURE(errorCode)) {
+        return FALSE;
+    }
+    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
+    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
+        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
+        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+            spanCondition=USET_SPAN_SIMPLE;
+        } else {
+            if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
+                U_FAILURE(errorCode)
+            ) {
+                return FALSE;
+            }
+            spanCondition=USET_SPAN_NOT_CONTAINED;
+        }
+        prevSpanLimit=spanLimit;
+    }
+    return TRUE;
+}
+
+UBool
+FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
+    if(U_FAILURE(errorCode)) {
+        return FALSE;
+    }
+    const char *s = sp.data();
+    int32_t length = sp.length();
+    USetSpanCondition spanCondition = USET_SPAN_SIMPLE;
+    while (length > 0) {
+        int32_t spanLength = set.spanUTF8(s, length, spanCondition);
+        if (spanCondition == USET_SPAN_NOT_CONTAINED) {
+            spanCondition = USET_SPAN_SIMPLE;
+        } else {
+            if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
+                    U_FAILURE(errorCode)) {
+                return FALSE;
+            }
+            spanCondition = USET_SPAN_NOT_CONTAINED;
+        }
+        s += spanLength;
+        length -= spanLength;
+    }
+    return TRUE;
+}
+
+UNormalizationCheckResult
+FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
+    uprv_checkCanGetBuffer(s, errorCode);
+    if(U_FAILURE(errorCode)) {
+        return UNORM_MAYBE;
+    }
+    UNormalizationCheckResult result=UNORM_YES;
+    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
+    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
+        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
+        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+            spanCondition=USET_SPAN_SIMPLE;
+        } else {
+            UNormalizationCheckResult qcResult=
+                norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
+            if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
+                return qcResult;
+            } else if(qcResult==UNORM_MAYBE) {
+                result=qcResult;
+            }
+            spanCondition=USET_SPAN_NOT_CONTAINED;
+        }
+        prevSpanLimit=spanLimit;
+    }
+    return result;
+}
+
+int32_t
+FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
+    uprv_checkCanGetBuffer(s, errorCode);
+    if(U_FAILURE(errorCode)) {
+        return 0;
+    }
+    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
+    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
+        int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
+        if(spanCondition==USET_SPAN_NOT_CONTAINED) {
+            spanCondition=USET_SPAN_SIMPLE;
+        } else {
+            int32_t yesLimit=
+                prevSpanLimit+
+                norm2.spanQuickCheckYes(
+                    s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
+            if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
+                return yesLimit;
+            }
+            spanCondition=USET_SPAN_NOT_CONTAINED;
+        }
+        prevSpanLimit=spanLimit;
+    }
+    return s.length();
+}
+
+UBool
+FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
+    return !set.contains(c) || norm2.hasBoundaryBefore(c);
+}
+
+UBool
+FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
+    return !set.contains(c) || norm2.hasBoundaryAfter(c);
+}
+
+UBool
+FilteredNormalizer2::isInert(UChar32 c) const {
+    return !set.contains(c) || norm2.isInert(c);
+}
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+U_CAPI UNormalizer2 * U_EXPORT2
+unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) {
+        return NULL;
+    }
+    if(filterSet==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+    Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
+                                             *UnicodeSet::fromUSet(filterSet));
+    if(fn2==NULL) {
+        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+    }
+    return (UNormalizer2 *)fn2;
+}
+
+#endif  // !UCONFIG_NO_NORMALIZATION
--- a/external/duckdb/extension/icu/third_party/icu/common/hash.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/hash.h
@@ -0,0 +1,248 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*   Copyright (C) 1997-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+******************************************************************************
+*   Date        Name        Description
+*   03/28/00    aliu        Creation.
+******************************************************************************
+*/
+
+#ifndef HASH_H
+#define HASH_H
+
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "cmemory.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Hashtable is a thin C++ wrapper around UHashtable, a general-purpose void*
+ * hashtable implemented in C.  Hashtable is designed to be idiomatic and
+ * easy-to-use in C++.
+ *
+ * Hashtable is an INTERNAL CLASS.
+ */
+class U_COMMON_API Hashtable : public UMemory {
+    UHashtable* hash;
+    UHashtable hashObj;
+
+    inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
+
+    inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);
+
+public:
+    /**
+     * Construct a hashtable
+     * @param ignoreKeyCase If true, keys are case insensitive.
+     * @param status Error code
+    */
+    inline Hashtable(UBool ignoreKeyCase, UErrorCode& status);
+
+    /**
+     * Construct a hashtable
+     * @param ignoreKeyCase If true, keys are case insensitive.
+     * @param size initial size allocation
+     * @param status Error code
+    */
+    inline Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
+
+    /**
+     * Construct a hashtable
+     * @param keyComp Comparator for comparing the keys
+     * @param valueComp Comparator for comparing the values
+     * @param status Error code
+    */
+    inline Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
+
+    /**
+     * Construct a hashtable
+     * @param status Error code
+    */
+    inline Hashtable(UErrorCode& status);
+
+    /**
+     * Construct a hashtable, _disregarding any error_.  Use this constructor
+     * with caution.
+     */
+    inline Hashtable();
+
+    /**
+     * Non-virtual destructor; make this virtual if Hashtable is subclassed
+     * in the future.
+     */
+    inline ~Hashtable();
+
+    inline UObjectDeleter *setValueDeleter(UObjectDeleter *fn);
+
+    inline int32_t count() const;
+
+    inline void* put(const UnicodeString& key, void* value, UErrorCode& status);
+
+    inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
+
+    inline void* get(const UnicodeString& key) const;
+
+    inline int32_t geti(const UnicodeString& key) const;
+
+    inline void* remove(const UnicodeString& key);
+
+    inline int32_t removei(const UnicodeString& key);
+
+    inline void removeAll(void);
+
+    inline const UHashElement* find(const UnicodeString& key) const;
+
+    /**
+     * @param pos - must be UHASH_FIRST on first call, and untouched afterwards.
+     * @see uhash_nextElement
+     */
+    inline const UHashElement* nextElement(int32_t& pos) const;
+
+    inline UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
+
+    inline UValueComparator* setValueComparator(UValueComparator* valueComp);
+
+    inline UBool equals(const Hashtable& that) const;
+private:
+    Hashtable(const Hashtable &other); // forbid copying of this class
+    Hashtable &operator=(const Hashtable &other); // forbid copying of this class
+};
+
+/*********************************************************************
+ * Implementation
+ ********************************************************************/
+
+inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
+                            UValueComparator *valueComp, UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
+    if (U_SUCCESS(status)) {
+        hash = &hashObj;
+        uhash_setKeyDeleter(hash, uprv_deleteUObject);
+    }
+}
+
+inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
+                                UValueComparator *valueComp, int32_t size, UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
+    if (U_SUCCESS(status)) {
+        hash = &hashObj;
+        uhash_setKeyDeleter(hash, uprv_deleteUObject);
+    }
+}
+
+inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
+                 UErrorCode& status) : hash(0) {
+    init( uhash_hashUnicodeString, keyComp, valueComp, status);
+}
+
+inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
+ : hash(0)
+{
+    init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
+                        : uhash_hashUnicodeString,
+            ignoreKeyCase ? uhash_compareCaselessUnicodeString
+                        : uhash_compareUnicodeString,
+            NULL,
+            status);
+}
+
+inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
+ : hash(0)
+{
+    initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
+                        : uhash_hashUnicodeString,
+            ignoreKeyCase ? uhash_compareCaselessUnicodeString
+                        : uhash_compareUnicodeString,
+            NULL, size,
+            status);
+}
+
+inline Hashtable::Hashtable(UErrorCode& status)
+ : hash(0)
+{
+    init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
+}
+
+inline Hashtable::Hashtable()
+ : hash(0)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
+}
+
+inline Hashtable::~Hashtable() {
+    if (hash != NULL) {
+        uhash_close(hash);
+    }
+}
+
+inline UObjectDeleter *Hashtable::setValueDeleter(UObjectDeleter *fn) {
+    return uhash_setValueDeleter(hash, fn);
+}
+
+inline int32_t Hashtable::count() const {
+    return uhash_count(hash);
+}
+
+inline void* Hashtable::put(const UnicodeString& key, void* value, UErrorCode& status) {
+    return uhash_put(hash, new UnicodeString(key), value, &status);
+}
+
+inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCode& status) {
+    return uhash_puti(hash, new UnicodeString(key), value, &status);
+}
+
+inline void* Hashtable::get(const UnicodeString& key) const {
+    return uhash_get(hash, &key);
+}
+
+inline int32_t Hashtable::geti(const UnicodeString& key) const {
+    return uhash_geti(hash, &key);
+}
+
+inline void* Hashtable::remove(const UnicodeString& key) {
+    return uhash_remove(hash, &key);
+}
+
+inline int32_t Hashtable::removei(const UnicodeString& key) {
+    return uhash_removei(hash, &key);
+}
+
+inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
+    return uhash_find(hash, &key);
+}
+
+inline const UHashElement* Hashtable::nextElement(int32_t& pos) const {
+    return uhash_nextElement(hash, &pos);
+}
+
+inline void Hashtable::removeAll(void) {
+    uhash_removeAll(hash);
+}
+
+inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
+    return uhash_setKeyComparator(hash, keyComp);
+}
+
+inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
+    return uhash_setValueComparator(hash, valueComp);
+}
+
+inline UBool Hashtable::equals(const Hashtable& that)const{
+   return uhash_equals(hash, that.hash);
+}
+U_NAMESPACE_END
+
+#endif
+
--- a/external/duckdb/extension/icu/third_party/icu/common/icudataver.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/icudataver.cpp
@@ -0,0 +1,31 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 2009-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/icudataver.h"
+#include "unicode/ures.h"
+#include "uresimp.h" /* for ures_getVersionByKey */
+
+U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) {
+    UResourceBundle *icudatares = NULL;
+    
+    if (U_FAILURE(*status)) {
+        return;
+    }
+    
+    if (dataVersionFillin != NULL) {
+        icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status);
+        if (U_SUCCESS(*status)) {
+            ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status);
+        }
+        ures_close(icudatares);
+    }
+}
--- a/external/duckdb/extension/icu/third_party/icu/common/icuplug.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/icuplug.cpp
@@ -0,0 +1,884 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 2009-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+*  FILE NAME : icuplug.c
+*
+*   Date         Name        Description
+*   10/29/2009   sl          New.
+******************************************************************************
+*/
+
+#include "unicode/icuplug.h"
+
+
+#if UCONFIG_ENABLE_PLUGINS
+
+
+#include "icuplugimp.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "putilimp.h"
+#include "ucln.h"
+#include <stdio.h>
+#ifdef __MVS__  /* defined by z/OS compiler */
+#define _POSIX_SOURCE
+#include <cics.h> /* 12 Nov 2011 JAM iscics() function */
+#endif
+#include "charstr.h"
+
+using namespace icu;
+
+#ifndef UPLUG_TRACE
+#define UPLUG_TRACE 0
+#endif
+
+#if UPLUG_TRACE
+#include <stdio.h>
+#define DBG(x) fprintf(stderr, "%s:%d: ",__FILE__,__LINE__); fprintf x
+#endif
+
+/**
+ * Internal structure of an ICU plugin. 
+ */
+
+struct UPlugData {
+  UPlugEntrypoint  *entrypoint; /**< plugin entrypoint */
+  uint32_t structSize;    /**< initialized to the size of this structure */
+  uint32_t token;         /**< must be U_PLUG_TOKEN */
+  void *lib;              /**< plugin library, or NULL */
+  char libName[UPLUG_NAME_MAX];   /**< library name */
+  char sym[UPLUG_NAME_MAX];        /**< plugin symbol, or NULL */
+  char config[UPLUG_NAME_MAX];     /**< configuration data */
+  void *context;          /**< user context data */
+  char name[UPLUG_NAME_MAX];   /**< name of plugin */
+  UPlugLevel  level; /**< level of plugin */
+  UBool   awaitingLoad; /**< TRUE if the plugin is awaiting a load call */
+  UBool   dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */
+  UErrorCode pluginStatus; /**< status code of plugin */
+};
+
+
+
+#define UPLUG_LIBRARY_INITIAL_COUNT 8
+#define UPLUG_PLUGIN_INITIAL_COUNT 12
+
+/**
+ * Remove an item
+ * @param list the full list
+ * @param listSize the number of entries in the list
+ * @param memberSize the size of one member
+ * @param itemToRemove the item number of the member
+ * @return the new listsize 
+ */
+static int32_t uplug_removeEntryAt(void *list, int32_t listSize, int32_t memberSize, int32_t itemToRemove) {
+  uint8_t *bytePtr = (uint8_t *)list;
+    
+  /* get rid of some bad cases first */
+  if(listSize<1) {
+    return listSize;
+  }
+    
+  /* is there anything to move? */
+  if(listSize > itemToRemove+1) {
+    memmove(bytePtr+(itemToRemove*memberSize), bytePtr+((itemToRemove+1)*memberSize), memberSize);
+  }
+    
+  return listSize-1;
+}
+
+
+
+
+#if U_ENABLE_DYLOAD
+/**
+ * Library management. Internal. 
+ * @internal
+ */
+struct UPlugLibrary;
+
+/**
+ * Library management. Internal. 
+ * @internal
+ */
+typedef struct UPlugLibrary {
+  void *lib;                           /**< library ptr */
+  char name[UPLUG_NAME_MAX]; /**< library name */
+  uint32_t ref;                        /**< reference count */
+} UPlugLibrary;
+
+static UPlugLibrary   staticLibraryList[UPLUG_LIBRARY_INITIAL_COUNT];
+static UPlugLibrary * libraryList = staticLibraryList;
+static int32_t libraryCount = 0;
+static int32_t libraryMax = UPLUG_LIBRARY_INITIAL_COUNT;
+
+/**
+ * Search for a library. Doesn't lock
+ * @param libName libname to search for
+ * @return the library's struct
+ */
+static int32_t searchForLibraryName(const char *libName) {
+  int32_t i;
+    
+  for(i=0;i<libraryCount;i++) {
+    if(!uprv_strcmp(libName, libraryList[i].name)) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+static int32_t searchForLibrary(void *lib) {
+  int32_t i;
+    
+  for(i=0;i<libraryCount;i++) {
+    if(lib==libraryList[i].lib) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+U_INTERNAL char * U_EXPORT2
+uplug_findLibrary(void *lib, UErrorCode *status) {
+  int32_t libEnt;
+  char *ret = NULL;
+  if(U_FAILURE(*status)) {
+    return NULL;
+  }
+  libEnt = searchForLibrary(lib);
+  if(libEnt!=-1) { 
+    ret = libraryList[libEnt].name;
+  } else {
+    *status = U_MISSING_RESOURCE_ERROR;
+  }
+  return ret;
+}
+
+U_INTERNAL void * U_EXPORT2
+uplug_openLibrary(const char *libName, UErrorCode *status) {
+  int32_t libEntry = -1;
+  void *lib = NULL;
+    
+  if(U_FAILURE(*status)) return NULL;
+
+  libEntry = searchForLibraryName(libName);
+  if(libEntry == -1) {
+    libEntry = libraryCount++;
+    if(libraryCount >= libraryMax) {
+      /* Ran out of library slots. Statically allocated because we can't depend on allocating memory.. */
+      *status = U_MEMORY_ALLOCATION_ERROR;
+#if UPLUG_TRACE
+      DBG((stderr, "uplug_openLibrary() - out of library slots (max %d)\n", libraryMax));
+#endif
+      return NULL;
+    }
+    /* Some operating systems don't want 
+       DL operations from multiple threads. */
+    libraryList[libEntry].lib = uprv_dl_open(libName, status);
+#if UPLUG_TRACE
+    DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
+#endif
+        
+    if(libraryList[libEntry].lib == NULL || U_FAILURE(*status)) {
+      /* cleanup. */
+      libraryList[libEntry].lib = NULL; /* failure with open */
+      libraryList[libEntry].name[0] = 0;
+#if UPLUG_TRACE
+      DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
+#endif
+      /* no need to free - just won't increase the count. */
+      libraryCount--;
+    } else { /* is it still there? */
+      /* link it in */
+      uprv_strncpy(libraryList[libEntry].name,libName,UPLUG_NAME_MAX);
+      libraryList[libEntry].ref=1;
+      lib = libraryList[libEntry].lib;
+    }
+
+  } else {
+    lib = libraryList[libEntry].lib;
+    libraryList[libEntry].ref++;
+  }
+  return lib;
+}
+
+U_INTERNAL void U_EXPORT2
+uplug_closeLibrary(void *lib, UErrorCode *status) {
+  int32_t i;
+    
+#if UPLUG_TRACE
+  DBG((stderr, "uplug_closeLibrary(%p,%s) list %p\n", lib, u_errorName(*status), (void*)libraryList));
+#endif
+  if(U_FAILURE(*status)) return;
+    
+  for(i=0;i<libraryCount;i++) {
+    if(lib==libraryList[i].lib) {
+      if(--(libraryList[i].ref) == 0) {
+        uprv_dl_close(libraryList[i].lib, status);
+        libraryCount = uplug_removeEntryAt(libraryList, libraryCount, sizeof(*libraryList), i);
+      }
+      return;
+    }
+  }
+  *status = U_INTERNAL_PROGRAM_ERROR; /* could not find the entry! */
+}
+
+#endif
+
+static UPlugData pluginList[UPLUG_PLUGIN_INITIAL_COUNT];
+static int32_t pluginCount = 0;
+
+
+
+  
+static int32_t uplug_pluginNumber(UPlugData* d) {
+  UPlugData *pastPlug = &pluginList[pluginCount];
+  if(d<=pluginList) {
+    return 0;
+  } else if(d>=pastPlug) {
+    return pluginCount;
+  } else {
+    return (d-pluginList)/sizeof(pluginList[0]);
+  }
+}
+
+
+U_CAPI UPlugData * U_EXPORT2
+uplug_nextPlug(UPlugData *prior) {
+  if(prior==NULL) {
+    return pluginList;
+  } else {
+    UPlugData *nextPlug = &prior[1];
+    UPlugData *pastPlug = &pluginList[pluginCount];
+    
+    if(nextPlug>=pastPlug) {
+      return NULL;
+    } else {
+      return nextPlug;
+    }
+  }
+}
+
+
+
+/**
+ * Call the plugin with some params
+ */
+static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *status) {
+  UPlugTokenReturn token;
+  if(plug==NULL||U_FAILURE(*status)) {
+    return;
+  }
+  token = (*(plug->entrypoint))(plug, reason, status);
+  if(token!=UPLUG_TOKEN) {
+    *status = U_INTERNAL_PROGRAM_ERROR;
+  }
+}
+
+
+static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
+  if(plug->awaitingLoad) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+    *status = U_INTERNAL_PROGRAM_ERROR;
+    return; 
+  }
+  if(U_SUCCESS(plug->pluginStatus)) {
+    /* Don't unload a plug which has a failing load status - means it didn't actually load. */
+    uplug_callPlug(plug, UPLUG_REASON_UNLOAD, status);
+  }
+}
+
+static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
+  if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+    *status = U_INTERNAL_PROGRAM_ERROR;
+    return; 
+  }
+  plug->level = UPLUG_LEVEL_INVALID;
+  uplug_callPlug(plug, UPLUG_REASON_QUERY, status);
+  if(U_SUCCESS(*status)) { 
+    if(plug->level == UPLUG_LEVEL_INVALID) {
+      plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
+      plug->awaitingLoad = FALSE;
+    }
+  } else {
+    plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
+    plug->awaitingLoad = FALSE;
+  }
+}
+
+
+static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
+  if(U_FAILURE(*status)) {
+    return;
+  }
+  if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+    *status = U_INTERNAL_PROGRAM_ERROR;
+    return;
+  }
+  uplug_callPlug(plug, UPLUG_REASON_LOAD, status);
+  plug->awaitingLoad = FALSE;
+  if(!U_SUCCESS(*status)) {
+    plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
+  }
+}
+
+static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
+{
+  UPlugData *plug = NULL;
+
+  if(U_FAILURE(*status)) {
+    return NULL;
+  }
+
+  if(pluginCount == UPLUG_PLUGIN_INITIAL_COUNT) {
+    *status = U_MEMORY_ALLOCATION_ERROR;
+    return NULL;
+  }
+
+  plug = &pluginList[pluginCount++];
+
+  plug->token = UPLUG_TOKEN;
+  plug->structSize = sizeof(UPlugData);
+  plug->name[0]=0;
+  plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */
+  plug->awaitingLoad = TRUE;
+  plug->dontUnload = FALSE;
+  plug->pluginStatus = U_ZERO_ERROR;
+  plug->libName[0] = 0;
+  plug->config[0]=0;
+  plug->sym[0]=0;
+  plug->lib=NULL;
+  plug->entrypoint=NULL;
+
+
+  return plug;
+}
+
+static UPlugData *uplug_allocatePlug(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *symName,
+                                     UErrorCode *status) {
+  UPlugData *plug = uplug_allocateEmptyPlug(status);
+  if(U_FAILURE(*status)) {
+    return NULL;
+  }
+
+  if(config!=NULL) {
+    uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
+  } else {
+    plug->config[0] = 0;
+  }
+    
+  if(symName!=NULL) {
+    uprv_strncpy(plug->sym, symName, UPLUG_NAME_MAX);
+  } else {
+    plug->sym[0] = 0;
+  }
+    
+  plug->entrypoint = entrypoint;
+  plug->lib = lib;
+  uplug_queryPlug(plug, status);
+    
+  return plug;
+}
+
+static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
+  UErrorCode subStatus = U_ZERO_ERROR;
+  if(!plug->dontUnload) {
+#if U_ENABLE_DYLOAD
+    uplug_closeLibrary(plug->lib, &subStatus);
+#endif
+  }
+  plug->lib = NULL;
+  if(U_SUCCESS(*status) && U_FAILURE(subStatus)) {
+    *status = subStatus;
+  }
+  /* shift plugins up and decrement count. */
+  if(U_SUCCESS(*status)) {
+    /* all ok- remove. */
+    pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug));
+  } else {
+    /* not ok- leave as a message. */
+    plug->awaitingLoad=FALSE;
+    plug->entrypoint=0;
+    plug->dontUnload=TRUE;
+  }
+}
+
+static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) {
+  if(plugToRemove != NULL) {
+    uplug_unloadPlug(plugToRemove, status);
+    uplug_deallocatePlug(plugToRemove, status);
+  }
+}
+
+U_CAPI void U_EXPORT2
+uplug_removePlug(UPlugData *plug, UErrorCode *status)  {
+  UPlugData *cursor = NULL;
+  UPlugData *plugToRemove = NULL;
+  if(U_FAILURE(*status)) return;
+    
+  for(cursor=pluginList;cursor!=NULL;) {
+    if(cursor==plug) {
+      plugToRemove = plug;
+      cursor=NULL;
+    } else {
+      cursor = uplug_nextPlug(cursor);
+    }
+  }
+    
+  uplug_doUnloadPlug(plugToRemove, status);
+}
+
+
+
+
+U_CAPI void U_EXPORT2 
+uplug_setPlugNoUnload(UPlugData *data, UBool dontUnload)
+{
+  data->dontUnload = dontUnload;
+}
+
+
+U_CAPI void U_EXPORT2
+uplug_setPlugLevel(UPlugData *data, UPlugLevel level) {
+  data->level = level;
+}
+
+
+U_CAPI UPlugLevel U_EXPORT2
+uplug_getPlugLevel(UPlugData *data) {
+  return data->level;
+}
+
+
+U_CAPI void U_EXPORT2
+uplug_setPlugName(UPlugData *data, const char *name) {
+  uprv_strncpy(data->name, name, UPLUG_NAME_MAX);
+}
+
+
+U_CAPI const char * U_EXPORT2
+uplug_getPlugName(UPlugData *data) {
+  return data->name;
+}
+
+
+U_CAPI const char * U_EXPORT2
+uplug_getSymbolName(UPlugData *data) {
+  return data->sym;
+}
+
+U_CAPI const char * U_EXPORT2
+uplug_getLibraryName(UPlugData *data, UErrorCode *status) {
+  if(data->libName[0]) {
+    return data->libName;
+  } else {
+#if U_ENABLE_DYLOAD
+    return uplug_findLibrary(data->lib, status);
+#else
+    return NULL;
+#endif
+  }
+}
+
+U_CAPI void * U_EXPORT2
+uplug_getLibrary(UPlugData *data) {
+  return data->lib;
+}
+
+U_CAPI void * U_EXPORT2
+uplug_getContext(UPlugData *data) {
+  return data->context;
+}
+
+
+U_CAPI void U_EXPORT2
+uplug_setContext(UPlugData *data, void *context) {
+  data->context = context;
+}
+
+U_CAPI const char* U_EXPORT2
+uplug_getConfiguration(UPlugData *data) {
+  return data->config;
+}
+
+U_INTERNAL UPlugData* U_EXPORT2
+uplug_getPlugInternal(int32_t n) { 
+  if(n <0 || n >= pluginCount) {
+    return NULL;
+  } else { 
+    return &(pluginList[n]);
+  }
+}
+
+
+U_CAPI UErrorCode U_EXPORT2
+uplug_getPlugLoadStatus(UPlugData *plug) {
+  return plug->pluginStatus;
+}
+
+
+
+
+/**
+ * Initialize a plugin fron an entrypoint and library - but don't load it.
+ */
+static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
+                                                         UErrorCode *status) {
+  UPlugData *plug = NULL;
+
+  plug = uplug_allocatePlug(entrypoint, config, lib, sym, status);
+
+  if(U_SUCCESS(*status)) {
+    return plug;
+  } else {
+    uplug_deallocatePlug(plug, status);
+    return NULL;
+  }
+}
+
+U_CAPI UPlugData* U_EXPORT2
+uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status) {
+  UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, NULL, NULL, status);
+  uplug_loadPlug(plug, status);
+  return plug;
+}
+
+#if U_ENABLE_DYLOAD
+
+static UPlugData* 
+uplug_initErrorPlug(const char *libName, const char *sym, const char *config, const char *nameOrError, UErrorCode loadStatus, UErrorCode *status)
+{
+  UPlugData *plug = uplug_allocateEmptyPlug(status);
+  if(U_FAILURE(*status)) return NULL;
+
+  plug->pluginStatus = loadStatus;
+  plug->awaitingLoad = FALSE; /* Won't load. */
+  plug->dontUnload = TRUE; /* cannot unload. */
+
+  if(sym!=NULL) {
+    uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX);
+  }
+
+  if(libName!=NULL) {
+    uprv_strncpy(plug->libName, libName, UPLUG_NAME_MAX);
+  }
+
+  if(nameOrError!=NULL) {
+    uprv_strncpy(plug->name, nameOrError, UPLUG_NAME_MAX);
+  }
+
+  if(config!=NULL) {
+    uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
+  }
+
+  return plug;
+}
+
+/**
+ * Fetch a plugin from DLL, and then initialize it from a library- but don't load it.
+ */
+static UPlugData* 
+uplug_initPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
+  void *lib = NULL;
+  UPlugData *plug = NULL;
+  if(U_FAILURE(*status)) { return NULL; }
+  lib = uplug_openLibrary(libName, status);
+  if(lib!=NULL && U_SUCCESS(*status)) {
+    UPlugEntrypoint *entrypoint = NULL;
+    entrypoint = (UPlugEntrypoint*)uprv_dlsym_func(lib, sym, status);
+
+    if(entrypoint!=NULL&&U_SUCCESS(*status)) {
+      plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, lib, sym, status);
+      if(plug!=NULL&&U_SUCCESS(*status)) {
+        plug->lib = lib; /* plug takes ownership of library */
+        lib = NULL; /* library is now owned by plugin. */
+      }
+    } else {
+      UErrorCode subStatus = U_ZERO_ERROR;
+      plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
+    }
+    if(lib!=NULL) { /* still need to close the lib */
+      UErrorCode subStatus = U_ZERO_ERROR;
+      uplug_closeLibrary(lib, &subStatus); /* don't care here */
+    }
+  } else {
+    UErrorCode subStatus = U_ZERO_ERROR;
+    plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
+  }
+  return plug;
+}
+
+U_CAPI UPlugData* U_EXPORT2
+uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) { 
+  UPlugData *plug = NULL;
+  if(U_FAILURE(*status)) { return NULL; }
+  plug = uplug_initPlugFromLibrary(libName, sym, config, status);
+  uplug_loadPlug(plug, status);
+
+  return plug;
+}
+
+#endif
+
+static UPlugLevel gCurrentLevel = UPLUG_LEVEL_LOW;
+
+U_CAPI UPlugLevel U_EXPORT2 uplug_getCurrentLevel() {
+  return gCurrentLevel;
+}
+
+static UBool U_CALLCONV uplug_cleanup(void)
+{
+  int32_t i;
+    
+  UPlugData *pluginToRemove;
+  /* cleanup plugs */
+  for(i=0;i<pluginCount;i++) {
+    UErrorCode subStatus = U_ZERO_ERROR;
+    pluginToRemove = &pluginList[i];
+    /* unload and deallocate */
+    uplug_doUnloadPlug(pluginToRemove, &subStatus);
+  }
+  /* close other held libs? */
+  gCurrentLevel = UPLUG_LEVEL_LOW;
+  return TRUE;
+}
+
+#if U_ENABLE_DYLOAD
+
+static void uplug_loadWaitingPlugs(UErrorCode *status) {
+  int32_t i;
+  UPlugLevel currentLevel = uplug_getCurrentLevel();
+    
+  if(U_FAILURE(*status)) {
+    return;
+  }
+#if UPLUG_TRACE
+  DBG((stderr,  "uplug_loadWaitingPlugs() Level: %d\n", currentLevel));
+#endif
+  /* pass #1: low level plugs */
+  for(i=0;i<pluginCount;i++) {
+    UErrorCode subStatus = U_ZERO_ERROR;
+    UPlugData *pluginToLoad = &pluginList[i];
+    if(pluginToLoad->awaitingLoad) {
+      if(pluginToLoad->level == UPLUG_LEVEL_LOW) {
+        if(currentLevel > UPLUG_LEVEL_LOW) {
+          pluginToLoad->pluginStatus = U_PLUGIN_TOO_HIGH;
+        } else {
+          UPlugLevel newLevel;
+          uplug_loadPlug(pluginToLoad, &subStatus);
+          newLevel = uplug_getCurrentLevel();
+          if(newLevel > currentLevel) {
+            pluginToLoad->pluginStatus = U_PLUGIN_CHANGED_LEVEL_WARNING;
+            currentLevel = newLevel;
+          }
+        }
+        pluginToLoad->awaitingLoad = FALSE;
+      } 
+    }
+  }    
+  for(i=0;i<pluginCount;i++) {
+    UErrorCode subStatus = U_ZERO_ERROR;
+    UPlugData *pluginToLoad = &pluginList[i];
+        
+    if(pluginToLoad->awaitingLoad) {
+      if(pluginToLoad->level == UPLUG_LEVEL_INVALID) { 
+        pluginToLoad->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
+      } else if(pluginToLoad->level == UPLUG_LEVEL_UNKNOWN) {
+        pluginToLoad->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
+      } else {
+        uplug_loadPlug(pluginToLoad, &subStatus);
+      }
+      pluginToLoad->awaitingLoad = FALSE;
+    }
+  }
+    
+#if UPLUG_TRACE
+  DBG((stderr,  " Done Loading Plugs. Level: %d\n", (int32_t)uplug_getCurrentLevel()));
+#endif
+}
+
+/* Name of the plugin config file */
+static char plugin_file[2048] = "";
+#endif
+
+U_INTERNAL const char* U_EXPORT2
+uplug_getPluginFile() {
+#if U_ENABLE_DYLOAD && !UCONFIG_NO_FILE_IO
+  return plugin_file;
+#else
+  return NULL;
+#endif
+}
+
+
+//  uplug_init()  is called first thing from u_init().
+
+U_CAPI void U_EXPORT2
+uplug_init(UErrorCode *status) {
+#if !U_ENABLE_DYLOAD
+  (void)status; /* unused */
+#elif !UCONFIG_NO_FILE_IO
+  CharString plugin_dir;
+  const char *env = getenv("ICU_PLUGINS");
+
+  if(U_FAILURE(*status)) return;
+  if(env != NULL) {
+    plugin_dir.append(env, -1, *status);
+  }
+  if(U_FAILURE(*status)) return;
+
+#if defined(DEFAULT_ICU_PLUGINS) 
+  if(plugin_dir.isEmpty()) {
+    plugin_dir.append(DEFAULT_ICU_PLUGINS, -1, *status);
+  }
+#endif
+
+#if UPLUG_TRACE
+  DBG((stderr, "ICU_PLUGINS=%s\n", plugin_dir.data()));
+#endif
+
+  if(!plugin_dir.isEmpty()) {
+    FILE *f;
+        
+    CharString pluginFile;
+#ifdef OS390BATCH
+/* There are potentially a lot of ways to implement a plugin directory on OS390/zOS  */
+/* Keeping in mind that unauthorized file access is logged, monitored, and enforced  */
+/* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX    */
+/* System Services.  Alternative techniques might be allocating a member in          */
+/* SYS1.PARMLIB or setting an environment variable "ICU_PLUGIN_PATH" (?).  The       */
+/* DDNAME can be connected to a file in the HFS if need be.                          */
+
+    pluginFile.append("//DD:ICUPLUG", -1, *status);        /* JAM 20 Oct 2011 */
+#else
+    pluginFile.append(plugin_dir, *status);
+    pluginFile.append(U_FILE_SEP_STRING, -1, *status);
+    pluginFile.append("icuplugins", -1, *status);
+    pluginFile.append(U_ICU_VERSION_SHORT, -1, *status);
+    pluginFile.append(".txt", -1, *status);
+#endif
+
+#if UPLUG_TRACE
+    DBG((stderr, "status=%s\n", u_errorName(*status)));
+#endif
+
+    if(U_FAILURE(*status)) {
+      return;
+    }
+    if((size_t)pluginFile.length() > (sizeof(plugin_file)-1)) {
+      *status = U_BUFFER_OVERFLOW_ERROR;
+#if UPLUG_TRACE
+      DBG((stderr, "status=%s\n", u_errorName(*status)));
+#endif
+      return;
+    }
+    
+    /* plugin_file is not used for processing - it is only used 
+       so that uplug_getPluginFile() works (i.e. icuinfo)
+    */
+    uprv_strncpy(plugin_file, pluginFile.data(), sizeof(plugin_file));
+        
+#if UPLUG_TRACE
+    DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file)));
+#endif
+        
+#ifdef __MVS__
+    if (iscics()) /* 12 Nov 2011 JAM */
+    {
+        f = NULL;
+    }
+    else
+#endif
+    {
+        f = fopen(pluginFile.data(), "r");
+    }
+
+    if(f != NULL) {
+      char linebuf[1024];
+      char *p, *libName=NULL, *symName=NULL, *config=NULL;
+      int32_t line = 0;
+            
+            
+      while(fgets(linebuf,1023,f)) {
+        line++;
+
+        if(!*linebuf || *linebuf=='#') {
+          continue;
+        } else {
+          p = linebuf;
+          while(*p&&isspace((int)*p))
+            p++;
+          if(!*p || *p=='#') continue;
+          libName = p;
+          while(*p&&!isspace((int)*p)) {
+            p++;
+          }
+          if(!*p || *p=='#') continue; /* no tab after libname */
+          *p=0; /* end of libname */
+          p++;
+          while(*p&&isspace((int)*p)) {
+            p++;
+          }
+          if(!*p||*p=='#') continue; /* no symname after libname +tab */
+          symName = p;
+          while(*p&&!isspace((int)*p)) {
+            p++;
+          }
+                    
+          if(*p) { /* has config */
+            *p=0;
+            ++p;
+            while(*p&&isspace((int)*p)) {
+              p++;
+            }
+            if(*p) {
+              config = p;
+            }
+          }
+                    
+          /* chop whitespace at the end of the config */
+          if(config!=NULL&&*config!=0) {
+            p = config+strlen(config);
+            while(p>config&&isspace((int)*(--p))) {
+              *p=0;
+            }
+          }
+                
+          /* OK, we're good. */
+          { 
+            UErrorCode subStatus = U_ZERO_ERROR;
+            UPlugData *plug = uplug_initPlugFromLibrary(libName, symName, config, &subStatus);
+            if(U_FAILURE(subStatus) && U_SUCCESS(*status)) {
+              *status = subStatus;
+            }
+#if UPLUG_TRACE
+            DBG((stderr, "PLUGIN libName=[%s], sym=[%s], config=[%s]\n", libName, symName, config));
+            DBG((stderr, " -> %p, %s\n", (void*)plug, u_errorName(subStatus)));
+#else
+            (void)plug; /* unused */
+#endif
+          }
+        }
+      }
+      fclose(f);
+    } else {
+#if UPLUG_TRACE
+      DBG((stderr, "Can't open plugin file %s\n", plugin_file));
+#endif
+    }
+  }
+  uplug_loadWaitingPlugs(status);
+#endif /* U_ENABLE_DYLOAD */
+  gCurrentLevel = UPLUG_LEVEL_HIGH;
+  ucln_registerCleanup(UCLN_UPLUG, uplug_cleanup);
+}
+
+#endif
+
+
--- a/external/duckdb/extension/icu/third_party/icu/common/icuplugimp.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/icuplugimp.h
@@ -0,0 +1,93 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 2009-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+*  FILE NAME : icuplugimp.h
+* 
+*  Internal functions for the ICU plugin system
+*
+*   Date         Name        Description
+*   10/29/2009   sl          New.
+******************************************************************************
+*/
+
+
+#ifndef ICUPLUGIMP_H
+#define ICUPLUGIMP_H
+
+#include "unicode/icuplug.h"
+
+#if UCONFIG_ENABLE_PLUGINS
+
+/*========================*/
+/** @{ Library Manipulation  
+ */
+
+/**
+ * Open a library, adding a reference count if needed.
+ * @param libName library name to load
+ * @param status error code
+ * @return the library pointer, or NULL
+ * @internal internal use only
+ */
+U_INTERNAL void * U_EXPORT2
+uplug_openLibrary(const char *libName, UErrorCode *status);
+
+/**
+ * Close a library, if its reference count is 0
+ * @param lib the library to close
+ * @param status error code
+ * @internal internal use only
+ */
+U_INTERNAL void U_EXPORT2
+uplug_closeLibrary(void *lib, UErrorCode *status);
+
+/**
+ * Get a library's name, or NULL if not found.
+ * @param lib the library's name
+ * @param status error code
+ * @return the library name, or NULL if not found.
+ * @internal internal use only
+ */
+U_INTERNAL  char * U_EXPORT2
+uplug_findLibrary(void *lib, UErrorCode *status);
+
+/** @} */
+
+/*========================*/
+/** {@ ICU Plugin internal interfaces
+ */
+
+/**
+ * Initialize the plugins 
+ * @param status error result
+ * @internal - Internal use only.
+ */
+U_INTERNAL void U_EXPORT2
+uplug_init(UErrorCode *status);
+
+/**
+ * Get raw plug N
+ * @internal - Internal use only
+ */ 
+U_INTERNAL UPlugData* U_EXPORT2
+uplug_getPlugInternal(int32_t n);
+
+/**
+ * Get the name of the plugin file. 
+ * @internal - Internal use only.
+ */
+U_INTERNAL const char* U_EXPORT2
+uplug_getPluginFile(void);
+
+/** @} */
+
+#endif
+
+#endif
--- a/external/duckdb/extension/icu/third_party/icu/common/loadednormalizer2impl.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/loadednormalizer2impl.cpp
@@ -0,0 +1,418 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* loadednormalizer2impl.cpp
+*
+* created on: 2014sep03
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/udata.h"
+#include "unicode/localpointer.h"
+#include "unicode/normalizer2.h"
+#include "unicode/ucptrie.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "norm2allmodes.h"
+#include "normalizer2impl.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+class LoadedNormalizer2Impl : public Normalizer2Impl {
+public:
+    LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
+    virtual ~LoadedNormalizer2Impl();
+
+    void load(const char *packageName, const char *name, UErrorCode &errorCode);
+
+private:
+    static UBool U_CALLCONV
+    isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
+
+    UDataMemory *memory;
+    UCPTrie *ownedTrie;
+};
+
+LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
+    udata_close(memory);
+    ucptrie_close(ownedTrie);
+}
+
+UBool U_CALLCONV
+LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
+                                    const char * /* type */, const char * /*name*/,
+                                    const UDataInfo *pInfo) {
+    if(
+        pInfo->size>=20 &&
+        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+        pInfo->charsetFamily==U_CHARSET_FAMILY &&
+        pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
+        pInfo->dataFormat[1]==0x72 &&
+        pInfo->dataFormat[2]==0x6d &&
+        pInfo->dataFormat[3]==0x32 &&
+        pInfo->formatVersion[0]==4
+    ) {
+        // Normalizer2Impl *me=(Normalizer2Impl *)context;
+        // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+void
+LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
+    const int32_t *inIndexes=(const int32_t *)inBytes;
+    int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
+    if(indexesLength<=IX_MIN_LCCC_CP) {
+        errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
+        return;
+    }
+
+    int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
+    int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
+    ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
+                                     inBytes+offset, nextOffset-offset, NULL,
+                                     &errorCode);
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+
+    offset=nextOffset;
+    nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
+    const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
+
+    // smallFCD: new in formatVersion 2
+    offset=nextOffset;
+    const uint8_t *inSmallFCD=inBytes+offset;
+
+    init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
+}
+
+// instance cache ---------------------------------------------------------- ***
+
+Norm2AllModes *
+Norm2AllModes::createInstance(const char *packageName,
+                              const char *name,
+                              UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
+    if(impl==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    impl->load(packageName, name, errorCode);
+    return createInstance(impl, errorCode);
+}
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
+U_CDECL_END
+
+#if !NORM2_HARDCODE_NFC_DATA
+static Norm2AllModes *nfcSingleton;
+static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
+#endif
+
+static Norm2AllModes *nfkcSingleton;
+static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
+
+static Norm2AllModes *nfkc_cfSingleton;
+static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
+
+static UHashtable    *cache=NULL;
+
+// UInitOnce singleton initialization function
+static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
+#if !NORM2_HARDCODE_NFC_DATA
+    if (uprv_strcmp(what, "nfc") == 0) {
+        nfcSingleton    = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
+    } else
+#endif
+    if (uprv_strcmp(what, "nfkc") == 0) {
+        nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
+    } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
+        nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
+    } else {
+        UPRV_UNREACHABLE;   // Unknown singleton
+    }
+    ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
+}
+
+U_CDECL_BEGIN
+
+static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
+    delete (Norm2AllModes *)allModes;
+}
+
+static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
+#if !NORM2_HARDCODE_NFC_DATA
+    delete nfcSingleton;
+    nfcSingleton = NULL;
+    nfcInitOnce.reset();
+#endif
+
+    delete nfkcSingleton;
+    nfkcSingleton = NULL;
+    nfkcInitOnce.reset();
+
+    delete nfkc_cfSingleton;
+    nfkc_cfSingleton = NULL;
+    nfkc_cfInitOnce.reset();
+
+    uhash_close(cache);
+    cache=NULL;
+    return TRUE;
+}
+
+U_CDECL_END
+
+#if !NORM2_HARDCODE_NFC_DATA
+const Norm2AllModes *
+Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return NULL; }
+    umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
+    return nfcSingleton;
+}
+#endif
+
+const Norm2AllModes *
+Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return NULL; }
+    umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
+    return nfkcSingleton;
+}
+
+const Norm2AllModes *
+Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return NULL; }
+    umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
+    return nfkc_cfSingleton;
+}
+
+#if !NORM2_HARDCODE_NFC_DATA
+const Normalizer2 *
+Normalizer2::getNFCInstance(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+    return allModes!=NULL ? &allModes->comp : NULL;
+}
+
+const Normalizer2 *
+Normalizer2::getNFDInstance(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+    return allModes!=NULL ? &allModes->decomp : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+    return allModes!=NULL ? &allModes->fcd : NULL;
+}
+
+const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+    return allModes!=NULL ? &allModes->fcc : NULL;
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+    return allModes!=NULL ? allModes->impl : NULL;
+}
+#endif
+
+const Normalizer2 *
+Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
+    return allModes!=NULL ? &allModes->comp : NULL;
+}
+
+const Normalizer2 *
+Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
+    return allModes!=NULL ? &allModes->decomp : NULL;
+}
+
+const Normalizer2 *
+Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
+    return allModes!=NULL ? &allModes->comp : NULL;
+}
+
+const Normalizer2 *
+Normalizer2::getInstance(const char *packageName,
+                         const char *name,
+                         UNormalization2Mode mode,
+                         UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    if(name==NULL || *name==0) {
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+    const Norm2AllModes *allModes=NULL;
+    if(packageName==NULL) {
+        if(0==uprv_strcmp(name, "nfc")) {
+            allModes=Norm2AllModes::getNFCInstance(errorCode);
+        } else if(0==uprv_strcmp(name, "nfkc")) {
+            allModes=Norm2AllModes::getNFKCInstance(errorCode);
+        } else if(0==uprv_strcmp(name, "nfkc_cf")) {
+            allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
+        }
+    }
+    if(allModes==NULL && U_SUCCESS(errorCode)) {
+        {
+            Mutex lock;
+            if(cache!=NULL) {
+                allModes=(Norm2AllModes *)uhash_get(cache, name);
+            }
+        }
+        if(allModes==NULL) {
+            ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
+            LocalPointer<Norm2AllModes> localAllModes(
+                Norm2AllModes::createInstance(packageName, name, errorCode));
+            if(U_SUCCESS(errorCode)) {
+                Mutex lock;
+                if(cache==NULL) {
+                    cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
+                    if(U_FAILURE(errorCode)) {
+                        return NULL;
+                    }
+                    uhash_setKeyDeleter(cache, uprv_free);
+                    uhash_setValueDeleter(cache, deleteNorm2AllModes);
+                }
+                void *temp=uhash_get(cache, name);
+                if(temp==NULL) {
+                    int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
+                    char *nameCopy=(char *)uprv_malloc(keyLength);
+                    if(nameCopy==NULL) {
+                        errorCode=U_MEMORY_ALLOCATION_ERROR;
+                        return NULL;
+                    }
+                    uprv_memcpy(nameCopy, name, keyLength);
+                    allModes=localAllModes.getAlias();
+                    uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
+                } else {
+                    // race condition
+                    allModes=(Norm2AllModes *)temp;
+                }
+            }
+        }
+    }
+    if(allModes!=NULL && U_SUCCESS(errorCode)) {
+        switch(mode) {
+        case UNORM2_COMPOSE:
+            return &allModes->comp;
+        case UNORM2_DECOMPOSE:
+            return &allModes->decomp;
+        case UNORM2_FCD:
+            return &allModes->fcd;
+        case UNORM2_COMPOSE_CONTIGUOUS:
+            return &allModes->fcc;
+        default:
+            break;  // do nothing
+        }
+    }
+    return NULL;
+}
+
+const Normalizer2 *
+Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    switch(mode) {
+    case UNORM_NFD:
+        return Normalizer2::getNFDInstance(errorCode);
+    case UNORM_NFKD:
+        return Normalizer2::getNFKDInstance(errorCode);
+    case UNORM_NFC:
+        return Normalizer2::getNFCInstance(errorCode);
+    case UNORM_NFKC:
+        return Normalizer2::getNFKCInstance(errorCode);
+    case UNORM_FCD:
+        return getFCDInstance(errorCode);
+    default:  // UNORM_NONE
+        return getNoopInstance(errorCode);
+    }
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
+    return allModes!=NULL ? allModes->impl : NULL;
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
+    return allModes!=NULL ? allModes->impl : NULL;
+}
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getInstance(const char *packageName,
+                   const char *name,
+                   UNormalization2Mode mode,
+                   UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
+}
+
+U_CFUNC UNormalizationCheckResult
+unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
+    if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
+        return UNORM_YES;
+    }
+    UErrorCode errorCode=U_ZERO_ERROR;
+    const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
+    if(U_SUCCESS(errorCode)) {
+        return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
+    } else {
+        return UNORM_MAYBE;
+    }
+}
+
+#endif  // !UCONFIG_NO_NORMALIZATION
--- a/external/duckdb/extension/icu/third_party/icu/common/localebuilder.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/localebuilder.cpp
@@ -0,0 +1,468 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <utility>
+
+#include "bytesinkutil.h"  // CharStringByteSink
+#include "charstr.h"
+#include "cstring.h"
+#include "ulocimp.h"
+#include "unicode/localebuilder.h"
+#include "unicode/locid.h"
+
+U_NAMESPACE_BEGIN
+
+#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
+#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
+
+const char* kAttributeKey = "attribute";
+
+static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
+    switch (uprv_tolower(key)) {
+        case 'u':
+            return ultag_isUnicodeExtensionSubtags(s, len);
+        case 't':
+            return ultag_isTransformedExtensionSubtags(s, len);
+        case 'x':
+            return ultag_isPrivateuseValueSubtags(s, len);
+        default:
+            return ultag_isExtensionSubtags(s, len);
+    }
+}
+
+LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
+    script_(), region_(), variant_(nullptr), extensions_(nullptr)
+{
+    language_[0] = 0;
+    script_[0] = 0;
+    region_[0] = 0;
+}
+
+LocaleBuilder::~LocaleBuilder()
+{
+    delete variant_;
+    delete extensions_;
+}
+
+LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
+{
+    clear();
+    setLanguage(locale.getLanguage());
+    setScript(locale.getScript());
+    setRegion(locale.getCountry());
+    setVariant(locale.getVariant());
+    extensions_ = locale.clone();
+    if (extensions_ == nullptr) {
+        status_ = U_MEMORY_ALLOCATION_ERROR;
+    }
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
+{
+    Locale l = Locale::forLanguageTag(tag, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    // Because setLocale will reset status_ we need to return
+    // first if we have error in forLanguageTag.
+    setLocale(l);
+    return *this;
+}
+
+static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
+                     UBool (*test)(const char*, int32_t)) {
+    if (U_FAILURE(errorCode)) { return; }
+    if (input.empty()) {
+        dest[0] = '\0';
+    } else if (test(input.data(), input.length())) {
+        uprv_memcpy(dest, input.data(), input.length());
+        dest[input.length()] = '\0';
+    } else {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
+{
+    setField(language, language_, status_, &ultag_isLanguageSubtag);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
+{
+    setField(script, script_, status_, &ultag_isScriptSubtag);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
+{
+    setField(region, region_, status_, &ultag_isRegionSubtag);
+    return *this;
+}
+
+static void transform(char* data, int32_t len) {
+    for (int32_t i = 0; i < len; i++, data++) {
+        if (*data == '_') {
+            *data = '-';
+        } else {
+            *data = uprv_tolower(*data);
+        }
+    }
+}
+
+LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
+{
+    if (U_FAILURE(status_)) { return *this; }
+    if (variant.empty()) {
+        delete variant_;
+        variant_ = nullptr;
+        return *this;
+    }
+    CharString* new_variant = new CharString(variant, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    if (new_variant == nullptr) {
+        status_ = U_MEMORY_ALLOCATION_ERROR;
+        return *this;
+    }
+    transform(new_variant->data(), new_variant->length());
+    if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
+        delete new_variant;
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    delete variant_;
+    variant_ = new_variant;
+    return *this;
+}
+
+static bool
+_isKeywordValue(const char* key, const char* value, int32_t value_len)
+{
+    if (key[1] == '\0') {
+        // one char key
+        return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
+                _isExtensionSubtags(key[0], value, value_len));
+    } else if (uprv_strcmp(key, kAttributeKey) == 0) {
+        // unicode attributes
+        return ultag_isUnicodeLocaleAttributes(value, value_len);
+    }
+    // otherwise: unicode extension value
+    // We need to convert from legacy key/value to unicode
+    // key/value
+    const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
+    const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
+
+    return unicode_locale_key && unicode_locale_type &&
+           ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
+           ultag_isUnicodeLocaleType(unicode_locale_type, -1);
+}
+
+static void
+_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
+                Locale& to, bool validate, UErrorCode& errorCode)
+{
+    if (U_FAILURE(errorCode)) { return; }
+    LocalPointer<icu::StringEnumeration> ownedKeywords;
+    if (keywords == nullptr) {
+        ownedKeywords.adoptInstead(from.createKeywords(errorCode));
+        if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
+        keywords = ownedKeywords.getAlias();
+    }
+    const char* key;
+    while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
+        CharString value;
+        CharStringByteSink sink(&value);
+        from.getKeywordValue(key, sink, errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        if (uprv_strcmp(key, kAttributeKey) == 0) {
+            transform(value.data(), value.length());
+        }
+        if (validate &&
+            !_isKeywordValue(key, value.data(), value.length())) {
+            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        to.setKeywordValue(key, value.data(), errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+    }
+}
+
+void static
+_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
+{
+    // Clear Unicode attributes
+    locale.setKeywordValue(kAttributeKey, "", errorCode);
+
+    // Clear all Unicode keyword values
+    LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
+    if (U_FAILURE(errorCode) || iter.isNull()) { return; }
+    const char* key;
+    while ((key = iter->next(nullptr, errorCode)) != nullptr) {
+        locale.setUnicodeKeywordValue(key, nullptr, errorCode);
+    }
+}
+
+static void
+_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
+{
+    // Add the unicode extensions to extensions_
+    CharString locale_str("und-u-", errorCode);
+    locale_str.append(value, errorCode);
+    _copyExtensions(
+        Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
+        locale, false, errorCode);
+}
+
+LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
+{
+    if (U_FAILURE(status_)) { return *this; }
+    if (!UPRV_ISALPHANUM(key)) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    CharString value_str(value, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    transform(value_str.data(), value_str.length());
+    if (!value_str.isEmpty() &&
+            !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = new Locale();
+        if (extensions_ == nullptr) {
+            status_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+    }
+    if (uprv_tolower(key) != 'u') {
+        // for t, x and others extension.
+        extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
+                                     status_);
+        return *this;
+    }
+    _clearUAttributesAndKeyType(*extensions_, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    if (!value.empty()) {
+        _setUnicodeExtensions(*extensions_, value_str, status_);
+    }
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
+      StringPiece key, StringPiece type)
+{
+    if (U_FAILURE(status_)) { return *this; }
+    if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
+            (!type.empty() &&
+                 !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
+      status_ = U_ILLEGAL_ARGUMENT_ERROR;
+      return *this;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = new Locale();
+    }
+    if (extensions_ == nullptr) {
+        status_ = U_MEMORY_ALLOCATION_ERROR;
+        return *this;
+    }
+    extensions_->setUnicodeKeywordValue(key, type, status_);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
+    StringPiece value)
+{
+    CharString value_str(value, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    transform(value_str.data(), value_str.length());
+    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = new Locale();
+        if (extensions_ == nullptr) {
+            status_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+        extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
+        return *this;
+    }
+
+    CharString attributes;
+    CharStringByteSink sink(&attributes);
+    UErrorCode localErrorCode = U_ZERO_ERROR;
+    extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
+    if (U_FAILURE(localErrorCode)) {
+        CharString new_attributes(value_str.data(), status_);
+        // No attributes, set the attribute.
+        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+        return *this;
+    }
+
+    transform(attributes.data(),attributes.length());
+    const char* start = attributes.data();
+    const char* limit = attributes.data() + attributes.length();
+    CharString new_attributes;
+    bool inserted = false;
+    while (start < limit) {
+        if (!inserted) {
+            int cmp = uprv_strcmp(start, value_str.data());
+            if (cmp == 0) { return *this; }  // Found it in attributes: Just return
+            if (cmp > 0) {
+                if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
+                new_attributes.append(value_str.data(), status_);
+                inserted = true;
+            }
+        }
+        if (!new_attributes.isEmpty()) {
+            new_attributes.append('_', status_);
+        }
+        new_attributes.append(start, status_);
+        start += uprv_strlen(start) + 1;
+    }
+    if (!inserted) {
+        if (!new_attributes.isEmpty()) {
+            new_attributes.append('_', status_);
+        }
+        new_attributes.append(value_str.data(), status_);
+    }
+    // Not yet in the attributes, set the attribute.
+    extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
+    StringPiece value)
+{
+    CharString value_str(value, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    transform(value_str.data(), value_str.length());
+    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    if (extensions_ == nullptr) { return *this; }
+    UErrorCode localErrorCode = U_ZERO_ERROR;
+    CharString attributes;
+    CharStringByteSink sink(&attributes);
+    extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
+    // get failure, just return
+    if (U_FAILURE(localErrorCode)) { return *this; }
+    // Do not have any attributes, just return.
+    if (attributes.isEmpty()) { return *this; }
+
+    char* p = attributes.data();
+    // Replace null terminiator in place for _ and - so later
+    // we can use uprv_strcmp to compare.
+    for (int32_t i = 0; i < attributes.length(); i++, p++) {
+        *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
+    }
+
+    const char* start = attributes.data();
+    const char* limit = attributes.data() + attributes.length();
+    CharString new_attributes;
+    bool found = false;
+    while (start < limit) {
+        if (uprv_strcmp(start, value_str.data()) == 0) {
+            found = true;
+        } else {
+            if (!new_attributes.isEmpty()) {
+                new_attributes.append('_', status_);
+            }
+            new_attributes.append(start, status_);
+        }
+        start += uprv_strlen(start) + 1;
+    }
+    // Found the value in attributes, set the attribute.
+    if (found) {
+        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+    }
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::clear()
+{
+    status_ = U_ZERO_ERROR;
+    language_[0] = 0;
+    script_[0] = 0;
+    region_[0] = 0;
+    delete variant_;
+    variant_ = nullptr;
+    clearExtensions();
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::clearExtensions()
+{
+    delete extensions_;
+    extensions_ = nullptr;
+    return *this;
+}
+
+Locale makeBogusLocale() {
+  Locale bogus;
+  bogus.setToBogus();
+  return bogus;
+}
+
+void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
+{
+    if (U_FAILURE(errorCode)) { return; }
+    LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
+    if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
+        // Error, or no extensions to copy.
+        return;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = new Locale();
+        if (extensions_ == nullptr) {
+            status_ = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+    }
+    _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
+}
+
+Locale LocaleBuilder::build(UErrorCode& errorCode)
+{
+    if (U_FAILURE(errorCode)) {
+        return makeBogusLocale();
+    }
+    if (U_FAILURE(status_)) {
+        errorCode = status_;
+        return makeBogusLocale();
+    }
+    CharString locale_str(language_, errorCode);
+    if (uprv_strlen(script_) > 0) {
+        locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
+    }
+    if (uprv_strlen(region_) > 0) {
+        locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
+    }
+    if (variant_ != nullptr) {
+        locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
+    }
+    if (U_FAILURE(errorCode)) {
+        return makeBogusLocale();
+    }
+    Locale product(locale_str.data());
+    if (extensions_ != nullptr) {
+        _copyExtensions(*extensions_, nullptr, product, true, errorCode);
+    }
+    if (U_FAILURE(errorCode)) {
+        return makeBogusLocale();
+    }
+    return product;
+}
+
+UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
+    if (U_FAILURE(outErrorCode)) {
+        // Do not overwrite the older error code
+        return TRUE;
+    }
+    outErrorCode = status_;
+    return U_FAILURE(outErrorCode);
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/localematcher.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/localematcher.cpp
@@ -0,0 +1,720 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localematcher.cpp
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCMATCHER_H__
+#define __LOCMATCHER_H__
+
+#include "unicode/utypes.h"
+#include "unicode/localebuilder.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "cstring.h"
+#include "localeprioritylist.h"
+#include "loclikelysubtags.h"
+#include "locdistance.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "uhash.h"
+#include "uvector.h"
+
+#define UND_LSR LSR("und", "", "")
+
+/**
+ * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
+ *
+ * @draft ICU 65
+ */
+enum ULocMatchLifetime {
+    /**
+     * Locale objects are temporary.
+     * The matcher will make a copy of a locale that will be used beyond one function call.
+     *
+     * @draft ICU 65
+     */
+    ULOCMATCH_TEMPORARY_LOCALES,
+    /**
+     * Locale objects are stored at least as long as the matcher is used.
+     * The matcher will keep only a pointer to a locale that will be used beyond one function call,
+     * avoiding a copy.
+     *
+     * @draft ICU 65
+     */
+    ULOCMATCH_STORED_LOCALES  // TODO: permanent? cached? clone?
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchLifetime ULocMatchLifetime;
+#endif
+
+U_NAMESPACE_BEGIN
+
+LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT :
+        desiredLocale(src.desiredLocale),
+        supportedLocale(src.supportedLocale),
+        desiredIndex(src.desiredIndex),
+        supportedIndex(src.supportedIndex),
+        desiredIsOwned(src.desiredIsOwned) {
+    if (desiredIsOwned) {
+        src.desiredLocale = nullptr;
+        src.desiredIndex = -1;
+        src.desiredIsOwned = FALSE;
+    }
+}
+
+LocaleMatcher::Result::~Result() {
+    if (desiredIsOwned) {
+        delete desiredLocale;
+    }
+}
+
+LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) U_NOEXCEPT {
+    this->~Result();
+
+    desiredLocale = src.desiredLocale;
+    supportedLocale = src.supportedLocale;
+    desiredIndex = src.desiredIndex;
+    supportedIndex = src.supportedIndex;
+    desiredIsOwned = src.desiredIsOwned;
+
+    if (desiredIsOwned) {
+        src.desiredLocale = nullptr;
+        src.desiredIndex = -1;
+        src.desiredIsOwned = FALSE;
+    }
+    return *this;
+}
+
+Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
+        return Locale::getRoot();
+    }
+    const Locale *bestDesired = getDesiredLocale();
+    if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
+        return *supportedLocale;
+    }
+    LocaleBuilder b;
+    b.setLocale(*supportedLocale);
+
+    // Copy the region from bestDesired, if there is one.
+    const char *region = bestDesired->getCountry();
+    if (*region != 0) {
+        b.setRegion(region);
+    }
+
+    // Copy the variants from bestDesired, if there are any.
+    // Note that this will override any supportedLocale variants.
+    // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
+    const char *variants = bestDesired->getVariant();
+    if (*variants != 0) {
+        b.setVariant(variants);
+    }
+
+    // Copy the extensions from bestDesired, if there are any.
+    // C++ note: The following note, copied from Java, may not be true,
+    // as long as C++ copies by legacy ICU keyword, not by extension singleton.
+    // Note that this will override any supportedLocale extensions.
+    // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
+    // (replacing calendar).
+    b.copyExtensionsFrom(*bestDesired, errorCode);
+    return b.build(errorCode);
+}
+
+LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT :
+        errorCode_(src.errorCode_),
+        supportedLocales_(src.supportedLocales_),
+        thresholdDistance_(src.thresholdDistance_),
+        demotion_(src.demotion_),
+        defaultLocale_(src.defaultLocale_),
+        favor_(src.favor_) {
+    src.supportedLocales_ = nullptr;
+    src.defaultLocale_ = nullptr;
+}
+
+LocaleMatcher::Builder::~Builder() {
+    delete supportedLocales_;
+    delete defaultLocale_;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
+    this->~Builder();
+
+    errorCode_ = src.errorCode_;
+    supportedLocales_ = src.supportedLocales_;
+    thresholdDistance_ = src.thresholdDistance_;
+    demotion_ = src.demotion_;
+    defaultLocale_ = src.defaultLocale_;
+    favor_ = src.favor_;
+
+    src.supportedLocales_ = nullptr;
+    src.defaultLocale_ = nullptr;
+    return *this;
+}
+
+void LocaleMatcher::Builder::clearSupportedLocales() {
+    if (supportedLocales_ != nullptr) {
+        supportedLocales_->removeAllElements();
+    }
+}
+
+bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
+    if (U_FAILURE(errorCode_)) { return false; }
+    if (supportedLocales_ != nullptr) { return true; }
+    supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
+    if (U_FAILURE(errorCode_)) { return false; }
+    if (supportedLocales_ == nullptr) {
+        errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+        return false;
+    }
+    return true;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
+        StringPiece locales) {
+    LocalePriorityList list(locales, errorCode_);
+    if (U_FAILURE(errorCode_)) { return *this; }
+    clearSupportedLocales();
+    if (!ensureSupportedLocaleVector()) { return *this; }
+    int32_t length = list.getLengthIncludingRemoved();
+    for (int32_t i = 0; i < length; ++i) {
+        Locale *locale = list.orphanLocaleAt(i);
+        if (locale == nullptr) { continue; }
+        supportedLocales_->addElement(locale, errorCode_);
+        if (U_FAILURE(errorCode_)) {
+            delete locale;
+            break;
+        }
+    }
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    clearSupportedLocales();
+    if (!ensureSupportedLocaleVector()) { return *this; }
+    while (locales.hasNext()) {
+        const Locale &locale = locales.next();
+        Locale *clone = locale.clone();
+        if (clone == nullptr) {
+            errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+            break;
+        }
+        supportedLocales_->addElement(clone, errorCode_);
+        if (U_FAILURE(errorCode_)) {
+            delete clone;
+            break;
+        }
+    }
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
+    if (!ensureSupportedLocaleVector()) { return *this; }
+    Locale *clone = locale.clone();
+    if (clone == nullptr) {
+        errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+        return *this;
+    }
+    supportedLocales_->addElement(clone, errorCode_);
+    if (U_FAILURE(errorCode_)) {
+        delete clone;
+    }
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    Locale *clone = nullptr;
+    if (defaultLocale != nullptr) {
+        clone = defaultLocale->clone();
+        if (clone == nullptr) {
+            errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+    }
+    delete defaultLocale_;
+    defaultLocale_ = clone;
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    favor_ = subtag;
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    demotion_ = demotion;
+    return *this;
+}
+
+#if 0
+/**
+ * <i>Internal only!</i>
+ *
+ * @param thresholdDistance the thresholdDistance to set, with -1 = default
+ * @return this Builder object
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+@Deprecated
+LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    if (thresholdDistance > 100) {
+        thresholdDistance = 100;
+    }
+    thresholdDistance_ = thresholdDistance;
+    return *this;
+}
+#endif
+
+UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
+    if (U_FAILURE(outErrorCode)) { return TRUE; }
+    if (U_SUCCESS(errorCode_)) { return FALSE; }
+    outErrorCode = errorCode_;
+    return TRUE;
+}
+
+LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
+    if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
+        errorCode = errorCode_;
+    }
+    return LocaleMatcher(*this, errorCode);
+}
+
+namespace {
+
+LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
+                       UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
+        return UND_LSR;
+    } else {
+        return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
+    }
+}
+
+int32_t hashLSR(const UHashTok token) {
+    const LSR *lsr = static_cast<const LSR *>(token.pointer);
+    return lsr->hashCode;
+}
+
+UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
+    const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
+    const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
+    return *lsr1 == *lsr2;
+}
+
+bool putIfAbsent(UHashtable *lsrToIndex, const LSR &lsr, int32_t i, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return false; }
+    U_ASSERT(i > 0);
+    int32_t index = uhash_geti(lsrToIndex, &lsr);
+    if (index != 0) {
+        return false;
+    } else {
+        uhash_puti(lsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
+        return U_SUCCESS(errorCode);
+    }
+}
+
+}  // namespace
+
+LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
+        likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
+        localeDistance(*LocaleDistance::getSingleton(errorCode)),
+        thresholdDistance(builder.thresholdDistance_),
+        demotionPerDesiredLocale(0),
+        favorSubtag(builder.favor_),
+        supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
+        supportedLsrToIndex(nullptr),
+        supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
+        ownedDefaultLocale(nullptr), defaultLocale(nullptr), defaultLocaleIndex(-1) {
+    if (U_FAILURE(errorCode)) { return; }
+    if (thresholdDistance < 0) {
+        thresholdDistance = localeDistance.getDefaultScriptDistance();
+    }
+    supportedLocalesLength = builder.supportedLocales_ != nullptr ?
+        builder.supportedLocales_->size() : 0;
+    const Locale *def = builder.defaultLocale_;
+    int32_t idef = -1;
+    if (supportedLocalesLength > 0) {
+        // Store the supported locales in input order,
+        // so that when different types are used (e.g., language tag strings)
+        // we can return those by parallel index.
+        supportedLocales = static_cast<const Locale **>(
+            uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
+        // Supported LRSs in input order.
+        // In C++, we store these permanently to simplify ownership management
+        // in the hash tables. Duplicate LSRs (if any) are unused overhead.
+        lsrs = new LSR[supportedLocalesLength];
+        if (supportedLocales == nullptr || lsrs == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        // If the constructor fails partway, we need null pointers for destructibility.
+        uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
+        // Also find the first supported locale whose LSR is
+        // the same as that for the default locale.
+        LSR builderDefaultLSR;
+        const LSR *defLSR = nullptr;
+        if (def != nullptr) {
+            builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
+            if (U_FAILURE(errorCode)) { return; }
+            defLSR = &builderDefaultLSR;
+        }
+        for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+            const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
+            supportedLocales[i] = locale.clone();
+            if (supportedLocales[i] == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            const Locale &supportedLocale = *supportedLocales[i];
+            LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
+            lsr.setHashCode();
+            if (U_FAILURE(errorCode)) { return; }
+            if (idef < 0 && defLSR != nullptr && lsr == *defLSR) {
+                idef = i;
+                defLSR = &lsr;  // owned pointer to put into supportedLsrToIndex
+                if (*def == supportedLocale) {
+                    def = &supportedLocale;  // owned pointer to keep
+                }
+            }
+        }
+
+        // We need an unordered map from LSR to first supported locale with that LSR,
+        // and an ordered list of (LSR, supported index).
+        // We insert the supported locales in the following order:
+        // 1. Default locale, if it is supported.
+        // 2. Priority locales (aka "paradigm locales") in builder order.
+        // 3. Remaining locales in builder order.
+        // In Java, we use a LinkedHashMap for both map & ordered lists.
+        // In C++, we use separate structures.
+        // We over-allocate arrays of LSRs and indexes for simplicity.
+        // We reserve slots at the array starts for the default and paradigm locales,
+        // plus enough for all supported locales.
+        // If there are few paradigm locales and few duplicate supported LSRs,
+        // then the amount of wasted space is small.
+        supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
+                                             supportedLocalesLength, &errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        int32_t paradigmLimit = 1 + localeDistance.getParadigmLSRsLength();
+        int32_t suppLSRsCapacity = paradigmLimit + supportedLocalesLength;
+        supportedLSRs = static_cast<const LSR **>(
+            uprv_malloc(suppLSRsCapacity * sizeof(const LSR *)));
+        supportedIndexes = static_cast<int32_t *>(
+            uprv_malloc(suppLSRsCapacity * sizeof(int32_t)));
+        if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        int32_t paradigmIndex = 0;
+        int32_t otherIndex = paradigmLimit;
+        if (idef >= 0) {
+            uhash_puti(supportedLsrToIndex, const_cast<LSR *>(defLSR), idef + 1, &errorCode);
+            supportedLSRs[0] = defLSR;
+            supportedIndexes[0] = idef;
+            paradigmIndex = 1;
+        }
+        for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+            if (i == idef) { continue; }
+            const Locale &locale = *supportedLocales[i];
+            const LSR &lsr = lsrs[i];
+            if (defLSR == nullptr) {
+                U_ASSERT(i == 0);
+                def = &locale;
+                defLSR = &lsr;
+                idef = 0;
+                uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), 0 + 1, &errorCode);
+                supportedLSRs[0] = &lsr;
+                supportedIndexes[0] = 0;
+                paradigmIndex = 1;
+            } else if (idef >= 0 && lsr == *defLSR) {
+                // lsr == *defLSR means that this supported locale is
+                // a duplicate of the default locale.
+                // Either an explicit default locale is supported, and we added it before the loop,
+                // or there is no explicit default locale, and this is
+                // a duplicate of the first supported locale.
+                // In both cases, idef >= 0 now, so otherwise we can skip the comparison.
+                // For a duplicate, putIfAbsent() is a no-op, so nothing to do.
+            } else {
+                if (putIfAbsent(supportedLsrToIndex, lsr, i + 1, errorCode)) {
+                    if (localeDistance.isParadigmLSR(lsr)) {
+                        supportedLSRs[paradigmIndex] = &lsr;
+                        supportedIndexes[paradigmIndex++] = i;
+                    } else {
+                        supportedLSRs[otherIndex] = &lsr;
+                        supportedIndexes[otherIndex++] = i;
+                    }
+                }
+            }
+            if (U_FAILURE(errorCode)) { return; }
+        }
+        // Squeeze out unused array slots.
+        if (paradigmIndex < paradigmLimit && paradigmLimit < otherIndex) {
+            uprv_memmove(supportedLSRs + paradigmIndex, supportedLSRs + paradigmLimit,
+                         (otherIndex - paradigmLimit) * sizeof(const LSR *));
+            uprv_memmove(supportedIndexes + paradigmIndex, supportedIndexes + paradigmLimit,
+                         (otherIndex - paradigmLimit) * sizeof(int32_t));
+        }
+        supportedLSRsLength = otherIndex - (paradigmLimit - paradigmIndex);
+    }
+
+    if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) {
+        ownedDefaultLocale = def->clone();
+        if (ownedDefaultLocale == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        def = ownedDefaultLocale;
+    }
+    defaultLocale = def;
+    defaultLocaleIndex = idef;
+
+    if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
+        demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
+    }
+}
+
+LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
+        likelySubtags(src.likelySubtags),
+        localeDistance(src.localeDistance),
+        thresholdDistance(src.thresholdDistance),
+        demotionPerDesiredLocale(src.demotionPerDesiredLocale),
+        favorSubtag(src.favorSubtag),
+        supportedLocales(src.supportedLocales), lsrs(src.lsrs),
+        supportedLocalesLength(src.supportedLocalesLength),
+        supportedLsrToIndex(src.supportedLsrToIndex),
+        supportedLSRs(src.supportedLSRs),
+        supportedIndexes(src.supportedIndexes),
+        supportedLSRsLength(src.supportedLSRsLength),
+        ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale),
+        defaultLocaleIndex(src.defaultLocaleIndex) {
+    src.supportedLocales = nullptr;
+    src.lsrs = nullptr;
+    src.supportedLocalesLength = 0;
+    src.supportedLsrToIndex = nullptr;
+    src.supportedLSRs = nullptr;
+    src.supportedIndexes = nullptr;
+    src.supportedLSRsLength = 0;
+    src.ownedDefaultLocale = nullptr;
+    src.defaultLocale = nullptr;
+    src.defaultLocaleIndex = -1;
+}
+
+LocaleMatcher::~LocaleMatcher() {
+    for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+        delete supportedLocales[i];
+    }
+    uprv_free(supportedLocales);
+    delete[] lsrs;
+    uhash_close(supportedLsrToIndex);
+    uprv_free(supportedLSRs);
+    uprv_free(supportedIndexes);
+    delete ownedDefaultLocale;
+}
+
+LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
+    this->~LocaleMatcher();
+
+    thresholdDistance = src.thresholdDistance;
+    demotionPerDesiredLocale = src.demotionPerDesiredLocale;
+    favorSubtag = src.favorSubtag;
+    supportedLocales = src.supportedLocales;
+    lsrs = src.lsrs;
+    supportedLocalesLength = src.supportedLocalesLength;
+    supportedLsrToIndex = src.supportedLsrToIndex;
+    supportedLSRs = src.supportedLSRs;
+    supportedIndexes = src.supportedIndexes;
+    supportedLSRsLength = src.supportedLSRsLength;
+    ownedDefaultLocale = src.ownedDefaultLocale;
+    defaultLocale = src.defaultLocale;
+    defaultLocaleIndex = src.defaultLocaleIndex;
+
+    src.supportedLocales = nullptr;
+    src.lsrs = nullptr;
+    src.supportedLocalesLength = 0;
+    src.supportedLsrToIndex = nullptr;
+    src.supportedLSRs = nullptr;
+    src.supportedIndexes = nullptr;
+    src.supportedLSRsLength = 0;
+    src.ownedDefaultLocale = nullptr;
+    src.defaultLocale = nullptr;
+    src.defaultLocaleIndex = -1;
+    return *this;
+}
+
+class LocaleLsrIterator {
+public:
+    LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
+                      ULocMatchLifetime lifetime) :
+            likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
+
+    ~LocaleLsrIterator() {
+        if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
+            delete remembered;
+        }
+    }
+
+    bool hasNext() const {
+        return locales.hasNext();
+    }
+
+    LSR next(UErrorCode &errorCode) {
+        current = &locales.next();
+        return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
+    }
+
+    void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) { return; }
+        bestDesiredIndex = desiredIndex;
+        if (lifetime == ULOCMATCH_STORED_LOCALES) {
+            remembered = current;
+        } else {
+            // ULOCMATCH_TEMPORARY_LOCALES
+            delete remembered;
+            remembered = new Locale(*current);
+            if (remembered == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+            }
+        }
+    }
+
+    const Locale *orphanRemembered() {
+        const Locale *rem = remembered;
+        remembered = nullptr;
+        return rem;
+    }
+
+    int32_t getBestDesiredIndex() const {
+        return bestDesiredIndex;
+    }
+
+private:
+    const XLikelySubtags &likelySubtags;
+    Locale::Iterator &locales;
+    ULocMatchLifetime lifetime;
+    const Locale *current = nullptr, *remembered = nullptr;
+    int32_t bestDesiredIndex = -1;
+};
+
+const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    int32_t suppIndex = getBestSuppIndex(
+        getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
+        nullptr, errorCode);
+    return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+}
+
+const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
+                                          UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    if (!desiredLocales.hasNext()) {
+        return defaultLocale;
+    }
+    LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
+    int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+    return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+}
+
+const Locale *LocaleMatcher::getBestMatchForListString(
+        StringPiece desiredLocaleList, UErrorCode &errorCode) const {
+    LocalePriorityList list(desiredLocaleList, errorCode);
+    LocalePriorityList::Iterator iter = list.iterator();
+    return getBestMatch(iter, errorCode);
+}
+
+LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
+        const Locale &desiredLocale, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) {
+        return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+    }
+    int32_t suppIndex = getBestSuppIndex(
+        getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
+        nullptr, errorCode);
+    if (U_FAILURE(errorCode) || suppIndex < 0) {
+        return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+    } else {
+        return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
+    }
+}
+
+LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
+        Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
+        return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+    }
+    LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
+    int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+    if (U_FAILURE(errorCode) || suppIndex < 0) {
+        return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+    } else {
+        return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
+                      lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
+    }
+}
+
+int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
+                                        UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return -1; }
+    int32_t desiredIndex = 0;
+    int32_t bestSupportedLsrIndex = -1;
+    for (int32_t bestDistance = thresholdDistance;;) {
+        // Quick check for exact maximized LSR.
+        // Returns suppIndex+1 where 0 means not found.
+        if (supportedLsrToIndex != nullptr) {
+            desiredLSR.setHashCode();
+            int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
+            if (index != 0) {
+                int32_t suppIndex = index - 1;
+                if (remainingIter != nullptr) {
+                    remainingIter->rememberCurrent(desiredIndex, errorCode);
+                }
+                return suppIndex;
+            }
+        }
+        int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
+                desiredLSR, supportedLSRs, supportedLSRsLength, bestDistance, favorSubtag);
+        if (bestIndexAndDistance >= 0) {
+            bestDistance = bestIndexAndDistance & 0xff;
+            if (remainingIter != nullptr) {
+                remainingIter->rememberCurrent(desiredIndex, errorCode);
+                if (U_FAILURE(errorCode)) { return -1; }
+            }
+            bestSupportedLsrIndex = bestIndexAndDistance >= 0 ? bestIndexAndDistance >> 8 : -1;
+        }
+        if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
+            break;
+        }
+        if (remainingIter == nullptr || !remainingIter->hasNext()) {
+            break;
+        }
+        desiredLSR = remainingIter->next(errorCode);
+        if (U_FAILURE(errorCode)) { return -1; }
+        ++desiredIndex;
+    }
+    if (bestSupportedLsrIndex < 0) {
+        // no good match
+        return -1;
+    }
+    return supportedIndexes[bestSupportedLsrIndex];
+}
+
+double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
+    // Returns the inverse of the distance: That is, 1-distance(desired, supported).
+    LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
+    if (U_FAILURE(errorCode)) { return 0; }
+    const LSR *pSuppLSR = &suppLSR;
+    int32_t distance = localeDistance.getBestIndexAndDistance(
+            getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
+            &pSuppLSR, 1,
+            thresholdDistance, favorSubtag) & 0xff;
+    return (100 - distance) / 100.0;
+}
+
+U_NAMESPACE_END
+
+#endif  // __LOCMATCHER_H__
--- a/external/duckdb/extension/icu/third_party/icu/common/localeprioritylist.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/localeprioritylist.cpp
@@ -0,0 +1,239 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localeprioritylist.cpp
+// created: 2019jul11 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "localeprioritylist.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+struct LocaleAndWeight {
+    Locale *locale;
+    int32_t weight;  // 0..1000 = 0.0..1.0
+    int32_t index;  // force stable sort
+
+    int32_t compare(const LocaleAndWeight &other) const {
+        int32_t diff = other.weight - weight;  // descending: other-this
+        if (diff != 0) { return diff; }
+        return index - other.index;
+    }
+};
+
+/**
+ * Nothing but a wrapper over a MaybeStackArray of LocaleAndWeight.
+ *
+ * This wrapper exists (and is not in an anonymous namespace)
+ * so that we can forward-declare it in the header file and
+ * don't have to expose the MaybeStackArray specialization and
+ * the LocaleAndWeight to code (like the test) that #includes localeprioritylist.h.
+ * Also, otherwise we would have to do a platform-specific
+ * template export declaration of some kind for the MaybeStackArray specialization
+ * to be properly exported from the common DLL.
+ */
+struct LocaleAndWeightArray : public UMemory {
+    MaybeStackArray<LocaleAndWeight, 20> array;
+};
+
+namespace {
+
+int32_t hashLocale(const UHashTok token) {
+    auto *locale = static_cast<const Locale *>(token.pointer);
+    return locale->hashCode();
+}
+
+UBool compareLocales(const UHashTok t1, const UHashTok t2) {
+    auto *l1 = static_cast<const Locale *>(t1.pointer);
+    auto *l2 = static_cast<const Locale *>(t2.pointer);
+    return *l1 == *l2;
+}
+
+constexpr int32_t WEIGHT_ONE = 1000;
+
+int32_t U_CALLCONV
+compareLocaleAndWeight(const void * /*context*/, const void *left, const void *right) {
+    return static_cast<const LocaleAndWeight *>(left)->
+        compare(*static_cast<const LocaleAndWeight *>(right));
+}
+
+const char *skipSpaces(const char *p, const char *limit) {
+    while (p < limit && *p == ' ') { ++p; }
+    return p;
+}
+
+int32_t findTagLength(const char *p, const char *limit) {
+    // Look for accept-language delimiters.
+    // Leave other validation up to the Locale constructor.
+    const char *q;
+    for (q = p; q < limit; ++q) {
+        char c = *q;
+        if (c == ' ' || c == ',' || c == ';') { break; }
+    }
+    return static_cast<int32_t>(q - p);
+}
+
+/**
+ * Parses and returns a qvalue weight in millis.
+ * Advances p to after the parsed substring.
+ * Returns a negative value if parsing fails.
+ */
+int32_t parseWeight(const char *&p, const char *limit) {
+    p = skipSpaces(p, limit);
+    char c;
+    if (p == limit || ((c = *p) != '0' && c != '1')) { return -1; }
+    int32_t weight = (c - '0') * 1000;
+    if (++p == limit || *p != '.') { return weight; }
+    int32_t multiplier = 100;
+    while (++p != limit && '0' <= (c = *p) && c <= '9') {
+        c -= '0';
+        if (multiplier > 0) {
+            weight += c * multiplier;
+            multiplier /= 10;
+        } else if (multiplier == 0) {
+            // round up
+            if (c >= 5) { ++weight; }
+            multiplier = -1;
+        }  // else ignore further fraction digits
+    }
+    return weight <= WEIGHT_ONE ? weight : -1;  // bad if > 1.0
+}
+
+}  // namespace
+
+LocalePriorityList::LocalePriorityList(StringPiece s, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return; }
+    list = new LocaleAndWeightArray();
+    if (list == nullptr) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    const char *p = s.data();
+    const char *limit = p + s.length();
+    while ((p = skipSpaces(p, limit)) != limit) {
+        if (*p == ',') {  // empty range field
+            ++p;
+            continue;
+        }
+        int32_t tagLength = findTagLength(p, limit);
+        if (tagLength == 0) {
+            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        CharString tag(p, tagLength, errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        Locale locale = Locale(tag.data());
+        if (locale.isBogus()) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        int32_t weight = WEIGHT_ONE;
+        if ((p = skipSpaces(p + tagLength, limit)) != limit && *p == ';') {
+            if ((p = skipSpaces(p + 1, limit)) == limit || *p != 'q' ||
+                    (p = skipSpaces(p + 1, limit)) == limit || *p != '=' ||
+                    (++p, (weight = parseWeight(p, limit)) < 0)) {
+                errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+                return;
+            }
+            p = skipSpaces(p, limit);
+        }
+        if (p != limit && *p != ',') {  // trailing junk
+            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        add(locale, weight, errorCode);
+        if (p == limit) { break; }
+        ++p;
+    }
+    sort(errorCode);
+}
+
+LocalePriorityList::~LocalePriorityList() {
+    if (list != nullptr) {
+        for (int32_t i = 0; i < listLength; ++i) {
+            delete list->array[i].locale;
+        }
+        delete list;
+    }
+    uhash_close(map);
+}
+
+const Locale *LocalePriorityList::localeAt(int32_t i) const {
+    return list->array[i].locale;
+}
+
+Locale *LocalePriorityList::orphanLocaleAt(int32_t i) {
+    if (list == nullptr) { return nullptr; }
+    LocaleAndWeight &lw = list->array[i];
+    Locale *l = lw.locale;
+    lw.locale = nullptr;
+    return l;
+}
+
+bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return false; }
+    if (map == nullptr) {
+        if (weight <= 0) { return true; }  // do not add q=0
+        map = uhash_open(hashLocale, compareLocales, uhash_compareLong, &errorCode);
+        if (U_FAILURE(errorCode)) { return false; }
+    }
+    LocalPointer<Locale> clone;
+    int32_t index = uhash_geti(map, &locale);
+    if (index != 0) {
+        // Duplicate: Remove the old item and append it anew.
+        LocaleAndWeight &lw = list->array[index - 1];
+        clone.adoptInstead(lw.locale);
+        lw.locale = nullptr;
+        lw.weight = 0;
+        ++numRemoved;
+    }
+    if (weight <= 0) {  // do not add q=0
+        if (index != 0) {
+            // Not strictly necessary but cleaner.
+            uhash_removei(map, &locale);
+        }
+        return true;
+    }
+    if (clone.isNull()) {
+        clone.adoptInstead(locale.clone());
+        if (clone.isNull() || (clone->isBogus() && !locale.isBogus())) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return false;
+        }
+    }
+    if (listLength == list->array.getCapacity()) {
+        int32_t newCapacity = listLength < 50 ? 100 : 4 * listLength;
+        if (list->array.resize(newCapacity, listLength) == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return false;
+        }
+    }
+    uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
+    if (U_FAILURE(errorCode)) { return false; }
+    LocaleAndWeight &lw = list->array[listLength];
+    lw.locale = clone.orphan();
+    lw.weight = weight;
+    lw.index = listLength++;
+    if (weight < WEIGHT_ONE) { hasWeights = true; }
+    U_ASSERT(uhash_count(map) == getLength());
+    return true;
+}
+
+void LocalePriorityList::sort(UErrorCode &errorCode) {
+    // Sort by descending weights if there is a mix of weights.
+    // The comparator forces a stable sort via the item index.
+    if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; }
+    uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight),
+                   compareLocaleAndWeight, nullptr, FALSE, &errorCode);
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/localeprioritylist.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/localeprioritylist.h
@@ -0,0 +1,115 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localeprioritylist.h
+// created: 2019jul11 Markus W. Scherer
+
+#ifndef __LOCALEPRIORITYLIST_H__
+#define __LOCALEPRIORITYLIST_H__
+
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+struct UHashtable;
+
+U_NAMESPACE_BEGIN
+
+struct LocaleAndWeightArray;
+
+/**
+ * Parses a list of locales from an accept-language string.
+ * We are a bit more lenient than the spec:
+ * We accept extra whitespace in more places, empty range fields,
+ * and any number of qvalue fraction digits.
+ *
+ * https://tools.ietf.org/html/rfc2616#section-14.4
+ * 14.4 Accept-Language
+ *
+ *        Accept-Language = "Accept-Language" ":"
+ *                          1#( language-range [ ";" "q" "=" qvalue ] )
+ *        language-range  = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
+ *
+ *    Each language-range MAY be given an associated quality value which
+ *    represents an estimate of the user's preference for the languages
+ *    specified by that range. The quality value defaults to "q=1". For
+ *    example,
+ *
+ *        Accept-Language: da, en-gb;q=0.8, en;q=0.7
+ *
+ * https://tools.ietf.org/html/rfc2616#section-3.9
+ * 3.9 Quality Values
+ *
+ *    HTTP content negotiation (section 12) uses short "floating point"
+ *    numbers to indicate the relative importance ("weight") of various
+ *    negotiable parameters.  A weight is normalized to a real number in
+ *    the range 0 through 1, where 0 is the minimum and 1 the maximum
+ *    value. If a parameter has a quality value of 0, then content with
+ *    this parameter is `not acceptable' for the client. HTTP/1.1
+ *    applications MUST NOT generate more than three digits after the
+ *    decimal point. User configuration of these values SHOULD also be
+ *    limited in this fashion.
+ *
+ *        qvalue         = ( "0" [ "." 0*3DIGIT ] )
+ *                       | ( "1" [ "." 0*3("0") ] )
+ */
+class U_COMMON_API LocalePriorityList : public UMemory {
+public:
+    class Iterator : public Locale::Iterator {
+    public:
+        UBool hasNext() const override { return count < length; }
+
+        const Locale &next() override {
+            for(;;) {
+                const Locale *locale = list.localeAt(index++);
+                if (locale != nullptr) {
+                    ++count;
+                    return *locale;
+                }
+            }
+        }
+
+    private:
+        friend class LocalePriorityList;
+
+        Iterator(const LocalePriorityList &list) : list(list), length(list.getLength()) {}
+
+        const LocalePriorityList &list;
+        int32_t index = 0;
+        int32_t count = 0;
+        const int32_t length;
+    };
+
+    LocalePriorityList(StringPiece s, UErrorCode &errorCode);
+
+    ~LocalePriorityList();
+
+    int32_t getLength() const { return listLength - numRemoved; }
+
+    int32_t getLengthIncludingRemoved() const { return listLength; }
+
+    Iterator iterator() const { return Iterator(*this); }
+
+    const Locale *localeAt(int32_t i) const;
+
+    Locale *orphanLocaleAt(int32_t i);
+
+private:
+    LocalePriorityList(const LocalePriorityList &) = delete;
+    LocalePriorityList &operator=(const LocalePriorityList &) = delete;
+
+    bool add(const Locale &locale, int32_t weight, UErrorCode &errorCode);
+
+    void sort(UErrorCode &errorCode);
+
+    LocaleAndWeightArray *list = nullptr;
+    int32_t listLength = 0;
+    int32_t numRemoved = 0;
+    bool hasWeights = false;  // other than 1.0
+    UHashtable *map = nullptr;
+};
+
+U_NAMESPACE_END
+
+#endif  // __LOCALEPRIORITYLIST_H__
--- a/external/duckdb/extension/icu/third_party/icu/common/localsvc.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/localsvc.h
@@ -0,0 +1,27 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+***************************************************************************
+*   Copyright (C) 2006 International Business Machines Corporation        *
+*   and others. All rights reserved.                                      *
+***************************************************************************
+*/
+
+#ifndef LOCALSVC_H
+#define LOCALSVC_H
+
+#include "unicode/utypes.h"
+
+#if defined(U_LOCAL_SERVICE_HOOK) && U_LOCAL_SERVICE_HOOK
+/**
+ * Prototype for user-supplied service hook. This function is expected to return
+ * a type of factory object specific to the requested service.
+ * 
+ * @param what service-specific string identifying the specific user hook
+ * @param status error status
+ * @return a service-specific hook, or NULL on failure.
+ */
+U_CAPI void* uprv_svc_hook(const char *what, UErrorCode *status);
+#endif
+
+#endif
--- a/external/duckdb/extension/icu/third_party/icu/common/locavailable.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/locavailable.cpp
@@ -0,0 +1,270 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1997-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  locavailable.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010feb25
+*   created by: Markus W. Scherer
+*
+*   Code for available locales, separated out from other .cpp files
+*   that then do not depend on resource bundle code and res_index bundles.
+*/
+
+#include "unicode/errorcode.h"
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/uloc.h"
+#include "unicode/ures.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "ucln_cmn.h"
+#include "uassert.h"
+#include "umutex.h"
+#include "uresimp.h"
+
+// C++ API ----------------------------------------------------------------- ***
+
+U_NAMESPACE_BEGIN
+
+static icu::Locale*  gLocAvailable_availableLocaleList = NULL;
+static int32_t  gLocAvailable_availableLocaleListCount;
+static icu::UInitOnce gLocAvailableInitOnceLocale = U_INITONCE_INITIALIZER;
+
+U_NAMESPACE_END
+
+U_CDECL_BEGIN
+
+static UBool U_CALLCONV locale_available_cleanup(void)
+{
+    U_NAMESPACE_USE
+
+    if (gLocAvailable_availableLocaleList) {
+        delete []gLocAvailable_availableLocaleList;
+        gLocAvailable_availableLocaleList = NULL;
+    }
+    gLocAvailable_availableLocaleListCount = 0;
+    gLocAvailableInitOnceLocale.reset();
+
+    return TRUE;
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+void U_CALLCONV locale_available_init() {
+    // This function is a friend of class Locale.
+    // This function is only invoked via umtx_initOnce().
+
+    // for now, there is a hardcoded list, so just walk through that list and set it up.
+    //  Note: this function is a friend of class Locale.
+    gLocAvailable_availableLocaleListCount = uloc_countAvailable();
+    if(gLocAvailable_availableLocaleListCount) {
+       gLocAvailable_availableLocaleList = new Locale[gLocAvailable_availableLocaleListCount];
+    }
+    if (gLocAvailable_availableLocaleList == NULL) {
+        gLocAvailable_availableLocaleListCount= 0;
+    }
+    for (int32_t locCount=gLocAvailable_availableLocaleListCount-1; locCount>=0; --locCount) {
+        gLocAvailable_availableLocaleList[locCount].setFromPOSIXID(uloc_getAvailable(locCount));
+    }
+    ucln_common_registerCleanup(UCLN_COMMON_LOCALE_AVAILABLE, locale_available_cleanup);
+}
+
+const Locale* U_EXPORT2
+Locale::getAvailableLocales(int32_t& count)
+{
+    umtx_initOnce(gLocAvailableInitOnceLocale, &locale_available_init);
+    count = gLocAvailable_availableLocaleListCount;
+    return gLocAvailable_availableLocaleList;
+}
+
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+/* ### Constants **************************************************/
+
+namespace {
+
+// Enough capacity for the two lists in the res_index.res file
+const char** gAvailableLocaleNames[2] = {};
+int32_t gAvailableLocaleCounts[2] = {};
+icu::UInitOnce ginstalledLocalesInitOnce = U_INITONCE_INITIALIZER;
+
+class AvailableLocalesSink : public ResourceSink {
+  public:
+    void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
+        ResourceTable resIndexTable = value.getTable(status);
+        if (U_FAILURE(status)) {
+            return;
+        }
+        for (int32_t i = 0; resIndexTable.getKeyAndValue(i, key, value); ++i) {
+            ULocAvailableType type;
+            if (uprv_strcmp(key, "InstalledLocales") == 0) {
+                type = ULOC_AVAILABLE_DEFAULT;
+            } else if (uprv_strcmp(key, "AliasLocales") == 0) {
+                type = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES;
+            } else {
+                // CLDRVersion, etc.
+                continue;
+            }
+            ResourceTable availableLocalesTable = value.getTable(status);
+            if (U_FAILURE(status)) {
+                return;
+            }
+            gAvailableLocaleCounts[type] = availableLocalesTable.getSize();
+            gAvailableLocaleNames[type] = static_cast<const char**>(
+                uprv_malloc(gAvailableLocaleCounts[type] * sizeof(const char*)));
+            if (gAvailableLocaleNames[type] == nullptr) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            for (int32_t j = 0; availableLocalesTable.getKeyAndValue(j, key, value); ++j) {
+                gAvailableLocaleNames[type][j] = key;
+            }
+        }
+    }
+};
+
+class AvailableLocalesStringEnumeration : public StringEnumeration {
+  public:
+    AvailableLocalesStringEnumeration(ULocAvailableType type) : fType(type) {
+    }
+
+    const char* next(int32_t *resultLength, UErrorCode&) override {
+        ULocAvailableType actualType = fType;
+        int32_t actualIndex = fIndex++;
+
+        // If the "combined" list was requested, resolve that now
+        if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) {
+            int32_t defaultLocalesCount = gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT];
+            if (actualIndex < defaultLocalesCount) {
+                actualType = ULOC_AVAILABLE_DEFAULT;
+            } else {
+                actualIndex -= defaultLocalesCount;
+                actualType = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES;
+            }
+        }
+
+        // Return the requested string
+        int32_t count = gAvailableLocaleCounts[actualType];
+        const char* result;
+        if (actualIndex < count) {
+            result = gAvailableLocaleNames[actualType][actualIndex];
+            if (resultLength != nullptr) {
+                *resultLength = static_cast<int32_t>(uprv_strlen(result));
+            }
+        } else {
+            result = nullptr;
+            if (resultLength != nullptr) {
+                *resultLength = 0;
+            }
+        }
+        return result;
+    }
+
+    void reset(UErrorCode&) override {
+        fIndex = 0;
+    }
+
+    int32_t count(UErrorCode&) const override {
+        if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) {
+            return gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT]
+                + gAvailableLocaleCounts[ULOC_AVAILABLE_ONLY_LEGACY_ALIASES];
+        } else {
+            return gAvailableLocaleCounts[fType];
+        }
+    }
+
+  private:
+    ULocAvailableType fType;
+    int32_t fIndex = 0;
+};
+
+/* ### Get available **************************************************/
+
+static UBool U_CALLCONV uloc_cleanup(void) {
+    for (int32_t i = 0; i < UPRV_LENGTHOF(gAvailableLocaleNames); i++) {
+        uprv_free(gAvailableLocaleNames[i]);
+        gAvailableLocaleNames[i] = nullptr;
+        gAvailableLocaleCounts[i] = 0;
+    }
+    ginstalledLocalesInitOnce.reset();
+    return TRUE;
+}
+
+// Load Installed Locales. This function will be called exactly once
+//   via the initOnce mechanism.
+
+static void U_CALLCONV loadInstalledLocales(UErrorCode& status) {
+    ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
+
+    icu::LocalUResourceBundlePointer rb(ures_openDirect(NULL, "res_index", &status));
+    AvailableLocalesSink sink;
+    ures_getAllItemsWithFallback(rb.getAlias(), "", sink, status);
+}
+
+void _load_installedLocales(UErrorCode& status) {
+    umtx_initOnce(ginstalledLocalesInitOnce, &loadInstalledLocales, status);
+}
+
+} // namespace
+
+U_CAPI const char* U_EXPORT2
+uloc_getAvailable(int32_t offset) {
+    icu::ErrorCode status;
+    _load_installedLocales(status);
+    if (status.isFailure()) {
+        return nullptr;
+    }
+    if (offset > gAvailableLocaleCounts[0]) {
+        // *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    return gAvailableLocaleNames[0][offset];
+}
+
+U_CAPI int32_t  U_EXPORT2
+uloc_countAvailable() {
+    icu::ErrorCode status;
+    _load_installedLocales(status);
+    if (status.isFailure()) {
+        return 0;
+    }
+    return gAvailableLocaleCounts[0];
+}
+
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status) {
+    if (U_FAILURE(*status)) {
+        return nullptr;
+    }
+    if (type < 0 || type >= ULOC_AVAILABLE_COUNT) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    _load_installedLocales(*status);
+    if (U_FAILURE(*status)) {
+        return nullptr;
+    }
+    LocalPointer<AvailableLocalesStringEnumeration> result(
+        new AvailableLocalesStringEnumeration(type), *status);
+    if (U_FAILURE(*status)) {
+        return nullptr;
+    }
+    return uenum_openFromStringEnumeration(result.orphan(), status);
+}
+
--- a/external/duckdb/extension/icu/third_party/icu/common/locbased.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/locbased.cpp
@@ -0,0 +1,55 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2004-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: January 16 2004
+* Since: ICU 2.8
+**********************************************************************
+*/
+#include "locbased.h"
+#include "cstring.h"
+
+U_NAMESPACE_BEGIN
+
+Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
+    const char* id = getLocaleID(type, status);
+    return Locale((id != 0) ? id : "");
+}
+
+const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+
+    switch(type) {
+    case ULOC_VALID_LOCALE:
+        return valid;
+    case ULOC_ACTUAL_LOCALE:
+        return actual;
+    default:
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+}
+
+void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
+    if (validID != 0) {
+      uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
+      valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
+    }
+    if (actualID != 0) {
+      uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
+      actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
+    }
+}
+
+void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) {
+  uprv_strcpy(valid, validID.getName());
+  uprv_strcpy(actual, actualID.getName());
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/locbased.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/locbased.h
@@ -0,0 +1,107 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2004-2014, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: January 16 2004
+* Since: ICU 2.8
+**********************************************************************
+*/
+#ifndef LOCBASED_H
+#define LOCBASED_H
+
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+
+/**
+ * Macro to declare a locale LocaleBased wrapper object for the given
+ * object, which must have two members named `validLocale' and
+ * `actualLocale' of size ULOC_FULLNAME_CAPACITY
+ */
+#define U_LOCALE_BASED(varname, objname) \
+  LocaleBased varname((objname).validLocale, (objname).actualLocale)
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A utility class that unifies the implementation of getLocale() by
+ * various ICU services.  This class is likely to be removed in the
+ * ICU 3.0 time frame in favor of an integrated approach with the
+ * services framework.
+ * @since ICU 2.8
+ */
+class U_COMMON_API LocaleBased : public UMemory {
+
+ public:
+
+    /**
+     * Construct a LocaleBased wrapper around the two pointers.  These
+     * will be aliased for the lifetime of this object.
+     */
+    inline LocaleBased(char* validAlias, char* actualAlias);
+
+    /**
+     * Construct a LocaleBased wrapper around the two const pointers.
+     * These will be aliased for the lifetime of this object.
+     */
+    inline LocaleBased(const char* validAlias, const char* actualAlias);
+
+    /**
+     * Return locale meta-data for the service object wrapped by this
+     * object.  Either the valid or the actual locale may be
+     * retrieved.
+     * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
+     * @param status input-output error code
+     * @return the indicated locale
+     */
+    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+    /**
+     * Return the locale ID for the service object wrapped by this
+     * object.  Either the valid or the actual locale may be
+     * retrieved.
+     * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
+     * @param status input-output error code
+     * @return the indicated locale ID
+     */
+    const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
+
+    /**
+     * Set the locale meta-data for the service object wrapped by this
+     * object.  If either parameter is zero, it is ignored.
+     * @param valid the ID of the valid locale
+     * @param actual the ID of the actual locale
+     */
+    void setLocaleIDs(const char* valid, const char* actual);
+
+    /**
+     * Set the locale meta-data for the service object wrapped by this
+     * object.
+     * @param valid the ID of the valid locale
+     * @param actual the ID of the actual locale
+     */
+    void setLocaleIDs(const Locale& valid, const Locale& actual);
+
+ private:
+
+    char* valid;
+    
+    char* actual;
+};
+
+inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
+    valid(validAlias), actual(actualAlias) {
+}
+
+inline LocaleBased::LocaleBased(const char* validAlias,
+                                const char* actualAlias) :
+    // ugh: cast away const
+    valid((char*)validAlias), actual((char*)actualAlias) {
+}
+
+U_NAMESPACE_END
+
+#endif
--- a/external/duckdb/extension/icu/third_party/icu/common/locdispnames.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/locdispnames.cpp
@@ -0,0 +1,885 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1997-2016, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  locdispnames.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010feb25
+*   created by: Markus W. Scherer
+*
+*   Code for locale display names, separated out from other .cpp files
+*   that then do not depend on resource bundle code and display name data.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/brkiter.h"
+#include "unicode/locid.h"
+#include "unicode/uenum.h"
+#include "unicode/uloc.h"
+#include "unicode/ures.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "putilimp.h"
+#include "ulocimp.h"
+#include "uresimp.h"
+#include "ureslocs.h"
+#include "ustr_imp.h"
+
+// C++ API ----------------------------------------------------------------- ***
+
+U_NAMESPACE_BEGIN
+
+UnicodeString&
+Locale::getDisplayLanguage(UnicodeString& dispLang) const
+{
+    return this->getDisplayLanguage(getDefault(), dispLang);
+}
+
+/*We cannot make any assumptions on the size of the output display strings
+* Yet, since we are calling through to a C API, we need to set limits on
+* buffer size. For all the following getDisplay functions we first attempt
+* to fill up a stack allocated buffer. If it is to small we heap allocated
+* the exact buffer we need copy it to the UnicodeString and delete it*/
+
+UnicodeString&
+Locale::getDisplayLanguage(const Locale &displayLocale,
+                           UnicodeString &result) const {
+    UChar *buffer;
+    UErrorCode errorCode=U_ZERO_ERROR;
+    int32_t length;
+
+    buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
+    if(buffer==0) {
+        result.truncate(0);
+        return result;
+    }
+
+    length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
+                                   buffer, result.getCapacity(),
+                                   &errorCode);
+    result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+
+    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+        buffer=result.getBuffer(length);
+        if(buffer==0) {
+            result.truncate(0);
+            return result;
+        }
+        errorCode=U_ZERO_ERROR;
+        length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
+                                       buffer, result.getCapacity(),
+                                       &errorCode);
+        result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+    }
+
+    return result;
+}
+
+UnicodeString&
+Locale::getDisplayScript(UnicodeString& dispScript) const
+{
+    return this->getDisplayScript(getDefault(), dispScript);
+}
+
+UnicodeString&
+Locale::getDisplayScript(const Locale &displayLocale,
+                          UnicodeString &result) const {
+    UChar *buffer;
+    UErrorCode errorCode=U_ZERO_ERROR;
+    int32_t length;
+
+    buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
+    if(buffer==0) {
+        result.truncate(0);
+        return result;
+    }
+
+    length=uloc_getDisplayScript(fullName, displayLocale.fullName,
+                                  buffer, result.getCapacity(),
+                                  &errorCode);
+    result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+
+    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+        buffer=result.getBuffer(length);
+        if(buffer==0) {
+            result.truncate(0);
+            return result;
+        }
+        errorCode=U_ZERO_ERROR;
+        length=uloc_getDisplayScript(fullName, displayLocale.fullName,
+                                      buffer, result.getCapacity(),
+                                      &errorCode);
+        result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+    }
+
+    return result;
+}
+
+UnicodeString&
+Locale::getDisplayCountry(UnicodeString& dispCntry) const
+{
+    return this->getDisplayCountry(getDefault(), dispCntry);
+}
+
+UnicodeString&
+Locale::getDisplayCountry(const Locale &displayLocale,
+                          UnicodeString &result) const {
+    UChar *buffer;
+    UErrorCode errorCode=U_ZERO_ERROR;
+    int32_t length;
+
+    buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
+    if(buffer==0) {
+        result.truncate(0);
+        return result;
+    }
+
+    length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
+                                  buffer, result.getCapacity(),
+                                  &errorCode);
+    result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+
+    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+        buffer=result.getBuffer(length);
+        if(buffer==0) {
+            result.truncate(0);
+            return result;
+        }
+        errorCode=U_ZERO_ERROR;
+        length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
+                                      buffer, result.getCapacity(),
+                                      &errorCode);
+        result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+    }
+
+    return result;
+}
+
+UnicodeString&
+Locale::getDisplayVariant(UnicodeString& dispVar) const
+{
+    return this->getDisplayVariant(getDefault(), dispVar);
+}
+
+UnicodeString&
+Locale::getDisplayVariant(const Locale &displayLocale,
+                          UnicodeString &result) const {
+    UChar *buffer;
+    UErrorCode errorCode=U_ZERO_ERROR;
+    int32_t length;
+
+    buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
+    if(buffer==0) {
+        result.truncate(0);
+        return result;
+    }
+
+    length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
+                                  buffer, result.getCapacity(),
+                                  &errorCode);
+    result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+
+    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+        buffer=result.getBuffer(length);
+        if(buffer==0) {
+            result.truncate(0);
+            return result;
+        }
+        errorCode=U_ZERO_ERROR;
+        length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
+                                      buffer, result.getCapacity(),
+                                      &errorCode);
+        result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+    }
+
+    return result;
+}
+
+UnicodeString&
+Locale::getDisplayName( UnicodeString& name ) const
+{
+    return this->getDisplayName(getDefault(), name);
+}
+
+UnicodeString&
+Locale::getDisplayName(const Locale &displayLocale,
+                       UnicodeString &result) const {
+    UChar *buffer;
+    UErrorCode errorCode=U_ZERO_ERROR;
+    int32_t length;
+
+    buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
+    if(buffer==0) {
+        result.truncate(0);
+        return result;
+    }
+
+    length=uloc_getDisplayName(fullName, displayLocale.fullName,
+                               buffer, result.getCapacity(),
+                               &errorCode);
+    result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+
+    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+        buffer=result.getBuffer(length);
+        if(buffer==0) {
+            result.truncate(0);
+            return result;
+        }
+        errorCode=U_ZERO_ERROR;
+        length=uloc_getDisplayName(fullName, displayLocale.fullName,
+                                   buffer, result.getCapacity(),
+                                   &errorCode);
+        result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
+    }
+
+    return result;
+}
+
+#if ! UCONFIG_NO_BREAK_ITERATION
+
+// -------------------------------------
+// Gets the objectLocale display name in the default locale language.
+UnicodeString& U_EXPORT2
+BreakIterator::getDisplayName(const Locale& objectLocale,
+                             UnicodeString& name)
+{
+    return objectLocale.getDisplayName(name);
+}
+
+// -------------------------------------
+// Gets the objectLocale display name in the displayLocale language.
+UnicodeString& U_EXPORT2
+BreakIterator::getDisplayName(const Locale& objectLocale,
+                             const Locale& displayLocale,
+                             UnicodeString& name)
+{
+    return objectLocale.getDisplayName(displayLocale, name);
+}
+
+#endif
+
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+/* ### Constants **************************************************/
+
+/* These strings describe the resources we attempt to load from
+ the locale ResourceBundle data file.*/
+static const char _kLanguages[]       = "Languages";
+static const char _kScripts[]         = "Scripts";
+static const char _kScriptsStandAlone[] = "Scripts%stand-alone";
+static const char _kCountries[]       = "Countries";
+static const char _kVariants[]        = "Variants";
+static const char _kKeys[]            = "Keys";
+static const char _kTypes[]           = "Types";
+//static const char _kRootName[]        = "root";
+static const char _kCurrency[]        = "currency";
+static const char _kCurrencies[]      = "Currencies";
+static const char _kLocaleDisplayPattern[] = "localeDisplayPattern";
+static const char _kPattern[]         = "pattern";
+static const char _kSeparator[]       = "separator";
+
+/* ### Display name **************************************************/
+
+static int32_t
+_getStringOrCopyKey(const char *path, const char *locale,
+                    const char *tableKey, 
+                    const char* subTableKey,
+                    const char *itemKey,
+                    const char *substitute,
+                    UChar *dest, int32_t destCapacity,
+                    UErrorCode *pErrorCode) {
+    const UChar *s = NULL;
+    int32_t length = 0;
+
+    if(itemKey==NULL) {
+        /* top-level item: normal resource bundle access */
+        icu::LocalUResourceBundlePointer rb(ures_open(path, locale, pErrorCode));
+
+        if(U_SUCCESS(*pErrorCode)) {
+            s=ures_getStringByKey(rb.getAlias(), tableKey, &length, pErrorCode);
+            /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
+        }
+    } else {
+        /* Language code should not be a number. If it is, set the error code. */
+        if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
+            *pErrorCode = U_MISSING_RESOURCE_ERROR;
+        } else {
+            /* second-level item, use special fallback */
+            s=uloc_getTableStringWithFallback(path, locale,
+                                               tableKey, 
+                                               subTableKey,
+                                               itemKey,
+                                               &length,
+                                               pErrorCode);
+        }
+    }
+
+    if(U_SUCCESS(*pErrorCode)) {
+        int32_t copyLength=uprv_min(length, destCapacity);
+        if(copyLength>0 && s != NULL) {
+            u_memcpy(dest, s, copyLength);
+        }
+    } else {
+        /* no string from a resource bundle: convert the substitute */
+        length=(int32_t)uprv_strlen(substitute);
+        u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
+        *pErrorCode=U_USING_DEFAULT_WARNING;
+    }
+
+    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
+}
+
+typedef  int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *);
+
+static int32_t
+_getDisplayNameForComponent(const char *locale,
+                            const char *displayLocale,
+                            UChar *dest, int32_t destCapacity,
+                            UDisplayNameGetter *getter,
+                            const char *tag,
+                            UErrorCode *pErrorCode) {
+    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
+    int32_t length;
+    UErrorCode localStatus;
+    const char* root = NULL;
+
+    /* argument checking */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    localStatus = U_ZERO_ERROR;
+    length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
+    if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    if(length==0) {
+        // For the display name, we treat this as unknown language (ICU-20273).
+        if (getter == uloc_getLanguage) {
+            uprv_strcpy(localeBuffer, "und");
+        } else {
+            return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
+        }
+    }
+
+    root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG;
+
+    return _getStringOrCopyKey(root, displayLocale,
+                               tag, NULL, localeBuffer,
+                               localeBuffer,
+                               dest, destCapacity,
+                               pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayLanguage(const char *locale,
+                        const char *displayLocale,
+                        UChar *dest, int32_t destCapacity,
+                        UErrorCode *pErrorCode) {
+    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+                uloc_getLanguage, _kLanguages, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayScript(const char* locale,
+                      const char* displayLocale,
+                      UChar *dest, int32_t destCapacity,
+                      UErrorCode *pErrorCode)
+{
+	UErrorCode err = U_ZERO_ERROR;
+	int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+                uloc_getScript, _kScriptsStandAlone, &err);
+	
+	if ( err == U_USING_DEFAULT_WARNING ) {
+        return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+                    uloc_getScript, _kScripts, pErrorCode);
+	} else {
+		*pErrorCode = err;
+		return res;
+	}
+}
+
+U_INTERNAL int32_t U_EXPORT2
+uloc_getDisplayScriptInContext(const char* locale,
+                      const char* displayLocale,
+                      UChar *dest, int32_t destCapacity,
+                      UErrorCode *pErrorCode)
+{
+    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+                    uloc_getScript, _kScripts, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayCountry(const char *locale,
+                       const char *displayLocale,
+                       UChar *dest, int32_t destCapacity,
+                       UErrorCode *pErrorCode) {
+    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+                uloc_getCountry, _kCountries, pErrorCode);
+}
+
+/*
+ * TODO separate variant1_variant2_variant3...
+ * by getting each tag's display string and concatenating them with ", "
+ * in between - similar to uloc_getDisplayName()
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayVariant(const char *locale,
+                       const char *displayLocale,
+                       UChar *dest, int32_t destCapacity,
+                       UErrorCode *pErrorCode) {
+    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+                uloc_getVariant, _kVariants, pErrorCode);
+}
+
+/* Instead of having a separate pass for 'special' patterns, reintegrate the two
+ * so we don't get bitten by preflight bugs again.  We can be reasonably efficient
+ * without two separate code paths, this code isn't that performance-critical.
+ *
+ * This code is general enough to deal with patterns that have a prefix or swap the
+ * language and remainder components, since we gave developers enough rope to do such
+ * things if they futz with the pattern data.  But since we don't give them a way to
+ * specify a pattern for arbitrary combinations of components, there's not much use in
+ * that.  I don't think our data includes such patterns, the only variable I know if is
+ * whether there is a space before the open paren, or not.  Oh, and zh uses different
+ * chars than the standard open/close paren (which ja and ko use, btw).
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayName(const char *locale,
+                    const char *displayLocale,
+                    UChar *dest, int32_t destCapacity,
+                    UErrorCode *pErrorCode)
+{
+    static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */
+    static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
+    static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
+    static const int32_t subLen = 3;
+    static const UChar defaultPattern[10] = {
+        0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
+    }; /* {0} ({1}) */
+    static const int32_t defaultPatLen = 9;
+    static const int32_t defaultSub0Pos = 0;
+    static const int32_t defaultSub1Pos = 5;
+
+    int32_t length; /* of formatted result */
+
+    const UChar *separator;
+    int32_t sepLen = 0;
+    const UChar *pattern;
+    int32_t patLen = 0;
+    int32_t sub0Pos, sub1Pos;
+    
+    UChar formatOpenParen         = 0x0028; // (
+    UChar formatReplaceOpenParen  = 0x005B; // [
+    UChar formatCloseParen        = 0x0029; // )
+    UChar formatReplaceCloseParen = 0x005D; // ]
+
+    UBool haveLang = TRUE; /* assume true, set false if we find we don't have
+                              a lang component in the locale */
+    UBool haveRest = TRUE; /* assume true, set false if we find we don't have
+                              any other component in the locale */
+    UBool retry = FALSE; /* set true if we need to retry, see below */
+
+    int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    {
+        UErrorCode status = U_ZERO_ERROR;
+
+        icu::LocalUResourceBundlePointer locbundle(
+                ures_open(U_ICUDATA_LANG, displayLocale, &status));
+        icu::LocalUResourceBundlePointer dspbundle(
+                ures_getByKeyWithFallback(locbundle.getAlias(), _kLocaleDisplayPattern, NULL, &status));
+
+        separator=ures_getStringByKeyWithFallback(dspbundle.getAlias(), _kSeparator, &sepLen, &status);
+        pattern=ures_getStringByKeyWithFallback(dspbundle.getAlias(), _kPattern, &patLen, &status);
+    }
+
+    /* If we couldn't find any data, then use the defaults */
+    if(sepLen == 0) {
+       separator = defaultSeparator;
+    }
+    /* #10244: Even though separator is now a pattern, it is awkward to handle it as such
+     * here since we are trying to build the display string in place in the dest buffer,
+     * and to handle it as a pattern would entail having separate storage for the
+     * substrings that need to be combined (the first of which may be the result of
+     * previous such combinations). So for now we continue to treat the portion between
+     * {0} and {1} as a string to be appended when joining substrings, ignoring anything
+     * that is before {0} or after {1} (no existing separator pattern has any such thing).
+     * This is similar to how pattern is handled below.
+     */
+    {
+        UChar *p0=u_strstr(separator, sub0);
+        UChar *p1=u_strstr(separator, sub1);
+        if (p0==NULL || p1==NULL || p1<p0) {
+            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+        separator = (const UChar *)p0 + subLen;
+        sepLen = static_cast<int32_t>(p1 - separator);
+    }
+
+    if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
+        pattern=defaultPattern;
+        patLen=defaultPatLen;
+        sub0Pos=defaultSub0Pos;
+        sub1Pos=defaultSub1Pos;
+        // use default formatOpenParen etc. set above
+    } else { /* non-default pattern */
+        UChar *p0=u_strstr(pattern, sub0);
+        UChar *p1=u_strstr(pattern, sub1);
+        if (p0==NULL || p1==NULL) {
+            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+        sub0Pos = static_cast<int32_t>(p0-pattern);
+        sub1Pos = static_cast<int32_t>(p1-pattern);
+        if (sub1Pos < sub0Pos) { /* a very odd pattern */
+            int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
+            langi=1;
+        }
+        if (u_strchr(pattern, 0xFF08) != NULL) {
+            formatOpenParen         = 0xFF08; // fullwidth (
+            formatReplaceOpenParen  = 0xFF3B; // fullwidth [
+            formatCloseParen        = 0xFF09; // fullwidth )
+            formatReplaceCloseParen = 0xFF3D; // fullwidth ]
+        }
+    }
+
+    /* We loop here because there is one case in which after the first pass we could need to
+     * reextract the data.  If there's initial padding before the first element, we put in
+     * the padding and then write that element.  If it turns out there's no second element,
+     * we didn't need the padding.  If we do need the data (no preflight), and the first element
+     * would have fit but for the padding, we need to reextract.  In this case (only) we
+     * adjust the parameters so padding is not added, and repeat.
+     */
+    do {
+        UChar* p=dest;
+        int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
+        int32_t langLen=0; /* length of language substitution */
+        int32_t langPos=0; /* position in output of language substitution */
+        int32_t restLen=0; /* length of 'everything else' substitution */
+        int32_t restPos=0; /* position in output of 'everything else' substitution */
+        icu::LocalUEnumerationPointer kenum; /* keyword enumeration */
+
+        /* prefix of pattern, extremely likely to be empty */
+        if(sub0Pos) {
+            if(destCapacity >= sub0Pos) {
+                while (patPos < sub0Pos) {
+                    *p++ = pattern[patPos++];
+                }
+            } else {
+                patPos=sub0Pos;
+            }
+            length=sub0Pos;
+        } else {
+            length=0;
+        }
+
+        for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
+            UBool subdone = FALSE; /* set true when ready to move to next substitution */
+
+            /* prep p and cap for calls to get display components, pin cap to 0 since
+               they complain if cap is negative */
+            int32_t cap=destCapacity-length;
+            if (cap <= 0) {
+                cap=0;
+            } else {
+                p=dest+length;
+            }
+
+            if (subi == langi) { /* {0}*/
+                if(haveLang) {
+                    langPos=length;
+                    langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode);
+                    length+=langLen;
+                    haveLang=langLen>0;
+                }
+                subdone=TRUE;
+            } else { /* {1} */
+                if(!haveRest) {
+                    subdone=TRUE;
+                } else {
+                    int32_t len; /* length of component (plus other stuff) we just fetched */
+                    switch(resti++) {
+                        case 0:
+                            restPos=length;
+                            len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode);
+                            break;
+                        case 1:
+                            len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode);
+                            break;
+                        case 2:
+                            len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
+                            break;
+                        case 3:
+                            kenum.adoptInstead(uloc_openKeywords(locale, pErrorCode));
+                            U_FALLTHROUGH;
+                        default: {
+                            const char* kw=uenum_next(kenum.getAlias(), &len, pErrorCode);
+                            if (kw == NULL) {
+                                len=0; /* mark that we didn't add a component */
+                                subdone=TRUE;
+                            } else {
+                                /* incorporating this behavior into the loop made it even more complex,
+                                   so just special case it here */
+                                len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode);
+                                if(len) {
+                                    if(len < cap) {
+                                        p[len]=0x3d; /* '=', assume we'll need it */
+                                    }
+                                    len+=1;
+
+                                    /* adjust for call to get keyword */
+                                    cap-=len;
+                                    if(cap <= 0) {
+                                        cap=0;
+                                    } else {
+                                        p+=len;
+                                    }
+                                }
+                                /* reset for call below */
+                                if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
+                                    *pErrorCode=U_ZERO_ERROR;
+                                }
+                                int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale,
+                                                                           p, cap, pErrorCode);
+                                if(len) {
+                                    if(vlen==0) {
+                                        --len; /* remove unneeded '=' */
+                                    }
+                                    /* restore cap and p to what they were at start */
+                                    cap=destCapacity-length;
+                                    if(cap <= 0) {
+                                        cap=0;
+                                    } else {
+                                        p=dest+length;
+                                    }
+                                }
+                                len+=vlen; /* total we added for key + '=' + value */
+                            }
+                        } break;
+                    } /* end switch */
+
+                    if (len>0) {
+                        /* we addeed a component, so add separator and write it if there's room. */
+                        if(len+sepLen<=cap) {
+                            const UChar * plimit = p + len;
+                            for (; p < plimit; p++) {
+                                if (*p == formatOpenParen) {
+                                    *p = formatReplaceOpenParen;
+                                } else if (*p == formatCloseParen) {
+                                    *p = formatReplaceCloseParen;
+                                }
+                            }
+                            for(int32_t i=0;i<sepLen;++i) {
+                                *p++=separator[i];
+                            }
+                        }
+                        length+=len+sepLen;
+                    } else if(subdone) {
+                        /* remove separator if we added it */
+                        if (length!=restPos) {
+                            length-=sepLen;
+                        }
+                        restLen=length-restPos;
+                        haveRest=restLen>0;
+                    }
+                }
+            }
+
+            if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
+                *pErrorCode=U_ZERO_ERROR;
+            }
+
+            if(subdone) {
+                if(haveLang && haveRest) {
+                    /* append internal portion of pattern, the first time,
+                       or last portion of pattern the second time */
+                    int32_t padLen;
+                    patPos+=subLen;
+                    padLen=(subi==0 ? sub1Pos : patLen)-patPos;
+                    if(length+padLen < destCapacity) {
+                        p=dest+length;
+                        for(int32_t i=0;i<padLen;++i) {
+                            *p++=pattern[patPos++];
+                        }
+                    } else {
+                        patPos+=padLen;
+                    }
+                    length+=padLen;
+                } else if(subi==0) {
+                    /* don't have first component, reset for second component */
+                    sub0Pos=0;
+                    length=0;
+                } else if(length>0) {
+                    /* true length is the length of just the component we got. */
+                    length=haveLang?langLen:restLen;
+                    if(dest && sub0Pos!=0) {
+                        if (sub0Pos+length<=destCapacity) {
+                            /* first component not at start of result,
+                               but we have full component in buffer. */
+                            u_memmove(dest, dest+(haveLang?langPos:restPos), length);
+                        } else {
+                            /* would have fit, but didn't because of pattern prefix. */
+                            sub0Pos=0; /* stops initial padding (and a second retry,
+                                          so we won't end up here again) */
+                            retry=TRUE;
+                        }
+                    }
+                }
+
+                ++subi; /* move on to next substitution */
+            }
+        }
+    } while(retry);
+
+    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeyword(const char* keyword,
+                       const char* displayLocale,
+                       UChar* dest,
+                       int32_t destCapacity,
+                       UErrorCode* status){
+
+    /* argument checking */
+    if(status==NULL || U_FAILURE(*status)) {
+        return 0;
+    }
+
+    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+        *status=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+
+    /* pass itemKey=NULL to look for a top-level item */
+    return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
+                               _kKeys, NULL, 
+                               keyword, 
+                               keyword,      
+                               dest, destCapacity,
+                               status);
+
+}
+
+
+#define UCURRENCY_DISPLAY_NAME_INDEX 1
+
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeywordValue(   const char* locale,
+                               const char* keyword,
+                               const char* displayLocale,
+                               UChar* dest,
+                               int32_t destCapacity,
+                               UErrorCode* status){
+
+
+    char keywordValue[ULOC_FULLNAME_CAPACITY*4];
+    int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
+    int32_t keywordValueLen =0;
+
+    /* argument checking */
+    if(status==NULL || U_FAILURE(*status)) {
+        return 0;
+    }
+
+    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+        *status=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* get the keyword value */
+    keywordValue[0]=0;
+    keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
+    if (*status == U_STRING_NOT_TERMINATED_WARNING)
+      *status = U_BUFFER_OVERFLOW_ERROR;
+
+    /* 
+     * if the keyword is equal to currency .. then to get the display name 
+     * we need to do the fallback ourselves
+     */
+    if(uprv_stricmp(keyword, _kCurrency)==0){
+
+        int32_t dispNameLen = 0;
+        const UChar *dispName = NULL;
+
+        icu::LocalUResourceBundlePointer bundle(
+                ures_open(U_ICUDATA_CURR, displayLocale, status));
+        icu::LocalUResourceBundlePointer currencies(
+                ures_getByKey(bundle.getAlias(), _kCurrencies, NULL, status));
+        icu::LocalUResourceBundlePointer currency(
+                ures_getByKeyWithFallback(currencies.getAlias(), keywordValue, NULL, status));
+
+        dispName = ures_getStringByIndex(currency.getAlias(), UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
+
+        if(U_FAILURE(*status)){
+            if(*status == U_MISSING_RESOURCE_ERROR){
+                /* we just want to write the value over if nothing is available */
+                *status = U_USING_DEFAULT_WARNING;
+            }else{
+                return 0;
+            }
+        }
+
+        /* now copy the dispName over if not NULL */
+        if(dispName != NULL){
+            if(dispNameLen <= destCapacity){
+                u_memcpy(dest, dispName, dispNameLen);
+                return u_terminateUChars(dest, destCapacity, dispNameLen, status);
+            }else{
+                *status = U_BUFFER_OVERFLOW_ERROR;
+                return dispNameLen;
+            }
+        }else{
+            /* we have not found the display name for the value .. just copy over */
+            if(keywordValueLen <= destCapacity){
+                u_charsToUChars(keywordValue, dest, keywordValueLen);
+                return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
+            }else{
+                 *status = U_BUFFER_OVERFLOW_ERROR;
+                return keywordValueLen;
+            }
+        }
+
+        
+    }else{
+
+        return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
+                                   _kTypes, keyword, 
+                                   keywordValue,
+                                   keywordValue,
+                                   dest, destCapacity,
+                                   status);
+    }
+}
--- a/external/duckdb/extension/icu/third_party/icu/common/locdistance.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/locdistance.cpp
@@ -0,0 +1,364 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// locdistance.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "cstring.h"
+#include "locdistance.h"
+#include "loclikelysubtags.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uinvchar.h"
+#include "umutex.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+/**
+ * Bit flag used on the last character of a subtag in the trie.
+ * Must be set consistently by the builder and the lookup code.
+ */
+constexpr int32_t END_OF_SUBTAG = 0x80;
+/** Distance value bit flag, set by the builder. */
+constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80;
+/** Distance value bit flag, set by trieNext(). */
+constexpr int32_t DISTANCE_IS_FINAL = 0x100;
+constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
+
+constexpr int32_t ABOVE_THRESHOLD = 100;
+
+// Indexes into array of distances.
+enum {
+    IX_DEF_LANG_DISTANCE,
+    IX_DEF_SCRIPT_DISTANCE,
+    IX_DEF_REGION_DISTANCE,
+    IX_MIN_REGION_DISTANCE,
+    IX_LIMIT
+};
+
+LocaleDistance *gLocaleDistance = nullptr;
+UInitOnce locdistance_gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV locdistance_cleanup() {
+    delete gLocaleDistance;
+    gLocaleDistance = nullptr;
+    locdistance_gInitOnce.reset();
+    return TRUE;
+}
+
+}  // namespace
+
+void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) {
+    // This function is invoked only via umtx_initOnce().
+    U_ASSERT(gLocaleDistance == nullptr);
+    const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode);
+    if (U_FAILURE(errorCode)) { return; }
+    const LocaleDistanceData &data = likely.getDistanceData();
+    if (data.distanceTrieBytes == nullptr ||
+            data.regionToPartitions == nullptr || data.partitions == nullptr ||
+            // ok if no paradigms
+            data.distances == nullptr) {
+        errorCode = U_MISSING_RESOURCE_ERROR;
+        return;
+    }
+    gLocaleDistance = new LocaleDistance(data);
+    if (gLocaleDistance == nullptr) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, locdistance_cleanup);
+}
+
+const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    umtx_initOnce(locdistance_gInitOnce, &LocaleDistance::initLocaleDistance, errorCode);
+    return gLocaleDistance;
+}
+
+LocaleDistance::LocaleDistance(const LocaleDistanceData &data) :
+        trie(data.distanceTrieBytes),
+        regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions),
+        paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength),
+        defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]),
+        defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]),
+        defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]),
+        minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) {
+    // For the default demotion value, use the
+    // default region distance between unrelated Englishes.
+    // Thus, unless demotion is turned off,
+    // a mere region difference for one desired locale
+    // is as good as a perfect match for the next following desired locale.
+    // As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
+    LSR en("en", "Latn", "US");
+    LSR enGB("en", "Latn", "GB");
+    const LSR *p_enGB = &enGB;
+    defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, &p_enGB, 1,
+            50, ULOCMATCH_FAVOR_LANGUAGE) & 0xff;
+}
+
+int32_t LocaleDistance::getBestIndexAndDistance(
+        const LSR &desired,
+        const LSR **supportedLSRs, int32_t supportedLSRsLength,
+        int32_t threshold, ULocMatchFavorSubtag favorSubtag) const {
+    BytesTrie iter(trie);
+    // Look up the desired language only once for all supported LSRs.
+    // Its "distance" is either a match point value of 0, or a non-match negative value.
+    // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+    int32_t desLangDistance = trieNext(iter, desired.language, false);
+    uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
+    // Index of the supported LSR with the lowest distance.
+    int32_t bestIndex = -1;
+    for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
+        const LSR &supported = *supportedLSRs[slIndex];
+        bool star = false;
+        int32_t distance = desLangDistance;
+        if (distance >= 0) {
+            U_ASSERT((distance & DISTANCE_IS_FINAL) == 0);
+            if (slIndex != 0) {
+                iter.resetToState64(desLangState);
+            }
+            distance = trieNext(iter, supported.language, true);
+        }
+        // Note: The data builder verifies that there are no rules with "any" (*) language and
+        // real (non *) script or region subtags.
+        // This means that if the lookup for either language fails we can use
+        // the default distances without further lookups.
+        int32_t flags;
+        if (distance >= 0) {
+            flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+            distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+        } else {  // <*, *>
+            if (uprv_strcmp(desired.language, supported.language) == 0) {
+                distance = 0;
+            } else {
+                distance = defaultLanguageDistance;
+            }
+            flags = 0;
+            star = true;
+        }
+        U_ASSERT(0 <= distance && distance <= 100);
+        // We implement "favor subtag" by reducing the language subtag distance
+        // (unscientifically reducing it to a quarter of the normal value),
+        // so that the script distance is relatively more important.
+        // For example, given a default language distance of 80, we reduce it to 20,
+        // which is below the default threshold of 50, which is the default script distance.
+        if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) {
+            distance >>= 2;
+        }
+        if (distance >= threshold) {
+            continue;
+        }
+
+        int32_t scriptDistance;
+        if (star || flags != 0) {
+            if (uprv_strcmp(desired.script, supported.script) == 0) {
+                scriptDistance = 0;
+            } else {
+                scriptDistance = defaultScriptDistance;
+            }
+        } else {
+            scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
+                    desired.script, supported.script);
+            flags = scriptDistance & DISTANCE_IS_FINAL;
+            scriptDistance &= ~DISTANCE_IS_FINAL;
+        }
+        distance += scriptDistance;
+        if (distance >= threshold) {
+            continue;
+        }
+
+        if (uprv_strcmp(desired.region, supported.region) == 0) {
+            // regionDistance = 0
+        } else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
+            distance += defaultRegionDistance;
+        } else {
+            int32_t remainingThreshold = threshold - distance;
+            if (minRegionDistance >= remainingThreshold) {
+                continue;
+            }
+
+            // From here on we know the regions are not equal.
+            // Map each region to zero or more partitions. (zero = one non-matching string)
+            // (Each array of single-character partition strings is encoded as one string.)
+            // If either side has more than one, then we find the maximum distance.
+            // This could be optimized by adding some more structure, but probably not worth it.
+            distance += getRegionPartitionsDistance(
+                    iter, iter.getState64(),
+                    partitionsForRegion(desired),
+                    partitionsForRegion(supported),
+                    remainingThreshold);
+        }
+        if (distance < threshold) {
+            if (distance == 0) {
+                return slIndex << 8;
+            }
+            bestIndex = slIndex;
+            threshold = distance;
+        }
+    }
+    return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
+}
+
+int32_t LocaleDistance::getDesSuppScriptDistance(
+        BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) {
+    // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+    int32_t distance = trieNext(iter, desired, false);
+    if (distance >= 0) {
+        distance = trieNext(iter, supported, true);
+    }
+    if (distance < 0) {
+        UStringTrieResult result = iter.resetToState64(startState).next(u'*');  // <*, *>
+        U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+        if (uprv_strcmp(desired, supported) == 0) {
+            distance = 0;  // same script
+        } else {
+            distance = iter.getValue();
+            U_ASSERT(distance >= 0);
+        }
+        if (result == USTRINGTRIE_FINAL_VALUE) {
+            distance |= DISTANCE_IS_FINAL;
+        }
+    }
+    return distance;
+}
+
+int32_t LocaleDistance::getRegionPartitionsDistance(
+        BytesTrie &iter, uint64_t startState,
+        const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) {
+    char desired = *desiredPartitions++;
+    char supported = *supportedPartitions++;
+    U_ASSERT(desired != 0 && supported != 0);
+    // See if we have single desired/supported partitions, from NUL-terminated
+    // partition strings without explicit length.
+    bool suppLengthGt1 = *supportedPartitions != 0;  // gt1: more than 1 character
+    // equivalent to: if (desLength == 1 && suppLength == 1)
+    if (*desiredPartitions == 0 && !suppLengthGt1) {
+        // Fastpath for single desired/supported partitions.
+        UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+        if (USTRINGTRIE_HAS_NEXT(result)) {
+            result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+            if (USTRINGTRIE_HAS_VALUE(result)) {
+                return iter.getValue();
+            }
+        }
+        return getFallbackRegionDistance(iter, startState);
+    }
+
+    const char *supportedStart = supportedPartitions - 1;  // for restart of inner loop
+    int32_t regionDistance = 0;
+    // Fall back to * only once, not for each pair of partition strings.
+    bool star = false;
+    for (;;) {
+        // Look up each desired-partition string only once,
+        // not for each (desired, supported) pair.
+        UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+        if (USTRINGTRIE_HAS_NEXT(result)) {
+            uint64_t desState = suppLengthGt1 ? iter.getState64() : 0;
+            for (;;) {
+                result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+                int32_t d;
+                if (USTRINGTRIE_HAS_VALUE(result)) {
+                    d = iter.getValue();
+                } else if (star) {
+                    d = 0;
+                } else {
+                    d = getFallbackRegionDistance(iter, startState);
+                    star = true;
+                }
+                if (d >= threshold) {
+                    return d;
+                } else if (regionDistance < d) {
+                    regionDistance = d;
+                }
+                if ((supported = *supportedPartitions++) != 0) {
+                    iter.resetToState64(desState);
+                } else {
+                    break;
+                }
+            }
+        } else if (!star) {
+            int32_t d = getFallbackRegionDistance(iter, startState);
+            if (d >= threshold) {
+                return d;
+            } else if (regionDistance < d) {
+                regionDistance = d;
+            }
+            star = true;
+        }
+        if ((desired = *desiredPartitions++) != 0) {
+            iter.resetToState64(startState);
+            supportedPartitions = supportedStart;
+            supported = *supportedPartitions++;
+        } else {
+            break;
+        }
+    }
+    return regionDistance;
+}
+
+int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) {
+#if U_DEBUG
+    UStringTrieResult result =
+#endif
+    iter.resetToState64(startState).next(u'*');  // <*, *>
+    U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+    int32_t distance = iter.getValue();
+    U_ASSERT(distance >= 0);
+    return distance;
+}
+
+int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) {
+    uint8_t c;
+    if ((c = *s) == 0) {
+        return -1;  // no empty subtags in the distance data
+    }
+    for (;;) {
+        c = uprv_invCharToAscii(c);
+        // EBCDIC: If *s is not an invariant character,
+        // then c is now 0 and will simply not match anything, which is harmless.
+        uint8_t next = *++s;
+        if (next != 0) {
+            if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+                return -1;
+            }
+        } else {
+            // last character of this subtag
+            UStringTrieResult result = iter.next(c | END_OF_SUBTAG);
+            if (wantValue) {
+                if (USTRINGTRIE_HAS_VALUE(result)) {
+                    int32_t value = iter.getValue();
+                    if (result == USTRINGTRIE_FINAL_VALUE) {
+                        value |= DISTANCE_IS_FINAL;
+                    }
+                    return value;
+                }
+            } else {
+                if (USTRINGTRIE_HAS_NEXT(result)) {
+                    return 0;
+                }
+            }
+            return -1;
+        }
+        c = next;
+    }
+}
+
+UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
+    // Linear search for a very short list (length 6 as of 2019).
+    // If there are many paradigm LSRs we should use a hash set.
+    U_ASSERT(paradigmLSRsLength <= 15);
+    for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
+        if (lsr == paradigmLSRs[i]) { return true; }
+    }
+    return false;
+}
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/locdistance.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/locdistance.h
@@ -0,0 +1,109 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// locdistance.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCDISTANCE_H__
+#define __LOCDISTANCE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "lsr.h"
+
+U_NAMESPACE_BEGIN
+
+struct LocaleDistanceData;
+
+/**
+ * Offline-built data for LocaleMatcher.
+ * Mostly but not only the data for mapping locales to their maximized forms.
+ */
+class LocaleDistance final : public UMemory {
+public:
+    static const LocaleDistance *getSingleton(UErrorCode &errorCode);
+
+    /**
+     * Finds the supported LSR with the smallest distance from the desired one.
+     * Equivalent LSR subtags must be normalized into a canonical form.
+     *
+     * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
+     * (negative if none has a distance below the threshold),
+     * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
+     */
+    int32_t getBestIndexAndDistance(const LSR &desired,
+                                    const LSR **supportedLSRs, int32_t supportedLSRsLength,
+                                    int32_t threshold, ULocMatchFavorSubtag favorSubtag) const;
+
+    int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; }
+
+    UBool isParadigmLSR(const LSR &lsr) const;
+
+    int32_t getDefaultScriptDistance() const {
+        return defaultScriptDistance;
+    }
+
+    int32_t getDefaultDemotionPerDesiredLocale() const {
+        return defaultDemotionPerDesiredLocale;
+    }
+
+private:
+    LocaleDistance(const LocaleDistanceData &data);
+    LocaleDistance(const LocaleDistance &other) = delete;
+    LocaleDistance &operator=(const LocaleDistance &other) = delete;
+
+    static void initLocaleDistance(UErrorCode &errorCode);
+
+    static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
+                                            const char *desired, const char *supported);
+
+    static int32_t getRegionPartitionsDistance(
+        BytesTrie &iter, uint64_t startState,
+        const char *desiredPartitions, const char *supportedPartitions,
+        int32_t threshold);
+
+    static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
+
+    static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
+
+    const char *partitionsForRegion(const LSR &lsr) const {
+        // ill-formed region -> one non-matching string
+        int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
+        return partitionArrays[pIndex];
+    }
+
+    int32_t getDefaultRegionDistance() const {
+        return defaultRegionDistance;
+    }
+
+    // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
+    // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
+    // There is also a trie value for each subsequence of whole subtags.
+    // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
+    BytesTrie trie;
+
+    /**
+     * Maps each region to zero or more single-character partitions.
+     */
+    const uint8_t *regionToPartitionsIndex;
+    const char **partitionArrays;
+
+    /**
+     * Used to get the paradigm region for a cluster, if there is one.
+     */
+    const LSR *paradigmLSRs;
+    int32_t paradigmLSRsLength;
+
+    int32_t defaultLanguageDistance;
+    int32_t defaultScriptDistance;
+    int32_t defaultRegionDistance;
+    int32_t minRegionDistance;
+    int32_t defaultDemotionPerDesiredLocale;
+};
+
+U_NAMESPACE_END
+
+#endif  // __LOCDISTANCE_H__
--- a/external/duckdb/extension/icu/third_party/icu/common/locdspnm.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/locdspnm.cpp
--- a/external/duckdb/extension/icu/third_party/icu/common/locid.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/locid.cpp
--- a/external/duckdb/extension/icu/third_party/icu/common/loclikely.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/loclikely.cpp
--- a/external/duckdb/extension/icu/third_party/icu/common/loclikelysubtags.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/loclikelysubtags.cpp
@@ -0,0 +1,638 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// loclikelysubtags.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "loclikelysubtags.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+#include "uinvchar.h"
+#include "umutex.h"
+#include "uresdata.h"
+#include "uresimp.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+constexpr char PSEUDO_ACCENTS_PREFIX = '\'';  // -XA, -PSACCENT
+constexpr char PSEUDO_BIDI_PREFIX = '+';  // -XB, -PSBIDI
+constexpr char PSEUDO_CRACKED_PREFIX = ',';  // -XC, -PSCRACK
+
+}  // namespace
+
+/**
+ * Stores NUL-terminated strings with duplicate elimination.
+ * Checks for unique UTF-16 string pointers and converts to invariant characters.
+ */
+class UniqueCharStrings {
+public:
+    UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
+        uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        strings = new CharString();
+        if (strings == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+        }
+    }
+    ~UniqueCharStrings() {
+        uhash_close(&map);
+        delete strings;
+    }
+
+    /** Returns/orphans the CharString that contains all strings. */
+    CharString *orphanCharStrings() {
+        CharString *result = strings;
+        strings = nullptr;
+        return result;
+    }
+
+    /** Adds a string and returns a unique number for it. */
+    int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) { return 0; }
+        if (isFrozen) {
+            errorCode = U_NO_WRITE_PERMISSION;
+            return 0;
+        }
+        // The string points into the resource bundle.
+        const char16_t *p = s.getBuffer();
+        int32_t oldIndex = uhash_geti(&map, p);
+        if (oldIndex != 0) {  // found duplicate
+            return oldIndex;
+        }
+        // Explicit NUL terminator for the previous string.
+        // The strings object is also terminated with one implicit NUL.
+        strings->append(0, errorCode);
+        int32_t newIndex = strings->length();
+        strings->appendInvariantChars(s, errorCode);
+        uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
+        return newIndex;
+    }
+
+    void freeze() { isFrozen = true; }
+
+    /**
+     * Returns a string pointer for its unique number, if this object is frozen.
+     * Otherwise nullptr.
+     */
+    const char *get(int32_t i) const {
+        U_ASSERT(isFrozen);
+        return isFrozen && i > 0 ? strings->data() + i : nullptr;
+    }
+
+private:
+    UHashtable map;
+    CharString *strings;
+    bool isFrozen = false;
+};
+
+LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
+        distanceTrieBytes(data.distanceTrieBytes),
+        regionToPartitions(data.regionToPartitions),
+        partitions(data.partitions),
+        paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
+        distances(data.distances) {
+    data.partitions = nullptr;
+    data.paradigms = nullptr;
+}
+
+LocaleDistanceData::~LocaleDistanceData() {
+    uprv_free(partitions);
+    delete[] paradigms;
+}
+
+// TODO(ICU-20777): Rename to just LikelySubtagsData.
+struct XLikelySubtagsData {
+    UResourceBundle *langInfoBundle = nullptr;
+    UniqueCharStrings strings;
+    CharStringMap languageAliases;
+    CharStringMap regionAliases;
+    const uint8_t *trieBytes = nullptr;
+    LSR *lsrs = nullptr;
+    int32_t lsrsLength = 0;
+
+    LocaleDistanceData distanceData;
+
+    XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
+
+    ~XLikelySubtagsData() {
+        ures_close(langInfoBundle);
+        delete[] lsrs;
+    }
+
+    void load(UErrorCode &errorCode) {
+        langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        StackUResourceBundle stackTempBundle;
+        ResourceDataValue value;
+        ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(),
+                                  value, errorCode);
+        ResourceTable likelyTable = value.getTable(errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+
+        // Read all strings in the resource bundle and convert them to invariant char *.
+        LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
+        int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
+        if (!readStrings(likelyTable, "languageAliases", value,
+                         languageIndexes, languagesLength, errorCode) ||
+                !readStrings(likelyTable, "regionAliases", value,
+                             regionIndexes, regionsLength, errorCode) ||
+                !readStrings(likelyTable, "lsrs", value,
+                             lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
+            return;
+        }
+        if ((languagesLength & 1) != 0 ||
+                (regionsLength & 1) != 0 ||
+                (lsrSubtagsLength % 3) != 0) {
+            errorCode = U_INVALID_FORMAT_ERROR;
+            return;
+        }
+        if (lsrSubtagsLength == 0) {
+            errorCode = U_MISSING_RESOURCE_ERROR;
+            return;
+        }
+
+        if (!likelyTable.findValue("trie", value)) {
+            errorCode = U_MISSING_RESOURCE_ERROR;
+            return;
+        }
+        int32_t length;
+        trieBytes = value.getBinary(length, errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+
+        // Also read distance/matcher data if available,
+        // to open & keep only one resource bundle pointer
+        // and to use one single UniqueCharStrings.
+        UErrorCode matchErrorCode = U_ZERO_ERROR;
+        ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(),
+                                  value, matchErrorCode);
+        LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes;
+        int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
+        if (U_SUCCESS(matchErrorCode)) {
+            ResourceTable matchTable = value.getTable(errorCode);
+            if (U_FAILURE(errorCode)) { return; }
+
+            if (matchTable.findValue("trie", value)) {
+                distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
+                if (U_FAILURE(errorCode)) { return; }
+            }
+
+            if (matchTable.findValue("regionToPartitions", value)) {
+                distanceData.regionToPartitions = value.getBinary(length, errorCode);
+                if (U_FAILURE(errorCode)) { return; }
+                if (length < LSR::REGION_INDEX_LIMIT) {
+                    errorCode = U_INVALID_FORMAT_ERROR;
+                    return;
+                }
+            }
+
+            if (!readStrings(matchTable, "partitions", value,
+                             partitionIndexes, partitionsLength, errorCode) ||
+                    !readStrings(matchTable, "paradigms", value,
+                                 paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
+                return;
+            }
+            if ((paradigmSubtagsLength % 3) != 0) {
+                errorCode = U_INVALID_FORMAT_ERROR;
+                return;
+            }
+
+            if (matchTable.findValue("distances", value)) {
+                distanceData.distances = value.getIntVector(length, errorCode);
+                if (U_FAILURE(errorCode)) { return; }
+                if (length < 4) {  // LocaleDistance IX_LIMIT
+                    errorCode = U_INVALID_FORMAT_ERROR;
+                    return;
+                }
+            }
+        } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
+            // ok for likely subtags
+        } else {  // error other than missing resource
+            errorCode = matchErrorCode;
+            return;
+        }
+
+        // Fetch & store invariant-character versions of strings
+        // only after we have collected and de-duplicated all of them.
+        strings.freeze();
+
+        languageAliases = CharStringMap(languagesLength / 2, errorCode);
+        for (int32_t i = 0; i < languagesLength; i += 2) {
+            languageAliases.put(strings.get(languageIndexes[i]),
+                                strings.get(languageIndexes[i + 1]), errorCode);
+        }
+
+        regionAliases = CharStringMap(regionsLength / 2, errorCode);
+        for (int32_t i = 0; i < regionsLength; i += 2) {
+            regionAliases.put(strings.get(regionIndexes[i]),
+                              strings.get(regionIndexes[i + 1]), errorCode);
+        }
+        if (U_FAILURE(errorCode)) { return; }
+
+        lsrsLength = lsrSubtagsLength / 3;
+        lsrs = new LSR[lsrsLength];
+        if (lsrs == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
+            lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
+                          strings.get(lsrSubtagIndexes[i + 1]),
+                          strings.get(lsrSubtagIndexes[i + 2]));
+        }
+
+        if (partitionsLength > 0) {
+            distanceData.partitions = static_cast<const char **>(
+                uprv_malloc(partitionsLength * sizeof(const char *)));
+            if (distanceData.partitions == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            for (int32_t i = 0; i < partitionsLength; ++i) {
+                distanceData.partitions[i] = strings.get(partitionIndexes[i]);
+            }
+        }
+
+        if (paradigmSubtagsLength > 0) {
+            distanceData.paradigmsLength = paradigmSubtagsLength / 3;
+            LSR *paradigms = new LSR[distanceData.paradigmsLength];
+            if (paradigms == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
+                paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
+                                   strings.get(paradigmSubtagIndexes[i + 1]),
+                                   strings.get(paradigmSubtagIndexes[i + 2]));
+            }
+            distanceData.paradigms = paradigms;
+        }
+    }
+
+private:
+    bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
+                     LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
+        if (table.findValue(key, value)) {
+            ResourceArray stringArray = value.getArray(errorCode);
+            if (U_FAILURE(errorCode)) { return false; }
+            length = stringArray.getSize();
+            if (length == 0) { return true; }
+            int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length);
+            if (rawIndexes == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return false;
+            }
+            for (int i = 0; i < length; ++i) {
+                stringArray.getValue(i, value);  // returns TRUE because i < length
+                rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
+                if (U_FAILURE(errorCode)) { return false; }
+            }
+        }
+        return true;
+    }
+};
+
+namespace {
+
+XLikelySubtags *gLikelySubtags = nullptr;
+UInitOnce loclikelysubtags_gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV loclikelysubtags_cleanup() {
+    delete gLikelySubtags;
+    gLikelySubtags = nullptr;
+    loclikelysubtags_gInitOnce.reset();
+    return TRUE;
+}
+
+}  // namespace
+
+void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
+    // This function is invoked only via umtx_initOnce().
+    U_ASSERT(gLikelySubtags == nullptr);
+    XLikelySubtagsData data(errorCode);
+    data.load(errorCode);
+    if (U_FAILURE(errorCode)) { return; }
+    gLikelySubtags = new XLikelySubtags(data);
+    if (gLikelySubtags == nullptr) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, loclikelysubtags_cleanup);
+}
+
+const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    umtx_initOnce(loclikelysubtags_gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
+    return gLikelySubtags;
+}
+
+XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
+        langInfoBundle(data.langInfoBundle),
+        strings(data.strings.orphanCharStrings()),
+        languageAliases(std::move(data.languageAliases)),
+        regionAliases(std::move(data.regionAliases)),
+        trie(data.trieBytes),
+        lsrs(data.lsrs),
+#if U_DEBUG
+        lsrsLength(data.lsrsLength),
+#endif
+        distanceData(std::move(data.distanceData)) {
+    data.langInfoBundle = nullptr;
+    data.lsrs = nullptr;
+
+    // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
+    UStringTrieResult result = trie.next(u'*');
+    U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
+    trieUndState = trie.getState64();
+    result = trie.next(u'*');
+    U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
+    trieUndZzzzState = trie.getState64();
+    result = trie.next(u'*');
+    U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+    defaultLsrIndex = trie.getValue();
+    trie.reset();
+
+    for (char16_t c = u'a'; c <= u'z'; ++c) {
+        result = trie.next(c);
+        if (result == USTRINGTRIE_NO_VALUE) {
+            trieFirstLetterStates[c - u'a'] = trie.getState64();
+        }
+        trie.reset();
+    }
+}
+
+XLikelySubtags::~XLikelySubtags() {
+    ures_close(langInfoBundle);
+    delete strings;
+    delete[] lsrs;
+}
+
+LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
+    const char *name = locale.getName();
+    if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') {  // name.startsWith("@x=")
+        // Private use language tag x-subtag-subtag...
+        return LSR(name, "", "");
+    }
+    return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
+                            locale.getVariant(), errorCode);
+}
+
+namespace {
+
+const char *getCanonical(const CharStringMap &aliases, const char *alias) {
+    const char *canonical = aliases.get(alias);
+    return canonical == nullptr ? alias : canonical;
+}
+
+}  // namespace
+
+LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
+                                     const char *variant, UErrorCode &errorCode) const {
+    // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
+    // They should match only themselves,
+    // not other locales with what looks like the same language and script subtags.
+    char c1;
+    if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
+        switch (c1) {
+        case 'A':
+            return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region, errorCode);
+        case 'B':
+            return LSR(PSEUDO_BIDI_PREFIX, language, script, region, errorCode);
+        case 'C':
+            return LSR(PSEUDO_CRACKED_PREFIX, language, script, region, errorCode);
+        default:  // normal locale
+            break;
+        }
+    }
+
+    if (variant[0] == 'P' && variant[1] == 'S') {
+        if (uprv_strcmp(variant, "PSACCENT") == 0) {
+            return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
+                       *region == 0 ? "XA" : region, errorCode);
+        } else if (uprv_strcmp(variant, "PSBIDI") == 0) {
+            return LSR(PSEUDO_BIDI_PREFIX, language, script,
+                       *region == 0 ? "XB" : region, errorCode);
+        } else if (uprv_strcmp(variant, "PSCRACK") == 0) {
+            return LSR(PSEUDO_CRACKED_PREFIX, language, script,
+                       *region == 0 ? "XC" : region, errorCode);
+        }
+        // else normal locale
+    }
+
+    language = getCanonical(languageAliases, language);
+    // (We have no script mappings.)
+    region = getCanonical(regionAliases, region);
+    return maximize(language, script, region);
+}
+
+LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
+    if (uprv_strcmp(language, "und") == 0) {
+        language = "";
+    }
+    if (uprv_strcmp(script, "Zzzz") == 0) {
+        script = "";
+    }
+    if (uprv_strcmp(region, "ZZ") == 0) {
+        region = "";
+    }
+    if (*script != 0 && *region != 0 && *language != 0) {
+        return LSR(language, script, region);  // already maximized
+    }
+
+    uint32_t retainOldMask = 0;
+    BytesTrie iter(trie);
+    uint64_t state;
+    int32_t value;
+    // Small optimization: Array lookup for first language letter.
+    int32_t c0;
+    if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
+            language[1] != 0 &&  // language.length() >= 2
+            (state = trieFirstLetterStates[c0]) != 0) {
+        value = trieNext(iter.resetToState64(state), language, 1);
+    } else {
+        value = trieNext(iter, language, 0);
+    }
+    if (value >= 0) {
+        if (*language != 0) {
+            retainOldMask |= 4;
+        }
+        state = iter.getState64();
+    } else {
+        retainOldMask |= 4;
+        iter.resetToState64(trieUndState);  // "und" ("*")
+        state = 0;
+    }
+
+    if (value > 0) {
+        // Intermediate or final value from just language.
+        if (value == SKIP_SCRIPT) {
+            value = 0;
+        }
+        if (*script != 0) {
+            retainOldMask |= 2;
+        }
+    } else {
+        value = trieNext(iter, script, 0);
+        if (value >= 0) {
+            if (*script != 0) {
+                retainOldMask |= 2;
+            }
+            state = iter.getState64();
+        } else {
+            retainOldMask |= 2;
+            if (state == 0) {
+                iter.resetToState64(trieUndZzzzState);  // "und-Zzzz" ("**")
+            } else {
+                iter.resetToState64(state);
+                value = trieNext(iter, "", 0);
+                U_ASSERT(value >= 0);
+                state = iter.getState64();
+            }
+        }
+    }
+
+    if (value > 0) {
+        // Final value from just language or language+script.
+        if (*region != 0) {
+            retainOldMask |= 1;
+        }
+    } else {
+        value = trieNext(iter, region, 0);
+        if (value >= 0) {
+            if (*region != 0) {
+                retainOldMask |= 1;
+            }
+        } else {
+            retainOldMask |= 1;
+            if (state == 0) {
+                value = defaultLsrIndex;
+            } else {
+                iter.resetToState64(state);
+                value = trieNext(iter, "", 0);
+                U_ASSERT(value > 0);
+            }
+        }
+    }
+    U_ASSERT(value < lsrsLength);
+    const LSR &result = lsrs[value];
+
+    if (*language == 0) {
+        language = "und";
+    }
+
+    if (retainOldMask == 0) {
+        // Quickly return a copy of the lookup-result LSR
+        // without new allocation of the subtags.
+        return LSR(result.language, result.script, result.region);
+    }
+    if ((retainOldMask & 4) == 0) {
+        language = result.language;
+    }
+    if ((retainOldMask & 2) == 0) {
+        script = result.script;
+    }
+    if ((retainOldMask & 1) == 0) {
+        region = result.region;
+    }
+    return LSR(language, script, region);
+}
+
+int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
+    UStringTrieResult result;
+    uint8_t c;
+    if ((c = s[i]) == 0) {
+        result = iter.next(u'*');
+    } else {
+        for (;;) {
+            c = uprv_invCharToAscii(c);
+            // EBCDIC: If s[i] is not an invariant character,
+            // then c is now 0 and will simply not match anything, which is harmless.
+            uint8_t next = s[++i];
+            if (next != 0) {
+                if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+                    return -1;
+                }
+            } else {
+                // last character of this subtag
+                result = iter.next(c | 0x80);
+                break;
+            }
+            c = next;
+        }
+    }
+    switch (result) {
+    case USTRINGTRIE_NO_MATCH: return -1;
+    case USTRINGTRIE_NO_VALUE: return 0;
+    case USTRINGTRIE_INTERMEDIATE_VALUE:
+        U_ASSERT(iter.getValue() == SKIP_SCRIPT);
+        return SKIP_SCRIPT;
+    case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
+    default: return -1;
+    }
+}
+
+// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
+// in loclikely.cpp to this new code, including activating this
+// minimizeSubtags() function. The LocaleMatcher does not minimize.
+#if 0
+LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
+                                    const char *regionIn, ULocale.Minimize fieldToFavor,
+                                    UErrorCode &errorCode) const {
+    LSR result = maximize(languageIn, scriptIn, regionIn);
+
+    // We could try just a series of checks, like:
+    // LSR result2 = addLikelySubtags(languageIn, "", "");
+    // if result.equals(result2) return result2;
+    // However, we can optimize 2 of the cases:
+    //   (languageIn, "", "")
+    //   (languageIn, "", regionIn)
+
+    // value00 = lookup(result.language, "", "")
+    BytesTrie iter = new BytesTrie(trie);
+    int value = trieNext(iter, result.language, 0);
+    U_ASSERT(value >= 0);
+    if (value == 0) {
+        value = trieNext(iter, "", 0);
+        U_ASSERT(value >= 0);
+        if (value == 0) {
+            value = trieNext(iter, "", 0);
+        }
+    }
+    U_ASSERT(value > 0);
+    LSR value00 = lsrs[value];
+    boolean favorRegionOk = false;
+    if (result.script.equals(value00.script)) { //script is default
+        if (result.region.equals(value00.region)) {
+            return new LSR(result.language, "", "");
+        } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
+            return new LSR(result.language, "", result.region);
+        } else {
+            favorRegionOk = true;
+        }
+    }
+
+    // The last case is not as easy to optimize.
+    // Maybe do later, but for now use the straightforward code.
+    LSR result2 = maximize(languageIn, scriptIn, "");
+    if (result2.equals(result)) {
+        return new LSR(result.language, result.script, "");
+    } else if (favorRegionOk) {
+        return new LSR(result.language, "", result.region);
+    }
+    return result;
+}
+#endif
+
+U_NAMESPACE_END
--- a/external/duckdb/extension/icu/third_party/icu/common/loclikelysubtags.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/loclikelysubtags.h
@@ -0,0 +1,143 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// loclikelysubtags.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCLIKELYSUBTAGS_H__
+#define __LOCLIKELYSUBTAGS_H__
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "lsr.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+struct XLikelySubtagsData;
+
+/**
+ * Map of const char * keys & values.
+ * Stores pointers as is: Does not own/copy/adopt/release strings.
+ */
+class CharStringMap final : public UMemory {
+public:
+    /** Constructs an unusable non-map. */
+    CharStringMap() : map(nullptr) {}
+    CharStringMap(int32_t size, UErrorCode &errorCode) {
+        map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
+                             size, &errorCode);
+    }
+    CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
+        other.map = nullptr;
+    }
+    CharStringMap(const CharStringMap &other) = delete;
+    ~CharStringMap() {
+        uhash_close(map);
+    }
+
+    CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
+        map = other.map;
+        other.map = nullptr;
+        return *this;
+    }
+    CharStringMap &operator=(const CharStringMap &other) = delete;
+
+    const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
+    void put(const char *key, const char *value, UErrorCode &errorCode) {
+        uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
+    }
+
+private:
+    UHashtable *map;
+};
+
+struct LocaleDistanceData {
+    LocaleDistanceData() = default;
+    LocaleDistanceData(LocaleDistanceData &&data);
+    ~LocaleDistanceData();
+
+    const uint8_t *distanceTrieBytes = nullptr;
+    const uint8_t *regionToPartitions = nullptr;
+    const char **partitions = nullptr;
+    const LSR *paradigms = nullptr;
+    int32_t paradigmsLength = 0;
+    const int32_t *distances = nullptr;
+
+private:
+    LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
+};
+
+// TODO(ICU-20777): Rename to just LikelySubtags.
+class XLikelySubtags final : public UMemory {
+public:
+    ~XLikelySubtags();
+
+    static constexpr int32_t SKIP_SCRIPT = 1;
+
+    // VisibleForTesting
+    static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
+
+    // VisibleForTesting
+    LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
+
+    // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
+    // in loclikely.cpp to this new code, including activating this
+    // minimizeSubtags() function. The LocaleMatcher does not minimize.
+#if 0
+    LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
+                        ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
+#endif
+
+    // visible for LocaleDistance
+    const LocaleDistanceData &getDistanceData() const { return distanceData; }
+
+private:
+    XLikelySubtags(XLikelySubtagsData &data);
+    XLikelySubtags(const XLikelySubtags &other) = delete;
+    XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
+
+    static void initLikelySubtags(UErrorCode &errorCode);
+
+    LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
+                         const char *variant, UErrorCode &errorCode) const;
+
+    /**
+     * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
+     */
+    LSR maximize(const char *language, const char *script, const char *region) const;
+
+    static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
+
+    UResourceBundle *langInfoBundle;
+    // We could store the strings by value, except that if there were few enough strings,
+    // moving the contents could copy it to a different array,
+    // invalidating the pointers stored in the maps.
+    CharString *strings;
+    CharStringMap languageAliases;
+    CharStringMap regionAliases;
+
+    // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
+    // There is also a trie value for each intermediate lang and lang+script.
+    // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
+    BytesTrie trie;
+    uint64_t trieUndState;
+    uint64_t trieUndZzzzState;
+    int32_t defaultLsrIndex;
+    uint64_t trieFirstLetterStates[26];
+    const LSR *lsrs;
+#if U_DEBUG
+    int32_t lsrsLength;
+#endif
+
+    // distance/matcher data: see comment in XLikelySubtagsData::load()
+    LocaleDistanceData distanceData;
+};
+
+U_NAMESPACE_END
+
+#endif  // __LOCLIKELYSUBTAGS_H__
--- a/external/duckdb/extension/icu/third_party/icu/common/locmap.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/locmap.cpp
--- a/external/duckdb/extension/icu/third_party/icu/common/locmap.h
+++ b/external/duckdb/extension/icu/third_party/icu/common/locmap.h
@@ -0,0 +1,40 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 1996-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File locmap.h      : Locale Mapping Classes
+* 
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+*  Date        Name        Description
+*  3/11/97     aliu        Added setId().
+*  4/20/99     Madhu       Added T_convertToPosix()
+* 09/18/00     george      Removed the memory leaks.
+* 08/23/01     george      Convert to C
+*============================================================================
+*/
+
+#ifndef LOCMAP_H
+#define LOCMAP_H
+
+#include "unicode/utypes.h"
+
+#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID)
+
+U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status);
+
+/* Don't call these functions directly. Use uloc_getLCID instead. */
+U_CAPI uint32_t uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status); // Leverage platform conversion if possible
+U_CAPI uint32_t uprv_convertToLCID(const char* langID, const char* posixID, UErrorCode* status);
+
+#endif /* LOCMAP_H */
+
--- a/external/duckdb/extension/icu/third_party/icu/common/locresdata.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/locresdata.cpp
@@ -0,0 +1,220 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1997-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  loclikely.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010feb25
+*   created by: Markus W. Scherer
+*
+*   Code for miscellaneous locale-related resource bundle data access,
+*   separated out from other .cpp files
+*   that then do not depend on resource bundle code and this data.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uloc.h"
+#include "unicode/ures.h"
+#include "cstring.h"
+#include "ulocimp.h"
+#include "uresimp.h"
+
+/*
+ * Lookup a resource bundle table item with fallback on the table level.
+ * Regular resource bundle lookups perform fallback to parent locale bundles
+ * and eventually the root bundle, but only for top-level items.
+ * This function takes the name of a top-level table and of an item in that table
+ * and performs a lookup of both, falling back until a bundle contains a table
+ * with this item.
+ *
+ * Note: Only the opening of entire bundles falls back through the default locale
+ * before root. Once a bundle is open, item lookups do not go through the
+ * default locale because that would result in a mix of languages that is
+ * unpredictable to the programmer and most likely useless.
+ */
+U_CAPI const UChar * U_EXPORT2
+uloc_getTableStringWithFallback(const char *path, const char *locale,
+                              const char *tableKey, const char *subTableKey,
+                              const char *itemKey,
+                              int32_t *pLength,
+                              UErrorCode *pErrorCode)
+{
+/*    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
+    const UChar *item=NULL;
+    UErrorCode errorCode;
+    char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
+
+    /*
+     * open the bundle for the current locale
+     * this falls back through the locale's chain to root
+     */
+    errorCode=U_ZERO_ERROR;
+    icu::LocalUResourceBundlePointer rb(ures_open(path, locale, &errorCode));
+
+    if(U_FAILURE(errorCode)) {
+        /* total failure, not even root could be opened */
+        *pErrorCode=errorCode;
+        return NULL;
+    } else if(errorCode==U_USING_DEFAULT_WARNING ||
+                (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
+    ) {
+        /* set the "strongest" error code (success->fallback->default->failure) */
+        *pErrorCode=errorCode;
+    }
+
+    for(;;){
+        icu::StackUResourceBundle table;
+        icu::StackUResourceBundle subTable;
+        ures_getByKeyWithFallback(rb.getAlias(), tableKey, table.getAlias(), &errorCode);
+
+        if (subTableKey != NULL) {
+            /*
+            ures_getByKeyWithFallback(table.getAlias(), subTableKey, subTable.getAlias(), &errorCode);
+            item = ures_getStringByKeyWithFallback(subTable.getAlias(), itemKey, pLength, &errorCode);
+            if(U_FAILURE(errorCode)){
+                *pErrorCode = errorCode;
+            }
+            
+            break;*/
+            
+            ures_getByKeyWithFallback(table.getAlias(), subTableKey, table.getAlias(), &errorCode);
+        }
+        if(U_SUCCESS(errorCode)){
+            item = ures_getStringByKeyWithFallback(table.getAlias(), itemKey, pLength, &errorCode);
+            if(U_FAILURE(errorCode)){
+                const char* replacement = NULL;
+                *pErrorCode = errorCode; /*save the errorCode*/
+                errorCode = U_ZERO_ERROR;
+                /* may be a deprecated code */
+                if(uprv_strcmp(tableKey, "Countries")==0){
+                    replacement =  uloc_getCurrentCountryID(itemKey);
+                }else if(uprv_strcmp(tableKey, "Languages")==0){
+                    replacement =  uloc_getCurrentLanguageID(itemKey);
+                }
+                /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
+                if(replacement!=NULL && itemKey != replacement){
+                    item = ures_getStringByKeyWithFallback(table.getAlias(), replacement, pLength, &errorCode);
+                    if(U_SUCCESS(errorCode)){
+                        *pErrorCode = errorCode;
+                        break;
+                    }
+                }
+            }else{
+                break;
+            }
+        }
+        
+        if(U_FAILURE(errorCode)){    
+
+            /* still can't figure out ?.. try the fallback mechanism */
+            int32_t len = 0;
+            const UChar* fallbackLocale =  NULL;
+            *pErrorCode = errorCode;
+            errorCode = U_ZERO_ERROR;
+
+            fallbackLocale = ures_getStringByKeyWithFallback(table.getAlias(), "Fallback", &len, &errorCode);
+            if(U_FAILURE(errorCode)){
+               *pErrorCode = errorCode;
+                break;
+            }
+            
+            u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
+            
+            /* guard against recursive fallback */
+            if(uprv_strcmp(explicitFallbackName, locale)==0){
+                *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
+                break;
+            }
+            rb.adoptInstead(ures_open(path, explicitFallbackName, &errorCode));
+            if(U_FAILURE(errorCode)){
+                *pErrorCode = errorCode;
+                break;
+            }
+            /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
+        }else{
+            break;
+        }
+    }
+
+    return item;
+}
+
+static ULayoutType
+_uloc_getOrientationHelper(const char* localeId,
+                           const char* key,
+                           UErrorCode *status)
+{
+    ULayoutType result = ULOC_LAYOUT_UNKNOWN;
+
+    if (!U_FAILURE(*status)) {
+        int32_t length = 0;
+        char localeBuffer[ULOC_FULLNAME_CAPACITY];
+
+        uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
+
+        if (!U_FAILURE(*status)) {
+            const UChar* const value =
+                uloc_getTableStringWithFallback(
+                    NULL,
+                    localeBuffer,
+                    "layout",
+                    NULL,
+                    key,
+                    &length,
+                    status);
+
+            if (!U_FAILURE(*status) && length != 0) {
+                switch(value[0])
+                {
+                case 0x0062: /* 'b' */
+                    result = ULOC_LAYOUT_BTT;
+                    break;
+                case 0x006C: /* 'l' */
+                    result = ULOC_LAYOUT_LTR;
+                    break;
+                case 0x0072: /* 'r' */
+                    result = ULOC_LAYOUT_RTL;
+                    break;
+                case 0x0074: /* 't' */
+                    result = ULOC_LAYOUT_TTB;
+                    break;
+                default:
+                    *status = U_INTERNAL_PROGRAM_ERROR;
+                    break;
+                }
+            }
+        }
+    }
+
+    return result;
+}
+
+U_CAPI ULayoutType U_EXPORT2
+uloc_getCharacterOrientation(const char* localeId,
+                             UErrorCode *status)
+{
+    return _uloc_getOrientationHelper(localeId, "characters", status);
+}
+
+/**
+ * Get the layout line orientation for the specified locale.
+ * 
+ * @param localeID locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for lines.
+ */
+U_CAPI ULayoutType U_EXPORT2
+uloc_getLineOrientation(const char* localeId,
+                        UErrorCode *status)
+{
+    return _uloc_getOrientationHelper(localeId, "lines", status);
+}
--- a/external/duckdb/extension/icu/third_party/icu/common/locutil.cpp
+++ b/external/duckdb/extension/icu/third_party/icu/common/locutil.cpp
@@ -0,0 +1,275 @@
+// // © 2016 and later: Unicode, Inc. and others.
+// // License & terms of use: http://www.unicode.org/copyright.html
+// /*
+//  *******************************************************************************
+//  * Copyright (C) 2002-2014, International Business Machines Corporation and
+//  * others. All Rights Reserved.
+//  *******************************************************************************
+//  */
+// #include "unicode/utypes.h"
+
+// #if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
+
+// #include "unicode/resbund.h"
+// #include "unicode/uenum.h"
+// #include "cmemory.h"
+// #include "ustrfmt.h"
+// #include "locutil.h"
+// #include "charstr.h"
+// #include "ucln_cmn.h"
+// #include "uassert.h"
+// #include "umutex.h"
+
+// // see LocaleUtility::getAvailableLocaleNames
+// static icu::UInitOnce   LocaleUtilityInitOnce = U_INITONCE_INITIALIZER;
+// static icu::Hashtable * LocaleUtility_cache = NULL;
+
+// #define UNDERSCORE_CHAR ((UChar)0x005f)
+// #define AT_SIGN_CHAR    ((UChar)64)
+// #define PERIOD_CHAR     ((UChar)46)
+
+// /*
+//  ******************************************************************
+//  */
+
+// /**
+//  * Release all static memory held by Locale Utility.
+//  */
+// U_CDECL_BEGIN
+// static UBool U_CALLCONV service_cleanup(void) {
+//     if (LocaleUtility_cache) {
+//         delete LocaleUtility_cache;
+//         LocaleUtility_cache = NULL;
+//     }
+//     return TRUE;
+// }
+
+
+// static void U_CALLCONV locale_utility_init(UErrorCode &status) {
+//     using namespace icu;
+//     U_ASSERT(LocaleUtility_cache == NULL);
+//     ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
+//     LocaleUtility_cache = new Hashtable(status);
+//     if (U_FAILURE(status)) {
+//         delete LocaleUtility_cache;
+//         LocaleUtility_cache = NULL;
+//         return;
+//     }
+//     if (LocaleUtility_cache == NULL) {
+//         status = U_MEMORY_ALLOCATION_ERROR;
+//         return;
+//     }
+//     LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable);
+// }
+
+// U_CDECL_END
+
+// U_NAMESPACE_BEGIN
+
+// UnicodeString&
+// LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
+// {
+//   if (id == NULL) {
+//     result.setToBogus();
+//   } else {
+//     // Fix case only (no other changes) up to the first '@' or '.' or
+//     // end of string, whichever comes first.  In 3.0 I changed this to
+//     // stop at first '@' or '.'.  It used to run out to the end of
+//     // string.  My fix makes the tests pass but is probably
+//     // structurally incorrect.  See below.  [alan 3.0]
+
+//     // TODO: Doug, you might want to revise this...
+//     result = *id;
+//     int32_t i = 0;
+//     int32_t end = result.indexOf(AT_SIGN_CHAR);
+//     int32_t n = result.indexOf(PERIOD_CHAR);
+//     if (n >= 0 && n < end) {
+//         end = n;
+//     }
+//     if (end < 0) {
+//         end = result.length();
+//     }
+//     n = result.indexOf(UNDERSCORE_CHAR);
+//     if (n < 0) {
+//       n = end;
+//     }
+//     for (; i < n; ++i) {
+//       UChar c = result.charAt(i);
+//       if (c >= 0x0041 && c <= 0x005a) {
+//         c += 0x20;
+//         result.setCharAt(i, c);
+//       }
+//     }
+//     for (n = end; i < n; ++i) {
+//       UChar c = result.charAt(i);
+//       if (c >= 0x0061 && c <= 0x007a) {
+//         c -= 0x20;
+//         result.setCharAt(i, c);
+//       }
+//     }
+//   }
+//   return result;
+
+// #if 0
+//     // This code does a proper full level 2 canonicalization of id.
+//     // It's nasty to go from UChar to char to char to UChar -- but
+//     // that's what you have to do to use the uloc_canonicalize
+//     // function on UnicodeStrings.
+
+//     // I ended up doing the alternate fix (see above) not for
+//     // performance reasons, although performance will certainly be
+//     // better, but because doing a full level 2 canonicalization
+//     // causes some tests to fail.  [alan 3.0]
+
+//     // TODO: Doug, you might want to revisit this...
+//     result.setToBogus();
+//     if (id != 0) {
+//         int32_t buflen = id->length() + 8; // space for NUL
+//         char* buf = (char*) uprv_malloc(buflen);
+//         char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
+//         if (buf != 0 && canon != 0) {
+//             U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
+//             UErrorCode ec = U_ZERO_ERROR;
+//             uloc_canonicalize(buf, canon, buflen, &ec);
+//             if (U_SUCCESS(ec)) {
+//                 result = UnicodeString(canon);
+//             }
+//         }
+//         uprv_free(buf);
+//         uprv_free(canon);
+//     }
+//     return result;
+// #endif
+// }
+
+// Locale&
+// LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
+// {
+//     enum { BUFLEN = 128 }; // larger than ever needed
+
+//     if (id.isBogus() || id.length() >= BUFLEN) {
+//         result.setToBogus();
+//     } else {
+//         /*
+//          * We need to convert from a UnicodeString to char * in order to
+//          * create a Locale.
+//          *
+//          * Problem: Locale ID strings may contain '@' which is a variant
+//          * character and cannot be handled by invariant-character conversion.
+//          *
+//          * Hack: Since ICU code can handle locale IDs with multiple encodings
+//          * of '@' (at least for EBCDIC; it's not known to be a problem for
+//          * ASCII-based systems),
+//          * we use regular invariant-character conversion for everything else
+//          * and manually convert U+0040 into a compiler-char-constant '@'.
+//          * While this compilation-time constant may not match the runtime
+//          * encoding of '@', it should be one of the encodings which ICU
+//          * recognizes.
+//          *
+//          * There should be only at most one '@' in a locale ID.
+//          */
+//         char buffer[BUFLEN];
+//         int32_t prev, i;
+//         prev = 0;
+//         for(;;) {
+//             i = id.indexOf((UChar)0x40, prev);
+//             if(i < 0) {
+//                 // no @ between prev and the rest of the string
+//                 id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
+//                 break; // done
+//             } else {
+//                 // normal invariant-character conversion for text between @s
+//                 id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
+//                 // manually "convert" U+0040 at id[i] into '@' at buffer[i]
+//                 buffer[i] = '@';
+//                 prev = i + 1;
+//             }
+//         }
+//         result = Locale::createFromName(buffer);
+//     }
+//     return result;
+// }
+
+// UnicodeString&
+// LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
+// {
+//     if (locale.isBogus()) {
+//         result.setToBogus();
+//     } else {
+//         result.append(UnicodeString(locale.getName(), -1, US_INV));
+//     }
+//     return result;
+// }
+
+// const Hashtable*
+// LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
+// {
+//     // LocaleUtility_cache is a hash-of-hashes.  The top-level keys
+//     // are path strings ('bundleID') passed to
+//     // ures_openAvailableLocales.  The top-level values are
+//     // second-level hashes.  The second-level keys are result strings
+//     // from ures_openAvailableLocales.  The second-level values are
+//     // garbage ((void*)1 or other random pointer).
+
+//     UErrorCode status = U_ZERO_ERROR;
+//     umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status);
+//     Hashtable *cache = LocaleUtility_cache;
+//     if (cache == NULL) {
+//         // Catastrophic failure.
+//         return NULL;
+//     }
+
+//     Hashtable* htp;
+//     umtx_lock(NULL);
+//     htp = (Hashtable*) cache->get(bundleID);
+//     umtx_unlock(NULL);
+
+//     if (htp == NULL) {
+//         htp = new Hashtable(status);
+//         if (htp && U_SUCCESS(status)) {
+//             CharString cbundleID;
+//             cbundleID.appendInvariantChars(bundleID, status);
+//             const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
+//             icu::LocalUEnumerationPointer uenum(ures_openAvailableLocales(path, &status));
+//             for (;;) {
+//                 const UChar* id = uenum_unext(uenum.getAlias(), NULL, &status);
+//                 if (id == NULL) {
+//                     break;
+//                 }
+//                 htp->put(UnicodeString(id), (void*)htp, status);
+//             }
+//             if (U_FAILURE(status)) {
+//                 delete htp;
+//                 return NULL;
+//             }
+//             umtx_lock(NULL);
+//             Hashtable *t = static_cast<Hashtable *>(cache->get(bundleID));
+//             if (t != NULL) {
+//                 // Another thread raced through this code, creating the cache entry first.
+//                 // Discard ours and return theirs.
+//                 umtx_unlock(NULL);
+//                 delete htp;
+//                 htp = t;
+//             } else {
+//                 cache->put(bundleID, (void*)htp, status);
+//                 umtx_unlock(NULL);
+//             }
+//         }
+//     }
+//     return htp;
+// }
+
+// UBool
+// LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
+// {
+//     return child.indexOf(root) == 0 &&
+//       (child.length() == root.length() ||
+//        child.charAt(root.length()) == UNDERSCORE_CHAR);
+// }
+
+// U_NAMESPACE_END
+
+// /* !UCONFIG_NO_SERVICE */
+// #endif
+
+
--- a/Show More
+++ b/Show More