should be it

2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions
--- a/external/duckdb/extension/parquet/CMakeLists.txt
+++ b/external/duckdb/extension/parquet/CMakeLists.txt
@@ -0,0 +1,91 @@
+cmake_minimum_required(VERSION 3.5...3.29)
+
+project(ParquetExtension)
+
+include_directories(
+  include ../../third_party/lz4 ../../third_party/parquet
+  ../../third_party/thrift ../../third_party/snappy
+  ../../third_party/brotli/include)
+
+add_subdirectory(decoder)
+add_subdirectory(reader)
+add_subdirectory(writer)
+set(PARQUET_EXTENSION_FILES
+    ${PARQUET_EXTENSION_FILES}
+    column_reader.cpp
+    column_writer.cpp
+    parquet_crypto.cpp
+    parquet_extension.cpp
+    parquet_file_metadata_cache.cpp
+    parquet_float16.cpp
+    parquet_multi_file_info.cpp
+    parquet_metadata.cpp
+    parquet_reader.cpp
+    parquet_field_id.cpp
+    parquet_statistics.cpp
+    parquet_timestamp.cpp
+    parquet_writer.cpp
+    parquet_shredding.cpp
+    serialize_parquet.cpp
+    zstd_file_system.cpp
+    geo_parquet.cpp)
+
+if(NOT CLANG_TIDY)
+  # parquet/thrift/snappy
+  set(PARQUET_EXTENSION_FILES
+      ${PARQUET_EXTENSION_FILES}
+      ../../third_party/parquet/parquet_types.cpp
+      ../../third_party/thrift/thrift/protocol/TProtocol.cpp
+      ../../third_party/thrift/thrift/transport/TTransportException.cpp
+      ../../third_party/thrift/thrift/transport/TBufferTransports.cpp
+      ../../third_party/snappy/snappy.cc
+      ../../third_party/snappy/snappy-sinksource.cc)
+  # lz4
+  set(PARQUET_EXTENSION_FILES ${PARQUET_EXTENSION_FILES}
+                              ../../third_party/lz4/lz4.cpp)
+  # brotli
+  set(PARQUET_EXTENSION_FILES
+      ${PARQUET_EXTENSION_FILES}
+      ../../third_party/brotli/enc/dictionary_hash.cpp
+      ../../third_party/brotli/enc/backward_references_hq.cpp
+      ../../third_party/brotli/enc/histogram.cpp
+      ../../third_party/brotli/enc/memory.cpp
+      ../../third_party/brotli/enc/entropy_encode.cpp
+      ../../third_party/brotli/enc/compound_dictionary.cpp
+      ../../third_party/brotli/enc/compress_fragment_two_pass.cpp
+      ../../third_party/brotli/enc/block_splitter.cpp
+      ../../third_party/brotli/enc/command.cpp
+      ../../third_party/brotli/enc/encode.cpp
+      ../../third_party/brotli/enc/encoder_dict.cpp
+      ../../third_party/brotli/enc/cluster.cpp
+      ../../third_party/brotli/enc/backward_references.cpp
+      ../../third_party/brotli/enc/utf8_util.cpp
+      ../../third_party/brotli/enc/compress_fragment.cpp
+      ../../third_party/brotli/enc/fast_log.cpp
+      ../../third_party/brotli/enc/brotli_bit_stream.cpp
+      ../../third_party/brotli/enc/bit_cost.cpp
+      ../../third_party/brotli/enc/static_dict.cpp
+      ../../third_party/brotli/enc/literal_cost.cpp
+      ../../third_party/brotli/enc/metablock.cpp
+      ../../third_party/brotli/common/dictionary.cpp
+      ../../third_party/brotli/common/constants.cpp
+      ../../third_party/brotli/common/transform.cpp
+      ../../third_party/brotli/common/platform.cpp
+      ../../third_party/brotli/common/shared_dictionary.cpp
+      ../../third_party/brotli/common/context.cpp
+      ../../third_party/brotli/dec/state.cpp
+      ../../third_party/brotli/dec/decode.cpp
+      ../../third_party/brotli/dec/huffman.cpp
+      ../../third_party/brotli/dec/bit_reader.cpp)
+endif()
+
+build_static_extension(parquet ${PARQUET_EXTENSION_FILES})
+set(PARAMETERS "-warnings")
+build_loadable_extension(parquet ${PARAMETERS} ${PARQUET_EXTENSION_FILES})
+target_link_libraries(parquet_loadable_extension duckdb_mbedtls duckdb_zstd)
+
+install(
+  TARGETS parquet_extension
+  EXPORT "${DUCKDB_EXPORT_SET}"
+  LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
+  ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
--- a/external/duckdb/extension/parquet/column_reader.cpp
+++ b/external/duckdb/extension/parquet/column_reader.cpp
@@ -0,0 +1,911 @@
+#include "column_reader.hpp"
+
+#include "reader/boolean_column_reader.hpp"
+#include "brotli/decode.h"
+#include "reader/callback_column_reader.hpp"
+#include "reader/decimal_column_reader.hpp"
+#include "duckdb.hpp"
+#include "reader/expression_column_reader.hpp"
+#include "reader/interval_column_reader.hpp"
+#include "reader/list_column_reader.hpp"
+#include "lz4.hpp"
+#include "miniz_wrapper.hpp"
+#include "reader/null_column_reader.hpp"
+#include "parquet_reader.hpp"
+#include "parquet_timestamp.hpp"
+#include "parquet_float16.hpp"
+
+#include "reader/row_number_column_reader.hpp"
+#include "snappy.h"
+#include "reader/string_column_reader.hpp"
+#include "reader/struct_column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+#include "reader/uuid_column_reader.hpp"
+
+#include "zstd.h"
+
+#include "duckdb/storage/table/column_segment.hpp"
+#include "duckdb/common/helper.hpp"
+#include "duckdb/common/types/bit.hpp"
+
+namespace duckdb {
+
+using duckdb_parquet::CompressionCodec;
+using duckdb_parquet::ConvertedType;
+using duckdb_parquet::Encoding;
+using duckdb_parquet::PageType;
+using duckdb_parquet::Type;
+
+const uint64_t ParquetDecodeUtils::BITPACK_MASKS[] = {0,
+                                                      1,
+                                                      3,
+                                                      7,
+                                                      15,
+                                                      31,
+                                                      63,
+                                                      127,
+                                                      255,
+                                                      511,
+                                                      1023,
+                                                      2047,
+                                                      4095,
+                                                      8191,
+                                                      16383,
+                                                      32767,
+                                                      65535,
+                                                      131071,
+                                                      262143,
+                                                      524287,
+                                                      1048575,
+                                                      2097151,
+                                                      4194303,
+                                                      8388607,
+                                                      16777215,
+                                                      33554431,
+                                                      67108863,
+                                                      134217727,
+                                                      268435455,
+                                                      536870911,
+                                                      1073741823,
+                                                      2147483647,
+                                                      4294967295,
+                                                      8589934591,
+                                                      17179869183,
+                                                      34359738367,
+                                                      68719476735,
+                                                      137438953471,
+                                                      274877906943,
+                                                      549755813887,
+                                                      1099511627775,
+                                                      2199023255551,
+                                                      4398046511103,
+                                                      8796093022207,
+                                                      17592186044415,
+                                                      35184372088831,
+                                                      70368744177663,
+                                                      140737488355327,
+                                                      281474976710655,
+                                                      562949953421311,
+                                                      1125899906842623,
+                                                      2251799813685247,
+                                                      4503599627370495,
+                                                      9007199254740991,
+                                                      18014398509481983,
+                                                      36028797018963967,
+                                                      72057594037927935,
+                                                      144115188075855871,
+                                                      288230376151711743,
+                                                      576460752303423487,
+                                                      1152921504606846975,
+                                                      2305843009213693951,
+                                                      4611686018427387903,
+                                                      9223372036854775807,
+                                                      18446744073709551615ULL};
+
+const uint64_t ParquetDecodeUtils::BITPACK_MASKS_SIZE = sizeof(ParquetDecodeUtils::BITPACK_MASKS) / sizeof(uint64_t);
+
+const uint8_t ParquetDecodeUtils::BITPACK_DLEN = 8;
+
+ColumnReader::ColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema_p)
+    : column_schema(schema_p), reader(reader), page_rows_available(0), dictionary_decoder(*this),
+      delta_binary_packed_decoder(*this), rle_decoder(*this), delta_length_byte_array_decoder(*this),
+      delta_byte_array_decoder(*this), byte_stream_split_decoder(*this) {
+}
+
+ColumnReader::~ColumnReader() {
+}
+
+Allocator &ColumnReader::GetAllocator() {
+	return reader.allocator;
+}
+
+ParquetReader &ColumnReader::Reader() {
+	return reader;
+}
+
+void ColumnReader::RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) {
+	if (chunk) {
+		uint64_t size = chunk->meta_data.total_compressed_size;
+		transport.RegisterPrefetch(FileOffset(), size, allow_merge);
+	}
+}
+
+unique_ptr<BaseStatistics> ColumnReader::Stats(idx_t row_group_idx_p, const vector<ColumnChunk> &columns) {
+	return Schema().Stats(*reader.GetFileMetadata(), reader.parquet_options, row_group_idx_p, columns);
+}
+
+uint64_t ColumnReader::TotalCompressedSize() {
+	if (!chunk) {
+		return 0;
+	}
+
+	return chunk->meta_data.total_compressed_size;
+}
+
+// Note: It's not trivial to determine where all Column data is stored. Chunk->file_offset
+// apparently is not the first page of the data. Therefore we determine the address of the first page by taking the
+// minimum of all page offsets.
+idx_t ColumnReader::FileOffset() const {
+	if (!chunk) {
+		throw std::runtime_error("FileOffset called on ColumnReader with no chunk");
+	}
+	auto min_offset = NumericLimits<idx_t>::Maximum();
+	if (chunk->meta_data.__isset.dictionary_page_offset) {
+		min_offset = MinValue<idx_t>(min_offset, chunk->meta_data.dictionary_page_offset);
+	}
+	if (chunk->meta_data.__isset.index_page_offset) {
+		min_offset = MinValue<idx_t>(min_offset, chunk->meta_data.index_page_offset);
+	}
+	min_offset = MinValue<idx_t>(min_offset, chunk->meta_data.data_page_offset);
+
+	return min_offset;
+}
+
+idx_t ColumnReader::GroupRowsAvailable() {
+	return group_rows_available;
+}
+
+void ColumnReader::PlainSkip(ByteBuffer &plain_data, uint8_t *defines, idx_t num_values) {
+	throw NotImplementedException("PlainSkip not implemented");
+}
+
+void ColumnReader::Plain(ByteBuffer &plain_data, uint8_t *defines, idx_t num_values, // NOLINT
+                         idx_t result_offset, Vector &result) {
+	throw NotImplementedException("Plain not implemented");
+}
+
+void ColumnReader::Plain(shared_ptr<ResizeableBuffer> &plain_data, uint8_t *defines, idx_t num_values,
+                         idx_t result_offset, Vector &result) {
+	Plain(*plain_data, defines, num_values, result_offset, result);
+}
+
+void ColumnReader::PlainSelect(shared_ptr<ResizeableBuffer> &plain_data, uint8_t *defines, idx_t num_values,
+                               Vector &result, const SelectionVector &sel, idx_t count) {
+	throw NotImplementedException("PlainSelect not implemented");
+}
+
+void ColumnReader::InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns, TProtocol &protocol_p) {
+	D_ASSERT(ColumnIndex() < columns.size());
+	chunk = &columns[ColumnIndex()];
+	protocol = &protocol_p;
+	D_ASSERT(chunk);
+	D_ASSERT(chunk->__isset.meta_data);
+
+	if (chunk->__isset.file_path) {
+		throw InvalidInputException("Failed to read file \"%s\": Only inlined data files are supported (no references)",
+		                            Reader().GetFileName());
+	}
+
+	// ugh. sometimes there is an extra offset for the dict. sometimes it's wrong.
+	chunk_read_offset = chunk->meta_data.data_page_offset;
+	if (chunk->meta_data.__isset.dictionary_page_offset && chunk->meta_data.dictionary_page_offset >= 4) {
+		// this assumes the data pages follow the dict pages directly.
+		chunk_read_offset = chunk->meta_data.dictionary_page_offset;
+	}
+	group_rows_available = chunk->meta_data.num_values;
+}
+
+bool ColumnReader::PageIsFilteredOut(PageHeader &page_hdr) {
+	if (!dictionary_decoder.HasFilteredOutAllValues()) {
+		return false;
+	}
+	if (page_hdr.type != PageType::DATA_PAGE && page_hdr.type != PageType::DATA_PAGE_V2) {
+		// we can only filter out data pages
+		return false;
+	}
+	bool is_v1 = page_hdr.type == PageType::DATA_PAGE;
+	auto &v1_header = page_hdr.data_page_header;
+	auto &v2_header = page_hdr.data_page_header_v2;
+	auto page_encoding = is_v1 ? v1_header.encoding : v2_header.encoding;
+	if (page_encoding != Encoding::PLAIN_DICTIONARY && page_encoding != Encoding::RLE_DICTIONARY) {
+		// not a dictionary page
+		return false;
+	}
+	// the page has been filtered out!
+	// skip forward
+	auto &trans = reinterpret_cast<ThriftFileTransport &>(*protocol->getTransport());
+	trans.Skip(page_hdr.compressed_page_size);
+
+	page_rows_available = is_v1 ? v1_header.num_values : v2_header.num_values;
+	encoding = ColumnEncoding::DICTIONARY;
+	page_is_filtered_out = true;
+	return true;
+}
+
+void ColumnReader::PrepareRead(optional_ptr<const TableFilter> filter, optional_ptr<TableFilterState> filter_state) {
+	encoding = ColumnEncoding::INVALID;
+	defined_decoder.reset();
+	page_is_filtered_out = false;
+	block.reset();
+	PageHeader page_hdr;
+	auto &trans = reinterpret_cast<ThriftFileTransport &>(*protocol->getTransport());
+	if (trans.HasPrefetch()) {
+		// Already has some data prefetched, let's not mess with it
+		reader.Read(page_hdr, *protocol);
+	} else {
+		// No prefetch yet, prefetch the full header in one go (so thrift won't read byte-by-byte from storage)
+		// 256 bytes should cover almost all headers (unless it's a V2 header with really LONG string statistics)
+		static constexpr idx_t ASSUMED_HEADER_SIZE = 256;
+		const auto prefetch_size = MinValue(trans.GetSize() - trans.GetLocation(), ASSUMED_HEADER_SIZE);
+		trans.Prefetch(trans.GetLocation(), prefetch_size);
+		reader.Read(page_hdr, *protocol);
+		trans.ClearPrefetch();
+	}
+	// some basic sanity check
+	if (page_hdr.compressed_page_size < 0 || page_hdr.uncompressed_page_size < 0) {
+		throw InvalidInputException("Failed to read file \"%s\": Page sizes can't be < 0", Reader().GetFileName());
+	}
+
+	if (PageIsFilteredOut(page_hdr)) {
+		// this page has been filtered out so we don't need to read it
+		return;
+	}
+
+	switch (page_hdr.type) {
+	case PageType::DATA_PAGE_V2:
+		PreparePageV2(page_hdr);
+		PrepareDataPage(page_hdr);
+		break;
+	case PageType::DATA_PAGE:
+		PreparePage(page_hdr);
+		PrepareDataPage(page_hdr);
+		break;
+	case PageType::DICTIONARY_PAGE: {
+		PreparePage(page_hdr);
+		auto dictionary_size = page_hdr.dictionary_page_header.num_values;
+		if (dictionary_size < 0) {
+			throw InvalidInputException("Failed to read file \"%s\": Invalid dictionary page header (num_values < 0)",
+			                            Reader().GetFileName());
+		}
+		dictionary_decoder.InitializeDictionary(dictionary_size, filter, filter_state, HasDefines());
+		break;
+	}
+	default:
+		break; // ignore INDEX page type and any other custom extensions
+	}
+	ResetPage();
+}
+
+void ColumnReader::ResetPage() {
+}
+
+void ColumnReader::PreparePageV2(PageHeader &page_hdr) {
+	D_ASSERT(page_hdr.type == PageType::DATA_PAGE_V2);
+
+	AllocateBlock(page_hdr.uncompressed_page_size + 1);
+	bool uncompressed = false;
+	if (page_hdr.data_page_header_v2.__isset.is_compressed && !page_hdr.data_page_header_v2.is_compressed) {
+		uncompressed = true;
+	}
+	if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) {
+		if (page_hdr.compressed_page_size != page_hdr.uncompressed_page_size) {
+			throw InvalidInputException("Failed to read file \"%s\": Page size mismatch", Reader().GetFileName());
+		}
+		uncompressed = true;
+	}
+	if (uncompressed) {
+		reader.ReadData(*protocol, block->ptr, page_hdr.compressed_page_size);
+		return;
+	}
+
+	// copy repeats & defines as-is because FOR SOME REASON they are uncompressed
+	auto uncompressed_bytes = page_hdr.data_page_header_v2.repetition_levels_byte_length +
+	                          page_hdr.data_page_header_v2.definition_levels_byte_length;
+	if (uncompressed_bytes > page_hdr.uncompressed_page_size) {
+		throw InvalidInputException(
+		    "Failed to read file \"%s\": header inconsistency, uncompressed_page_size needs to be larger than "
+		    "repetition_levels_byte_length + definition_levels_byte_length",
+		    Reader().GetFileName());
+	}
+	reader.ReadData(*protocol, block->ptr, uncompressed_bytes);
+
+	auto compressed_bytes = page_hdr.compressed_page_size - uncompressed_bytes;
+
+	if (compressed_bytes > 0) {
+		ResizeableBuffer compressed_buffer;
+		compressed_buffer.resize(GetAllocator(), compressed_bytes);
+		reader.ReadData(*protocol, compressed_buffer.ptr, compressed_bytes);
+
+		DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, compressed_bytes,
+		                   block->ptr + uncompressed_bytes, page_hdr.uncompressed_page_size - uncompressed_bytes);
+	}
+}
+
+void ColumnReader::AllocateBlock(idx_t size) {
+	if (!block) {
+		block = make_shared_ptr<ResizeableBuffer>(GetAllocator(), size);
+	} else {
+		block->resize(GetAllocator(), size);
+	}
+}
+
+void ColumnReader::PreparePage(PageHeader &page_hdr) {
+	AllocateBlock(page_hdr.uncompressed_page_size + 1);
+	if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) {
+		if (page_hdr.compressed_page_size != page_hdr.uncompressed_page_size) {
+			throw std::runtime_error("Page size mismatch");
+		}
+		reader.ReadData(*protocol, block->ptr, page_hdr.compressed_page_size);
+		return;
+	}
+
+	ResizeableBuffer compressed_buffer;
+	compressed_buffer.resize(GetAllocator(), page_hdr.compressed_page_size + 1);
+	reader.ReadData(*protocol, compressed_buffer.ptr, page_hdr.compressed_page_size);
+
+	DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, page_hdr.compressed_page_size, block->ptr,
+	                   page_hdr.uncompressed_page_size);
+}
+
+void ColumnReader::DecompressInternal(CompressionCodec::type codec, const_data_ptr_t src, idx_t src_size,
+                                      data_ptr_t dst, idx_t dst_size) {
+	switch (codec) {
+	case CompressionCodec::UNCOMPRESSED:
+		throw InternalException("Parquet data unexpectedly uncompressed");
+	case CompressionCodec::GZIP: {
+		MiniZStream s;
+		s.Decompress(const_char_ptr_cast(src), src_size, char_ptr_cast(dst), dst_size);
+		break;
+	}
+	case CompressionCodec::LZ4_RAW: {
+		auto res =
+		    duckdb_lz4::LZ4_decompress_safe(const_char_ptr_cast(src), char_ptr_cast(dst),
+		                                    UnsafeNumericCast<int32_t>(src_size), UnsafeNumericCast<int32_t>(dst_size));
+		if (res != NumericCast<int>(dst_size)) {
+			throw InvalidInputException("Failed to read file \"%s\": LZ4 decompression failure",
+			                            Reader().GetFileName());
+		}
+		break;
+	}
+	case CompressionCodec::SNAPPY: {
+		{
+			size_t uncompressed_size = 0;
+			auto res = duckdb_snappy::GetUncompressedLength(const_char_ptr_cast(src), src_size, &uncompressed_size);
+			if (!res) {
+				throw InvalidInputException("Failed to read file \"%s\": Snappy decompression failure",
+				                            Reader().GetFileName());
+			}
+			if (uncompressed_size != dst_size) {
+				throw InvalidInputException(
+				    "Failed to read file \"%s\": Snappy decompression failure: Uncompressed data size mismatch",
+				    Reader().GetFileName());
+			}
+		}
+		auto res = duckdb_snappy::RawUncompress(const_char_ptr_cast(src), src_size, char_ptr_cast(dst));
+		if (!res) {
+			throw InvalidInputException("Failed to read file \"%s\": Snappy decompression failure",
+			                            Reader().GetFileName());
+		}
+		break;
+	}
+	case CompressionCodec::ZSTD: {
+		auto res = duckdb_zstd::ZSTD_decompress(dst, dst_size, src, src_size);
+		if (duckdb_zstd::ZSTD_isError(res) || res != dst_size) {
+			throw InvalidInputException("Failed to read file \"%s\": ZSTD Decompression failure",
+			                            Reader().GetFileName());
+		}
+		break;
+	}
+	case CompressionCodec::BROTLI: {
+		auto state = duckdb_brotli::BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+		size_t total_out = 0;
+		auto src_size_size_t = NumericCast<size_t>(src_size);
+		auto dst_size_size_t = NumericCast<size_t>(dst_size);
+
+		auto res = duckdb_brotli::BrotliDecoderDecompressStream(state, &src_size_size_t, &src, &dst_size_size_t, &dst,
+		                                                        &total_out);
+		if (res != duckdb_brotli::BROTLI_DECODER_RESULT_SUCCESS) {
+			throw InvalidInputException("Failed to read file \"%s\": Brotli Decompression failure",
+			                            Reader().GetFileName());
+		}
+		duckdb_brotli::BrotliDecoderDestroyInstance(state);
+		break;
+	}
+
+	default: {
+		duckdb::stringstream codec_name;
+		codec_name << codec;
+		throw InvalidInputException("Failed to read file \"%s\": Unsupported compression codec \"%s\". Supported "
+		                            "options are uncompressed, brotli, gzip, lz4_raw, snappy or zstd",
+		                            Reader().GetFileName(), codec_name.str());
+	}
+	}
+}
+
+void ColumnReader::PrepareDataPage(PageHeader &page_hdr) {
+	if (page_hdr.type == PageType::DATA_PAGE && !page_hdr.__isset.data_page_header) {
+		throw InvalidInputException("Failed to read file \"%s\": Missing data page header from data page",
+		                            Reader().GetFileName());
+	}
+	if (page_hdr.type == PageType::DATA_PAGE_V2 && !page_hdr.__isset.data_page_header_v2) {
+		throw InvalidInputException("Failed to read file \"%s\": Missing data page header from data page v2",
+		                            Reader().GetFileName());
+	}
+
+	bool is_v1 = page_hdr.type == PageType::DATA_PAGE;
+	bool is_v2 = page_hdr.type == PageType::DATA_PAGE_V2;
+	auto &v1_header = page_hdr.data_page_header;
+	auto &v2_header = page_hdr.data_page_header_v2;
+
+	page_rows_available = is_v1 ? v1_header.num_values : v2_header.num_values;
+	auto page_encoding = is_v1 ? v1_header.encoding : v2_header.encoding;
+
+	if (HasRepeats()) {
+		uint32_t rep_length = is_v1 ? block->read<uint32_t>() : v2_header.repetition_levels_byte_length;
+		block->available(rep_length);
+		repeated_decoder = make_uniq<RleBpDecoder>(block->ptr, rep_length, RleBpDecoder::ComputeBitWidth(MaxRepeat()));
+		block->inc(rep_length);
+	} else if (is_v2 && v2_header.repetition_levels_byte_length > 0) {
+		block->inc(v2_header.repetition_levels_byte_length);
+	}
+
+	if (HasDefines()) {
+		uint32_t def_length = is_v1 ? block->read<uint32_t>() : v2_header.definition_levels_byte_length;
+		block->available(def_length);
+		defined_decoder = make_uniq<RleBpDecoder>(block->ptr, def_length, RleBpDecoder::ComputeBitWidth(MaxDefine()));
+		block->inc(def_length);
+	} else if (is_v2 && v2_header.definition_levels_byte_length > 0) {
+		block->inc(v2_header.definition_levels_byte_length);
+	}
+
+	switch (page_encoding) {
+	case Encoding::RLE_DICTIONARY:
+	case Encoding::PLAIN_DICTIONARY: {
+		encoding = ColumnEncoding::DICTIONARY;
+		dictionary_decoder.InitializePage();
+		break;
+	}
+	case Encoding::RLE: {
+		encoding = ColumnEncoding::RLE;
+		rle_decoder.InitializePage();
+		break;
+	}
+	case Encoding::DELTA_BINARY_PACKED: {
+		encoding = ColumnEncoding::DELTA_BINARY_PACKED;
+		delta_binary_packed_decoder.InitializePage();
+		break;
+	}
+	case Encoding::DELTA_LENGTH_BYTE_ARRAY: {
+		encoding = ColumnEncoding::DELTA_LENGTH_BYTE_ARRAY;
+		delta_length_byte_array_decoder.InitializePage();
+		break;
+	}
+	case Encoding::DELTA_BYTE_ARRAY: {
+		encoding = ColumnEncoding::DELTA_BYTE_ARRAY;
+		delta_byte_array_decoder.InitializePage();
+		break;
+	}
+	case Encoding::BYTE_STREAM_SPLIT: {
+		encoding = ColumnEncoding::BYTE_STREAM_SPLIT;
+		byte_stream_split_decoder.InitializePage();
+		break;
+	}
+	case Encoding::PLAIN:
+		// nothing to do here, will be read directly below
+		encoding = ColumnEncoding::PLAIN;
+		break;
+
+	default:
+		throw InvalidInputException("Failed to read file \"%s\": Unsupported page encoding", Reader().GetFileName());
+	}
+}
+
+void ColumnReader::BeginRead(data_ptr_t define_out, data_ptr_t repeat_out) {
+	// we need to reset the location because multiple column readers share the same protocol
+	auto &trans = reinterpret_cast<ThriftFileTransport &>(*protocol->getTransport());
+	trans.SetLocation(chunk_read_offset);
+
+	// Perform any skips that were not applied yet.
+	if (define_out && repeat_out) {
+		ApplyPendingSkips(define_out, repeat_out);
+	}
+}
+
+idx_t ColumnReader::ReadPageHeaders(idx_t max_read, optional_ptr<const TableFilter> filter,
+                                    optional_ptr<TableFilterState> filter_state) {
+	while (page_rows_available == 0) {
+		PrepareRead(filter, filter_state);
+	}
+	return MinValue<idx_t>(MinValue<idx_t>(max_read, page_rows_available), STANDARD_VECTOR_SIZE);
+}
+
+bool ColumnReader::PrepareRead(idx_t read_now, data_ptr_t define_out, data_ptr_t repeat_out, idx_t result_offset) {
+	D_ASSERT(block);
+
+	D_ASSERT(read_now + result_offset <= STANDARD_VECTOR_SIZE);
+	D_ASSERT(!page_is_filtered_out);
+
+	if (HasRepeats()) {
+		D_ASSERT(repeated_decoder);
+		repeated_decoder->GetBatch<uint8_t>(repeat_out + result_offset, read_now);
+	}
+
+	if (HasDefines()) {
+		D_ASSERT(defined_decoder);
+		const auto max_define = NumericCast<uint8_t>(MaxDefine());
+		if (!HasRepeats() && defined_decoder->HasRepeatedBatch<uint8_t>(read_now, max_define)) {
+			// Fast path: no repeats and all valid
+			defined_decoder->GetRepeatedBatch<uint8_t>(read_now, max_define);
+			return true;
+		}
+		defined_decoder->GetBatch<uint8_t>(define_out + result_offset, read_now);
+		return false;
+	}
+
+	return true; // No defines, so everything is valid
+}
+
+void ColumnReader::ReadData(idx_t read_now, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result,
+                            idx_t result_offset) {
+	// flatten the result vector if required
+	if (result_offset != 0 && result.GetVectorType() != VectorType::FLAT_VECTOR) {
+		result.Flatten(result_offset);
+		result.Resize(result_offset, STANDARD_VECTOR_SIZE);
+	}
+	if (page_is_filtered_out) {
+		// page is filtered out - emit NULL for any rows
+		auto &validity = FlatVector::Validity(result);
+		for (idx_t i = 0; i < read_now; i++) {
+			validity.SetInvalid(result_offset + i);
+		}
+		page_rows_available -= read_now;
+		return;
+	}
+	// read the defines/repeats
+	const auto all_valid = PrepareRead(read_now, define_out, repeat_out, result_offset);
+	// read the data according to the encoder
+	const auto define_ptr = all_valid ? nullptr : static_cast<uint8_t *>(define_out);
+	switch (encoding) {
+	case ColumnEncoding::DICTIONARY:
+		dictionary_decoder.Read(define_ptr, read_now, result, result_offset);
+		break;
+	case ColumnEncoding::DELTA_BINARY_PACKED:
+		delta_binary_packed_decoder.Read(define_ptr, read_now, result, result_offset);
+		break;
+	case ColumnEncoding::RLE:
+		rle_decoder.Read(define_ptr, read_now, result, result_offset);
+		break;
+	case ColumnEncoding::DELTA_LENGTH_BYTE_ARRAY:
+		delta_length_byte_array_decoder.Read(block, define_ptr, read_now, result, result_offset);
+		break;
+	case ColumnEncoding::DELTA_BYTE_ARRAY:
+		delta_byte_array_decoder.Read(define_ptr, read_now, result, result_offset);
+		break;
+	case ColumnEncoding::BYTE_STREAM_SPLIT:
+		byte_stream_split_decoder.Read(define_ptr, read_now, result, result_offset);
+		break;
+	default:
+		Plain(block, define_ptr, read_now, result_offset, result);
+		break;
+	}
+	page_rows_available -= read_now;
+}
+
+void ColumnReader::FinishRead(idx_t read_count) {
+	auto &trans = reinterpret_cast<ThriftFileTransport &>(*protocol->getTransport());
+	chunk_read_offset = trans.GetLocation();
+
+	group_rows_available -= read_count;
+}
+
+idx_t ColumnReader::ReadInternal(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result) {
+	idx_t result_offset = 0;
+	auto to_read = num_values;
+	D_ASSERT(to_read <= STANDARD_VECTOR_SIZE);
+
+	while (to_read > 0) {
+		auto read_now = ReadPageHeaders(to_read);
+
+		ReadData(read_now, define_out, repeat_out, result, result_offset);
+
+		result_offset += read_now;
+		to_read -= read_now;
+	}
+	FinishRead(num_values);
+
+	return num_values;
+}
+
+idx_t ColumnReader::Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result) {
+	BeginRead(define_out, repeat_out);
+	return ReadInternal(num_values, define_out, repeat_out, result);
+}
+
+void ColumnReader::Select(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result_out,
+                          const SelectionVector &sel, idx_t approved_tuple_count) {
+	if (SupportsDirectSelect() && approved_tuple_count < num_values) {
+		DirectSelect(num_values, define_out, repeat_out, result_out, sel, approved_tuple_count);
+		return;
+	}
+	Read(num_values, define_out, repeat_out, result_out);
+}
+
+void ColumnReader::DirectSelect(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result,
+                                const SelectionVector &sel, idx_t approved_tuple_count) {
+	auto to_read = num_values;
+
+	// prepare the first read if we haven't yet
+	BeginRead(define_out, repeat_out);
+	auto read_now = ReadPageHeaders(num_values);
+
+	// we can only push the filter into the decoder if we are reading the ENTIRE vector in one go
+	if (read_now == to_read && encoding == ColumnEncoding::PLAIN) {
+		const auto all_valid = PrepareRead(read_now, define_out, repeat_out, 0);
+		const auto define_ptr = all_valid ? nullptr : static_cast<uint8_t *>(define_out);
+		PlainSelect(block, define_ptr, read_now, result, sel, approved_tuple_count);
+
+		page_rows_available -= read_now;
+		FinishRead(num_values);
+		return;
+	}
+	// fallback to regular read + filter
+	ReadInternal(num_values, define_out, repeat_out, result);
+}
+
+void ColumnReader::Filter(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result,
+                          const TableFilter &filter, TableFilterState &filter_state, SelectionVector &sel,
+                          idx_t &approved_tuple_count, bool is_first_filter) {
+	if (SupportsDirectFilter() && is_first_filter) {
+		DirectFilter(num_values, define_out, repeat_out, result, filter, filter_state, sel, approved_tuple_count);
+		return;
+	}
+	Select(num_values, define_out, repeat_out, result, sel, approved_tuple_count);
+	ApplyFilter(result, filter, filter_state, num_values, sel, approved_tuple_count);
+}
+
+void ColumnReader::DirectFilter(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result,
+                                const TableFilter &filter, TableFilterState &filter_state, SelectionVector &sel,
+                                idx_t &approved_tuple_count) {
+	auto to_read = num_values;
+
+	// prepare the first read if we haven't yet
+	BeginRead(define_out, repeat_out);
+	auto read_now = ReadPageHeaders(num_values, &filter, &filter_state);
+
+	// we can only push the filter into the decoder if we are reading the ENTIRE vector in one go
+	if (encoding == ColumnEncoding::DICTIONARY && read_now == to_read && dictionary_decoder.HasFilter()) {
+		if (page_is_filtered_out) {
+			// the page has been filtered out entirely - skip
+			approved_tuple_count = 0;
+		} else {
+			// Push filter into dictionary directly
+			// read the defines/repeats
+			const auto all_valid = PrepareRead(read_now, define_out, repeat_out, 0);
+			const auto define_ptr = all_valid ? nullptr : static_cast<uint8_t *>(define_out);
+			dictionary_decoder.Filter(define_ptr, read_now, result, sel, approved_tuple_count);
+		}
+		page_rows_available -= read_now;
+		FinishRead(num_values);
+		return;
+	}
+	// fallback to regular read + filter
+	ReadInternal(num_values, define_out, repeat_out, result);
+	ApplyFilter(result, filter, filter_state, num_values, sel, approved_tuple_count);
+}
+
+void ColumnReader::ApplyFilter(Vector &v, const TableFilter &filter, TableFilterState &filter_state, idx_t scan_count,
+                               SelectionVector &sel, idx_t &approved_tuple_count) {
+	UnifiedVectorFormat vdata;
+	v.ToUnifiedFormat(scan_count, vdata);
+	ColumnSegment::FilterSelection(sel, v, vdata, filter, filter_state, scan_count, approved_tuple_count);
+}
+
+void ColumnReader::Skip(idx_t num_values) {
+	pending_skips += num_values;
+}
+
+void ColumnReader::ApplyPendingSkips(data_ptr_t define_out, data_ptr_t repeat_out) {
+	if (pending_skips == 0) {
+		return;
+	}
+	idx_t num_values = pending_skips;
+	pending_skips = 0;
+
+	auto to_skip = num_values;
+	// start reading but do not apply skips (we are skipping now)
+	BeginRead(nullptr, nullptr);
+
+	while (to_skip > 0) {
+		auto skip_now = ReadPageHeaders(to_skip);
+		if (page_is_filtered_out) {
+			// the page has been filtered out entirely - skip
+			page_rows_available -= skip_now;
+			to_skip -= skip_now;
+			continue;
+		}
+		const auto all_valid = PrepareRead(skip_now, define_out, repeat_out, 0);
+
+		const auto define_ptr = all_valid ? nullptr : static_cast<uint8_t *>(define_out);
+		switch (encoding) {
+		case ColumnEncoding::DICTIONARY:
+			dictionary_decoder.Skip(define_ptr, skip_now);
+			break;
+		case ColumnEncoding::DELTA_BINARY_PACKED:
+			delta_binary_packed_decoder.Skip(define_ptr, skip_now);
+			break;
+		case ColumnEncoding::RLE:
+			rle_decoder.Skip(define_ptr, skip_now);
+			break;
+		case ColumnEncoding::DELTA_LENGTH_BYTE_ARRAY:
+			delta_length_byte_array_decoder.Skip(define_ptr, skip_now);
+			break;
+		case ColumnEncoding::DELTA_BYTE_ARRAY:
+			delta_byte_array_decoder.Skip(define_ptr, skip_now);
+			break;
+		case ColumnEncoding::BYTE_STREAM_SPLIT:
+			byte_stream_split_decoder.Skip(define_ptr, skip_now);
+			break;
+		default:
+			PlainSkip(*block, define_ptr, skip_now);
+			break;
+		}
+		page_rows_available -= skip_now;
+		to_skip -= skip_now;
+	}
+	FinishRead(num_values);
+}
+
+//===--------------------------------------------------------------------===//
+// Create Column Reader
+//===--------------------------------------------------------------------===//
+template <class T>
+static unique_ptr<ColumnReader> CreateDecimalReader(ParquetReader &reader, const ParquetColumnSchema &schema) {
+	switch (schema.type.InternalType()) {
+	case PhysicalType::INT16:
+		return make_uniq<TemplatedColumnReader<int16_t, TemplatedParquetValueConversion<T>>>(reader, schema);
+	case PhysicalType::INT32:
+		return make_uniq<TemplatedColumnReader<int32_t, TemplatedParquetValueConversion<T>>>(reader, schema);
+	case PhysicalType::INT64:
+		return make_uniq<TemplatedColumnReader<int64_t, TemplatedParquetValueConversion<T>>>(reader, schema);
+	case PhysicalType::INT128:
+		return make_uniq<TemplatedColumnReader<hugeint_t, TemplatedParquetValueConversion<T>>>(reader, schema);
+	default:
+		throw NotImplementedException("Unimplemented internal type for CreateDecimalReader");
+	}
+}
+
+unique_ptr<ColumnReader> ColumnReader::CreateReader(ParquetReader &reader, const ParquetColumnSchema &schema) {
+	switch (schema.type.id()) {
+	case LogicalTypeId::BOOLEAN:
+		return make_uniq<BooleanColumnReader>(reader, schema);
+	case LogicalTypeId::UTINYINT:
+		return make_uniq<TemplatedColumnReader<uint8_t, TemplatedParquetValueConversion<uint32_t>>>(reader, schema);
+	case LogicalTypeId::USMALLINT:
+		return make_uniq<TemplatedColumnReader<uint16_t, TemplatedParquetValueConversion<uint32_t>>>(reader, schema);
+	case LogicalTypeId::UINTEGER:
+		return make_uniq<TemplatedColumnReader<uint32_t, TemplatedParquetValueConversion<uint32_t>>>(reader, schema);
+	case LogicalTypeId::UBIGINT:
+		return make_uniq<TemplatedColumnReader<uint64_t, TemplatedParquetValueConversion<uint64_t>>>(reader, schema);
+	case LogicalTypeId::TINYINT:
+		return make_uniq<TemplatedColumnReader<int8_t, TemplatedParquetValueConversion<int32_t>>>(reader, schema);
+	case LogicalTypeId::SMALLINT:
+		return make_uniq<TemplatedColumnReader<int16_t, TemplatedParquetValueConversion<int32_t>>>(reader, schema);
+	case LogicalTypeId::INTEGER:
+		return make_uniq<TemplatedColumnReader<int32_t, TemplatedParquetValueConversion<int32_t>>>(reader, schema);
+	case LogicalTypeId::BIGINT:
+		return make_uniq<TemplatedColumnReader<int64_t, TemplatedParquetValueConversion<int64_t>>>(reader, schema);
+	case LogicalTypeId::FLOAT:
+		if (schema.type_info == ParquetExtraTypeInfo::FLOAT16) {
+			return make_uniq<CallbackColumnReader<uint16_t, float, Float16ToFloat32>>(reader, schema);
+		}
+		return make_uniq<TemplatedColumnReader<float, TemplatedParquetValueConversion<float>>>(reader, schema);
+	case LogicalTypeId::DOUBLE:
+		if (schema.type_info == ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY) {
+			return ParquetDecimalUtils::CreateReader(reader, schema);
+		}
+		return make_uniq<TemplatedColumnReader<double, TemplatedParquetValueConversion<double>>>(reader, schema);
+	case LogicalTypeId::TIMESTAMP:
+	case LogicalTypeId::TIMESTAMP_TZ:
+		switch (schema.type_info) {
+		case ParquetExtraTypeInfo::IMPALA_TIMESTAMP:
+			return make_uniq<CallbackColumnReader<Int96, timestamp_t, ImpalaTimestampToTimestamp>>(reader, schema);
+		case ParquetExtraTypeInfo::UNIT_MS:
+			return make_uniq<CallbackColumnReader<int64_t, timestamp_t, ParquetTimestampMsToTimestamp>>(reader, schema);
+		case ParquetExtraTypeInfo::UNIT_MICROS:
+			return make_uniq<CallbackColumnReader<int64_t, timestamp_t, ParquetTimestampMicrosToTimestamp>>(reader,
+			                                                                                                schema);
+		case ParquetExtraTypeInfo::UNIT_NS:
+			return make_uniq<CallbackColumnReader<int64_t, timestamp_t, ParquetTimestampNsToTimestamp>>(reader, schema);
+		default:
+			throw InternalException("TIMESTAMP requires type info");
+		}
+	case LogicalTypeId::TIMESTAMP_NS:
+		switch (schema.type_info) {
+		case ParquetExtraTypeInfo::IMPALA_TIMESTAMP:
+			return make_uniq<CallbackColumnReader<Int96, timestamp_ns_t, ImpalaTimestampToTimestampNS>>(reader, schema);
+		case ParquetExtraTypeInfo::UNIT_MS:
+			return make_uniq<CallbackColumnReader<int64_t, timestamp_ns_t, ParquetTimestampMsToTimestampNs>>(reader,
+			                                                                                                 schema);
+		case ParquetExtraTypeInfo::UNIT_MICROS:
+			return make_uniq<CallbackColumnReader<int64_t, timestamp_ns_t, ParquetTimestampUsToTimestampNs>>(reader,
+			                                                                                                 schema);
+		case ParquetExtraTypeInfo::UNIT_NS:
+			return make_uniq<CallbackColumnReader<int64_t, timestamp_ns_t, ParquetTimestampNsToTimestampNs>>(reader,
+			                                                                                                 schema);
+		default:
+			throw InternalException("TIMESTAMP_NS requires type info");
+		}
+	case LogicalTypeId::DATE:
+		return make_uniq<CallbackColumnReader<int32_t, date_t, ParquetIntToDate>>(reader, schema);
+	case LogicalTypeId::TIME:
+		switch (schema.type_info) {
+		case ParquetExtraTypeInfo::UNIT_MS:
+			return make_uniq<CallbackColumnReader<int32_t, dtime_t, ParquetMsIntToTime>>(reader, schema);
+		case ParquetExtraTypeInfo::UNIT_MICROS:
+			return make_uniq<CallbackColumnReader<int64_t, dtime_t, ParquetIntToTime>>(reader, schema);
+		case ParquetExtraTypeInfo::UNIT_NS:
+			return make_uniq<CallbackColumnReader<int64_t, dtime_t, ParquetNsIntToTime>>(reader, schema);
+		default:
+			throw InternalException("TIME requires type info");
+		}
+	case LogicalTypeId::TIME_NS:
+		switch (schema.type_info) {
+		case ParquetExtraTypeInfo::UNIT_MS:
+			return make_uniq<CallbackColumnReader<int32_t, dtime_ns_t, ParquetMsIntToTimeNs>>(reader, schema);
+		case ParquetExtraTypeInfo::UNIT_MICROS:
+			return make_uniq<CallbackColumnReader<int64_t, dtime_ns_t, ParquetUsIntToTimeNs>>(reader, schema);
+		case ParquetExtraTypeInfo::UNIT_NS:
+			return make_uniq<CallbackColumnReader<int64_t, dtime_ns_t, ParquetIntToTimeNs>>(reader, schema);
+		default:
+			throw InternalException("TIME requires type info");
+		}
+	case LogicalTypeId::TIME_TZ:
+		switch (schema.type_info) {
+		case ParquetExtraTypeInfo::UNIT_MS:
+			return make_uniq<CallbackColumnReader<int32_t, dtime_tz_t, ParquetIntToTimeMsTZ>>(reader, schema);
+		case ParquetExtraTypeInfo::UNIT_MICROS:
+			return make_uniq<CallbackColumnReader<int64_t, dtime_tz_t, ParquetIntToTimeTZ>>(reader, schema);
+		case ParquetExtraTypeInfo::UNIT_NS:
+			return make_uniq<CallbackColumnReader<int64_t, dtime_tz_t, ParquetIntToTimeNsTZ>>(reader, schema);
+		default:
+			throw InternalException("TIME_TZ requires type info");
+		}
+	case LogicalTypeId::BLOB:
+	case LogicalTypeId::VARCHAR:
+		return make_uniq<StringColumnReader>(reader, schema);
+	case LogicalTypeId::DECIMAL:
+		// we have to figure out what kind of int we need
+		switch (schema.type_info) {
+		case ParquetExtraTypeInfo::DECIMAL_INT32:
+			return CreateDecimalReader<int32_t>(reader, schema);
+		case ParquetExtraTypeInfo::DECIMAL_INT64:
+			return CreateDecimalReader<int64_t>(reader, schema);
+		case ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY:
+			return ParquetDecimalUtils::CreateReader(reader, schema);
+		default:
+			throw NotImplementedException("Unrecognized Parquet type for Decimal");
+		}
+		break;
+	case LogicalTypeId::UUID:
+		return make_uniq<UUIDColumnReader>(reader, schema);
+	case LogicalTypeId::INTERVAL:
+		return make_uniq<IntervalColumnReader>(reader, schema);
+	case LogicalTypeId::SQLNULL:
+		return make_uniq<NullColumnReader>(reader, schema);
+	default:
+		break;
+	}
+	throw NotImplementedException(schema.type.ToString());
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/column_writer.cpp
+++ b/external/duckdb/extension/parquet/column_writer.cpp
@@ -0,0 +1,669 @@
+#include "column_writer.hpp"
+
+#include "duckdb.hpp"
+#include "geo_parquet.hpp"
+#include "parquet_rle_bp_decoder.hpp"
+#include "parquet_bss_encoder.hpp"
+#include "parquet_statistics.hpp"
+#include "parquet_writer.hpp"
+#include "writer/array_column_writer.hpp"
+#include "writer/boolean_column_writer.hpp"
+#include "writer/decimal_column_writer.hpp"
+#include "writer/enum_column_writer.hpp"
+#include "writer/list_column_writer.hpp"
+#include "writer/primitive_column_writer.hpp"
+#include "writer/struct_column_writer.hpp"
+#include "writer/variant_column_writer.hpp"
+#include "writer/templated_column_writer.hpp"
+#include "duckdb/common/exception.hpp"
+#include "duckdb/common/operator/comparison_operators.hpp"
+#include "duckdb/common/serializer/buffered_file_writer.hpp"
+#include "duckdb/common/serializer/memory_stream.hpp"
+#include "duckdb/common/serializer/write_stream.hpp"
+#include "duckdb/common/string_map_set.hpp"
+#include "duckdb/common/types/hugeint.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/execution/expression_executor.hpp"
+
+#include "brotli/encode.h"
+#include "lz4.hpp"
+#include "miniz_wrapper.hpp"
+#include "snappy.h"
+#include "zstd.h"
+
+#include <cmath>
+
+namespace duckdb {
+
+using namespace duckdb_parquet; // NOLINT
+using namespace duckdb_miniz;   // NOLINT
+
+using duckdb_parquet::CompressionCodec;
+using duckdb_parquet::ConvertedType;
+using duckdb_parquet::Encoding;
+using duckdb_parquet::FieldRepetitionType;
+using duckdb_parquet::FileMetaData;
+using duckdb_parquet::PageHeader;
+using duckdb_parquet::PageType;
+using ParquetRowGroup = duckdb_parquet::RowGroup;
+using duckdb_parquet::Type;
+
+constexpr uint16_t ColumnWriter::PARQUET_DEFINE_VALID;
+
+//===--------------------------------------------------------------------===//
+// ColumnWriterStatistics
+//===--------------------------------------------------------------------===//
+ColumnWriterStatistics::~ColumnWriterStatistics() {
+}
+
+bool ColumnWriterStatistics::HasStats() {
+	return false;
+}
+
+string ColumnWriterStatistics::GetMin() {
+	return string();
+}
+
+string ColumnWriterStatistics::GetMax() {
+	return string();
+}
+
+string ColumnWriterStatistics::GetMinValue() {
+	return string();
+}
+
+string ColumnWriterStatistics::GetMaxValue() {
+	return string();
+}
+
+bool ColumnWriterStatistics::CanHaveNaN() {
+	return false;
+}
+
+bool ColumnWriterStatistics::HasNaN() {
+	return false;
+}
+
+bool ColumnWriterStatistics::MinIsExact() {
+	return true;
+}
+
+bool ColumnWriterStatistics::MaxIsExact() {
+	return true;
+}
+
+bool ColumnWriterStatistics::HasGeoStats() {
+	return false;
+}
+
+optional_ptr<GeometryStatsData> ColumnWriterStatistics::GetGeoStats() {
+	return nullptr;
+}
+
+void ColumnWriterStatistics::WriteGeoStats(duckdb_parquet::GeospatialStatistics &stats) {
+	D_ASSERT(false); // this should never be called
+}
+
+//===--------------------------------------------------------------------===//
+// ColumnWriter
+//===--------------------------------------------------------------------===//
+ColumnWriter::ColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema,
+                           vector<string> schema_path_p, bool can_have_nulls)
+    : writer(writer), column_schema(column_schema), schema_path(std::move(schema_path_p)),
+      can_have_nulls(can_have_nulls) {
+}
+ColumnWriter::~ColumnWriter() {
+}
+
+ColumnWriterState::~ColumnWriterState() {
+}
+
+void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
+                                AllocatedData &compressed_buf) {
+	switch (writer.GetCodec()) {
+	case CompressionCodec::UNCOMPRESSED:
+		compressed_size = temp_writer.GetPosition();
+		compressed_data = temp_writer.GetData();
+		break;
+
+	case CompressionCodec::SNAPPY: {
+		compressed_size = duckdb_snappy::MaxCompressedLength(temp_writer.GetPosition());
+		compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
+		duckdb_snappy::RawCompress(const_char_ptr_cast(temp_writer.GetData()), temp_writer.GetPosition(),
+		                           char_ptr_cast(compressed_buf.get()), &compressed_size);
+		compressed_data = compressed_buf.get();
+		D_ASSERT(compressed_size <= duckdb_snappy::MaxCompressedLength(temp_writer.GetPosition()));
+		break;
+	}
+	case CompressionCodec::LZ4_RAW: {
+		compressed_size = duckdb_lz4::LZ4_compressBound(UnsafeNumericCast<int32_t>(temp_writer.GetPosition()));
+		compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
+		compressed_size = duckdb_lz4::LZ4_compress_default(
+		    const_char_ptr_cast(temp_writer.GetData()), char_ptr_cast(compressed_buf.get()),
+		    UnsafeNumericCast<int32_t>(temp_writer.GetPosition()), UnsafeNumericCast<int32_t>(compressed_size));
+		compressed_data = compressed_buf.get();
+		break;
+	}
+	case CompressionCodec::GZIP: {
+		MiniZStream s;
+		compressed_size = s.MaxCompressedLength(temp_writer.GetPosition());
+		compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
+		s.Compress(const_char_ptr_cast(temp_writer.GetData()), temp_writer.GetPosition(),
+		           char_ptr_cast(compressed_buf.get()), &compressed_size);
+		compressed_data = compressed_buf.get();
+		break;
+	}
+	case CompressionCodec::ZSTD: {
+		compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.GetPosition());
+		compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
+		compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size,
+		                                             (const void *)temp_writer.GetData(), temp_writer.GetPosition(),
+		                                             UnsafeNumericCast<int32_t>(writer.CompressionLevel()));
+		compressed_data = compressed_buf.get();
+		break;
+	}
+	case CompressionCodec::BROTLI: {
+		compressed_size = duckdb_brotli::BrotliEncoderMaxCompressedSize(temp_writer.GetPosition());
+		compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
+		duckdb_brotli::BrotliEncoderCompress(BROTLI_DEFAULT_QUALITY, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE,
+		                                     temp_writer.GetPosition(), temp_writer.GetData(), &compressed_size,
+		                                     compressed_buf.get());
+		compressed_data = compressed_buf.get();
+		break;
+	}
+	default:
+		throw InternalException("Unsupported codec for Parquet Writer");
+	}
+
+	if (compressed_size > idx_t(NumericLimits<int32_t>::Maximum())) {
+		throw InternalException("Parquet writer: %d compressed page size out of range for type integer",
+		                        temp_writer.GetPosition());
+	}
+}
+
+void ColumnWriter::HandleRepeatLevels(ColumnWriterState &state, ColumnWriterState *parent, idx_t count) const {
+	if (!parent) {
+		// no repeat levels without a parent node
+		return;
+	}
+	if (state.repetition_levels.size() >= parent->repetition_levels.size()) {
+		return;
+	}
+	state.repetition_levels.insert(state.repetition_levels.end(),
+	                               parent->repetition_levels.begin() + state.repetition_levels.size(),
+	                               parent->repetition_levels.end());
+}
+
+void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity,
+                                      const idx_t count, const uint16_t define_value, const uint16_t null_value) const {
+	if (parent) {
+		// parent node: inherit definition level from the parent
+		idx_t vector_index = 0;
+		while (state.definition_levels.size() < parent->definition_levels.size()) {
+			idx_t current_index = state.definition_levels.size();
+			if (parent->definition_levels[current_index] != PARQUET_DEFINE_VALID) {
+				//! Inherit nulls from parent
+				state.definition_levels.push_back(parent->definition_levels[current_index]);
+				state.parent_null_count++;
+			} else if (validity.RowIsValid(vector_index)) {
+				//! Produce a non-null define
+				state.definition_levels.push_back(define_value);
+			} else {
+				//! Produce a null define
+				if (!can_have_nulls) {
+					throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
+				}
+				state.null_count++;
+				state.definition_levels.push_back(null_value);
+			}
+			D_ASSERT(parent->is_empty.empty() || current_index < parent->is_empty.size());
+			if (parent->is_empty.empty() || !parent->is_empty[current_index]) {
+				vector_index++;
+			}
+		}
+		return;
+	}
+
+	// no parent: set definition levels only from this validity mask
+	if (validity.AllValid()) {
+		state.definition_levels.insert(state.definition_levels.end(), count, define_value);
+	} else {
+		for (idx_t i = 0; i < count; i++) {
+			const auto is_null = !validity.RowIsValid(i);
+			state.definition_levels.emplace_back(is_null ? null_value : define_value);
+			state.null_count += is_null;
+		}
+	}
+	if (!can_have_nulls && state.null_count != 0) {
+		throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
+	}
+}
+
+//===--------------------------------------------------------------------===//
+// Create Column Writer
+//===--------------------------------------------------------------------===//
+
+ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::SchemaElement> &schemas,
+                                                    const LogicalType &type, const string &name, bool allow_geometry,
+                                                    optional_ptr<const ChildFieldIDs> field_ids,
+                                                    optional_ptr<const ShreddingType> shredding_types, idx_t max_repeat,
+                                                    idx_t max_define, bool can_have_nulls) {
+	auto null_type = can_have_nulls ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED;
+	if (!can_have_nulls) {
+		max_define--;
+	}
+	idx_t schema_idx = schemas.size();
+
+	optional_ptr<const FieldID> field_id;
+	optional_ptr<const ChildFieldIDs> child_field_ids;
+	if (field_ids) {
+		auto field_id_it = field_ids->ids->find(name);
+		if (field_id_it != field_ids->ids->end()) {
+			field_id = &field_id_it->second;
+			child_field_ids = &field_id->child_field_ids;
+		}
+	}
+	optional_ptr<const ShreddingType> shredding_type;
+	if (shredding_types) {
+		shredding_type = shredding_types->GetChild(name);
+	}
+
+	if (type.id() == LogicalTypeId::STRUCT && type.GetAlias() == "PARQUET_VARIANT") {
+		// variant type
+		// variants are stored as follows:
+		// group <name> VARIANT {
+		//	metadata BYTE_ARRAY,
+		//	value BYTE_ARRAY,
+		//	[<typed_value>]
+		// }
+
+		const bool is_shredded = shredding_type != nullptr;
+
+		child_list_t<LogicalType> child_types;
+		child_types.emplace_back("metadata", LogicalType::BLOB);
+		child_types.emplace_back("value", LogicalType::BLOB);
+		if (is_shredded) {
+			auto &typed_value_type = shredding_type->type;
+			if (typed_value_type.id() != LogicalTypeId::ANY) {
+				child_types.emplace_back("typed_value",
+				                         VariantColumnWriter::TransformTypedValueRecursive(typed_value_type));
+			}
+		}
+
+		// variant group
+		duckdb_parquet::SchemaElement top_element;
+		top_element.repetition_type = null_type;
+		top_element.num_children = child_types.size();
+		top_element.logicalType.__isset.VARIANT = true;
+		top_element.logicalType.VARIANT.__isset.specification_version = true;
+		top_element.logicalType.VARIANT.specification_version = 1;
+		top_element.__isset.logicalType = true;
+		top_element.__isset.num_children = true;
+		top_element.__isset.repetition_type = true;
+		top_element.name = name;
+		schemas.push_back(std::move(top_element));
+
+		ParquetColumnSchema variant_column(name, type, max_define, max_repeat, schema_idx, 0);
+		variant_column.children.reserve(child_types.size());
+		for (auto &child_type : child_types) {
+			auto &child_name = child_type.first;
+			bool is_optional;
+			if (child_name == "metadata") {
+				is_optional = false;
+			} else if (child_name == "value") {
+				if (is_shredded) {
+					//! When shredding the variant, the 'value' becomes optional
+					is_optional = true;
+				} else {
+					is_optional = false;
+				}
+			} else {
+				D_ASSERT(child_name == "typed_value");
+				is_optional = true;
+			}
+			variant_column.children.emplace_back(FillParquetSchema(schemas, child_type.second, child_type.first,
+			                                                       allow_geometry, child_field_ids, shredding_type,
+			                                                       max_repeat, max_define + 1, is_optional));
+		}
+		return variant_column;
+	}
+
+	if (type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::UNION) {
+		auto &child_types = StructType::GetChildTypes(type);
+		// set up the schema element for this struct
+		duckdb_parquet::SchemaElement schema_element;
+		schema_element.repetition_type = null_type;
+		schema_element.num_children = UnsafeNumericCast<int32_t>(child_types.size());
+		schema_element.__isset.num_children = true;
+		schema_element.__isset.type = false;
+		schema_element.__isset.repetition_type = true;
+		schema_element.name = name;
+		if (field_id && field_id->set) {
+			schema_element.__isset.field_id = true;
+			schema_element.field_id = field_id->field_id;
+		}
+		schemas.push_back(std::move(schema_element));
+
+		ParquetColumnSchema struct_column(name, type, max_define, max_repeat, schema_idx, 0);
+		// construct the child schemas recursively
+		struct_column.children.reserve(child_types.size());
+		for (auto &child_type : child_types) {
+			struct_column.children.emplace_back(FillParquetSchema(schemas, child_type.second, child_type.first,
+			                                                      allow_geometry, child_field_ids, shredding_type,
+			                                                      max_repeat, max_define + 1, true));
+		}
+		return struct_column;
+	}
+	if (type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::ARRAY) {
+		auto is_list = type.id() == LogicalTypeId::LIST;
+		auto &child_type = is_list ? ListType::GetChildType(type) : ArrayType::GetChildType(type);
+		// set up the two schema elements for the list
+		// for some reason we only set the converted type in the OPTIONAL element
+		// first an OPTIONAL element
+		duckdb_parquet::SchemaElement optional_element;
+		optional_element.repetition_type = null_type;
+		optional_element.num_children = 1;
+		optional_element.converted_type = ConvertedType::LIST;
+		optional_element.__isset.num_children = true;
+		optional_element.__isset.type = false;
+		optional_element.__isset.repetition_type = true;
+		optional_element.__isset.converted_type = true;
+		optional_element.name = name;
+		if (field_id && field_id->set) {
+			optional_element.__isset.field_id = true;
+			optional_element.field_id = field_id->field_id;
+		}
+		schemas.push_back(std::move(optional_element));
+
+		// then a REPEATED element
+		duckdb_parquet::SchemaElement repeated_element;
+		repeated_element.repetition_type = FieldRepetitionType::REPEATED;
+		repeated_element.num_children = 1;
+		repeated_element.__isset.num_children = true;
+		repeated_element.__isset.type = false;
+		repeated_element.__isset.repetition_type = true;
+		repeated_element.name = "list";
+		schemas.push_back(std::move(repeated_element));
+
+		ParquetColumnSchema list_column(name, type, max_define, max_repeat, schema_idx, 0);
+		list_column.children.push_back(FillParquetSchema(schemas, child_type, "element", allow_geometry,
+		                                                 child_field_ids, shredding_type, max_repeat + 1,
+		                                                 max_define + 2, true));
+		return list_column;
+	}
+	if (type.id() == LogicalTypeId::MAP) {
+		// map type
+		// maps are stored as follows:
+		// <map-repetition> group <name> (MAP) {
+		// 	repeated group key_value {
+		// 		required <key-type> key;
+		// 		<value-repetition> <value-type> value;
+		// 	}
+		// }
+		// top map element
+		duckdb_parquet::SchemaElement top_element;
+		top_element.repetition_type = null_type;
+		top_element.num_children = 1;
+		top_element.converted_type = ConvertedType::MAP;
+		top_element.__isset.repetition_type = true;
+		top_element.__isset.num_children = true;
+		top_element.__isset.converted_type = true;
+		top_element.__isset.type = false;
+		top_element.name = name;
+		if (field_id && field_id->set) {
+			top_element.__isset.field_id = true;
+			top_element.field_id = field_id->field_id;
+		}
+		schemas.push_back(std::move(top_element));
+
+		// key_value element
+		duckdb_parquet::SchemaElement kv_element;
+		kv_element.repetition_type = FieldRepetitionType::REPEATED;
+		kv_element.num_children = 2;
+		kv_element.__isset.repetition_type = true;
+		kv_element.__isset.num_children = true;
+		kv_element.__isset.type = false;
+		kv_element.name = "key_value";
+		schemas.push_back(std::move(kv_element));
+
+		// construct the child types recursively
+		vector<LogicalType> kv_types {MapType::KeyType(type), MapType::ValueType(type)};
+		vector<string> kv_names {"key", "value"};
+
+		ParquetColumnSchema map_column(name, type, max_define, max_repeat, schema_idx, 0);
+		map_column.children.reserve(2);
+		for (idx_t i = 0; i < 2; i++) {
+			// key needs to be marked as REQUIRED
+			bool is_key = i == 0;
+			auto child_schema = FillParquetSchema(schemas, kv_types[i], kv_names[i], allow_geometry, child_field_ids,
+			                                      shredding_type, max_repeat + 1, max_define + 2, !is_key);
+
+			map_column.children.push_back(std::move(child_schema));
+		}
+		return map_column;
+	}
+
+	duckdb_parquet::SchemaElement schema_element;
+	schema_element.type = ParquetWriter::DuckDBTypeToParquetType(type);
+	schema_element.repetition_type = null_type;
+	schema_element.__isset.num_children = false;
+	schema_element.__isset.type = true;
+	schema_element.__isset.repetition_type = true;
+	schema_element.name = name;
+	if (field_id && field_id->set) {
+		schema_element.__isset.field_id = true;
+		schema_element.field_id = field_id->field_id;
+	}
+	ParquetWriter::SetSchemaProperties(type, schema_element, allow_geometry);
+	schemas.push_back(std::move(schema_element));
+	return ParquetColumnSchema(name, type, max_define, max_repeat, schema_idx, 0);
+}
+
+unique_ptr<ColumnWriter>
+ColumnWriter::CreateWriterRecursive(ClientContext &context, ParquetWriter &writer,
+                                    const vector<duckdb_parquet::SchemaElement> &parquet_schemas,
+                                    const ParquetColumnSchema &schema, vector<string> path_in_schema) {
+	auto &type = schema.type;
+	auto can_have_nulls = parquet_schemas[schema.schema_index].repetition_type == FieldRepetitionType::OPTIONAL;
+	path_in_schema.push_back(schema.name);
+
+	if (type.id() == LogicalTypeId::STRUCT && type.GetAlias() == "PARQUET_VARIANT") {
+		vector<unique_ptr<ColumnWriter>> child_writers;
+		child_writers.reserve(schema.children.size());
+		for (idx_t i = 0; i < schema.children.size(); i++) {
+			child_writers.push_back(
+			    CreateWriterRecursive(context, writer, parquet_schemas, schema.children[i], path_in_schema));
+		}
+		return make_uniq<VariantColumnWriter>(writer, schema, path_in_schema, std::move(child_writers), can_have_nulls);
+	}
+
+	if (type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::UNION) {
+		// construct the child writers recursively
+		vector<unique_ptr<ColumnWriter>> child_writers;
+		child_writers.reserve(schema.children.size());
+		for (auto &child_column : schema.children) {
+			child_writers.push_back(
+			    CreateWriterRecursive(context, writer, parquet_schemas, child_column, path_in_schema));
+		}
+		return make_uniq<StructColumnWriter>(writer, schema, std::move(path_in_schema), std::move(child_writers),
+		                                     can_have_nulls);
+	}
+	if (type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::ARRAY) {
+		auto is_list = type.id() == LogicalTypeId::LIST;
+		path_in_schema.push_back("list");
+		auto child_writer = CreateWriterRecursive(context, writer, parquet_schemas, schema.children[0], path_in_schema);
+		if (is_list) {
+			return make_uniq<ListColumnWriter>(writer, schema, std::move(path_in_schema), std::move(child_writer),
+			                                   can_have_nulls);
+		} else {
+			return make_uniq<ArrayColumnWriter>(writer, schema, std::move(path_in_schema), std::move(child_writer),
+			                                    can_have_nulls);
+		}
+	}
+	if (type.id() == LogicalTypeId::MAP) {
+		path_in_schema.push_back("key_value");
+		// construct the child types recursively
+		vector<unique_ptr<ColumnWriter>> child_writers;
+		child_writers.reserve(2);
+		for (idx_t i = 0; i < 2; i++) {
+			// key needs to be marked as REQUIRED
+			auto child_writer =
+			    CreateWriterRecursive(context, writer, parquet_schemas, schema.children[i], path_in_schema);
+			child_writers.push_back(std::move(child_writer));
+		}
+		auto struct_writer =
+		    make_uniq<StructColumnWriter>(writer, schema, path_in_schema, std::move(child_writers), can_have_nulls);
+		return make_uniq<ListColumnWriter>(writer, schema, path_in_schema, std::move(struct_writer), can_have_nulls);
+	}
+
+	if (type.id() == LogicalTypeId::BLOB && type.GetAlias() == "WKB_BLOB") {
+		return make_uniq<StandardColumnWriter<string_t, string_t, ParquetGeometryOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	}
+
+	switch (type.id()) {
+	case LogicalTypeId::BOOLEAN:
+		return make_uniq<BooleanColumnWriter>(writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::TINYINT:
+		return make_uniq<StandardColumnWriter<int8_t, int32_t>>(writer, schema, std::move(path_in_schema),
+		                                                        can_have_nulls);
+	case LogicalTypeId::SMALLINT:
+		return make_uniq<StandardColumnWriter<int16_t, int32_t>>(writer, schema, std::move(path_in_schema),
+		                                                         can_have_nulls);
+	case LogicalTypeId::INTEGER:
+	case LogicalTypeId::DATE:
+		return make_uniq<StandardColumnWriter<int32_t, int32_t>>(writer, schema, std::move(path_in_schema),
+		                                                         can_have_nulls);
+	case LogicalTypeId::BIGINT:
+	case LogicalTypeId::TIME:
+	case LogicalTypeId::TIMESTAMP:
+	case LogicalTypeId::TIMESTAMP_TZ:
+	case LogicalTypeId::TIMESTAMP_MS:
+		return make_uniq<StandardColumnWriter<int64_t, int64_t>>(writer, schema, std::move(path_in_schema),
+		                                                         can_have_nulls);
+	case LogicalTypeId::TIME_TZ:
+		return make_uniq<StandardColumnWriter<dtime_tz_t, int64_t, ParquetTimeTZOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::HUGEINT:
+		return make_uniq<StandardColumnWriter<hugeint_t, double, ParquetHugeintOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::UHUGEINT:
+		return make_uniq<StandardColumnWriter<uhugeint_t, double, ParquetUhugeintOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::TIMESTAMP_NS:
+		return make_uniq<StandardColumnWriter<int64_t, int64_t, ParquetTimestampNSOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::TIMESTAMP_SEC:
+		return make_uniq<StandardColumnWriter<int64_t, int64_t, ParquetTimestampSOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::UTINYINT:
+		return make_uniq<StandardColumnWriter<uint8_t, int32_t>>(writer, schema, std::move(path_in_schema),
+		                                                         can_have_nulls);
+	case LogicalTypeId::USMALLINT:
+		return make_uniq<StandardColumnWriter<uint16_t, int32_t>>(writer, schema, std::move(path_in_schema),
+		                                                          can_have_nulls);
+	case LogicalTypeId::UINTEGER:
+		return make_uniq<StandardColumnWriter<uint32_t, uint32_t>>(writer, schema, std::move(path_in_schema),
+		                                                           can_have_nulls);
+	case LogicalTypeId::UBIGINT:
+		return make_uniq<StandardColumnWriter<uint64_t, uint64_t>>(writer, schema, std::move(path_in_schema),
+		                                                           can_have_nulls);
+	case LogicalTypeId::FLOAT:
+		return make_uniq<StandardColumnWriter<float_na_equal, float, FloatingPointOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::DOUBLE:
+		return make_uniq<StandardColumnWriter<double_na_equal, double, FloatingPointOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::DECIMAL:
+		switch (type.InternalType()) {
+		case PhysicalType::INT16:
+			return make_uniq<StandardColumnWriter<int16_t, int32_t>>(writer, schema, std::move(path_in_schema),
+			                                                         can_have_nulls);
+		case PhysicalType::INT32:
+			return make_uniq<StandardColumnWriter<int32_t, int32_t>>(writer, schema, std::move(path_in_schema),
+			                                                         can_have_nulls);
+		case PhysicalType::INT64:
+			return make_uniq<StandardColumnWriter<int64_t, int64_t>>(writer, schema, std::move(path_in_schema),
+			                                                         can_have_nulls);
+		default:
+			return make_uniq<FixedDecimalColumnWriter>(writer, schema, std::move(path_in_schema), can_have_nulls);
+		}
+	case LogicalTypeId::BLOB:
+		return make_uniq<StandardColumnWriter<string_t, string_t, ParquetBlobOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::VARCHAR:
+		return make_uniq<StandardColumnWriter<string_t, string_t, ParquetStringOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::UUID:
+		return make_uniq<StandardColumnWriter<hugeint_t, ParquetUUIDTargetType, ParquetUUIDOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::INTERVAL:
+		return make_uniq<StandardColumnWriter<interval_t, ParquetIntervalTargetType, ParquetIntervalOperator>>(
+		    writer, schema, std::move(path_in_schema), can_have_nulls);
+	case LogicalTypeId::ENUM:
+		return make_uniq<EnumColumnWriter>(writer, schema, std::move(path_in_schema), can_have_nulls);
+	default:
+		throw InternalException("Unsupported type \"%s\" in Parquet writer", type.ToString());
+	}
+}
+
+template <>
+struct NumericLimits<float_na_equal> {
+	static constexpr float Minimum() {
+		return std::numeric_limits<float>::lowest();
+	};
+	static constexpr float Maximum() {
+		return std::numeric_limits<float>::max();
+	};
+	static constexpr bool IsSigned() {
+		return std::is_signed<float>::value;
+	}
+	static constexpr bool IsIntegral() {
+		return std::is_integral<float>::value;
+	}
+};
+
+template <>
+struct NumericLimits<double_na_equal> {
+	static constexpr double Minimum() {
+		return std::numeric_limits<double>::lowest();
+	};
+	static constexpr double Maximum() {
+		return std::numeric_limits<double>::max();
+	};
+	static constexpr bool IsSigned() {
+		return std::is_signed<double>::value;
+	}
+	static constexpr bool IsIntegral() {
+		return std::is_integral<double>::value;
+	}
+};
+
+template <>
+hash_t Hash(ParquetIntervalTargetType val) {
+	return Hash(const_char_ptr_cast(val.bytes), ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE);
+}
+
+template <>
+hash_t Hash(ParquetUUIDTargetType val) {
+	return Hash(const_char_ptr_cast(val.bytes), ParquetUUIDTargetType::PARQUET_UUID_SIZE);
+}
+
+template <>
+hash_t Hash(float_na_equal val) {
+	if (std::isnan(val.val)) {
+		return Hash<float>(std::numeric_limits<float>::quiet_NaN());
+	}
+	return Hash<float>(val.val);
+}
+
+template <>
+hash_t Hash(double_na_equal val) {
+	if (std::isnan(val.val)) {
+		return Hash<double>(std::numeric_limits<double>::quiet_NaN());
+	}
+	return Hash<double>(val.val);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/decoder/CMakeLists.txt
+++ b/external/duckdb/extension/parquet/decoder/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_library_unity(
+  duckdb_parquet_decoders
+  OBJECT
+  byte_stream_split_decoder.cpp
+  delta_binary_packed_decoder.cpp
+  delta_byte_array_decoder.cpp
+  delta_length_byte_array_decoder.cpp
+  dictionary_decoder.cpp
+  rle_decoder.cpp)
+set(PARQUET_EXTENSION_FILES
+    ${PARQUET_EXTENSION_FILES} $<TARGET_OBJECTS:duckdb_parquet_decoders>
+    PARENT_SCOPE)
--- a/external/duckdb/extension/parquet/decoder/byte_stream_split_decoder.cpp
+++ b/external/duckdb/extension/parquet/decoder/byte_stream_split_decoder.cpp
@@ -0,0 +1,54 @@
+#include "decoder/byte_stream_split_decoder.hpp"
+#include "column_reader.hpp"
+#include "parquet_reader.hpp"
+
+namespace duckdb {
+
+ByteStreamSplitDecoder::ByteStreamSplitDecoder(ColumnReader &reader)
+    : reader(reader), decoded_data_buffer(reader.encoding_buffers[0]) {
+}
+
+void ByteStreamSplitDecoder::InitializePage() {
+	auto &block = reader.block;
+	// Subtract 1 from length as the block is allocated with 1 extra byte,
+	// but the byte stream split encoder needs to know the correct data size.
+	bss_decoder = make_uniq<BssDecoder>(block->ptr, block->len - 1);
+	block->inc(block->len);
+}
+
+void ByteStreamSplitDecoder::Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset) {
+	idx_t valid_count = reader.GetValidCount(defines, read_count, result_offset);
+
+	auto &allocator = reader.reader.allocator;
+	decoded_data_buffer.reset();
+	switch (reader.Schema().parquet_type) {
+	case duckdb_parquet::Type::FLOAT:
+		decoded_data_buffer.resize(allocator, sizeof(float) * valid_count);
+		bss_decoder->GetBatch<float>(decoded_data_buffer.ptr, valid_count);
+		break;
+	case duckdb_parquet::Type::DOUBLE:
+		decoded_data_buffer.resize(allocator, sizeof(double) * valid_count);
+		bss_decoder->GetBatch<double>(decoded_data_buffer.ptr, valid_count);
+		break;
+	default:
+		throw std::runtime_error("BYTE_STREAM_SPLIT encoding is only supported for FLOAT or DOUBLE data");
+	}
+
+	reader.Plain(decoded_data_buffer, defines, read_count, result_offset, result);
+}
+
+void ByteStreamSplitDecoder::Skip(uint8_t *defines, idx_t skip_count) {
+	idx_t valid_count = reader.GetValidCount(defines, skip_count);
+	switch (reader.Schema().parquet_type) {
+	case duckdb_parquet::Type::FLOAT:
+		bss_decoder->Skip<float>(valid_count);
+		break;
+	case duckdb_parquet::Type::DOUBLE:
+		bss_decoder->Skip<double>(valid_count);
+		break;
+	default:
+		throw std::runtime_error("BYTE_STREAM_SPLIT encoding is only supported for FLOAT or DOUBLE data");
+	}
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/decoder/delta_binary_packed_decoder.cpp
+++ b/external/duckdb/extension/parquet/decoder/delta_binary_packed_decoder.cpp
@@ -0,0 +1,54 @@
+#include "decoder/delta_binary_packed_decoder.hpp"
+#include "column_reader.hpp"
+#include "parquet_reader.hpp"
+
+namespace duckdb {
+
+DeltaBinaryPackedDecoder::DeltaBinaryPackedDecoder(ColumnReader &reader)
+    : reader(reader), decoded_data_buffer(reader.encoding_buffers[0]) {
+}
+
+void DeltaBinaryPackedDecoder::InitializePage() {
+	auto &block = reader.block;
+	dbp_decoder = make_uniq<DbpDecoder>(block->ptr, block->len);
+	block->inc(block->len);
+}
+
+void DeltaBinaryPackedDecoder::Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset) {
+	idx_t valid_count = reader.GetValidCount(defines, read_count, result_offset);
+
+	auto &allocator = reader.reader.allocator;
+	decoded_data_buffer.reset();
+	switch (reader.Schema().parquet_type) {
+	case duckdb_parquet::Type::INT32:
+		decoded_data_buffer.resize(allocator, sizeof(int32_t) * (valid_count));
+		dbp_decoder->GetBatch<int32_t>(decoded_data_buffer.ptr, valid_count);
+		break;
+	case duckdb_parquet::Type::INT64:
+		decoded_data_buffer.resize(allocator, sizeof(int64_t) * (valid_count));
+		dbp_decoder->GetBatch<int64_t>(decoded_data_buffer.ptr, valid_count);
+		break;
+
+	default:
+		throw std::runtime_error("DELTA_BINARY_PACKED should only be INT32 or INT64");
+	}
+	// Plain() will put NULLs in the right place
+	reader.Plain(decoded_data_buffer, defines, read_count, result_offset, result);
+}
+
+void DeltaBinaryPackedDecoder::Skip(uint8_t *defines, idx_t skip_count) {
+	idx_t valid_count = reader.GetValidCount(defines, skip_count);
+	switch (reader.Schema().parquet_type) {
+	case duckdb_parquet::Type::INT32:
+		dbp_decoder->Skip<int32_t>(valid_count);
+		break;
+	case duckdb_parquet::Type::INT64:
+		dbp_decoder->Skip<int64_t>(valid_count);
+		break;
+
+	default:
+		throw std::runtime_error("DELTA_BINARY_PACKED should only be INT32 or INT64");
+	}
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/decoder/delta_byte_array_decoder.cpp
+++ b/external/duckdb/extension/parquet/decoder/delta_byte_array_decoder.cpp
@@ -0,0 +1,103 @@
+#include "decoder/delta_byte_array_decoder.hpp"
+#include "column_reader.hpp"
+#include "parquet_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+
+namespace duckdb {
+
+DeltaByteArrayDecoder::DeltaByteArrayDecoder(ColumnReader &reader) : reader(reader) {
+}
+
+void DeltaByteArrayDecoder::ReadDbpData(Allocator &allocator, ResizeableBuffer &buffer, ResizeableBuffer &result_buffer,
+                                        idx_t &value_count) {
+	auto decoder = make_uniq<DbpDecoder>(buffer.ptr, buffer.len);
+	value_count = decoder->TotalValues();
+	result_buffer.reset();
+	result_buffer.resize(allocator, sizeof(uint32_t) * value_count);
+	decoder->GetBatch<uint32_t>(result_buffer.ptr, value_count);
+	decoder->Finalize();
+	buffer.inc(buffer.len - decoder->BufferPtr().len);
+}
+
+void DeltaByteArrayDecoder::InitializePage() {
+	if (reader.Type().InternalType() != PhysicalType::VARCHAR) {
+		throw std::runtime_error("Delta Byte Array encoding is only supported for string/blob data");
+	}
+	auto &block = *reader.block;
+	auto &allocator = reader.reader.allocator;
+	idx_t prefix_count, suffix_count;
+	auto &prefix_buffer = reader.encoding_buffers[0];
+	auto &suffix_buffer = reader.encoding_buffers[1];
+	ReadDbpData(allocator, block, prefix_buffer, prefix_count);
+	ReadDbpData(allocator, block, suffix_buffer, suffix_count);
+	if (prefix_count != suffix_count) {
+		throw std::runtime_error("DELTA_BYTE_ARRAY - prefix and suffix counts are different - corrupt file?");
+	}
+	if (prefix_count == 0) {
+		// no values
+		byte_array_data = make_uniq<Vector>(LogicalType::VARCHAR, nullptr);
+		return;
+	}
+	auto prefix_data = reinterpret_cast<uint32_t *>(prefix_buffer.ptr);
+	auto suffix_data = reinterpret_cast<uint32_t *>(suffix_buffer.ptr);
+	byte_array_data = make_uniq<Vector>(LogicalType::VARCHAR, prefix_count);
+	byte_array_count = prefix_count;
+	delta_offset = 0;
+	auto string_data = FlatVector::GetData<string_t>(*byte_array_data);
+	for (idx_t i = 0; i < prefix_count; i++) {
+		auto str_len = prefix_data[i] + suffix_data[i];
+		block.available(suffix_data[i]);
+		string_data[i] = StringVector::EmptyString(*byte_array_data, str_len);
+		auto result_data = string_data[i].GetDataWriteable();
+		if (prefix_data[i] > 0) {
+			if (i == 0 || prefix_data[i] > string_data[i - 1].GetSize()) {
+				throw std::runtime_error("DELTA_BYTE_ARRAY - prefix is out of range - corrupt file?");
+			}
+			memcpy(result_data, string_data[i - 1].GetData(), prefix_data[i]);
+		}
+		memcpy(result_data + prefix_data[i], block.ptr, suffix_data[i]);
+		block.inc(suffix_data[i]);
+		string_data[i].Finalize();
+	}
+}
+
+void DeltaByteArrayDecoder::Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset) {
+	if (!byte_array_data) {
+		throw std::runtime_error("Internal error - DeltaByteArray called but there was no byte_array_data set");
+	}
+	auto result_ptr = FlatVector::GetData<string_t>(result);
+	auto &result_mask = FlatVector::Validity(result);
+	auto string_data = FlatVector::GetData<string_t>(*byte_array_data);
+	for (idx_t row_idx = 0; row_idx < read_count; row_idx++) {
+		if (defines && defines[row_idx + result_offset] != reader.MaxDefine()) {
+			result_mask.SetInvalid(row_idx + result_offset);
+			continue;
+		}
+		if (delta_offset >= byte_array_count) {
+			throw IOException("DELTA_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted "
+			                  "read of %d from %d entries) - corrupt file?",
+			                  delta_offset + 1, byte_array_count);
+		}
+		result_ptr[row_idx + result_offset] = string_data[delta_offset++];
+	}
+	StringVector::AddHeapReference(result, *byte_array_data);
+}
+
+void DeltaByteArrayDecoder::Skip(uint8_t *defines, idx_t skip_count) {
+	if (!byte_array_data) {
+		throw std::runtime_error("Internal error - DeltaByteArray called but there was no byte_array_data set");
+	}
+	for (idx_t row_idx = 0; row_idx < skip_count; row_idx++) {
+		if (defines && defines[row_idx] != reader.MaxDefine()) {
+			continue;
+		}
+		if (delta_offset >= byte_array_count) {
+			throw IOException("DELTA_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted "
+			                  "read of %d from %d entries) - corrupt file?",
+			                  delta_offset + 1, byte_array_count);
+		}
+		delta_offset++;
+	}
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/decoder/delta_length_byte_array_decoder.cpp
+++ b/external/duckdb/extension/parquet/decoder/delta_length_byte_array_decoder.cpp
@@ -0,0 +1,128 @@
+#include "decoder/delta_length_byte_array_decoder.hpp"
+#include "decoder/delta_byte_array_decoder.hpp"
+#include "column_reader.hpp"
+#include "parquet_reader.hpp"
+#include "reader/string_column_reader.hpp"
+#include "utf8proc_wrapper.hpp"
+
+namespace duckdb {
+
+DeltaLengthByteArrayDecoder::DeltaLengthByteArrayDecoder(ColumnReader &reader)
+    : reader(reader), length_buffer(reader.encoding_buffers[0]), length_idx(0) {
+}
+
+void DeltaLengthByteArrayDecoder::InitializePage() {
+	if (reader.Type().InternalType() != PhysicalType::VARCHAR) {
+		throw std::runtime_error("Delta Length Byte Array encoding is only supported for string/blob data");
+	}
+	// read the binary packed lengths
+	auto &block = *reader.block;
+	auto &allocator = reader.reader.allocator;
+	DeltaByteArrayDecoder::ReadDbpData(allocator, block, length_buffer, byte_array_count);
+
+	// Verify that the sum of DBP string lengths match up with the available string data
+	idx_t total_string_length = 0;
+	const auto length_data = reinterpret_cast<uint32_t *>(length_buffer.ptr);
+	for (idx_t i = 0; i < byte_array_count; i++) {
+		total_string_length += length_data[i];
+	}
+	block.available(total_string_length);
+
+	length_idx = 0;
+}
+
+void DeltaLengthByteArrayDecoder::Read(shared_ptr<ResizeableBuffer> &block_ref, uint8_t *defines, idx_t read_count,
+                                       Vector &result, idx_t result_offset) {
+	if (defines) {
+		ReadInternal<true>(block_ref, defines, read_count, result, result_offset);
+	} else {
+		ReadInternal<false>(block_ref, defines, read_count, result, result_offset);
+	}
+}
+
+template <bool HAS_DEFINES>
+void DeltaLengthByteArrayDecoder::ReadInternal(shared_ptr<ResizeableBuffer> &block_ref, uint8_t *const defines,
+                                               const idx_t read_count, Vector &result, const idx_t result_offset) {
+	auto &block = *block_ref;
+	const auto length_data = reinterpret_cast<uint32_t *>(length_buffer.ptr);
+	auto result_data = FlatVector::GetData<string_t>(result);
+	auto &result_mask = FlatVector::Validity(result);
+
+	if (!HAS_DEFINES) {
+		// Fast path: take this out of the loop below
+		if (length_idx + read_count > byte_array_count) {
+			throw IOException(
+			    "DELTA_LENGTH_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted "
+			    "read of %d from %d entries) - corrupt file?",
+			    length_idx + read_count, byte_array_count);
+		}
+	}
+
+	const auto start_ptr = block.ptr;
+	for (idx_t row_idx = 0; row_idx < read_count; row_idx++) {
+		const auto result_idx = result_offset + row_idx;
+		if (HAS_DEFINES) {
+			if (defines[result_idx] != reader.MaxDefine()) {
+				result_mask.SetInvalid(result_idx);
+				continue;
+			}
+			if (length_idx >= byte_array_count) {
+				throw IOException(
+				    "DELTA_LENGTH_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted "
+				    "read of %d from %d entries) - corrupt file?",
+				    length_idx, byte_array_count);
+			}
+		}
+		const auto &str_len = length_data[length_idx++];
+		result_data[result_idx] = string_t(char_ptr_cast(block.ptr), str_len);
+		block.unsafe_inc(str_len);
+	}
+
+	// Verify that the strings we read are valid UTF-8
+	reader.Cast<StringColumnReader>().VerifyString(char_ptr_cast(start_ptr), block.ptr - start_ptr);
+
+	StringColumnReader::ReferenceBlock(result, block_ref);
+}
+
+void DeltaLengthByteArrayDecoder::Skip(uint8_t *defines, idx_t skip_count) {
+	if (defines) {
+		SkipInternal<true>(defines, skip_count);
+	} else {
+		SkipInternal<false>(defines, skip_count);
+	}
+}
+
+template <bool HAS_DEFINES>
+void DeltaLengthByteArrayDecoder::SkipInternal(uint8_t *defines, idx_t skip_count) {
+	auto &block = *reader.block;
+	const auto length_data = reinterpret_cast<uint32_t *>(length_buffer.ptr);
+
+	if (!HAS_DEFINES) {
+		// Fast path: take this out of the loop below
+		if (length_idx + skip_count > byte_array_count) {
+			throw IOException(
+			    "DELTA_LENGTH_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted "
+			    "read of %d from %d entries) - corrupt file?",
+			    length_idx + skip_count, byte_array_count);
+		}
+	}
+
+	idx_t skip_bytes = 0;
+	for (idx_t row_idx = 0; row_idx < skip_count; row_idx++) {
+		if (HAS_DEFINES) {
+			if (defines[row_idx] != reader.MaxDefine()) {
+				continue;
+			}
+			if (length_idx >= byte_array_count) {
+				throw IOException(
+				    "DELTA_LENGTH_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted "
+				    "read of %d from %d entries) - corrupt file?",
+				    length_idx, byte_array_count);
+			}
+		}
+		skip_bytes += length_data[length_idx++];
+	}
+	block.inc(skip_bytes);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/decoder/dictionary_decoder.cpp
+++ b/external/duckdb/extension/parquet/decoder/dictionary_decoder.cpp
@@ -0,0 +1,229 @@
+#include "decoder/dictionary_decoder.hpp"
+#include "column_reader.hpp"
+#include "parquet_reader.hpp"
+#include "duckdb/planner/filter/conjunction_filter.hpp"
+#include "duckdb/planner/filter/expression_filter.hpp"
+#include "duckdb/planner/table_filter_state.hpp"
+
+namespace duckdb {
+
+DictionaryDecoder::DictionaryDecoder(ColumnReader &reader)
+    : reader(reader), offset_buffer(reader.encoding_buffers[0]), valid_sel(STANDARD_VECTOR_SIZE),
+      dictionary_selection_vector(STANDARD_VECTOR_SIZE), dictionary_size(0) {
+}
+
+void DictionaryDecoder::InitializeDictionary(idx_t new_dictionary_size, optional_ptr<const TableFilter> filter,
+                                             optional_ptr<TableFilterState> filter_state, bool has_defines) {
+	dictionary_size = new_dictionary_size;
+	filter_result.reset();
+	filter_count = 0;
+	can_have_nulls = has_defines;
+
+	// we use the last entry as a NULL, dictionary vectors don't have a separate validity mask
+	const auto duckdb_dictionary_size = dictionary_size + can_have_nulls;
+	dictionary = DictionaryVector::CreateReusableDictionary(reader.Type(), duckdb_dictionary_size);
+	auto &dict_validity = FlatVector::Validity(dictionary->data);
+	dict_validity.Reset(duckdb_dictionary_size);
+	if (can_have_nulls) {
+		dict_validity.SetInvalid(dictionary_size);
+	}
+
+	// now read the non-NULL values from Parquet
+	reader.Plain(reader.block, nullptr, dictionary_size, 0, dictionary->data);
+
+	// immediately filter the dictionary, if applicable
+	if (filter && CanFilter(*filter, *filter_state)) {
+		// no filter result yet - apply filter to the dictionary
+		// initialize the filter result - setting everything to false
+		filter_result = make_unsafe_uniq_array<bool>(duckdb_dictionary_size);
+
+		// apply the filter
+		UnifiedVectorFormat vdata;
+		dictionary->data.ToUnifiedFormat(duckdb_dictionary_size, vdata);
+		SelectionVector dict_sel;
+		filter_count = duckdb_dictionary_size;
+		ColumnSegment::FilterSelection(dict_sel, dictionary->data, vdata, *filter, *filter_state,
+		                               duckdb_dictionary_size, filter_count);
+
+		// now set all matching tuples to true
+		for (idx_t i = 0; i < filter_count; i++) {
+			auto idx = dict_sel.get_index(i);
+			filter_result[idx] = true;
+		}
+	}
+}
+
+void DictionaryDecoder::InitializePage() {
+	// where is it otherwise??
+	auto &block = reader.block;
+	auto dict_width = block->read<uint8_t>();
+	dict_decoder = make_uniq<RleBpDecoder>(block->ptr, block->len, dict_width);
+	block->inc(block->len);
+}
+
+void DictionaryDecoder::ConvertDictToSelVec(uint32_t *offsets, const SelectionVector &rows, idx_t count) {
+	D_ASSERT(count <= STANDARD_VECTOR_SIZE);
+	for (idx_t idx = 0; idx < count; idx++) {
+		auto row_idx = rows.get_index(idx);
+		auto offset = offsets[idx];
+		if (offset >= dictionary_size) {
+			throw std::runtime_error("Parquet file is likely corrupted, dictionary offset out of range");
+		}
+		dictionary_selection_vector.set_index(row_idx, offset);
+	}
+}
+
+idx_t DictionaryDecoder::GetValidValues(uint8_t *defines, idx_t read_count, idx_t result_offset) {
+	idx_t valid_count = read_count;
+	if (defines) {
+		D_ASSERT(can_have_nulls);
+		valid_count = 0;
+		for (idx_t i = 0; i < read_count; i++) {
+			valid_sel.set_index(valid_count, i);
+			dictionary_selection_vector.set_index(i, dictionary_size);
+			valid_count += defines[result_offset + i] == reader.MaxDefine();
+		}
+	}
+	return valid_count;
+}
+
+idx_t DictionaryDecoder::Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset) {
+	if (!dictionary || dictionary_size < 0) {
+		throw std::runtime_error("Parquet file is likely corrupted, missing dictionary");
+	}
+	idx_t valid_count = GetValidValues(defines, read_count, result_offset);
+	if (valid_count == read_count) {
+		// all values are valid - we can directly decompress the offsets into the selection vector
+		dict_decoder->GetBatch<uint32_t>(data_ptr_cast(dictionary_selection_vector.data()),
+		                                 NumericCast<uint32_t>(valid_count));
+		// we do still need to verify the offsets though
+		uint32_t max_index = 0;
+		for (idx_t idx = 0; idx < valid_count; idx++) {
+			max_index = MaxValue(max_index, dictionary_selection_vector[idx]);
+		}
+		if (max_index >= dictionary_size) {
+			throw std::runtime_error("Parquet file is likely corrupted, dictionary offset out of range");
+		}
+	} else if (valid_count > 0) {
+		// for the valid entries - decode the offsets
+		offset_buffer.resize(reader.reader.allocator, sizeof(uint32_t) * valid_count);
+		dict_decoder->GetBatch<uint32_t>(offset_buffer.ptr, NumericCast<uint32_t>(valid_count));
+		ConvertDictToSelVec(reinterpret_cast<uint32_t *>(offset_buffer.ptr), valid_sel, valid_count);
+	}
+#ifdef DEBUG
+	dictionary_selection_vector.Verify(read_count, dictionary_size + can_have_nulls);
+#endif
+	if (result_offset == 0) {
+		result.Dictionary(dictionary, dictionary_selection_vector);
+		D_ASSERT(result.GetVectorType() == VectorType::DICTIONARY_VECTOR);
+	} else {
+		D_ASSERT(result.GetVectorType() == VectorType::FLAT_VECTOR);
+		VectorOperations::Copy(dictionary->data, result, dictionary_selection_vector, read_count, 0, result_offset);
+	}
+	return valid_count;
+}
+
+void DictionaryDecoder::Skip(uint8_t *defines, idx_t skip_count) {
+	if (!dictionary || dictionary_size < 0) {
+		throw std::runtime_error("Parquet file is likely corrupted, missing dictionary");
+	}
+	idx_t valid_count = reader.GetValidCount(defines, skip_count);
+	// skip past the valid offsets
+	dict_decoder->Skip(NumericCast<uint32_t>(valid_count));
+}
+
+bool DictionaryDecoder::DictionarySupportsFilter(const TableFilter &filter, TableFilterState &filter_state) {
+	switch (filter.filter_type) {
+	case TableFilterType::CONJUNCTION_OR: {
+		auto &conjunction = filter.Cast<ConjunctionOrFilter>();
+		auto &state = filter_state.Cast<ConjunctionOrFilterState>();
+		for (idx_t child_idx = 0; child_idx < conjunction.child_filters.size(); child_idx++) {
+			auto &child_filter = *conjunction.child_filters[child_idx];
+			auto &child_state = *state.child_states[child_idx];
+			if (!DictionarySupportsFilter(child_filter, child_state)) {
+				return false;
+			}
+		}
+		return true;
+	}
+	case TableFilterType::CONJUNCTION_AND: {
+		auto &conjunction = filter.Cast<ConjunctionAndFilter>();
+		auto &state = filter_state.Cast<ConjunctionAndFilterState>();
+		for (idx_t child_idx = 0; child_idx < conjunction.child_filters.size(); child_idx++) {
+			auto &child_filter = *conjunction.child_filters[child_idx];
+			auto &child_state = *state.child_states[child_idx];
+			if (!DictionarySupportsFilter(child_filter, child_state)) {
+				return false;
+			}
+		}
+		return true;
+	}
+	case TableFilterType::CONSTANT_COMPARISON:
+	case TableFilterType::IS_NOT_NULL:
+		return true;
+	case TableFilterType::EXPRESSION_FILTER: {
+		// expression filters can only be pushed into the dictionary if they filter out NULL values
+		auto &expr_filter = filter.Cast<ExpressionFilter>();
+		auto &state = filter_state.Cast<ExpressionFilterState>();
+		auto emits_nulls = expr_filter.EvaluateWithConstant(state.executor, Value(reader.Type()));
+		return !emits_nulls;
+	}
+	case TableFilterType::IS_NULL:
+	case TableFilterType::DYNAMIC_FILTER:
+	case TableFilterType::OPTIONAL_FILTER:
+	case TableFilterType::STRUCT_EXTRACT:
+	default:
+		return false;
+	}
+}
+
+bool DictionaryDecoder::CanFilter(const TableFilter &filter, TableFilterState &filter_state) {
+	if (dictionary_size == 0) {
+		return false;
+	}
+	// We can only push the filter if the filter removes NULL values
+	if (!DictionarySupportsFilter(filter, filter_state)) {
+		return false;
+	}
+	return true;
+}
+
+void DictionaryDecoder::Filter(uint8_t *defines, const idx_t read_count, Vector &result, SelectionVector &sel,
+                               idx_t &approved_tuple_count) {
+	if (!dictionary || dictionary_size < 0) {
+		throw std::runtime_error("Parquet file is likely corrupted, missing dictionary");
+	}
+	D_ASSERT(filter_count > 0);
+	// read the dictionary values
+	const auto valid_count = Read(defines, read_count, result, 0);
+	if (valid_count == 0) {
+		// all values are NULL
+		approved_tuple_count = 0;
+		return;
+	}
+
+	// apply the filter by checking the dictionary offsets directly
+	uint32_t *offsets;
+	if (valid_count == read_count) {
+		offsets = dictionary_selection_vector.data();
+	} else {
+		offsets = reinterpret_cast<uint32_t *>(offset_buffer.ptr);
+	}
+	D_ASSERT(offsets);
+	SelectionVector new_sel(valid_count);
+	approved_tuple_count = 0;
+	for (idx_t idx = 0; idx < valid_count; idx++) {
+		auto row_idx = valid_count == read_count ? idx : valid_sel.get_index(idx);
+		auto offset = offsets[idx];
+		if (!filter_result[offset]) {
+			// does not pass the filter
+			continue;
+		}
+		new_sel.set_index(approved_tuple_count++, row_idx);
+	}
+	if (approved_tuple_count < read_count) {
+		sel.Initialize(new_sel);
+	}
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/decoder/rle_decoder.cpp
+++ b/external/duckdb/extension/parquet/decoder/rle_decoder.cpp
@@ -0,0 +1,36 @@
+#include "decoder/rle_decoder.hpp"
+#include "column_reader.hpp"
+#include "parquet_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+
+namespace duckdb {
+
+RLEDecoder::RLEDecoder(ColumnReader &reader) : reader(reader), decoded_data_buffer(reader.encoding_buffers[0]) {
+}
+
+void RLEDecoder::InitializePage() {
+	if (reader.Type().id() != LogicalTypeId::BOOLEAN) {
+		throw std::runtime_error("RLE encoding is only supported for boolean data");
+	}
+	auto &block = reader.block;
+	block->inc(sizeof(uint32_t));
+	rle_decoder = make_uniq<RleBpDecoder>(block->ptr, block->len, 1);
+}
+
+void RLEDecoder::Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset) {
+	// RLE encoding for boolean
+	D_ASSERT(reader.Type().id() == LogicalTypeId::BOOLEAN);
+	idx_t valid_count = reader.GetValidCount(defines, read_count, result_offset);
+	decoded_data_buffer.reset();
+	decoded_data_buffer.resize(reader.reader.allocator, sizeof(bool) * valid_count);
+	rle_decoder->GetBatch<uint8_t>(decoded_data_buffer.ptr, valid_count);
+	reader.PlainTemplated<bool, TemplatedParquetValueConversion<bool>>(decoded_data_buffer, defines, read_count,
+	                                                                   result_offset, result);
+}
+
+void RLEDecoder::Skip(uint8_t *defines, idx_t skip_count) {
+	idx_t valid_count = reader.GetValidCount(defines, skip_count);
+	rle_decoder->Skip(valid_count);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/geo_parquet.cpp
+++ b/external/duckdb/extension/parquet/geo_parquet.cpp
@@ -0,0 +1,332 @@
+
+#include "geo_parquet.hpp"
+
+#include "column_reader.hpp"
+#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp"
+#include "duckdb/execution/expression_executor.hpp"
+#include "duckdb/function/scalar_function.hpp"
+#include "duckdb/planner/expression/bound_function_expression.hpp"
+#include "duckdb/planner/expression/bound_reference_expression.hpp"
+#include "duckdb/main/extension_helper.hpp"
+#include "reader/expression_column_reader.hpp"
+#include "parquet_reader.hpp"
+#include "yyjson.hpp"
+
+namespace duckdb {
+
+using namespace duckdb_yyjson; // NOLINT
+
+//------------------------------------------------------------------------------
+// GeoParquetFileMetadata
+//------------------------------------------------------------------------------
+
+unique_ptr<GeoParquetFileMetadata> GeoParquetFileMetadata::TryRead(const duckdb_parquet::FileMetaData &file_meta_data,
+                                                                   const ClientContext &context) {
+
+	// Conversion not enabled, or spatial is not loaded!
+	if (!IsGeoParquetConversionEnabled(context)) {
+		return nullptr;
+	}
+
+	for (auto &kv : file_meta_data.key_value_metadata) {
+		if (kv.key == "geo") {
+			const auto geo_metadata = yyjson_read(kv.value.c_str(), kv.value.size(), 0);
+			if (!geo_metadata) {
+				// Could not parse the JSON
+				return nullptr;
+			}
+
+			try {
+				// Check the root object
+				const auto root = yyjson_doc_get_root(geo_metadata);
+				if (!yyjson_is_obj(root)) {
+					throw InvalidInputException("Geoparquet metadata is not an object");
+				}
+
+				// We dont actually care about the version for now, as we only support V1+native
+				auto result = make_uniq<GeoParquetFileMetadata>(GeoParquetVersion::BOTH);
+
+				// Check and parse the version
+				const auto version_val = yyjson_obj_get(root, "version");
+				if (!yyjson_is_str(version_val)) {
+					throw InvalidInputException("Geoparquet metadata does not have a version");
+				}
+
+				auto version = yyjson_get_str(version_val);
+				if (StringUtil::StartsWith(version, "3")) {
+					// Guard against a breaking future 3.0 version
+					throw InvalidInputException("Geoparquet version %s is not supported", version);
+				}
+
+				// Check and parse the geometry columns
+				const auto columns_val = yyjson_obj_get(root, "columns");
+				if (!yyjson_is_obj(columns_val)) {
+					throw InvalidInputException("Geoparquet metadata does not have a columns object");
+				}
+
+				// Iterate over all geometry columns
+				yyjson_obj_iter iter = yyjson_obj_iter_with(columns_val);
+				yyjson_val *column_key;
+
+				while ((column_key = yyjson_obj_iter_next(&iter))) {
+					const auto column_val = yyjson_obj_iter_get_val(column_key);
+					const auto column_name = yyjson_get_str(column_key);
+
+					auto &column = result->geometry_columns[column_name];
+
+					if (!yyjson_is_obj(column_val)) {
+						throw InvalidInputException("Geoparquet column '%s' is not an object", column_name);
+					}
+
+					// Parse the encoding
+					const auto encoding_val = yyjson_obj_get(column_val, "encoding");
+					if (!yyjson_is_str(encoding_val)) {
+						throw InvalidInputException("Geoparquet column '%s' does not have an encoding", column_name);
+					}
+					const auto encoding_str = yyjson_get_str(encoding_val);
+					if (strcmp(encoding_str, "WKB") == 0) {
+						column.geometry_encoding = GeoParquetColumnEncoding::WKB;
+					} else if (strcmp(encoding_str, "point") == 0) {
+						column.geometry_encoding = GeoParquetColumnEncoding::POINT;
+					} else if (strcmp(encoding_str, "linestring") == 0) {
+						column.geometry_encoding = GeoParquetColumnEncoding::LINESTRING;
+					} else if (strcmp(encoding_str, "polygon") == 0) {
+						column.geometry_encoding = GeoParquetColumnEncoding::POLYGON;
+					} else if (strcmp(encoding_str, "multipoint") == 0) {
+						column.geometry_encoding = GeoParquetColumnEncoding::MULTIPOINT;
+					} else if (strcmp(encoding_str, "multilinestring") == 0) {
+						column.geometry_encoding = GeoParquetColumnEncoding::MULTILINESTRING;
+					} else if (strcmp(encoding_str, "multipolygon") == 0) {
+						column.geometry_encoding = GeoParquetColumnEncoding::MULTIPOLYGON;
+					} else {
+						throw InvalidInputException("Geoparquet column '%s' has an unsupported encoding", column_name);
+					}
+
+					// Parse the geometry types
+					const auto geometry_types_val = yyjson_obj_get(column_val, "geometry_types");
+					if (!yyjson_is_arr(geometry_types_val)) {
+						throw InvalidInputException("Geoparquet column '%s' does not have geometry types", column_name);
+					}
+					// We dont care about the geometry types for now.
+
+					// TODO: Parse the bounding box, other metadata that might be useful.
+					// (Only encoding and geometry types are required to be present)
+				}
+
+				// Return the result
+				// Make sure to free the JSON document
+				yyjson_doc_free(geo_metadata);
+				return result;
+
+			} catch (...) {
+				// Make sure to free the JSON document in case of an exception
+				yyjson_doc_free(geo_metadata);
+				throw;
+			}
+		}
+	}
+	return nullptr;
+}
+
+void GeoParquetFileMetadata::AddGeoParquetStats(const string &column_name, const LogicalType &type,
+                                                const GeometryStatsData &stats) {
+
+	// Lock the metadata
+	lock_guard<mutex> glock(write_lock);
+
+	auto it = geometry_columns.find(column_name);
+	if (it == geometry_columns.end()) {
+		auto &column = geometry_columns[column_name];
+
+		column.stats.Merge(stats);
+		column.insertion_index = geometry_columns.size() - 1;
+	} else {
+		it->second.stats.Merge(stats);
+	}
+}
+
+void GeoParquetFileMetadata::Write(duckdb_parquet::FileMetaData &file_meta_data) {
+
+	// GeoParquet does not support M or ZM coordinates. So remove any columns that have them.
+	unordered_set<string> invalid_columns;
+	for (auto &column : geometry_columns) {
+		if (column.second.stats.extent.HasM()) {
+			invalid_columns.insert(column.first);
+		}
+	}
+	for (auto &col_name : invalid_columns) {
+		geometry_columns.erase(col_name);
+	}
+	// No columns remaining, nothing to write
+	if (geometry_columns.empty()) {
+		return;
+	}
+
+	// Find the primary geometry column
+	const auto &random_first_column = *geometry_columns.begin();
+	auto primary_geometry_column = random_first_column.first;
+	auto primary_insertion_index = random_first_column.second.insertion_index;
+
+	for (auto &column : geometry_columns) {
+		if (column.second.insertion_index < primary_insertion_index) {
+			primary_insertion_index = column.second.insertion_index;
+			primary_geometry_column = column.first;
+		}
+	}
+
+	yyjson_mut_doc *doc = yyjson_mut_doc_new(nullptr);
+	yyjson_mut_val *root = yyjson_mut_obj(doc);
+	yyjson_mut_doc_set_root(doc, root);
+
+	// Add the version
+	switch (version) {
+	case GeoParquetVersion::V1:
+	case GeoParquetVersion::BOTH:
+		yyjson_mut_obj_add_strcpy(doc, root, "version", "1.0.0");
+		break;
+	case GeoParquetVersion::V2:
+		yyjson_mut_obj_add_strcpy(doc, root, "version", "2.0.0");
+		break;
+	case GeoParquetVersion::NONE:
+	default:
+		// Should never happen, we should not be writing anything
+		yyjson_mut_doc_free(doc);
+		throw InternalException("GeoParquetVersion::NONE should not write metadata");
+	}
+
+	// Add the primary column
+	yyjson_mut_obj_add_strncpy(doc, root, "primary_column", primary_geometry_column.c_str(),
+	                           primary_geometry_column.size());
+
+	// Add the columns
+	const auto json_columns = yyjson_mut_obj_add_obj(doc, root, "columns");
+
+	for (auto &column : geometry_columns) {
+
+		const auto column_json = yyjson_mut_obj_add_obj(doc, json_columns, column.first.c_str());
+		yyjson_mut_obj_add_str(doc, column_json, "encoding", "WKB");
+		const auto geometry_types = yyjson_mut_obj_add_arr(doc, column_json, "geometry_types");
+
+		for (auto &type_name : column.second.stats.types.ToString(false)) {
+			yyjson_mut_arr_add_strcpy(doc, geometry_types, type_name.c_str());
+		}
+
+		const auto &bbox = column.second.stats.extent;
+
+		if (bbox.HasXY()) {
+
+			const auto bbox_arr = yyjson_mut_obj_add_arr(doc, column_json, "bbox");
+
+			if (!column.second.stats.extent.HasZ()) {
+				yyjson_mut_arr_add_real(doc, bbox_arr, bbox.x_min);
+				yyjson_mut_arr_add_real(doc, bbox_arr, bbox.y_min);
+				yyjson_mut_arr_add_real(doc, bbox_arr, bbox.x_max);
+				yyjson_mut_arr_add_real(doc, bbox_arr, bbox.y_max);
+			} else {
+				yyjson_mut_arr_add_real(doc, bbox_arr, bbox.x_min);
+				yyjson_mut_arr_add_real(doc, bbox_arr, bbox.y_min);
+				yyjson_mut_arr_add_real(doc, bbox_arr, bbox.z_min);
+				yyjson_mut_arr_add_real(doc, bbox_arr, bbox.x_max);
+				yyjson_mut_arr_add_real(doc, bbox_arr, bbox.y_max);
+				yyjson_mut_arr_add_real(doc, bbox_arr, bbox.z_max);
+			}
+		}
+
+		// If the CRS is present, add it
+		if (!column.second.projjson.empty()) {
+			const auto crs_doc = yyjson_read(column.second.projjson.c_str(), column.second.projjson.size(), 0);
+			if (!crs_doc) {
+				yyjson_mut_doc_free(doc);
+				throw InvalidInputException("Failed to parse CRS JSON");
+			}
+			const auto crs_root = yyjson_doc_get_root(crs_doc);
+			const auto crs_val = yyjson_val_mut_copy(doc, crs_root);
+			const auto crs_key = yyjson_mut_strcpy(doc, "projjson");
+			yyjson_mut_obj_add(column_json, crs_key, crs_val);
+			yyjson_doc_free(crs_doc);
+		}
+	}
+
+	yyjson_write_err err;
+	size_t len;
+	char *json = yyjson_mut_write_opts(doc, 0, nullptr, &len, &err);
+	if (!json) {
+		yyjson_mut_doc_free(doc);
+		throw SerializationException("Failed to write JSON string: %s", err.msg);
+	}
+
+	// Create a string from the JSON
+	duckdb_parquet::KeyValue kv;
+	kv.__set_key("geo");
+	kv.__set_value(string(json, len));
+
+	// Free the JSON and the document
+	free(json);
+	yyjson_mut_doc_free(doc);
+
+	file_meta_data.key_value_metadata.push_back(kv);
+	file_meta_data.__isset.key_value_metadata = true;
+}
+
+bool GeoParquetFileMetadata::IsGeometryColumn(const string &column_name) const {
+	return geometry_columns.find(column_name) != geometry_columns.end();
+}
+
+bool GeoParquetFileMetadata::IsGeoParquetConversionEnabled(const ClientContext &context) {
+	Value geoparquet_enabled;
+	if (!context.TryGetCurrentSetting("enable_geoparquet_conversion", geoparquet_enabled)) {
+		return false;
+	}
+	if (!geoparquet_enabled.GetValue<bool>()) {
+		// Disabled by setting
+		return false;
+	}
+	if (!context.db->ExtensionIsLoaded("spatial")) {
+		// Spatial extension is not loaded, we cant convert anyway
+		return false;
+	}
+	return true;
+}
+
+LogicalType GeoParquetFileMetadata::GeometryType() {
+	auto blob_type = LogicalType(LogicalTypeId::BLOB);
+	blob_type.SetAlias("GEOMETRY");
+	return blob_type;
+}
+
+const unordered_map<string, GeoParquetColumnMetadata> &GeoParquetFileMetadata::GetColumnMeta() const {
+	return geometry_columns;
+}
+
+unique_ptr<ColumnReader> GeoParquetFileMetadata::CreateColumnReader(ParquetReader &reader,
+                                                                    const ParquetColumnSchema &schema,
+                                                                    ClientContext &context) {
+
+	// Get the catalog
+	auto &catalog = Catalog::GetSystemCatalog(context);
+
+	// WKB encoding
+	if (schema.children[0].type.id() == LogicalTypeId::BLOB) {
+		// Look for a conversion function in the catalog
+		auto &conversion_func_set =
+		    catalog.GetEntry<ScalarFunctionCatalogEntry>(context, DEFAULT_SCHEMA, "st_geomfromwkb");
+		auto conversion_func = conversion_func_set.functions.GetFunctionByArguments(context, {LogicalType::BLOB});
+
+		// Create a bound function call expression
+		auto args = vector<unique_ptr<Expression>>();
+		args.push_back(std::move(make_uniq<BoundReferenceExpression>(LogicalType::BLOB, 0)));
+		auto expr =
+		    make_uniq<BoundFunctionExpression>(conversion_func.return_type, conversion_func, std::move(args), nullptr);
+
+		// Create a child reader
+		auto child_reader = ColumnReader::CreateReader(reader, schema.children[0]);
+
+		// Create an expression reader that applies the conversion function to the child reader
+		return make_uniq<ExpressionColumnReader>(context, std::move(child_reader), std::move(expr), schema);
+	}
+
+	// Otherwise, unrecognized encoding
+	throw NotImplementedException("Unsupported geometry encoding");
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/column_reader.hpp
@@ -0,0 +1,340 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "parquet_bss_decoder.hpp"
+#include "parquet_statistics.hpp"
+#include "parquet_types.h"
+#include "resizable_buffer.hpp"
+#include "thrift_tools.hpp"
+#include "decoder/byte_stream_split_decoder.hpp"
+#include "decoder/delta_binary_packed_decoder.hpp"
+#include "decoder/dictionary_decoder.hpp"
+#include "decoder/rle_decoder.hpp"
+#include "decoder/delta_length_byte_array_decoder.hpp"
+#include "decoder/delta_byte_array_decoder.hpp"
+#include "parquet_column_schema.hpp"
+
+#include "duckdb/common/operator/cast_operators.hpp"
+#include "duckdb/common/types/string_type.hpp"
+#include "duckdb/common/types/vector.hpp"
+#include "duckdb/common/types/vector_cache.hpp"
+
+namespace duckdb {
+class ParquetReader;
+struct TableFilterState;
+
+using duckdb_apache::thrift::protocol::TProtocol;
+
+using duckdb_parquet::ColumnChunk;
+using duckdb_parquet::CompressionCodec;
+using duckdb_parquet::FieldRepetitionType;
+using duckdb_parquet::PageHeader;
+using duckdb_parquet::SchemaElement;
+using duckdb_parquet::Type;
+
+enum class ColumnEncoding {
+	INVALID,
+	DICTIONARY,
+	DELTA_BINARY_PACKED,
+	RLE,
+	DELTA_LENGTH_BYTE_ARRAY,
+	DELTA_BYTE_ARRAY,
+	BYTE_STREAM_SPLIT,
+	PLAIN
+};
+
+class ColumnReader {
+	friend class ByteStreamSplitDecoder;
+	friend class DeltaBinaryPackedDecoder;
+	friend class DeltaByteArrayDecoder;
+	friend class DeltaLengthByteArrayDecoder;
+	friend class DictionaryDecoder;
+	friend class RLEDecoder;
+
+public:
+	ColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema_p);
+	virtual ~ColumnReader();
+
+public:
+	static unique_ptr<ColumnReader> CreateReader(ParquetReader &reader, const ParquetColumnSchema &schema);
+	virtual void InitializeRead(idx_t row_group_index, const vector<ColumnChunk> &columns, TProtocol &protocol_p);
+	virtual idx_t Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result_out);
+	virtual void Select(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result_out,
+	                    const SelectionVector &sel, idx_t approved_tuple_count);
+	virtual void Filter(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result_out,
+	                    const TableFilter &filter, TableFilterState &filter_state, SelectionVector &sel,
+	                    idx_t &approved_tuple_count, bool is_first_filter);
+	static void ApplyFilter(Vector &v, const TableFilter &filter, TableFilterState &filter_state, idx_t scan_count,
+	                        SelectionVector &sel, idx_t &approved_tuple_count);
+	virtual void Skip(idx_t num_values);
+
+	ParquetReader &Reader();
+	const LogicalType &Type() const {
+		return column_schema.type;
+	}
+	const ParquetColumnSchema &Schema() const {
+		return column_schema;
+	}
+
+	inline idx_t ColumnIndex() const {
+		return column_schema.column_index;
+	}
+	inline idx_t MaxDefine() const {
+		return column_schema.max_define;
+	}
+	idx_t MaxRepeat() const {
+		return column_schema.max_repeat;
+	}
+
+	virtual idx_t FileOffset() const;
+	virtual uint64_t TotalCompressedSize();
+	virtual idx_t GroupRowsAvailable();
+
+	// register the range this reader will touch for prefetching
+	virtual void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge);
+
+	unique_ptr<BaseStatistics> Stats(idx_t row_group_idx_p, const vector<ColumnChunk> &columns);
+
+	template <class VALUE_TYPE, class CONVERSION, bool HAS_DEFINES>
+	void PlainTemplatedDefines(ByteBuffer &plain_data, const uint8_t *defines, uint64_t num_values, idx_t result_offset,
+	                           Vector &result) {
+		if (CONVERSION::PlainAvailable(plain_data, num_values)) {
+			PlainTemplatedInternal<VALUE_TYPE, CONVERSION, HAS_DEFINES, false>(plain_data, defines, num_values,
+			                                                                   result_offset, result);
+		} else {
+			PlainTemplatedInternal<VALUE_TYPE, CONVERSION, HAS_DEFINES, true>(plain_data, defines, num_values,
+			                                                                  result_offset, result);
+		}
+	}
+	template <class VALUE_TYPE, class CONVERSION>
+	void PlainTemplated(ByteBuffer &plain_data, const uint8_t *defines, uint64_t num_values, idx_t result_offset,
+	                    Vector &result) {
+		if (HasDefines() && defines) {
+			PlainTemplatedDefines<VALUE_TYPE, CONVERSION, true>(plain_data, defines, num_values, result_offset, result);
+		} else {
+			PlainTemplatedDefines<VALUE_TYPE, CONVERSION, false>(plain_data, defines, num_values, result_offset,
+			                                                     result);
+		}
+	}
+
+	template <class CONVERSION, bool HAS_DEFINES>
+	void PlainSkipTemplatedDefines(ByteBuffer &plain_data, const uint8_t *defines, uint64_t num_values) {
+		if (CONVERSION::PlainAvailable(plain_data, num_values)) {
+			PlainSkipTemplatedInternal<CONVERSION, HAS_DEFINES, false>(plain_data, defines, num_values);
+		} else {
+			PlainSkipTemplatedInternal<CONVERSION, HAS_DEFINES, true>(plain_data, defines, num_values);
+		}
+	}
+	template <class CONVERSION>
+	void PlainSkipTemplated(ByteBuffer &plain_data, const uint8_t *defines, uint64_t num_values) {
+		if (HasDefines() && defines) {
+			PlainSkipTemplatedDefines<CONVERSION, true>(plain_data, defines, num_values);
+		} else {
+			PlainSkipTemplatedDefines<CONVERSION, false>(plain_data, defines, num_values);
+		}
+	}
+
+	template <class VALUE_TYPE, class CONVERSION>
+	void PlainSelectTemplated(ByteBuffer &plain_data, const uint8_t *defines, uint64_t num_values, Vector &result,
+	                          const SelectionVector &sel, idx_t approved_tuple_count) {
+		if (HasDefines() && defines) {
+			PlainSelectTemplatedInternal<VALUE_TYPE, CONVERSION, true, true>(plain_data, defines, num_values, result,
+			                                                                 sel, approved_tuple_count);
+		} else {
+			PlainSelectTemplatedInternal<VALUE_TYPE, CONVERSION, false, true>(plain_data, defines, num_values, result,
+			                                                                  sel, approved_tuple_count);
+		}
+	}
+
+	idx_t GetValidCount(uint8_t *defines, idx_t count, idx_t offset = 0) const {
+		if (!defines) {
+			return count;
+		}
+		idx_t valid_count = 0;
+		for (idx_t i = offset; i < offset + count; i++) {
+			valid_count += defines[i] == MaxDefine();
+		}
+		return valid_count;
+	}
+
+protected:
+	virtual bool SupportsDirectFilter() const {
+		return false;
+	}
+	virtual bool SupportsDirectSelect() const {
+		return false;
+	}
+	void DirectFilter(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result_out,
+	                  const TableFilter &filter, TableFilterState &filter_state, SelectionVector &sel,
+	                  idx_t &approved_tuple_count);
+	void DirectSelect(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result,
+	                  const SelectionVector &sel, idx_t approved_tuple_count);
+
+private:
+	//! Check if a previous table filter has filtered out this page
+	bool PageIsFilteredOut(PageHeader &page_hdr);
+	void BeginRead(data_ptr_t define_out, data_ptr_t repeat_out);
+	void FinishRead(idx_t read_count);
+	idx_t ReadPageHeaders(idx_t max_read, optional_ptr<const TableFilter> filter = nullptr,
+	                      optional_ptr<TableFilterState> filter_state = nullptr);
+	idx_t ReadInternal(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result);
+	//! Prepare a read of up to "max_read" rows and read the defines/repeats.
+	//! Returns whether all values are valid (i.e., not NULL)
+	bool PrepareRead(idx_t read_count, data_ptr_t define_out, data_ptr_t repeat_out, idx_t result_offset);
+	void ReadData(idx_t read_now, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result, idx_t result_offset);
+
+	template <class VALUE_TYPE, class CONVERSION, bool HAS_DEFINES, bool CHECKED>
+	void PlainTemplatedInternal(ByteBuffer &plain_data, const uint8_t *__restrict defines, const uint64_t num_values,
+	                            const idx_t result_offset, Vector &result) {
+		const auto result_ptr = FlatVector::GetData<VALUE_TYPE>(result);
+		if (!HAS_DEFINES && !CHECKED && CONVERSION::PlainConstantSize() == sizeof(VALUE_TYPE)) {
+			// we can memcpy
+			idx_t copy_count = num_values * CONVERSION::PlainConstantSize();
+			memcpy(result_ptr + result_offset, plain_data.ptr, copy_count);
+			plain_data.unsafe_inc(copy_count);
+			return;
+		}
+		auto &result_mask = FlatVector::Validity(result);
+		for (idx_t row_idx = result_offset; row_idx < result_offset + num_values; row_idx++) {
+			if (HAS_DEFINES && defines[row_idx] != MaxDefine()) {
+				result_mask.SetInvalid(row_idx);
+				continue;
+			}
+			result_ptr[row_idx] = CONVERSION::template PlainRead<CHECKED>(plain_data, *this);
+		}
+	}
+
+	template <class CONVERSION, bool HAS_DEFINES, bool CHECKED>
+	void PlainSkipTemplatedInternal(ByteBuffer &plain_data, const uint8_t *__restrict defines,
+	                                const uint64_t num_values, idx_t row_offset = 0) {
+		if (!HAS_DEFINES && CONVERSION::PlainConstantSize() > 0) {
+			if (CHECKED) {
+				plain_data.inc(num_values * CONVERSION::PlainConstantSize());
+			} else {
+				plain_data.unsafe_inc(num_values * CONVERSION::PlainConstantSize());
+			}
+			return;
+		}
+		for (idx_t row_idx = row_offset; row_idx < row_offset + num_values; row_idx++) {
+			if (HAS_DEFINES && defines[row_idx] != MaxDefine()) {
+				continue;
+			}
+			CONVERSION::template PlainSkip<CHECKED>(plain_data, *this);
+		}
+	}
+
+	template <class VALUE_TYPE, class CONVERSION, bool HAS_DEFINES, bool CHECKED>
+	void PlainSelectTemplatedInternal(ByteBuffer &plain_data, const uint8_t *__restrict defines,
+	                                  const uint64_t num_values, Vector &result, const SelectionVector &sel,
+	                                  idx_t approved_tuple_count) {
+		const auto result_ptr = FlatVector::GetData<VALUE_TYPE>(result);
+		auto &result_mask = FlatVector::Validity(result);
+		idx_t current_entry = 0;
+		for (idx_t i = 0; i < approved_tuple_count; i++) {
+			auto next_entry = sel.get_index(i);
+			D_ASSERT(current_entry <= next_entry);
+			// perform any skips forward if required
+			PlainSkipTemplatedInternal<CONVERSION, HAS_DEFINES, CHECKED>(plain_data, defines,
+			                                                             next_entry - current_entry, current_entry);
+			// read this row
+			if (HAS_DEFINES && defines[next_entry] != MaxDefine()) {
+				result_mask.SetInvalid(next_entry);
+			} else {
+				result_ptr[next_entry] = CONVERSION::template PlainRead<CHECKED>(plain_data, *this);
+			}
+			current_entry = next_entry + 1;
+		}
+		if (current_entry < num_values) {
+			// skip forward to the end of where we are selecting
+			PlainSkipTemplatedInternal<CONVERSION, HAS_DEFINES, CHECKED>(plain_data, defines,
+			                                                             num_values - current_entry, current_entry);
+		}
+	}
+
+protected:
+	Allocator &GetAllocator();
+	// readers that use the default Read() need to implement those
+	virtual void PlainSkip(ByteBuffer &plain_data, uint8_t *defines, idx_t num_values);
+	virtual void Plain(ByteBuffer &plain_data, uint8_t *defines, idx_t num_values, idx_t result_offset, Vector &result);
+	virtual void Plain(shared_ptr<ResizeableBuffer> &plain_data, uint8_t *defines, idx_t num_values,
+	                   idx_t result_offset, Vector &result);
+	virtual void PlainSelect(shared_ptr<ResizeableBuffer> &plain_data, uint8_t *defines, idx_t num_values,
+	                         Vector &result, const SelectionVector &sel, idx_t count);
+
+	// applies any skips that were registered using Skip()
+	virtual void ApplyPendingSkips(data_ptr_t define_out, data_ptr_t repeat_out);
+
+	inline bool HasDefines() const {
+		return MaxDefine() > 0;
+	}
+
+	inline bool HasRepeats() const {
+		return MaxRepeat() > 0;
+	}
+
+protected:
+	const ParquetColumnSchema &column_schema;
+
+	ParquetReader &reader;
+	idx_t pending_skips = 0;
+	bool page_is_filtered_out = false;
+
+	virtual void ResetPage();
+
+private:
+	void AllocateBlock(idx_t size);
+	void PrepareRead(optional_ptr<const TableFilter> filter, optional_ptr<TableFilterState> filter_state);
+	void PreparePage(PageHeader &page_hdr);
+	void PrepareDataPage(PageHeader &page_hdr);
+	void PreparePageV2(PageHeader &page_hdr);
+	void DecompressInternal(CompressionCodec::type codec, const_data_ptr_t src, idx_t src_size, data_ptr_t dst,
+	                        idx_t dst_size);
+	const ColumnChunk *chunk = nullptr;
+
+	TProtocol *protocol;
+	idx_t page_rows_available;
+	idx_t group_rows_available;
+	idx_t chunk_read_offset;
+
+	shared_ptr<ResizeableBuffer> block;
+
+	ColumnEncoding encoding = ColumnEncoding::INVALID;
+	unique_ptr<RleBpDecoder> defined_decoder;
+	unique_ptr<RleBpDecoder> repeated_decoder;
+	DictionaryDecoder dictionary_decoder;
+	DeltaBinaryPackedDecoder delta_binary_packed_decoder;
+	RLEDecoder rle_decoder;
+	DeltaLengthByteArrayDecoder delta_length_byte_array_decoder;
+	DeltaByteArrayDecoder delta_byte_array_decoder;
+	ByteStreamSplitDecoder byte_stream_split_decoder;
+
+	//! Resizeable buffers used for the various encodings above
+	ResizeableBuffer encoding_buffers[2];
+
+public:
+	template <class TARGET>
+	TARGET &Cast() {
+		if (TARGET::TYPE != PhysicalType::INVALID && Type().InternalType() != TARGET::TYPE) {
+			throw InternalException("Failed to cast column reader to type - type mismatch");
+		}
+		return reinterpret_cast<TARGET &>(*this);
+	}
+
+	template <class TARGET>
+	const TARGET &Cast() const {
+		if (TARGET::TYPE != PhysicalType::INVALID && Type().InternalType() != TARGET::TYPE) {
+			throw InternalException("Failed to cast column reader to type - type mismatch");
+		}
+		return reinterpret_cast<const TARGET &>(*this);
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/column_writer.hpp
+++ b/external/duckdb/extension/parquet/include/column_writer.hpp
@@ -0,0 +1,145 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// column_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "parquet_types.h"
+#include "parquet_column_schema.hpp"
+
+namespace duckdb {
+class MemoryStream;
+class ParquetWriter;
+class ColumnWriterPageState;
+class PrimitiveColumnWriterState;
+struct ChildFieldIDs;
+struct ShreddingType;
+class ResizeableBuffer;
+class ParquetBloomFilter;
+
+class ColumnWriterState {
+public:
+	virtual ~ColumnWriterState();
+
+	unsafe_vector<uint16_t> definition_levels;
+	unsafe_vector<uint16_t> repetition_levels;
+	unsafe_vector<uint8_t> is_empty;
+	idx_t parent_null_count = 0;
+	idx_t null_count = 0;
+
+public:
+	template <class TARGET>
+	TARGET &Cast() {
+		DynamicCastCheck<TARGET>(this);
+		return reinterpret_cast<TARGET &>(*this);
+	}
+	template <class TARGET>
+	const TARGET &Cast() const {
+		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		return reinterpret_cast<const TARGET &>(*this);
+	}
+};
+
+class ColumnWriterPageState {
+public:
+	virtual ~ColumnWriterPageState() {
+	}
+
+public:
+	template <class TARGET>
+	TARGET &Cast() {
+		DynamicCastCheck<TARGET>(this);
+		return reinterpret_cast<TARGET &>(*this);
+	}
+	template <class TARGET>
+	const TARGET &Cast() const {
+		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		return reinterpret_cast<const TARGET &>(*this);
+	}
+};
+
+class ColumnWriter {
+protected:
+	static constexpr uint16_t PARQUET_DEFINE_VALID = UINT16_C(65535);
+
+public:
+	ColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema, vector<string> schema_path,
+	             bool can_have_nulls);
+	virtual ~ColumnWriter();
+
+public:
+	const LogicalType &Type() const {
+		return column_schema.type;
+	}
+	const ParquetColumnSchema &Schema() const {
+		return column_schema;
+	}
+	inline idx_t SchemaIndex() const {
+		return column_schema.schema_index;
+	}
+	inline idx_t MaxDefine() const {
+		return column_schema.max_define;
+	}
+	idx_t MaxRepeat() const {
+		return column_schema.max_repeat;
+	}
+
+	static ParquetColumnSchema FillParquetSchema(vector<duckdb_parquet::SchemaElement> &schemas,
+	                                             const LogicalType &type, const string &name, bool allow_geometry,
+	                                             optional_ptr<const ChildFieldIDs> field_ids,
+	                                             optional_ptr<const ShreddingType> shredding_types,
+	                                             idx_t max_repeat = 0, idx_t max_define = 1,
+	                                             bool can_have_nulls = true);
+	//! Create the column writer for a specific type recursively
+	static unique_ptr<ColumnWriter> CreateWriterRecursive(ClientContext &context, ParquetWriter &writer,
+	                                                      const vector<duckdb_parquet::SchemaElement> &parquet_schemas,
+	                                                      const ParquetColumnSchema &schema,
+	                                                      vector<string> path_in_schema);
+
+	virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) = 0;
+
+	//! indicates whether the write need to analyse the data before preparing it
+	virtual bool HasAnalyze() {
+		return false;
+	}
+
+	virtual void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) {
+		throw NotImplementedException("Writer does not need analysis");
+	}
+
+	//! Called after all data has been passed to Analyze
+	virtual void FinalizeAnalyze(ColumnWriterState &state) {
+		throw NotImplementedException("Writer does not need analysis");
+	}
+
+	virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count,
+	                     bool vector_can_span_multiple_pages) = 0;
+
+	virtual void BeginWrite(ColumnWriterState &state) = 0;
+	virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count) = 0;
+	virtual void FinalizeWrite(ColumnWriterState &state) = 0;
+
+protected:
+	void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity,
+	                        const idx_t count, const uint16_t define_value, const uint16_t null_value) const;
+	void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count) const;
+
+	void CompressPage(MemoryStream &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
+	                  AllocatedData &compressed_buf);
+
+public:
+	ParquetWriter &writer;
+	const ParquetColumnSchema &column_schema;
+	vector<string> schema_path;
+	bool can_have_nulls;
+
+protected:
+	vector<unique_ptr<ColumnWriter>> child_writers;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/decode_utils.hpp
+++ b/external/duckdb/extension/parquet/include/decode_utils.hpp
@@ -0,0 +1,221 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// decode_utils.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/common/fast_mem.hpp"
+#include "duckdb/common/bitpacking.hpp"
+#include "resizable_buffer.hpp"
+
+namespace duckdb {
+
+class ParquetDecodeUtils {
+	//===--------------------------------------------------------------------===//
+	// Bitpacking
+	//===--------------------------------------------------------------------===//
+private:
+	static const uint64_t BITPACK_MASKS[];
+	static const uint64_t BITPACK_MASKS_SIZE;
+	static const uint8_t BITPACK_DLEN;
+
+	static void CheckWidth(const uint8_t width) {
+		if (width >= BITPACK_MASKS_SIZE) {
+			throw InvalidInputException("The width (%d) of the bitpacked data exceeds the supported max width (%d), "
+			                            "the file might be corrupted.",
+			                            width, BITPACK_MASKS_SIZE);
+		}
+	}
+
+public:
+	template <class T>
+	static void BitUnpack(ByteBuffer &src, bitpacking_width_t &bitpack_pos, T *dst, idx_t count,
+	                      const bitpacking_width_t width) {
+		CheckWidth(width);
+		const auto mask = BITPACK_MASKS[width];
+		src.available(count * width / BITPACK_DLEN); // check if buffer has enough space available once
+		if (bitpack_pos == 0 && count >= BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE) {
+			idx_t remainder = count % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE;
+			idx_t aligned_count = count - remainder;
+			BitUnpackAlignedInternal(src, dst, aligned_count, width);
+			dst += aligned_count;
+			count = remainder;
+		}
+		for (idx_t i = 0; i < count; i++) {
+			auto val = (src.unsafe_get<uint8_t>() >> bitpack_pos) & mask;
+			bitpack_pos += width;
+			while (bitpack_pos > BITPACK_DLEN) {
+				src.unsafe_inc(1);
+				val |= (static_cast<T>(src.unsafe_get<uint8_t>())
+				        << static_cast<T>(BITPACK_DLEN - (bitpack_pos - width))) &
+				       mask;
+				bitpack_pos -= BITPACK_DLEN;
+			}
+			dst[i] = val;
+		}
+	}
+
+	static void Skip(ByteBuffer &src, bitpacking_width_t &bitpack_pos, idx_t count, const bitpacking_width_t width) {
+		CheckWidth(width);
+		src.available(count * width / BITPACK_DLEN); // check if buffer has enough space available once
+		if (bitpack_pos == 0 && count >= BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE) {
+			idx_t remainder = count % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE;
+			idx_t aligned_count = count - remainder;
+			SkipAligned(src, aligned_count, width);
+			count = remainder;
+		}
+		// FIXME: we should be able to just do this in one go instead of having this loop
+		for (idx_t i = 0; i < count; i++) {
+			bitpack_pos += width;
+			while (bitpack_pos > BITPACK_DLEN) {
+				src.unsafe_inc(1);
+				bitpack_pos -= BITPACK_DLEN;
+			}
+		}
+	}
+
+	template <class T>
+	static void BitPackAligned(T *src, data_ptr_t dst, const idx_t count, const bitpacking_width_t width) {
+		D_ASSERT(width < BITPACK_MASKS_SIZE);
+		D_ASSERT(count % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE == 0);
+		BitpackingPrimitives::PackBuffer<T, true>(dst, src, count, width);
+	}
+
+	template <class T>
+	static void BitUnpackAlignedInternal(ByteBuffer &src, T *dst, const idx_t count, const bitpacking_width_t width) {
+		D_ASSERT(count % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE == 0);
+		if (cast_pointer_to_uint64(src.ptr) % sizeof(T) == 0) {
+			// Fast path: aligned
+			BitpackingPrimitives::UnPackBuffer<T>(data_ptr_cast(dst), src.ptr, count, width);
+			src.unsafe_inc(count * width / BITPACK_DLEN);
+			return;
+		}
+
+		for (idx_t i = 0; i < count; i += BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE) {
+			const auto next_read = BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE * width / BITPACK_DLEN;
+
+			// Buffer for alignment
+			T aligned_data[BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE];
+
+			// Copy over to aligned buffer
+			FastMemcpy(aligned_data, src.ptr, next_read);
+
+			// Unpack
+			BitpackingPrimitives::UnPackBlock<T>(data_ptr_cast(dst), data_ptr_cast(aligned_data), width, true);
+
+			src.unsafe_inc(next_read);
+			dst += BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE;
+		}
+	}
+
+	template <class T>
+	static void BitUnpackAligned(ByteBuffer &src, T *dst, const idx_t count, const bitpacking_width_t width) {
+		CheckWidth(width);
+		if (count % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE != 0) {
+			throw InvalidInputException("Aligned bitpacking count must be a multiple of %llu",
+			                            BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE);
+		}
+		const auto read_size = count * width / BITPACK_DLEN;
+		src.available(read_size); // check if buffer has enough space available once
+		BitUnpackAlignedInternal(src, dst, count, width);
+	}
+
+	static void SkipAligned(ByteBuffer &src, const idx_t count, const bitpacking_width_t width) {
+		CheckWidth(width);
+		if (count % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE != 0) {
+			throw InvalidInputException("Aligned bitpacking count must be a multiple of %llu",
+			                            BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE);
+		}
+		const auto read_size = count * width / BITPACK_DLEN;
+		src.inc(read_size);
+	}
+
+	//===--------------------------------------------------------------------===//
+	// Zigzag
+	//===--------------------------------------------------------------------===//
+private:
+	//! https://lemire.me/blog/2022/11/25/making-all-your-integers-positive-with-zigzag-encoding/
+	template <class UNSIGNED>
+	static typename std::enable_if<std::is_unsigned<UNSIGNED>::value, typename std::make_signed<UNSIGNED>::type>::type
+	ZigzagToIntInternal(UNSIGNED x) {
+		return (x >> 1) ^ (-(x & 1));
+	}
+
+	template <typename SIGNED>
+	static typename std::enable_if<std::is_signed<SIGNED>::value, typename std::make_unsigned<SIGNED>::type>::type
+	IntToZigzagInternal(SIGNED x) {
+		using UNSIGNED = typename std::make_unsigned<SIGNED>::type;
+		return (static_cast<UNSIGNED>(x) << 1) ^ static_cast<UNSIGNED>(x >> (sizeof(SIGNED) * 8 - 1));
+	}
+
+public:
+	template <class UNSIGNED>
+	static typename std::enable_if<std::is_unsigned<UNSIGNED>::value, typename std::make_signed<UNSIGNED>::type>::type
+	ZigzagToInt(UNSIGNED x) {
+		auto integer = ZigzagToIntInternal(x);
+		D_ASSERT(x == IntToZigzagInternal(integer)); // test roundtrip
+		return integer;
+	}
+
+	template <typename SIGNED>
+	static typename std::enable_if<std::is_signed<SIGNED>::value, typename std::make_unsigned<SIGNED>::type>::type
+	IntToZigzag(SIGNED x) {
+		auto zigzag = IntToZigzagInternal(x);
+		D_ASSERT(x == ZigzagToIntInternal(zigzag)); // test roundtrip
+		return zigzag;
+	}
+
+	//===--------------------------------------------------------------------===//
+	// Varint
+	//===--------------------------------------------------------------------===//
+public:
+	template <class T>
+	static uint8_t GetVarintSize(T val) {
+		uint8_t res = 0;
+		do {
+			val >>= 7;
+			res++;
+		} while (val != 0);
+		return res;
+	}
+
+	template <class T>
+	static void VarintEncode(T val, WriteStream &ser) {
+		do {
+			uint8_t byte = val & 127;
+			val >>= 7;
+			if (val != 0) {
+				byte |= 128;
+			}
+			ser.Write<uint8_t>(byte);
+		} while (val != 0);
+	}
+
+	template <class T, bool CHECKED = true>
+	static T VarintDecode(ByteBuffer &buf) {
+		T result = 0;
+		uint8_t shift = 0;
+		while (true) {
+			uint8_t byte;
+			if (CHECKED) {
+				byte = buf.read<uint8_t>();
+			} else {
+				byte = buf.unsafe_read<uint8_t>();
+			}
+			result |= T(byte & 127) << shift;
+			if ((byte & 128) == 0) {
+				break;
+			}
+			shift += 7;
+			if (shift > sizeof(T) * 8) {
+				throw std::runtime_error("Varint-decoding found too large number");
+			}
+		}
+		return result;
+	}
+};
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/decoder/byte_stream_split_decoder.hpp
+++ b/external/duckdb/extension/parquet/include/decoder/byte_stream_split_decoder.hpp
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// decoder/byte_stream_split_decoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "parquet_bss_decoder.hpp"
+
+namespace duckdb {
+class ColumnReader;
+
+class ByteStreamSplitDecoder {
+public:
+	explicit ByteStreamSplitDecoder(ColumnReader &reader);
+
+public:
+	void InitializePage();
+	void Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset);
+	void Skip(uint8_t *defines, idx_t skip_count);
+
+private:
+	ColumnReader &reader;
+	ResizeableBuffer &decoded_data_buffer;
+	unique_ptr<BssDecoder> bss_decoder;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/decoder/delta_binary_packed_decoder.hpp
+++ b/external/duckdb/extension/parquet/include/decoder/delta_binary_packed_decoder.hpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// decoder/delta_binary_packed_decoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "parquet_dbp_decoder.hpp"
+#include "resizable_buffer.hpp"
+
+namespace duckdb {
+class ColumnReader;
+
+class DeltaBinaryPackedDecoder {
+public:
+	explicit DeltaBinaryPackedDecoder(ColumnReader &reader);
+
+public:
+	void InitializePage();
+	void Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset);
+	void Skip(uint8_t *defines, idx_t skip_count);
+
+private:
+	ColumnReader &reader;
+	ResizeableBuffer &decoded_data_buffer;
+	unique_ptr<DbpDecoder> dbp_decoder;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/decoder/delta_byte_array_decoder.hpp
+++ b/external/duckdb/extension/parquet/include/decoder/delta_byte_array_decoder.hpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// decoder/delta_byte_array_decoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "parquet_dbp_decoder.hpp"
+#include "resizable_buffer.hpp"
+
+namespace duckdb {
+class ColumnReader;
+
+class DeltaByteArrayDecoder {
+public:
+	explicit DeltaByteArrayDecoder(ColumnReader &reader);
+
+public:
+	void InitializePage();
+
+	void Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset);
+	void Skip(uint8_t *defines, idx_t skip_count);
+
+	static void ReadDbpData(Allocator &allocator, ResizeableBuffer &buffer, ResizeableBuffer &result_buffer,
+	                        idx_t &value_count);
+
+private:
+	ColumnReader &reader;
+	unique_ptr<Vector> byte_array_data;
+	idx_t byte_array_count = 0;
+	idx_t delta_offset = 0;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/decoder/delta_length_byte_array_decoder.hpp
+++ b/external/duckdb/extension/parquet/include/decoder/delta_length_byte_array_decoder.hpp
@@ -0,0 +1,43 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// decoder/delta_length_byte_array_decoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "parquet_dbp_decoder.hpp"
+#include "resizable_buffer.hpp"
+
+namespace duckdb {
+class ColumnReader;
+
+class DeltaLengthByteArrayDecoder {
+public:
+	explicit DeltaLengthByteArrayDecoder(ColumnReader &reader);
+
+public:
+	void InitializePage();
+
+	void Read(shared_ptr<ResizeableBuffer> &block, uint8_t *defines, idx_t read_count, Vector &result,
+	          idx_t result_offset);
+	void Skip(uint8_t *defines, idx_t skip_count);
+
+private:
+	template <bool HAS_DEFINES>
+	void ReadInternal(shared_ptr<ResizeableBuffer> &block, uint8_t *defines, idx_t read_count, Vector &result,
+	                  idx_t result_offset);
+	template <bool HAS_DEFINES>
+	void SkipInternal(uint8_t *defines, idx_t skip_count);
+
+private:
+	ColumnReader &reader;
+	ResizeableBuffer &length_buffer;
+	idx_t byte_array_count = 0;
+	idx_t length_idx;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/decoder/dictionary_decoder.hpp
+++ b/external/duckdb/extension/parquet/include/decoder/dictionary_decoder.hpp
@@ -0,0 +1,56 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// decoder/dictionary_decoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "parquet_rle_bp_decoder.hpp"
+#include "resizable_buffer.hpp"
+
+namespace duckdb {
+class ColumnReader;
+struct TableFilterState;
+
+class DictionaryDecoder {
+public:
+	explicit DictionaryDecoder(ColumnReader &reader);
+
+public:
+	void InitializeDictionary(idx_t dictionary_size, optional_ptr<const TableFilter> filter,
+	                          optional_ptr<TableFilterState> filter_state, bool has_defines);
+	void InitializePage();
+	idx_t Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset);
+	void Skip(uint8_t *defines, idx_t skip_count);
+	bool CanFilter(const TableFilter &filter, TableFilterState &filter_state);
+	bool DictionarySupportsFilter(const TableFilter &filter, TableFilterState &filter_state);
+	void Filter(uint8_t *defines, idx_t read_count, Vector &result, SelectionVector &sel, idx_t &approved_tuple_count);
+	bool HasFilter() const {
+		return filter_result.get();
+	}
+	bool HasFilteredOutAllValues() const {
+		return HasFilter() && filter_count == 0;
+	}
+
+private:
+	idx_t GetValidValues(uint8_t *defines, idx_t read_count, idx_t result_offset);
+	void ConvertDictToSelVec(uint32_t *offsets, const SelectionVector &rows, idx_t count);
+
+private:
+	ColumnReader &reader;
+	ResizeableBuffer &offset_buffer;
+	unique_ptr<RleBpDecoder> dict_decoder;
+	SelectionVector valid_sel;
+	SelectionVector dictionary_selection_vector;
+	idx_t dictionary_size;
+	buffer_ptr<VectorChildBuffer> dictionary;
+	unsafe_unique_array<bool> filter_result;
+	idx_t filter_count;
+	bool can_have_nulls;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/decoder/rle_decoder.hpp
+++ b/external/duckdb/extension/parquet/include/decoder/rle_decoder.hpp
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// decoder/rle_decoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "parquet_rle_bp_decoder.hpp"
+
+namespace duckdb {
+class ColumnReader;
+
+class RLEDecoder {
+public:
+	explicit RLEDecoder(ColumnReader &reader);
+
+public:
+	void InitializePage();
+	void Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset);
+	void Skip(uint8_t *defines, idx_t skip_count);
+
+private:
+	ColumnReader &reader;
+	ResizeableBuffer &decoded_data_buffer;
+	unique_ptr<RleBpDecoder> rle_decoder;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/geo_parquet.hpp
+++ b/external/duckdb/extension/parquet/include/geo_parquet.hpp
@@ -0,0 +1,102 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// geo_parquet.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_writer.hpp"
+#include "duckdb/common/string.hpp"
+#include "duckdb/common/types/data_chunk.hpp"
+#include "duckdb/common/unordered_map.hpp"
+#include "duckdb/common/unordered_set.hpp"
+#include "parquet_types.h"
+
+namespace duckdb {
+
+struct ParquetColumnSchema;
+class ParquetReader;
+class ColumnReader;
+class ClientContext;
+class ExpressionExecutor;
+
+enum class GeoParquetColumnEncoding : uint8_t {
+	WKB = 1,
+	POINT,
+	LINESTRING,
+	POLYGON,
+	MULTIPOINT,
+	MULTILINESTRING,
+	MULTIPOLYGON,
+};
+
+enum class GeoParquetVersion : uint8_t {
+	// Write GeoParquet 1.0 metadata
+	// GeoParquet 1.0 has the widest support among readers and writers
+	V1,
+
+	// Write GeoParquet 2.0
+	// The GeoParquet 2.0 options is identical to GeoParquet 1.0 except the underlying storage
+	// of spatial columns is Parquet native geometry, where the Parquet writer will include
+	// native statistics according to the underlying Parquet options. Compared to 'BOTH', this will
+	// actually write the metadata as containing GeoParquet version 2.0.0
+	// However, V2 isnt standardized yet, so this option is still a bit experimental
+	V2,
+
+	// Write GeoParquet 1.0 metadata, with native Parquet geometry types
+	// This is a bit of a hold-over option for compatibility with systems that
+	// reject GeoParquet 2.0 metadata, but can read Parquet native geometry types as they simply ignore the extra
+	// logical type. DuckDB v1.4.0 falls into this category.
+	BOTH,
+
+	// Do not write GeoParquet metadata
+	// This option suppresses GeoParquet metadata; however, spatial types will be written as
+	// Parquet native Geometry/Geography.
+	NONE,
+};
+
+struct GeoParquetColumnMetadata {
+	// The encoding of the geometry column
+	GeoParquetColumnEncoding geometry_encoding;
+
+	// The statistics of the geometry column
+	GeometryStatsData stats;
+
+	// The crs of the geometry column (if any) in PROJJSON format
+	string projjson;
+
+	// Used to track the "primary" geometry column (if any)
+	idx_t insertion_index = 0;
+};
+
+class GeoParquetFileMetadata {
+public:
+	explicit GeoParquetFileMetadata(GeoParquetVersion geo_parquet_version) : version(geo_parquet_version) {
+	}
+	void AddGeoParquetStats(const string &column_name, const LogicalType &type, const GeometryStatsData &stats);
+	void Write(duckdb_parquet::FileMetaData &file_meta_data);
+
+	// Try to read GeoParquet metadata. Returns nullptr if not found, invalid or the required spatial extension is not
+	// available.
+	static unique_ptr<GeoParquetFileMetadata> TryRead(const duckdb_parquet::FileMetaData &file_meta_data,
+	                                                  const ClientContext &context);
+	const unordered_map<string, GeoParquetColumnMetadata> &GetColumnMeta() const;
+
+	static unique_ptr<ColumnReader> CreateColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema,
+	                                                   ClientContext &context);
+
+	bool IsGeometryColumn(const string &column_name) const;
+
+	static bool IsGeoParquetConversionEnabled(const ClientContext &context);
+	static LogicalType GeometryType();
+
+private:
+	mutex write_lock;
+	unordered_map<string, GeoParquetColumnMetadata> geometry_columns;
+	GeoParquetVersion version;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet.json
+++ b/external/duckdb/extension/parquet/include/parquet.json
@@ -0,0 +1,196 @@
+[
+  {
+    "class": "ParquetColumnDefinition",
+    "includes": [
+      "parquet_reader.hpp"
+    ],
+    "members": [
+      {
+        "id": 100,
+        "name": "field_id",
+        "type": "int32_t"
+      },
+      {
+        "id": 101,
+        "name": "name",
+        "type": "string"
+      },
+      {
+        "id": 103,
+        "name": "type",
+        "type": "LogicalType"
+      },
+      {
+        "id": 104,
+        "name": "default_value",
+        "type": "Value"
+      },
+      {
+        "id": 105,
+        "name": "identifier",
+        "type": "Value",
+        "default": "Value()"
+      }
+    ],
+    "pointer_type": "none"
+  },
+  {
+    "class": "ParquetEncryptionConfig",
+    "includes": [
+      "parquet_crypto.hpp"
+    ],
+    "members": [
+      {
+        "id": 100,
+        "name": "footer_key",
+        "type": "string"
+      },
+      {
+        "id": 101,
+        "name": "column_keys",
+        "type": "unordered_map<string, string>"
+      }
+    ],
+    "pointer_type": "shared_ptr"
+  },
+  {
+    "class": "ParquetOptionsSerialization",
+    "includes": [
+      "parquet_reader.hpp"
+    ],
+    "members": [
+      {
+        "id": 100,
+        "name": "binary_as_string",
+        "type": "bool",
+        "property": "parquet_options.binary_as_string"
+      },
+      {
+        "id": 101,
+        "name": "file_row_number",
+        "type": "bool",
+        "property": "parquet_options.file_row_number"
+      },
+      {
+        "id": 102,
+        "name": "file_options",
+        "type": "MultiFileOptions"
+      },
+      {
+        "id": 103,
+        "name": "schema",
+        "type": "vector<ParquetColumnDefinition>",
+        "property": "parquet_options.schema"
+      },
+      {
+        "id": 104,
+        "name": "encryption_config",
+        "type": "shared_ptr<ParquetEncryptionConfig>",
+        "default": "nullptr",
+        "property": "parquet_options.encryption_config"
+      },
+      {
+        "id": 105,
+        "name": "debug_use_openssl",
+        "type": "bool",
+        "default": "true",
+        "property": "parquet_options.debug_use_openssl"
+      },
+      {
+        "id": 106,
+        "name": "explicit_cardinality",
+        "type": "idx_t",
+        "default": "0",
+        "property": "parquet_options.explicit_cardinality"
+      },
+      {
+        "id": 107,
+        "name": "can_have_nan",
+        "type": "bool",
+        "default": "false",
+        "property": "parquet_options.can_have_nan"
+      }
+    ],
+    "pointer_type": "none"
+  },
+  {
+    "class": "FieldID",
+    "includes": [
+      "parquet_field_id.hpp"
+    ],
+    "members": [
+      {
+        "id": 100,
+        "name": "set",
+        "type": "bool"
+      },
+      {
+        "id": 101,
+        "name": "field_id",
+        "type": "int32_t"
+      },
+      {
+        "id": 102,
+        "name": "child_field_ids",
+        "type": "ChildFieldIDs"
+      }
+    ],
+    "pointer_type": "none"
+  },
+  {
+    "class": "ChildFieldIDs",
+    "includes": [
+      "parquet_field_id.hpp"
+    ],
+    "members": [
+      {
+        "id": 100,
+        "name": "ids",
+        "type": "case_insensitive_map_t<FieldID>",
+        "serialize_property": "ids.operator*()",
+        "deserialize_property": "ids.operator*()"
+      }
+    ],
+    "pointer_type": "none"
+  },
+  {
+    "class": "ShreddingType",
+    "includes": [
+      "parquet_shredding.hpp"
+    ],
+    "members": [
+      {
+        "id": 100,
+        "name": "set",
+        "type": "bool"
+      },
+      {
+        "id": 101,
+        "name": "type",
+        "type": "LogicalType"
+      },
+      {
+        "id": 102,
+        "name": "children",
+        "type": "ChildShreddingTypes"
+      }
+    ],
+    "pointer_type": "none"
+  },
+  {
+    "class": "ChildShreddingTypes",
+    "includes": [
+      "parquet_shredding.hpp"
+    ],
+    "members": [
+      {
+        "id": 100,
+        "name": "types",
+        "type": "case_insensitive_map_t<ShreddingType>",
+        "serialize_property": "types.operator*()",
+        "deserialize_property": "types.operator*()"
+      }
+    ],
+    "pointer_type": "none"
+  }
+]
--- a/external/duckdb/extension/parquet/include/parquet_bss_decoder.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_bss_decoder.hpp
@@ -0,0 +1,61 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_bss_decoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+#include "parquet_types.h"
+#include "resizable_buffer.hpp"
+
+namespace duckdb {
+
+/// Decoder for the Byte Stream Split encoding
+class BssDecoder {
+public:
+	/// Create a decoder object. buffer/buffer_len is the encoded data.
+	BssDecoder(data_ptr_t buffer, uint32_t buffer_len) : buffer_(buffer, buffer_len), value_offset_(0) {
+	}
+
+public:
+	template <typename T>
+	void GetBatch(data_ptr_t values_target_ptr, uint32_t batch_size) {
+		if (buffer_.len % sizeof(T) != 0) {
+			duckdb::stringstream error;
+			error << "Data buffer size for the BYTE_STREAM_SPLIT encoding (" << buffer_.len
+			      << ") should be a multiple of the type size (" << sizeof(T) << ")";
+			throw std::runtime_error(error.str());
+		}
+		uint32_t num_buffer_values = buffer_.len / sizeof(T);
+
+		buffer_.available((value_offset_ + batch_size) * sizeof(T));
+
+		for (uint32_t byte_offset = 0; byte_offset < sizeof(T); ++byte_offset) {
+			data_ptr_t input_bytes = buffer_.ptr + byte_offset * num_buffer_values + value_offset_;
+			for (uint32_t i = 0; i < batch_size; ++i) {
+				values_target_ptr[byte_offset + i * sizeof(T)] = *(input_bytes + i);
+			}
+		}
+		value_offset_ += batch_size;
+	}
+
+	template <typename T>
+	void Skip(uint32_t batch_size) {
+		if (buffer_.len % sizeof(T) != 0) {
+			duckdb::stringstream error;
+			error << "Data buffer size for the BYTE_STREAM_SPLIT encoding (" << buffer_.len
+			      << ") should be a multiple of the type size (" << sizeof(T) << ")";
+			throw std::runtime_error(error.str());
+		}
+		buffer_.available((value_offset_ + batch_size) * sizeof(T));
+		value_offset_ += batch_size;
+	}
+
+private:
+	ByteBuffer buffer_;
+	uint32_t value_offset_;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_bss_encoder.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_bss_encoder.hpp
@@ -0,0 +1,47 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_bss_encoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "decode_utils.hpp"
+
+namespace duckdb {
+
+class BssEncoder {
+public:
+	explicit BssEncoder(const idx_t total_value_count_p, const idx_t bit_width_p)
+	    : total_value_count(total_value_count_p), bit_width(bit_width_p), count(0) {
+	}
+
+public:
+	void BeginWrite(Allocator &allocator) {
+		buffer = allocator.Allocate(total_value_count * bit_width + 1);
+	}
+
+	template <class T>
+	void WriteValue(const T &value) {
+		D_ASSERT(sizeof(T) == bit_width);
+		for (idx_t i = 0; i < sizeof(T); i++) {
+			buffer.get()[i * total_value_count + count] = reinterpret_cast<const_data_ptr_t>(&value)[i];
+		}
+		count++;
+	}
+
+	void FinishWrite(WriteStream &writer) {
+		writer.WriteData(buffer.get(), total_value_count * bit_width);
+	}
+
+private:
+	const idx_t total_value_count;
+	const idx_t bit_width;
+
+	idx_t count;
+	AllocatedData buffer;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_column_schema.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_column_schema.hpp
@@ -0,0 +1,58 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_column_schema.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+
+#include "duckdb.hpp"
+#include "parquet_types.h"
+
+namespace duckdb {
+
+using duckdb_parquet::FileMetaData;
+struct ParquetOptions;
+
+enum class ParquetColumnSchemaType { COLUMN, FILE_ROW_NUMBER, GEOMETRY, EXPRESSION, VARIANT };
+
+enum class ParquetExtraTypeInfo {
+	NONE,
+	IMPALA_TIMESTAMP,
+	UNIT_NS,
+	UNIT_MS,
+	UNIT_MICROS,
+	DECIMAL_BYTE_ARRAY,
+	DECIMAL_INT32,
+	DECIMAL_INT64,
+	FLOAT16
+};
+
+struct ParquetColumnSchema {
+	ParquetColumnSchema() = default;
+	ParquetColumnSchema(idx_t max_define, idx_t max_repeat, idx_t schema_index, idx_t file_index,
+	                    ParquetColumnSchemaType schema_type = ParquetColumnSchemaType::COLUMN);
+	ParquetColumnSchema(string name, LogicalType type, idx_t max_define, idx_t max_repeat, idx_t schema_index,
+	                    idx_t column_index, ParquetColumnSchemaType schema_type = ParquetColumnSchemaType::COLUMN);
+	ParquetColumnSchema(ParquetColumnSchema parent, LogicalType result_type, ParquetColumnSchemaType schema_type);
+
+	ParquetColumnSchemaType schema_type;
+	string name;
+	LogicalType type;
+	idx_t max_define;
+	idx_t max_repeat;
+	idx_t schema_index;
+	idx_t column_index;
+	optional_idx parent_schema_index;
+	uint32_t type_length = 0;
+	uint32_t type_scale = 0;
+	duckdb_parquet::Type::type parquet_type = duckdb_parquet::Type::INT32;
+	ParquetExtraTypeInfo type_info = ParquetExtraTypeInfo::NONE;
+	vector<ParquetColumnSchema> children;
+
+	unique_ptr<BaseStatistics> Stats(const FileMetaData &file_meta_data, const ParquetOptions &parquet_options,
+	                                 idx_t row_group_idx_p, const vector<duckdb_parquet::ColumnChunk> &columns) const;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_crypto.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_crypto.hpp
@@ -0,0 +1,89 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_crypto.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "parquet_types.h"
+#include "duckdb/common/encryption_state.hpp"
+#include "duckdb/storage/object_cache.hpp"
+
+namespace duckdb {
+
+using duckdb_apache::thrift::TBase;
+using duckdb_apache::thrift::protocol::TProtocol;
+
+class BufferedFileWriter;
+
+class ParquetKeys : public ObjectCacheEntry {
+public:
+	static ParquetKeys &Get(ClientContext &context);
+
+public:
+	void AddKey(const string &key_name, const string &key);
+	bool HasKey(const string &key_name) const;
+	const string &GetKey(const string &key_name) const;
+
+public:
+	static string ObjectType();
+	string GetObjectType() override;
+
+private:
+	unordered_map<string, string> keys;
+};
+
+class ParquetEncryptionConfig {
+public:
+	explicit ParquetEncryptionConfig();
+	ParquetEncryptionConfig(ClientContext &context, const Value &arg);
+	ParquetEncryptionConfig(string footer_key);
+
+public:
+	static shared_ptr<ParquetEncryptionConfig> Create(ClientContext &context, const Value &arg);
+	const string &GetFooterKey() const;
+
+public:
+	void Serialize(Serializer &serializer) const;
+	static shared_ptr<ParquetEncryptionConfig> Deserialize(Deserializer &deserializer);
+
+private:
+	//! The encryption key used for the footer
+	string footer_key;
+	//! Mapping from column name to key name
+	unordered_map<string, string> column_keys;
+};
+
+class ParquetCrypto {
+public:
+	//! Encrypted modules
+	static constexpr idx_t LENGTH_BYTES = 4;
+	static constexpr idx_t NONCE_BYTES = 12;
+	static constexpr idx_t TAG_BYTES = 16;
+
+	//! Block size we encrypt/decrypt
+	static constexpr idx_t CRYPTO_BLOCK_SIZE = 4096;
+	static constexpr idx_t BLOCK_SIZE = 16;
+
+public:
+	//! Decrypt and read a Thrift object from the transport protocol
+	static uint32_t Read(TBase &object, TProtocol &iprot, const string &key, const EncryptionUtil &encryption_util_p);
+	//! Encrypt and write a Thrift object to the transport protocol
+	static uint32_t Write(const TBase &object, TProtocol &oprot, const string &key,
+	                      const EncryptionUtil &encryption_util_p);
+	//! Decrypt and read a buffer
+	static uint32_t ReadData(TProtocol &iprot, const data_ptr_t buffer, const uint32_t buffer_size, const string &key,
+	                         const EncryptionUtil &encryption_util_p);
+	//! Encrypt and write a buffer to a file
+	static uint32_t WriteData(TProtocol &oprot, const const_data_ptr_t buffer, const uint32_t buffer_size,
+	                          const string &key, const EncryptionUtil &encryption_util_p);
+
+public:
+	static void AddKey(ClientContext &context, const FunctionParameters &parameters);
+	static bool ValidKey(const std::string &key);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp
@@ -0,0 +1,163 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_dbp_deccoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "decode_utils.hpp"
+
+namespace duckdb {
+
+class DbpDecoder {
+public:
+	DbpDecoder(const data_ptr_t buffer, const uint32_t buffer_len)
+	    : buffer_(buffer, buffer_len),
+	      //<block size in values> <number of miniblocks in a block> <total value count> <first value>
+	      block_size_in_values(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_)),
+	      number_of_miniblocks_per_block(DecodeNumberOfMiniblocksPerBlock(buffer_)),
+	      number_of_values_in_a_miniblock(block_size_in_values / number_of_miniblocks_per_block),
+	      total_value_count(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_)),
+	      previous_value(ParquetDecodeUtils::ZigzagToInt(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_))),
+	      // init state to something sane
+	      is_first_value(true), read_values(0), min_delta(NumericLimits<int64_t>::Maximum()),
+	      miniblock_index(number_of_miniblocks_per_block - 1), list_of_bitwidths_of_miniblocks(nullptr),
+	      miniblock_offset(number_of_values_in_a_miniblock),
+	      unpacked_data_offset(BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE) {
+		if (!(block_size_in_values % number_of_miniblocks_per_block == 0 &&
+		      number_of_values_in_a_miniblock % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE == 0)) {
+			throw InvalidInputException("Parquet file has invalid block sizes for DELTA_BINARY_PACKED");
+		}
+	}
+
+	ByteBuffer BufferPtr() const {
+		return buffer_;
+	}
+
+	uint64_t TotalValues() const {
+		return total_value_count;
+	}
+
+	template <typename T>
+	void GetBatch(const data_ptr_t target_values_ptr, const idx_t batch_size) {
+		if (read_values + batch_size > total_value_count) {
+			throw std::runtime_error("DBP decode did not find enough values");
+		}
+		read_values += batch_size;
+		GetBatchInternal<T>(target_values_ptr, batch_size);
+	}
+
+	template <class T>
+	void Skip(idx_t skip_count) {
+		if (read_values + skip_count > total_value_count) {
+			throw std::runtime_error("DBP decode did not find enough values");
+		}
+		read_values += skip_count;
+		GetBatchInternal<T, true>(nullptr, skip_count);
+	}
+
+	void Finalize() {
+		if (miniblock_offset == number_of_values_in_a_miniblock) {
+			return;
+		}
+		auto data = make_unsafe_uniq_array<int64_t>(number_of_values_in_a_miniblock);
+		GetBatchInternal<int64_t>(data_ptr_cast(data.get()), number_of_values_in_a_miniblock - miniblock_offset);
+	}
+
+private:
+	static idx_t DecodeNumberOfMiniblocksPerBlock(ByteBuffer &buffer) {
+		auto res = ParquetDecodeUtils::VarintDecode<uint64_t>(buffer);
+		if (res == 0) {
+			throw InvalidInputException(
+			    "Parquet file has invalid number of miniblocks per block for DELTA_BINARY_PACKED");
+		}
+		return res;
+	}
+
+	template <typename T, bool SKIP_READ = false>
+	void GetBatchInternal(const data_ptr_t target_values_ptr, const idx_t batch_size) {
+		if (batch_size == 0) {
+			return;
+		}
+		D_ASSERT(target_values_ptr || SKIP_READ);
+
+		T *target_values = nullptr;
+		if (!SKIP_READ) {
+			target_values = reinterpret_cast<T *>(target_values_ptr);
+		}
+		idx_t target_values_offset = 0;
+		if (is_first_value) {
+			if (!SKIP_READ) {
+				target_values[0] = static_cast<T>(previous_value);
+			}
+			target_values_offset++;
+			is_first_value = false;
+		}
+
+		while (target_values_offset < batch_size) {
+			// Copy over any remaining data
+			const idx_t next = MinValue(batch_size - target_values_offset,
+			                            BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE - unpacked_data_offset);
+			if (next != 0) {
+				for (idx_t i = 0; i < next; i++) {
+					const auto &unpacked_value = unpacked_data[unpacked_data_offset + i];
+					auto current_value = static_cast<T>(static_cast<uint64_t>(previous_value) +
+					                                    static_cast<uint64_t>(min_delta) + unpacked_value);
+					if (!SKIP_READ) {
+						target_values[target_values_offset + i] = current_value;
+					}
+					previous_value = static_cast<int64_t>(current_value);
+				}
+				target_values_offset += next;
+				unpacked_data_offset += next;
+				continue;
+			}
+
+			// Move to next miniblock / block
+			D_ASSERT(unpacked_data_offset == BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE);
+			D_ASSERT(miniblock_index < number_of_miniblocks_per_block);
+			D_ASSERT(miniblock_offset <= number_of_values_in_a_miniblock);
+			if (miniblock_offset == number_of_values_in_a_miniblock) {
+				miniblock_offset = 0;
+				if (++miniblock_index == number_of_miniblocks_per_block) {
+					// <min delta> <list of bitwidths of miniblocks> <miniblocks>
+					min_delta = ParquetDecodeUtils::ZigzagToInt(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_));
+					buffer_.available(number_of_miniblocks_per_block);
+					list_of_bitwidths_of_miniblocks = buffer_.ptr;
+					buffer_.unsafe_inc(number_of_miniblocks_per_block);
+					miniblock_index = 0;
+				}
+			}
+
+			// Unpack from current miniblock
+			ParquetDecodeUtils::BitUnpackAligned(buffer_, unpacked_data,
+			                                     BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE,
+			                                     list_of_bitwidths_of_miniblocks[miniblock_index]);
+			unpacked_data_offset = 0;
+			miniblock_offset += BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE;
+		}
+	}
+
+private:
+	ByteBuffer buffer_;
+	const idx_t block_size_in_values;
+	const idx_t number_of_miniblocks_per_block;
+	const idx_t number_of_values_in_a_miniblock;
+	const idx_t total_value_count;
+	int64_t previous_value;
+
+	bool is_first_value;
+	idx_t read_values;
+
+	//! Block stuff
+	int64_t min_delta;
+	idx_t miniblock_index;
+	bitpacking_width_t *list_of_bitwidths_of_miniblocks;
+	idx_t miniblock_offset;
+	uint64_t unpacked_data[BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE];
+	idx_t unpacked_data_offset;
+};
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp
@@ -0,0 +1,229 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_dbp_encoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "decode_utils.hpp"
+
+namespace duckdb {
+
+class DbpEncoder {
+private:
+	static constexpr uint64_t BLOCK_SIZE_IN_VALUES = 2048;
+	static constexpr uint64_t NUMBER_OF_MINIBLOCKS_IN_A_BLOCK = 8;
+	static constexpr uint64_t NUMBER_OF_VALUES_IN_A_MINIBLOCK = BLOCK_SIZE_IN_VALUES / NUMBER_OF_MINIBLOCKS_IN_A_BLOCK;
+
+public:
+	explicit DbpEncoder(const idx_t total_value_count_p) : total_value_count(total_value_count_p), count(0) {
+	}
+
+public:
+	template <class T>
+	void BeginWrite(WriteStream &writer, const T &first_value) {
+		throw InternalException("DbpEncoder should only be used with integers");
+	}
+
+	template <class T>
+	void WriteValue(WriteStream &writer, const T &value) {
+		throw InternalException("DbpEncoder should only be used with integers");
+	}
+
+	void FinishWrite(WriteStream &writer) {
+		if (count + block_count != total_value_count) {
+			throw InternalException("value count mismatch when writing DELTA_BINARY_PACKED");
+		}
+		if (block_count != 0) {
+			WriteBlock(writer);
+		}
+	}
+
+private:
+	void BeginWriteInternal(WriteStream &writer, const int64_t &first_value) {
+		// <block size in values> <number of miniblocks in a block> <total value count> <first value>
+
+		// the block size is a multiple of 128; it is stored as a ULEB128 int
+		ParquetDecodeUtils::VarintEncode(BLOCK_SIZE_IN_VALUES, writer);
+		// the miniblock count per block is a divisor of the block size such that their quotient,
+		// the number of values in a miniblock, is a multiple of 32
+		static_assert(BLOCK_SIZE_IN_VALUES % NUMBER_OF_MINIBLOCKS_IN_A_BLOCK == 0 &&
+		                  NUMBER_OF_VALUES_IN_A_MINIBLOCK % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE == 0,
+		              "invalid block sizes for DELTA_BINARY_PACKED");
+		// it is stored as a ULEB128 int
+		ParquetDecodeUtils::VarintEncode(NUMBER_OF_MINIBLOCKS_IN_A_BLOCK, writer);
+		// the total value count is stored as a ULEB128 int
+		ParquetDecodeUtils::VarintEncode(total_value_count, writer);
+		// the first value is stored as a zigzag ULEB128 int
+		ParquetDecodeUtils::VarintEncode(ParquetDecodeUtils::IntToZigzag(first_value), writer);
+
+		// initialize
+		if (total_value_count != 0) {
+			count++;
+		}
+		previous_value = first_value;
+
+		min_delta = NumericLimits<int64_t>::Maximum();
+		block_count = 0;
+	}
+
+	void WriteValueInternal(WriteStream &writer, const int64_t &value) {
+		// 1. Compute the differences between consecutive elements. For the first element in the block,
+		// use the last element in the previous block or, in the case of the first block,
+		// use the first value of the whole sequence, stored in the header.
+
+		// Subtractions in steps 1) and 2) may incur signed arithmetic overflow,
+		// and so will the corresponding additions when decoding.
+		// Overflow should be allowed and handled as wrapping around in 2’s complement notation
+		// so that the original values are correctly restituted.
+		// This may require explicit care in some programming languages
+		// (for example by doing all arithmetic in the unsigned domain).
+		const auto delta = static_cast<int64_t>(static_cast<uint64_t>(value) - static_cast<uint64_t>(previous_value));
+		previous_value = value;
+		// Compute the frame of reference (the minimum of the deltas in the block).
+		min_delta = MinValue(min_delta, delta);
+		// append. if block is full, write it out
+		data[block_count++] = delta;
+		if (block_count == BLOCK_SIZE_IN_VALUES) {
+			WriteBlock(writer);
+		}
+	}
+
+	void WriteBlock(WriteStream &writer) {
+		D_ASSERT(count + block_count == total_value_count || block_count == BLOCK_SIZE_IN_VALUES);
+		const auto number_of_miniblocks =
+		    (block_count + NUMBER_OF_VALUES_IN_A_MINIBLOCK - 1) / NUMBER_OF_VALUES_IN_A_MINIBLOCK;
+		for (idx_t miniblock_idx = 0; miniblock_idx < number_of_miniblocks; miniblock_idx++) {
+			for (idx_t i = 0; i < NUMBER_OF_VALUES_IN_A_MINIBLOCK; i++) {
+				const idx_t index = miniblock_idx * NUMBER_OF_VALUES_IN_A_MINIBLOCK + i;
+				auto &value = data[index];
+				if (index < block_count) {
+					// 2. Compute the frame of reference (the minimum of the deltas in the block).
+					// Subtract this min delta from all deltas in the block.
+					// This guarantees that all values are non-negative.
+					D_ASSERT(min_delta <= value);
+					value = static_cast<int64_t>(static_cast<uint64_t>(value) - static_cast<uint64_t>(min_delta));
+				} else {
+					// If there are not enough values to fill the last miniblock, we pad the miniblock
+					// so that its length is always the number of values in a full miniblock multiplied by the bit
+					// width. The values of the padding bits should be zero, but readers must accept paddings consisting
+					// of arbitrary bits as well.
+					value = 0;
+				}
+			}
+		}
+
+		for (idx_t miniblock_idx = 0; miniblock_idx < NUMBER_OF_MINIBLOCKS_IN_A_BLOCK; miniblock_idx++) {
+			auto &width = list_of_bitwidths_of_miniblocks[miniblock_idx];
+			if (miniblock_idx < number_of_miniblocks) {
+				const auto src = &data[miniblock_idx * NUMBER_OF_VALUES_IN_A_MINIBLOCK];
+				width = BitpackingPrimitives::MinimumBitWidth(reinterpret_cast<uint64_t *>(src),
+				                                              NUMBER_OF_VALUES_IN_A_MINIBLOCK);
+				D_ASSERT(width <= sizeof(int64_t) * 8);
+			} else {
+				// If, in the last block, less than <number of miniblocks in a block> miniblocks are needed to store the
+				// values, the bytes storing the bit widths of the unneeded miniblocks are still present, their value
+				// should be zero, but readers must accept arbitrary values as well. There are no additional padding
+				// bytes for the miniblock bodies though, as if their bit widths were 0 (regardless of the actual byte
+				// values). The reader knows when to stop reading by keeping track of the number of values read.
+				width = 0;
+			}
+		}
+
+		// 3. Encode the frame of reference (min delta) as a zigzag ULEB128 int
+		// followed by the bit widths of the miniblocks
+		// and the delta values (minus the min delta) bit-packed per miniblock.
+		// <min delta> <list of bitwidths of miniblocks> <miniblocks>
+
+		// the min delta is a zigzag ULEB128 int (we compute a minimum as we need positive integers for bit packing)
+		ParquetDecodeUtils::VarintEncode(ParquetDecodeUtils::IntToZigzag(min_delta), writer);
+		// the bitwidth of each block is stored as a byte
+		writer.WriteData(list_of_bitwidths_of_miniblocks, NUMBER_OF_MINIBLOCKS_IN_A_BLOCK);
+		// each miniblock is a list of bit packed ints according to the bit width stored at the beginning of the block
+		for (idx_t miniblock_idx = 0; miniblock_idx < number_of_miniblocks; miniblock_idx++) {
+			const auto src = &data[miniblock_idx * NUMBER_OF_VALUES_IN_A_MINIBLOCK];
+			const auto &width = list_of_bitwidths_of_miniblocks[miniblock_idx];
+			memset(data_packed, 0, sizeof(data_packed));
+			ParquetDecodeUtils::BitPackAligned(reinterpret_cast<uint64_t *>(src), data_packed,
+			                                   NUMBER_OF_VALUES_IN_A_MINIBLOCK, width);
+			const auto write_size = NUMBER_OF_VALUES_IN_A_MINIBLOCK * width / 8;
+#ifdef DEBUG
+			// immediately verify that unpacking yields the input data
+			int64_t verification_data[NUMBER_OF_VALUES_IN_A_MINIBLOCK];
+			ByteBuffer byte_buffer(data_ptr_cast(data_packed), write_size);
+			bitpacking_width_t bitpack_pos = 0;
+			ParquetDecodeUtils::BitUnpack(byte_buffer, bitpack_pos, reinterpret_cast<uint64_t *>(verification_data),
+			                              NUMBER_OF_VALUES_IN_A_MINIBLOCK, width);
+			for (idx_t i = 0; i < NUMBER_OF_VALUES_IN_A_MINIBLOCK; i++) {
+				D_ASSERT(src[i] == verification_data[i]);
+			}
+#endif
+			writer.WriteData(data_packed, write_size);
+		}
+
+		count += block_count;
+
+		min_delta = NumericLimits<int64_t>::Maximum();
+		block_count = 0;
+	}
+
+private:
+	//! Overall fields
+	const idx_t total_value_count;
+	idx_t count;
+	int64_t previous_value;
+
+	//! Block-specific fields
+	int64_t min_delta;
+	int64_t data[BLOCK_SIZE_IN_VALUES];
+	idx_t block_count;
+
+	//! Bitpacking fields
+	bitpacking_width_t list_of_bitwidths_of_miniblocks[NUMBER_OF_MINIBLOCKS_IN_A_BLOCK];
+	data_t data_packed[NUMBER_OF_VALUES_IN_A_MINIBLOCK * sizeof(int64_t)];
+};
+
+template <>
+inline void DbpEncoder::BeginWrite(WriteStream &writer, const int32_t &first_value) {
+	BeginWriteInternal(writer, first_value);
+}
+
+template <>
+inline void DbpEncoder::BeginWrite(WriteStream &writer, const int64_t &first_value) {
+	BeginWriteInternal(writer, first_value);
+}
+
+template <>
+inline void DbpEncoder::BeginWrite(WriteStream &writer, const uint32_t &first_value) {
+	BeginWriteInternal(writer, first_value);
+}
+
+template <>
+inline void DbpEncoder::BeginWrite(WriteStream &writer, const uint64_t &first_value) {
+	BeginWriteInternal(writer, first_value);
+}
+
+template <>
+inline void DbpEncoder::WriteValue(WriteStream &writer, const int32_t &first_value) {
+	WriteValueInternal(writer, first_value);
+}
+
+template <>
+inline void DbpEncoder::WriteValue(WriteStream &writer, const int64_t &first_value) {
+	WriteValueInternal(writer, first_value);
+}
+
+template <>
+inline void DbpEncoder::WriteValue(WriteStream &writer, const uint32_t &first_value) {
+	WriteValueInternal(writer, first_value);
+}
+
+template <>
+inline void DbpEncoder::WriteValue(WriteStream &writer, const uint64_t &first_value) {
+	WriteValueInternal(writer, first_value);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_decimal_utils.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_decimal_utils.hpp
@@ -0,0 +1,55 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_decimal_utils.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+
+namespace duckdb {
+
+class ParquetDecimalUtils {
+public:
+	template <class PHYSICAL_TYPE>
+	static PHYSICAL_TYPE ReadDecimalValue(const_data_ptr_t pointer, idx_t size, const ParquetColumnSchema &) {
+		PHYSICAL_TYPE res = 0;
+
+		auto res_ptr = (uint8_t *)&res;
+		bool positive = (*pointer & 0x80) == 0;
+
+		// numbers are stored as two's complement so some muckery is required
+		for (idx_t i = 0; i < MinValue<idx_t>(size, sizeof(PHYSICAL_TYPE)); i++) {
+			auto byte = *(pointer + (size - i - 1));
+			res_ptr[i] = positive ? byte : byte ^ 0xFF;
+		}
+		// Verify that there are only 0s here
+		if (size > sizeof(PHYSICAL_TYPE)) {
+			for (idx_t i = sizeof(PHYSICAL_TYPE); i < size; i++) {
+				auto byte = *(pointer + (size - i - 1));
+				if (!positive) {
+					byte ^= 0xFF;
+				}
+				if (byte != 0) {
+					throw InvalidInputException("Invalid decimal encoding in Parquet file");
+				}
+			}
+		}
+		if (!positive) {
+			res += 1;
+			return -res;
+		}
+		return res;
+	}
+
+	static unique_ptr<ColumnReader> CreateReader(ParquetReader &reader, const ParquetColumnSchema &schema);
+};
+
+template <>
+double ParquetDecimalUtils::ReadDecimalValue(const_data_ptr_t pointer, idx_t size, const ParquetColumnSchema &schema);
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp
@@ -0,0 +1,69 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_dlba_encoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "parquet_dbp_encoder.hpp"
+#include "duckdb/common/serializer/memory_stream.hpp"
+
+namespace duckdb {
+
+class DlbaEncoder {
+public:
+	DlbaEncoder(const idx_t total_value_count_p, const idx_t total_string_size_p)
+	    : dbp_encoder(total_value_count_p), total_string_size(total_string_size_p) {
+	}
+
+public:
+	template <class T>
+	void BeginWrite(Allocator &, WriteStream &, const T &) {
+		throw InternalException("DlbaEncoder should only be used with strings");
+	}
+
+	template <class T>
+	void WriteValue(WriteStream &, const T &) {
+		throw InternalException("DlbaEncoder should only be used with strings");
+	}
+
+	void FinishWrite(WriteStream &writer) {
+		dbp_encoder.FinishWrite(writer);
+		writer.WriteData(buffer.get(), stream->GetPosition());
+	}
+
+	template <class SRC>
+	static idx_t GetStringSize(const SRC &) {
+		return 0;
+	}
+
+private:
+	DbpEncoder dbp_encoder;
+	const idx_t total_string_size;
+	AllocatedData buffer;
+	unsafe_unique_ptr<MemoryStream> stream;
+};
+
+template <>
+inline void DlbaEncoder::BeginWrite(Allocator &allocator, WriteStream &writer, const string_t &first_value) {
+	buffer = allocator.Allocate(total_string_size + 1);
+	stream = make_unsafe_uniq<MemoryStream>(buffer.get(), buffer.GetSize());
+	dbp_encoder.BeginWrite(writer, UnsafeNumericCast<int64_t>(first_value.GetSize()));
+	stream->WriteData(const_data_ptr_cast(first_value.GetData()), first_value.GetSize());
+}
+
+template <>
+inline void DlbaEncoder::WriteValue(WriteStream &writer, const string_t &value) {
+	dbp_encoder.WriteValue(writer, UnsafeNumericCast<int64_t>(value.GetSize()));
+	stream->WriteData(const_data_ptr_cast(value.GetData()), value.GetSize());
+}
+
+template <>
+inline idx_t DlbaEncoder::GetStringSize(const string_t &src_value) {
+	return src_value.GetSize();
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_extension.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_extension.hpp
@@ -0,0 +1,22 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_extension.hpp
+//
+//
+//===----------------------------------------------------------------------===/
+
+#pragma once
+
+#include "duckdb.hpp"
+
+namespace duckdb {
+
+class ParquetExtension : public Extension {
+public:
+	void Load(ExtensionLoader &loader) override;
+	std::string Name() override;
+	std::string Version() const override;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_field_id.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_field_id.hpp
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "duckdb/common/serializer/buffered_file_writer.hpp"
+#include "duckdb/common/case_insensitive_map.hpp"
+
+namespace duckdb {
+
+struct FieldID;
+struct ChildFieldIDs {
+	ChildFieldIDs();
+	ChildFieldIDs Copy() const;
+	unique_ptr<case_insensitive_map_t<FieldID>> ids;
+
+	void Serialize(Serializer &serializer) const;
+	static ChildFieldIDs Deserialize(Deserializer &source);
+};
+
+struct FieldID {
+public:
+	static constexpr const auto DUCKDB_FIELD_ID = "__duckdb_field_id";
+	FieldID();
+	explicit FieldID(int32_t field_id);
+	FieldID Copy() const;
+	bool set;
+	int32_t field_id;
+	ChildFieldIDs child_field_ids;
+
+	void Serialize(Serializer &serializer) const;
+	static FieldID Deserialize(Deserializer &source);
+
+public:
+	static void GenerateFieldIDs(ChildFieldIDs &field_ids, idx_t &field_id, const vector<string> &names,
+	                             const vector<LogicalType> &sql_types);
+	static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
+	                        unordered_set<uint32_t> &unique_field_ids,
+	                        const case_insensitive_map_t<LogicalType> &name_to_type_map);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_file_metadata_cache.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+
+#include "duckdb.hpp"
+#include "duckdb/storage/object_cache.hpp"
+#include "geo_parquet.hpp"
+#include "parquet_types.h"
+
+namespace duckdb {
+struct CachingFileHandle;
+
+enum class ParquetCacheValidity { VALID, INVALID, UNKNOWN };
+
+class ParquetFileMetadataCache : public ObjectCacheEntry {
+public:
+	ParquetFileMetadataCache(unique_ptr<duckdb_parquet::FileMetaData> file_metadata, CachingFileHandle &handle,
+	                         unique_ptr<GeoParquetFileMetadata> geo_metadata, idx_t footer_size);
+	~ParquetFileMetadataCache() override = default;
+
+	//! Parquet file metadata
+	unique_ptr<const duckdb_parquet::FileMetaData> metadata;
+
+	//! GeoParquet metadata
+	unique_ptr<GeoParquetFileMetadata> geo_metadata;
+
+	//! Parquet footer size
+	idx_t footer_size;
+
+public:
+	static string ObjectType();
+	string GetObjectType() override;
+
+	bool IsValid(CachingFileHandle &new_handle) const;
+	//! Check if a cache entry is valid based ONLY on the OpenFileInfo (without doing any file system calls)
+	//! If the OpenFileInfo does not have enough information this can return UNKNOWN
+	ParquetCacheValidity IsValid(const OpenFileInfo &info) const;
+
+private:
+	bool validate;
+	timestamp_t last_modified;
+	string version_tag;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_float16.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_float16.hpp
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_timestamp.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+
+namespace duckdb {
+
+float Float16ToFloat32(const uint16_t &float16_value);
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_metadata.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_metadata.hpp
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_metadata.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "parquet_reader.hpp"
+#include "duckdb/function/function_set.hpp"
+
+namespace duckdb {
+
+class ParquetMetaDataFunction : public TableFunction {
+public:
+	ParquetMetaDataFunction();
+};
+
+class ParquetSchemaFunction : public TableFunction {
+public:
+	ParquetSchemaFunction();
+};
+
+class ParquetKeyValueMetadataFunction : public TableFunction {
+public:
+	ParquetKeyValueMetadataFunction();
+};
+
+class ParquetFileMetadataFunction : public TableFunction {
+public:
+	ParquetFileMetadataFunction();
+};
+
+class ParquetBloomProbeFunction : public TableFunction {
+public:
+	ParquetBloomProbeFunction();
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_multi_file_info.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_multi_file_info.hpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_multi_file_info.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/common/multi_file/multi_file_function.hpp"
+#include "parquet_reader.hpp"
+
+namespace duckdb {
+
+class ParquetFileReaderOptions : public BaseFileReaderOptions {
+public:
+	explicit ParquetFileReaderOptions(ParquetOptions options_p) : options(std::move(options_p)) {
+	}
+	explicit ParquetFileReaderOptions(ClientContext &context) : options(context) {
+	}
+
+	ParquetOptions options;
+};
+
+struct ParquetMultiFileInfo : MultiFileReaderInterface {
+	static unique_ptr<MultiFileReaderInterface> CreateInterface(ClientContext &context);
+
+	unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context,
+	                                                    optional_ptr<TableFunctionInfo> info) override;
+	bool ParseCopyOption(ClientContext &context, const string &key, const vector<Value> &values,
+	                     BaseFileReaderOptions &options, vector<string> &expected_names,
+	                     vector<LogicalType> &expected_types) override;
+	bool ParseOption(ClientContext &context, const string &key, const Value &val, MultiFileOptions &file_options,
+	                 BaseFileReaderOptions &options) override;
+	void BindReader(ClientContext &context, vector<LogicalType> &return_types, vector<string> &names,
+	                MultiFileBindData &bind_data) override;
+	unique_ptr<TableFunctionData> InitializeBindData(MultiFileBindData &multi_file_data,
+	                                                 unique_ptr<BaseFileReaderOptions> options) override;
+	void FinalizeBindData(MultiFileBindData &multi_file_data) override;
+	void GetBindInfo(const TableFunctionData &bind_data, BindInfo &info) override;
+	optional_idx MaxThreads(const MultiFileBindData &bind_data, const MultiFileGlobalState &global_state,
+	                        FileExpandResult expand_result) override;
+	unique_ptr<GlobalTableFunctionState> InitializeGlobalState(ClientContext &context, MultiFileBindData &bind_data,
+	                                                           MultiFileGlobalState &global_state) override;
+	unique_ptr<LocalTableFunctionState> InitializeLocalState(ExecutionContext &, GlobalTableFunctionState &) override;
+	shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
+	                                        BaseUnionData &union_data, const MultiFileBindData &bind_data_p) override;
+	shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
+	                                        const OpenFileInfo &file, idx_t file_idx,
+	                                        const MultiFileBindData &bind_data) override;
+	shared_ptr<BaseFileReader> CreateReader(ClientContext &context, const OpenFileInfo &file,
+	                                        BaseFileReaderOptions &options,
+	                                        const MultiFileOptions &file_options) override;
+	unique_ptr<NodeStatistics> GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override;
+	void GetVirtualColumns(ClientContext &context, MultiFileBindData &bind_data, virtual_column_map_t &result) override;
+	unique_ptr<MultiFileReaderInterface> Copy() override;
+	FileGlobInput GetGlobInput() override;
+};
+
+class ParquetScanFunction {
+public:
+	static TableFunctionSet GetFunctionSet();
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_reader.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_reader.hpp
@@ -0,0 +1,239 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "duckdb/storage/caching_file_system.hpp"
+#include "duckdb/common/common.hpp"
+#include "duckdb/common/encryption_state.hpp"
+#include "duckdb/common/exception.hpp"
+#include "duckdb/common/multi_file/base_file_reader.hpp"
+#include "duckdb/common/multi_file/multi_file_options.hpp"
+#include "duckdb/common/string_util.hpp"
+#include "duckdb/common/types/data_chunk.hpp"
+#include "column_reader.hpp"
+#include "parquet_file_metadata_cache.hpp"
+#include "parquet_rle_bp_decoder.hpp"
+#include "parquet_types.h"
+#include "resizable_buffer.hpp"
+#include "duckdb/execution/adaptive_filter.hpp"
+
+#include <exception>
+
+namespace duckdb_parquet {
+namespace format {
+class FileMetaData;
+}
+} // namespace duckdb_parquet
+
+namespace duckdb {
+class Allocator;
+class ClientContext;
+class BaseStatistics;
+class TableFilterSet;
+class ParquetEncryptionConfig;
+class ParquetReader;
+
+struct ParquetReaderPrefetchConfig {
+	// Percentage of data in a row group span that should be scanned for enabling whole group prefetch
+	static constexpr double WHOLE_GROUP_PREFETCH_MINIMUM_SCAN = 0.95;
+};
+
+struct ParquetScanFilter {
+	ParquetScanFilter(ClientContext &context, idx_t filter_idx, TableFilter &filter);
+	~ParquetScanFilter();
+	ParquetScanFilter(ParquetScanFilter &&) = default;
+
+	idx_t filter_idx;
+	TableFilter &filter;
+	unique_ptr<TableFilterState> filter_state;
+};
+
+struct ParquetReaderScanState {
+	vector<idx_t> group_idx_list;
+	int64_t current_group;
+	idx_t offset_in_group;
+	idx_t group_offset;
+	unique_ptr<CachingFileHandle> file_handle;
+	unique_ptr<ColumnReader> root_reader;
+	duckdb_base_std::unique_ptr<duckdb_apache::thrift::protocol::TProtocol> thrift_file_proto;
+
+	bool finished;
+	SelectionVector sel;
+
+	ResizeableBuffer define_buf;
+	ResizeableBuffer repeat_buf;
+
+	bool prefetch_mode = false;
+	bool current_group_prefetched = false;
+
+	//! Adaptive filter
+	unique_ptr<AdaptiveFilter> adaptive_filter;
+	//! Table filter list
+	vector<ParquetScanFilter> scan_filters;
+
+	//! (optional) pointer to the PhysicalOperator for logging
+	optional_ptr<const PhysicalOperator> op;
+};
+
+struct ParquetColumnDefinition {
+public:
+	static ParquetColumnDefinition FromSchemaValue(ClientContext &context, const Value &column_value);
+
+public:
+	// DEPRECATED, use 'identifier' instead
+	int32_t field_id;
+	string name;
+	LogicalType type;
+	Value default_value;
+	Value identifier;
+
+public:
+	void Serialize(Serializer &serializer) const;
+	static ParquetColumnDefinition Deserialize(Deserializer &deserializer);
+};
+
+struct ParquetOptions {
+	explicit ParquetOptions() {
+	}
+	explicit ParquetOptions(ClientContext &context);
+
+	bool binary_as_string = false;
+	bool variant_legacy_encoding = false;
+	bool file_row_number = false;
+	shared_ptr<ParquetEncryptionConfig> encryption_config;
+	bool debug_use_openssl = true;
+
+	vector<ParquetColumnDefinition> schema;
+	idx_t explicit_cardinality = 0;
+	bool can_have_nan = false; // if floats or doubles can contain NaN values
+};
+
+struct ParquetOptionsSerialization {
+	ParquetOptionsSerialization() = default;
+	ParquetOptionsSerialization(ParquetOptions parquet_options_p, MultiFileOptions file_options_p)
+	    : parquet_options(std::move(parquet_options_p)), file_options(std::move(file_options_p)) {
+	}
+
+	ParquetOptions parquet_options;
+	MultiFileOptions file_options;
+
+public:
+	void Serialize(Serializer &serializer) const;
+	static ParquetOptionsSerialization Deserialize(Deserializer &deserializer);
+};
+
+struct ParquetUnionData : public BaseUnionData {
+	explicit ParquetUnionData(OpenFileInfo file_p) : BaseUnionData(std::move(file_p)) {
+	}
+	~ParquetUnionData() override;
+
+	unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, const string &name) override;
+
+	ParquetOptions options;
+	shared_ptr<ParquetFileMetadataCache> metadata;
+	unique_ptr<ParquetColumnSchema> root_schema;
+};
+
+class ParquetReader : public BaseFileReader {
+public:
+	ParquetReader(ClientContext &context, OpenFileInfo file, ParquetOptions parquet_options,
+	              shared_ptr<ParquetFileMetadataCache> metadata = nullptr);
+	~ParquetReader() override;
+
+	CachingFileSystem fs;
+	Allocator &allocator;
+	shared_ptr<ParquetFileMetadataCache> metadata;
+	ParquetOptions parquet_options;
+	unique_ptr<ParquetColumnSchema> root_schema;
+	shared_ptr<EncryptionUtil> encryption_util;
+	//! How many rows have been read from this file
+	atomic<idx_t> rows_read;
+
+public:
+	string GetReaderType() const override {
+		return "Parquet";
+	}
+
+	shared_ptr<BaseUnionData> GetUnionData(idx_t file_idx) override;
+	unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, const string &name) override;
+
+	bool TryInitializeScan(ClientContext &context, GlobalTableFunctionState &gstate,
+	                       LocalTableFunctionState &lstate) override;
+	void Scan(ClientContext &context, GlobalTableFunctionState &global_state, LocalTableFunctionState &local_state,
+	          DataChunk &chunk) override;
+	void FinishFile(ClientContext &context, GlobalTableFunctionState &gstate_p) override;
+	double GetProgressInFile(ClientContext &context) override;
+
+public:
+	void InitializeScan(ClientContext &context, ParquetReaderScanState &state, vector<idx_t> groups_to_read);
+	void Scan(ClientContext &context, ParquetReaderScanState &state, DataChunk &output);
+
+	idx_t NumRows() const;
+	idx_t NumRowGroups() const;
+
+	const duckdb_parquet::FileMetaData *GetFileMetadata() const;
+
+	uint32_t Read(duckdb_apache::thrift::TBase &object, TProtocol &iprot);
+	uint32_t ReadData(duckdb_apache::thrift::protocol::TProtocol &iprot, const data_ptr_t buffer,
+	                  const uint32_t buffer_size);
+
+	unique_ptr<BaseStatistics> ReadStatistics(const string &name);
+
+	CachingFileHandle &GetHandle() {
+		return *file_handle;
+	}
+
+	static unique_ptr<BaseStatistics> ReadStatistics(ClientContext &context, ParquetOptions parquet_options,
+	                                                 shared_ptr<ParquetFileMetadataCache> metadata, const string &name);
+	static unique_ptr<BaseStatistics> ReadStatistics(const ParquetUnionData &union_data, const string &name);
+
+	LogicalType DeriveLogicalType(const SchemaElement &s_ele, ParquetColumnSchema &schema) const;
+
+	void AddVirtualColumn(column_t virtual_column_id) override;
+
+	void GetPartitionStats(vector<PartitionStatistics> &result);
+	static void GetPartitionStats(const duckdb_parquet::FileMetaData &metadata, vector<PartitionStatistics> &result);
+	static bool MetadataCacheEnabled(ClientContext &context);
+	static shared_ptr<ParquetFileMetadataCache> GetMetadataCacheEntry(ClientContext &context, const OpenFileInfo &file);
+
+private:
+	//! Construct a parquet reader but **do not** open a file, used in ReadStatistics only
+	ParquetReader(ClientContext &context, ParquetOptions parquet_options,
+	              shared_ptr<ParquetFileMetadataCache> metadata);
+
+	void InitializeSchema(ClientContext &context);
+	bool ScanInternal(ClientContext &context, ParquetReaderScanState &state, DataChunk &output);
+	//! Parse the schema of the file
+	unique_ptr<ParquetColumnSchema> ParseSchema(ClientContext &context);
+	ParquetColumnSchema ParseSchemaRecursive(idx_t depth, idx_t max_define, idx_t max_repeat, idx_t &next_schema_idx,
+	                                         idx_t &next_file_idx, ClientContext &context);
+
+	unique_ptr<ColumnReader> CreateReader(ClientContext &context);
+
+	unique_ptr<ColumnReader> CreateReaderRecursive(ClientContext &context, const vector<ColumnIndex> &indexes,
+	                                               const ParquetColumnSchema &schema);
+	const duckdb_parquet::RowGroup &GetGroup(ParquetReaderScanState &state);
+	uint64_t GetGroupCompressedSize(ParquetReaderScanState &state);
+	idx_t GetGroupOffset(ParquetReaderScanState &state);
+	// Group span is the distance between the min page offset and the max page offset plus the max page compressed size
+	uint64_t GetGroupSpan(ParquetReaderScanState &state);
+	void PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx);
+	ParquetColumnSchema ParseColumnSchema(const SchemaElement &s_ele, idx_t max_define, idx_t max_repeat,
+	                                      idx_t schema_index, idx_t column_index,
+	                                      ParquetColumnSchemaType type = ParquetColumnSchemaType::COLUMN);
+
+	MultiFileColumnDefinition ParseColumnDefinition(const duckdb_parquet::FileMetaData &file_meta_data,
+	                                                ParquetColumnSchema &element);
+
+private:
+	unique_ptr<CachingFileHandle> file_handle;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp
@@ -0,0 +1,158 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_rle_bp_decoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+#include "decode_utils.hpp"
+#include "parquet_types.h"
+#include "resizable_buffer.hpp"
+#include "thrift_tools.hpp"
+
+namespace duckdb {
+
+class RleBpDecoder {
+public:
+	/// Create a decoder object. buffer/buffer_len is the decoded data.
+	/// bit_width is the width of each value (before encoding).
+	RleBpDecoder(data_ptr_t buffer, uint32_t buffer_len, uint32_t bit_width)
+	    : buffer_(buffer, buffer_len), bit_width_(bit_width), current_value_(0), repeat_count_(0), literal_count_(0) {
+		if (bit_width >= 64) {
+			throw std::runtime_error("Decode bit width too large");
+		}
+		byte_encoded_len = ((bit_width_ + 7) / 8);
+		max_val = (uint64_t(1) << bit_width_) - 1;
+	}
+
+	template <class T>
+	bool HasRepeatedBatch(const uint32_t batch_size, const T value) {
+		if (repeat_count_ == 0 && literal_count_ == 0) {
+			NextCounts();
+		}
+		return repeat_count_ >= batch_size && current_value_ == static_cast<uint64_t>(value);
+	}
+
+	template <typename T>
+	void GetRepeatedBatch(const uint32_t batch_size, const T value) {
+		D_ASSERT(repeat_count_ >= batch_size && current_value_ == static_cast<uint64_t>(value));
+		repeat_count_ -= batch_size;
+	}
+
+	template <typename T>
+	void GetBatch(data_ptr_t values_target_ptr, const uint32_t batch_size) {
+		auto values = reinterpret_cast<T *>(values_target_ptr);
+		uint32_t values_read = 0;
+
+		while (values_read < batch_size) {
+			if (repeat_count_ > 0) {
+				auto repeat_batch = MinValue<uint32_t>(batch_size - values_read, repeat_count_);
+				std::fill_n(values + values_read, repeat_batch, static_cast<T>(current_value_));
+				repeat_count_ -= repeat_batch;
+				values_read += repeat_batch;
+			} else if (literal_count_ > 0) {
+				auto literal_batch = MinValue<uint32_t>(batch_size - values_read, literal_count_);
+				ParquetDecodeUtils::BitUnpack<T>(buffer_, bitpack_pos, values + values_read, literal_batch, bit_width_);
+				literal_count_ -= literal_batch;
+				values_read += literal_batch;
+			} else {
+				NextCounts();
+			}
+		}
+		D_ASSERT(values_read == batch_size);
+	}
+
+	void Skip(uint32_t batch_size) {
+		uint32_t values_skipped = 0;
+
+		while (values_skipped < batch_size) {
+			if (repeat_count_ > 0) {
+				auto repeat_batch = MinValue<uint32_t>(batch_size - values_skipped, repeat_count_);
+				repeat_count_ -= repeat_batch;
+				values_skipped += repeat_batch;
+			} else if (literal_count_ > 0) {
+				auto literal_batch = MinValue<uint32_t>(batch_size - values_skipped, literal_count_);
+				ParquetDecodeUtils::Skip(buffer_, bitpack_pos, literal_batch, bit_width_);
+				literal_count_ -= literal_batch;
+				values_skipped += literal_batch;
+			} else {
+				NextCounts();
+			}
+		}
+		D_ASSERT(values_skipped == batch_size);
+	}
+
+	static uint8_t ComputeBitWidth(idx_t val) {
+		if (val == 0) {
+			return 0;
+		}
+		uint8_t ret = 1;
+		while ((((idx_t)1u << (idx_t)ret) - 1) < val) {
+			ret++;
+		}
+		return ret;
+	}
+
+private:
+	ByteBuffer buffer_;
+
+	/// Number of bits needed to encode the value. Must be between 0 and 64.
+	uint32_t bit_width_;
+	uint64_t current_value_;
+	uint32_t repeat_count_;
+	uint32_t literal_count_;
+	uint8_t byte_encoded_len;
+	uint64_t max_val;
+
+	uint8_t bitpack_pos = 0;
+
+	/// Fills literal_count_ and repeat_count_ with next values. Returns false if there
+	/// are no more.
+	template <bool CHECKED>
+	void NextCountsTemplated() {
+		// Read the next run's indicator int, it could be a literal or repeated run.
+		// The int is encoded as a vlq-encoded value.
+		if (bitpack_pos != 0) {
+			if (CHECKED) {
+				buffer_.inc(1);
+			} else {
+				buffer_.unsafe_inc(1);
+			}
+			bitpack_pos = 0;
+		}
+		auto indicator_value = ParquetDecodeUtils::VarintDecode<uint32_t, CHECKED>(buffer_);
+
+		// lsb indicates if it is a literal run or repeated run
+		bool is_literal = indicator_value & 1;
+		if (is_literal) {
+			literal_count_ = (indicator_value >> 1) * 8;
+		} else {
+			repeat_count_ = indicator_value >> 1;
+			// (ARROW-4018) this is not big-endian compatible, lol
+			current_value_ = 0;
+			if (CHECKED) {
+				buffer_.available(byte_encoded_len);
+			}
+			for (auto i = 0; i < byte_encoded_len; i++) {
+				auto next_byte = Load<uint8_t>(buffer_.ptr + i);
+				current_value_ |= (next_byte << (i * 8));
+			}
+			buffer_.unsafe_inc(byte_encoded_len);
+			// sanity check
+			if (repeat_count_ > 0 && current_value_ > max_val) {
+				throw std::runtime_error("Payload value bigger than allowed. Corrupted file?");
+			}
+		}
+	}
+
+	void NextCounts() {
+		if (buffer_.check_available(byte_encoded_len + sizeof(uint32_t) + 2)) {
+			NextCountsTemplated<false>();
+		} else {
+			NextCountsTemplated<true>();
+		}
+	}
+};
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp
@@ -0,0 +1,155 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_rle_bp_encoder.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "decode_utils.hpp"
+
+namespace duckdb {
+
+class RleBpEncoder {
+public:
+	explicit RleBpEncoder(uint32_t bit_width_p) : bit_width(bit_width_p), byte_width((bit_width + 7) / 8) {
+	}
+
+public:
+	void BeginWrite() {
+		rle_count = 0;
+		bp_block_count = 0;
+	}
+
+	void WriteValue(WriteStream &writer, const uint32_t &value) {
+		if (bp_block_count != 0) {
+			// We already committed to a BP run
+			D_ASSERT(rle_count == 0);
+			bp_block[bp_block_count++] = value;
+			if (bp_block_count == BP_BLOCK_SIZE) {
+				WriteRun(writer);
+			}
+			return;
+		}
+
+		if (rle_count == 0) {
+			// Starting fresh, try for an RLE run first
+			rle_value = value;
+			rle_count = 1;
+			return;
+		}
+
+		// We're trying for an RLE run
+		if (rle_value == value) {
+			// Same as current RLE value
+			rle_count++;
+			return;
+		}
+
+		// Value differs from current RLE value
+		if (rle_count >= MINIMUM_RLE_COUNT) {
+			// We have enough values for an RLE run
+			WriteRun(writer);
+			rle_value = value;
+			rle_count = 1;
+			return;
+		}
+
+		// Not enough values, convert and commit to a BP run
+		D_ASSERT(bp_block_count == 0);
+		for (idx_t i = 0; i < rle_count; i++) {
+			bp_block[bp_block_count++] = rle_value;
+		}
+		bp_block[bp_block_count++] = value;
+		rle_count = 0;
+	}
+
+	void WriteMany(WriteStream &writer, uint32_t value, idx_t count) {
+		if (rle_count != 0) {
+			// If an RLE run is going on, write a single value to either finish it or convert to BP
+			WriteValue(writer, value);
+			count--;
+		}
+
+		if (bp_block_count != 0) {
+			// If a BP run is going on, finish it
+			while (bp_block_count != 0 && count > 0) {
+				WriteValue(writer, value);
+				count--;
+			}
+		}
+
+		// Set remaining as current RLE run
+		rle_value = value;
+		rle_count += count;
+	}
+
+	void FinishWrite(WriteStream &writer) {
+		WriteRun(writer);
+	}
+
+private:
+	//! Meta information
+	uint32_t bit_width;
+	uint32_t byte_width;
+
+	//! RLE stuff
+	static constexpr idx_t MINIMUM_RLE_COUNT = 4;
+	uint32_t rle_value;
+	idx_t rle_count;
+
+	//! BP stuff
+	static constexpr idx_t BP_BLOCK_SIZE = 256;
+	static_assert(BP_BLOCK_SIZE % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE == 0,
+	              "BP_BLOCK_SIZE must be divisible by BITPACKING_ALGORITHM_GROUP_SIZE");
+	uint32_t bp_block[BP_BLOCK_SIZE] = {0};
+	uint32_t bp_block_packed[BP_BLOCK_SIZE] = {0};
+	idx_t bp_block_count;
+
+private:
+	void WriteRun(WriteStream &writer) {
+		if (rle_count != 0) {
+			WriteCurrentBlockRLE(writer);
+		} else {
+			WriteCurrentBlockBP(writer);
+		}
+	}
+
+	void WriteCurrentBlockRLE(WriteStream &writer) {
+		ParquetDecodeUtils::VarintEncode(rle_count << 1 | 0, writer); // (... | 0) signals RLE run
+		D_ASSERT(rle_value >> (byte_width * 8) == 0);
+		switch (byte_width) {
+		case 1:
+			writer.Write<uint8_t>(rle_value);
+			break;
+		case 2:
+			writer.Write<uint16_t>(rle_value);
+			break;
+		case 3:
+			writer.Write<uint8_t>(rle_value & 0xFF);
+			writer.Write<uint8_t>((rle_value >> 8) & 0xFF);
+			writer.Write<uint8_t>((rle_value >> 16) & 0xFF);
+			break;
+		case 4:
+			writer.Write<uint32_t>(rle_value);
+			break;
+		default:
+			throw InternalException("unsupported byte width for RLE encoding");
+		}
+		rle_count = 0;
+	}
+
+	void WriteCurrentBlockBP(WriteStream &writer) {
+		if (bp_block_count == 0) {
+			return;
+		}
+		ParquetDecodeUtils::VarintEncode(BP_BLOCK_SIZE / 8 << 1 | 1, writer); // (... | 1) signals BP run
+		ParquetDecodeUtils::BitPackAligned(bp_block, data_ptr_cast(bp_block_packed), BP_BLOCK_SIZE, bit_width);
+		writer.WriteData(data_ptr_cast(bp_block_packed), BP_BLOCK_SIZE * bit_width / 8);
+		bp_block_count = 0;
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_shredding.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_shredding.hpp
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "duckdb/common/serializer/buffered_file_writer.hpp"
+#include "duckdb/common/case_insensitive_map.hpp"
+#include "duckdb/common/types/variant.hpp"
+
+namespace duckdb {
+
+struct ShreddingType;
+
+struct ChildShreddingTypes {
+public:
+	ChildShreddingTypes();
+
+public:
+	ChildShreddingTypes Copy() const;
+
+public:
+	void Serialize(Serializer &serializer) const;
+	static ChildShreddingTypes Deserialize(Deserializer &source);
+
+public:
+	unique_ptr<case_insensitive_map_t<ShreddingType>> types;
+};
+
+struct ShreddingType {
+public:
+	ShreddingType();
+	explicit ShreddingType(const LogicalType &type);
+
+public:
+	ShreddingType Copy() const;
+
+public:
+	void Serialize(Serializer &serializer) const;
+	static ShreddingType Deserialize(Deserializer &source);
+
+public:
+	static ShreddingType GetShreddingTypes(const Value &val);
+	void AddChild(const string &name, ShreddingType &&child);
+	optional_ptr<const ShreddingType> GetChild(const string &name) const;
+
+public:
+	bool set = false;
+	LogicalType type;
+	ChildShreddingTypes children;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_statistics.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_statistics.hpp
@@ -0,0 +1,111 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_statistics.hpp
+//
+//
+//===----------------------------------------------------------------------===/
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "duckdb/storage/statistics/base_statistics.hpp"
+#include "parquet_types.h"
+#include "resizable_buffer.hpp"
+
+namespace duckdb {
+
+using duckdb_parquet::ColumnChunk;
+using duckdb_parquet::SchemaElement;
+
+struct LogicalType;
+struct ParquetColumnSchema;
+class ResizeableBuffer;
+
+struct ParquetStatisticsUtils {
+
+	static unique_ptr<BaseStatistics> TransformColumnStatistics(const ParquetColumnSchema &reader,
+	                                                            const vector<ColumnChunk> &columns, bool can_have_nan);
+
+	static Value ConvertValue(const LogicalType &type, const ParquetColumnSchema &schema_ele, const std::string &stats);
+
+	static bool BloomFilterSupported(const LogicalTypeId &type_id);
+
+	static bool BloomFilterExcludes(const TableFilter &filter, const duckdb_parquet::ColumnMetaData &column_meta_data,
+	                                duckdb_apache::thrift::protocol::TProtocol &file_proto, Allocator &allocator);
+
+	static unique_ptr<BaseStatistics> CreateNumericStats(const LogicalType &type, const ParquetColumnSchema &schema_ele,
+	                                                     const duckdb_parquet::Statistics &parquet_stats);
+
+private:
+	static Value ConvertValueInternal(const LogicalType &type, const ParquetColumnSchema &schema_ele,
+	                                  const std::string &stats);
+};
+
+class ParquetBloomFilter {
+	static constexpr const idx_t DEFAULT_BLOCK_COUNT = 32; // 4k filter
+
+public:
+	ParquetBloomFilter(idx_t num_entries, double bloom_filter_false_positive_ratio);
+	ParquetBloomFilter(unique_ptr<ResizeableBuffer> data_p);
+	void FilterInsert(uint64_t x);
+	bool FilterCheck(uint64_t x);
+	void Shrink(idx_t new_block_count);
+	double OneRatio();
+	ResizeableBuffer *Get();
+
+private:
+	unique_ptr<ResizeableBuffer> data;
+	idx_t block_count;
+};
+
+// see https://github.com/apache/parquet-format/blob/master/BloomFilter.md
+
+struct ParquetBloomBlock {
+	struct ParquetBloomMaskResult {
+		uint8_t bit_set[8] = {0};
+	};
+
+	uint32_t block[8] = {0};
+
+	static bool check_bit(uint32_t &x, const uint8_t i) {
+		D_ASSERT(i < 32);
+		return (x >> i) & (uint32_t)1;
+	}
+
+	static void set_bit(uint32_t &x, const uint8_t i) {
+		D_ASSERT(i < 32);
+		x |= (uint32_t)1 << i;
+		D_ASSERT(check_bit(x, i));
+	}
+
+	static ParquetBloomMaskResult Mask(uint32_t x) {
+		static const uint32_t parquet_bloom_salt[8] = {0x47b6137bU, 0x44974d91U, 0x8824ad5bU, 0xa2b7289dU,
+		                                               0x705495c7U, 0x2df1424bU, 0x9efc4947U, 0x5c6bfb31U};
+		ParquetBloomMaskResult result;
+		for (idx_t i = 0; i < 8; i++) {
+			result.bit_set[i] = (x * parquet_bloom_salt[i]) >> 27;
+		}
+		return result;
+	}
+
+	static void BlockInsert(ParquetBloomBlock &b, uint32_t x) {
+		auto masked = Mask(x);
+		for (idx_t i = 0; i < 8; i++) {
+			set_bit(b.block[i], masked.bit_set[i]);
+			D_ASSERT(check_bit(b.block[i], masked.bit_set[i]));
+		}
+	}
+
+	static bool BlockCheck(ParquetBloomBlock &b, uint32_t x) {
+		auto masked = Mask(x);
+		for (idx_t i = 0; i < 8; i++) {
+			if (!check_bit(b.block[i], masked.bit_set[i])) {
+				return false;
+			}
+		}
+		return true;
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_support.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_support.hpp
@@ -0,0 +1,621 @@
+#pragma once
+
+namespace duckdb {
+
+class StripeStreams {
+public:
+	virtual ~StripeStreams() = default;
+
+	/**
+	 * get column selector for current stripe reading session
+	 * @return column selector will hold column projection info
+	 */
+	virtual const dwio::common::ColumnSelector &getColumnSelector() const = 0;
+
+	// Get row reader options
+	virtual const dwio::common::RowReaderOptclass StripeStreams {
+	public:
+		virtual ~StripeStreams() = default;
+
+		/**
+		 * get column selector for current stripe reading session
+		 * @return column selector will hold column projection info
+		 */
+		virtual const dwio::common::ColumnSelector &getColumnSelector() const = 0;
+
+		// Get row reader options
+		virtual const dwio::common::RowReaderOptions &getRowReaderOptions() const = 0;
+
+		/**
+		 * Get the encoding for the given column for this stripe.
+		 */
+		virtual const proto::ColumnEncoding &getEncoding(const EncodingKey &) const = 0;
+
+		/**
+		 * Get the stream for the given column/kind in this stripe.
+		 * @param streamId stream identifier object
+		 * @param throwIfNotFound fail if a stream is required and not found
+		 * @return the new stream
+		 */
+		virtual unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0;
+
+		/**
+		 * visit all streams of given node and execute visitor logic
+		 * return number of streams visited
+		 */
+		virtual uint32_t visitStreamsOfNode(uint32_t node, std::function<void(const StreamInformation &)> visitor)
+		    const = 0;
+
+		/**
+		 * Get the value of useVInts for the given column in this stripe.
+		 * Defaults to true.
+		 * @param streamId stream identifier
+		 */
+		virtual bool getUseVInts(const StreamIdentifier &streamId) const = 0;
+
+		/**
+		 * Get the memory pool for this reader.
+		 */
+		virtual memory::MemoryPool &getMemoryPool() const = 0;
+
+		/**
+		 * Get the RowGroupIndex.
+		 * @return a vector of RowIndex belonging to the stripe
+		 */
+		virtual unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
+
+		/**
+		 * Get stride index provider which is used by string dictionary reader to
+		 * get the row index stride index where next() happens
+		 */
+		virtual const StrideIndexProvider &getStrideIndexProvider() const = 0;
+	}
+	ions &getRowReaderOptions() const = 0;
+
+	/**
+	 * Get the encoding for the given column for this stripe.
+	 */
+	virtual const proto::ColumnEncoding &getEncoding(const EncodingKey &) const = 0;
+
+	/**
+	 * Get the stream for the given column/kind in this stripe.
+	 * @param streamId stream identifier object
+	 * @param throwIfNotFound fail if a stream is required and not found
+	 * @return the new stream
+	 */
+	virtual unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0;
+
+	/**
+	 * visit all streams of given node and execute visitor logic
+	 * return number of streams visited
+	 */
+	virtual uint32_t visitStreamsOfNode(uint32_t node,
+	                                    std::function<void(const StreamInformation &)> visitor) const = 0;
+
+	/**
+	 * Get the value of useVInts for the given column in this stripe.
+	 * Defaults to true.
+	 * @param streamId stream identifier
+	 */
+	virtual bool getUseVInts(const StreamIdentifier &streamId) const = 0;
+
+	/**
+	 * Get the memory pool for this reader.
+	 */
+	virtual memory::MemoryPool &getMemoryPool() const = 0;
+
+	/**
+	 * Get the RowGroupIndex.
+	 * @return a vector of RowIndex belonging to the stripe
+	 */
+	virtual unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
+
+	/**
+	 * Get stride index provider which is used by string dictionary reader to
+	 * get the row index stride index where next() happens
+	 */
+	virtual const StrideIndexProvider &getStrideIndexProvider() const = 0;
+};
+
+class ColumnReader {
+
+public:
+	ColumnReader(const EncodingKey &ek, StripeStreams &stripe);
+
+	virtual ~ColumnReader() = default;
+
+	/**
+	 * Skip number of specified rows.
+	 * @param numValues the number of values to skip
+	 * @return the number of non-null values skipped
+	 */
+	virtual uint64_t skip(uint64_t numValues);
+
+	/**
+	 * Read the next group of values into a RowVector.
+	 * @param numValues the number of values to read
+	 * @param vector to read into
+	 */
+	virtual void next(uint64_t numValues, VectorPtr &result, const uint64_t *nulls = nullptr) = 0;
+};
+
+class SelectiveColumnReader : public ColumnReader {
+public:
+	static constexpr uint64_t kStringBufferSize = 16 * 1024;
+
+	SelectiveColumnReader(const EncodingKey &ek, StripeStreams &stripe, common::ScanSpec *scanSpec);
+
+	/**
+	 * Read the next group of values into a RowVector.
+	 * @param numValues the number of values to read
+	 * @param vector to read into
+	 */
+	void next(uint64_t /*numValues*/, VectorPtr & /*result*/, const uint64_t * /*incomingNulls*/) override {
+		DATALIB_CHECK(false) << "next() is only defined in SelectiveStructColumnReader";
+	}
+
+	// Creates a reader for the given stripe.
+	static unique_ptr<SelectiveColumnReader> build(const std::shared_ptr<const dwio::common::TypeWithId> &requestedType,
+	                                               const std::shared_ptr<const dwio::common::TypeWithId> &dataType,
+	                                               StripeStreams &stripe, common::ScanSpec *scanSpec,
+	                                               uint32_t sequence = 0);
+
+	// Seeks to offset and reads the rows in 'rows' and applies
+	// filters and value processing as given by 'scanSpec supplied at
+	// construction. 'offset' is relative to start of stripe. 'rows' are
+	// relative to 'offset', so that row 0 is the 'offset'th row from
+	// start of stripe. 'rows' is expected to stay constant
+	// between this and the next call to read.
+	virtual void read(vector_size_t offset, RowSet rows, const uint64_t *incomingNulls) = 0;
+
+	// Extracts the values at 'rows' into '*result'. May rewrite or
+	// reallocate '*result'. 'rows' must be the same set or a subset of
+	// 'rows' passed to the last 'read().
+	virtual void getValues(RowSet rows, VectorPtr *result) = 0;
+
+	// Returns the rows that were selected/visited by the last
+	// read(). If 'this' has no filter, returns 'rows' passed to last
+	// read().
+	const RowSet outputRows() const {
+		if (scanSpec_->hasFilter()) {
+			return outputRows_;
+		}
+		return inputRows_;
+	}
+
+	// Advances to 'offset', so that the next item to be read is the
+	// offset-th from the start of stripe.
+	void seekTo(vector_size_t offset, bool readsNullsOnly);
+
+	// The below functions are called from ColumnVisitor to fill the result set.
+	inline void addOutputRow(vector_size_t row) {
+		outputRows_.push_back(row);
+	}
+
+	template <typename T>
+	inline void addNull() {
+		DATALIB_DCHECK(rawResultNulls_ && rawValues_ && (numValues_ + 1) * sizeof(T) < rawSize_);
+
+		anyNulls_ = true;
+		bits::setBit(rawResultNulls_, numValues_);
+		reinterpret_cast<T *>(rawValues_)[numValues_] = T();
+		numValues_++;
+	}
+
+	template <typename T>
+	inline void addValue(const T value) {
+		// @lint-ignore-every HOWTOEVEN ConstantArgumentPassByValue
+		static_assert(std::is_pod<T>::value, "General case of addValue is only for primitive types");
+		DATALIB_DCHECK(rawValues_ && (numValues _ + 1) * sizeof(T) < rawSize_);
+		reinterpret_cast<T *>(rawValues_)[numValues_] = value;
+		numValues_++;
+	}
+
+	void dropResults(vector_size_t count) {
+		outputRows_.resize(outputRows_.size() - count);
+		numValues_ -= count;
+	}
+
+	common::ScanSpec *scanSpec() const {
+		return scanSpec_;
+	}
+
+	auto readOffset() const {
+		return readOffset_;
+	}
+
+	void setReadOffset(vector_size_t readOffset) {
+		readOffset_ = readOffset;
+	}
+
+protected:
+	static constexpr int8_t kNoValueSize = -1;
+
+	template <typename T>
+	void ensureValuesCapacity(vector_size_t numRows);
+
+	void prepareNulls(vector_size_t numRows, bool needNulls);
+
+	template <typename T>
+	void filterNulls(RowSet rows, bool isNull, bool extractValues);
+
+	template <typename T>
+	void prepareRead(vector_size_t offset, RowSet rows, const uint64_t *incomingNulls);
+
+	void setOutputRows(RowSet rows) {
+		outputRows_.resize(rows.size());
+		if (!rows.size()) {
+			return;
+		}
+		memcpy(outputRows_.data(), &rows[0], rows.size() * sizeof(vector_size_t));
+	}
+	template <typename T, typename TVector>
+	void getFlatValues(RowSet rows, VectorPtr *result);
+
+	template <typename T, typename TVector>
+	void compactScalarValues(RowSet rows);
+
+	void addStringValue(folly::StringPiece value);
+
+	// Specification of filters, value extraction, pruning etc. The
+	// spec is assigned at construction and the contents may change at
+	// run time based on adaptation. Owned by caller.
+	common::ScanSpec *const scanSpec_;
+	// Row number after last read row, relative to stripe start.
+	vector_size_t readOffset_ = 0;
+	// The rows to process in read(). References memory supplied by
+	// caller. The values must remain live until the next call to read().
+	RowSet inputRows_;
+	// Rows passing the filter in readWithVisitor. Must stay
+	// constant between consecutive calls to read().
+	vector<vector_size_t> outputRows_;
+	// The row number corresponding to each element in 'values_'
+	vector<vector_size_t> valueRows_;
+	// The set of all nulls in the range of read(). Created when first
+	// needed and then reused. Not returned to callers.
+	BufferPtr nullsInReadRange_;
+	// Nulls buffer for readWithVisitor. Not set if no nulls. 'numValues'
+	// is the index of the first non-set bit.
+	BufferPtr resultNulls_;
+	uint64_t *rawResultNulls_ = nullptr;
+	// Buffer for gathering scalar values in readWithVisitor.
+	BufferPtr values_;
+	// Writable content in 'values'
+	void *rawValues_ = nullptr;
+	vector_size_t numValues_ = 0;
+	// Size of fixed width value in 'rawValues'. For integers, values
+	// are read at 64 bit width and can be compacted or extracted at a
+	// different width.
+	int8_t valueSize_ = kNoValueSize;
+	// Buffers backing the StringViews in 'values' when reading strings.
+	vector<BufferPtr> stringBuffers_;
+	// Writable contents of 'stringBuffers_.back()'.
+	char *rawStringBuffer_ = nullptr;
+	// Total writable bytes in 'rawStringBuffer_'.
+	int32_t rawStringSize_ = 0;
+	// Number of written bytes in 'rawStringBuffer_'.
+	uint32_t rawStringUsed_ = 0;
+
+	// True if last read() added any nulls.
+	bool anyNulls_ = false;
+	// True if all values in scope for last read() are null.
+	bool allNull_ = false;
+};
+
+struct ExtractValues {
+	static constexpr bool kSkipNulls = false;
+
+	bool acceptsNulls() const {
+		return true;
+	}
+
+	template <typename V>
+	void addValue(vector_size_t /*rowIndex*/, V /*value*/) {
+	}
+	void addNull(vector_size_t /*rowIndex*/) {
+	}
+};
+
+class Filter {
+protected:
+	Filter(bool deterministic, bool nullAllowed, FilterKind kind)
+	    : nullAllowed_(nullAllowed), deterministic_(deterministic), kind_(kind) {
+	}
+
+public:
+	virtual ~Filter() = default;
+
+	// Templates parametrized on filter need to know determinism at compile
+	// time. If this is false, deterministic() will be consulted at
+	// runtime.
+	static constexpr bool deterministic = true;
+
+	FilterKind kind() const {
+		return kind_;
+	}
+
+	virtual unique_ptr<Filter> clone() const = 0;
+
+	/**
+	 * A filter becomes non-deterministic when applies to nested column,
+	 * e.g. a[1] > 10 is non-deterministic because > 10 filter applies only to
+	 * some positions, e.g. first entry in a set of entries that correspond to a
+	 * single top-level position.
+	 */
+	virtual bool isDeterministic() const {
+		return deterministic_;
+	}
+
+	/**
+	 * When a filter applied to a nested column fails, the whole top-level
+	 * position should fail. To enable this functionality, the filter keeps track
+	 * of the boundaries of top-level positions and allows the caller to find out
+	 * where the current top-level position started and how far it continues.
+	 * @return number of positions from the start of the current top-level
+	 * position up to the current position (excluding current position)
+	 */
+	virtual int getPrecedingPositionsToFail() const {
+		return 0;
+	}
+
+	/**
+	 * @return number of positions remaining until the end of the current
+	 * top-level position
+	 */
+	virtual int getSucceedingPositionsToFail() const {
+		return 0;
+	}
+
+	virtual bool testNull() const {
+		return nullAllowed_;
+	}
+
+	/**
+	 * Used to apply is [not] null filters to complex types, e.g.
+	 * a[1] is null AND a[3] is not null, where a is an array(array(T)).
+	 *
+	 * In these case, the exact values are not known, but it is known whether they
+	 * are null or not. Furthermore, for some positions only nulls are allowed
+	 * (a[1] is null), for others only non-nulls (a[3] is not null), and for the
+	 * rest both are allowed (a[2] and a[N], where N > 3).
+	 */
+	virtual bool testNonNull() const {
+		DWIO_RAISE("not supported");
+	}
+
+	virtual bool testInt64(int64_t /* unused */) const {
+		DWIO_RAISE("not supported");
+	}
+
+	virtual bool testDouble(double /* unused */) const {
+		DWIO_RAISE("not supported");
+	}
+
+	virtual bool testFloat(float /* unused */) const {
+		DWIO_RAISE("not supported");
+	}
+
+	virtual bool testBool(bool /* unused */) const {
+		DWIO_RAISE("not supported");
+	}
+
+	virtual bool testBytes(const char * /* unused */, int32_t /* unused */) const {
+		DWIO_RAISE("not supported");
+	}
+
+	/**
+	 * Filters like string equality and IN, as well as conditions on cardinality
+	 * of lists and maps can be at least partly decided by looking at lengths
+	 * alone. If this is false, then no further checks are needed. If true,
+	 * eventual filters on the data itself need to be evaluated.
+	 */
+	virtual bool testLength(int32_t /* unused */) const {
+		DWIO_RAISE("not supported");
+	}
+
+protected:
+	const bool nullAllowed_;
+
+private:
+	const bool deterministic_;
+	const FilterKind kind_;
+};
+
+// Template parameter for controlling filtering and action on a set of rows.
+template <typename T, typename TFilter, typename ExtractValues, bool isDense>
+class ColumnVisitor {
+public:
+	using FilterType = TFilter;
+	static constexpr bool dense = isDense;
+	ColumnVisitor(TFilter &filter, SelectiveColumnReader *reader, const RowSet &rows, ExtractValues values)
+	    : filter_(filter), reader_(reader), allowNulls_(!TFilter::deterministic || filter.testNull()), rows_(&rows[0]),
+	      numRows_(rows.size()), rowIndex_(0), values_(values) {
+	}
+
+	bool allowNulls() {
+		if (ExtractValues::kSkipNulls && TFilter::deterministic) {
+			return false;
+		}
+		return allowNulls_ && values_.acceptsNulls();
+	}
+
+	vector_size_t start() {
+		return isDense ? 0 : rowAt(0);
+	}
+
+	// Tests for a null value and processes it. If the value is not
+	// null, returns 0 and has no effect. If the value is null, advances
+	// to the next non-null value in 'rows_'. Returns the number of
+	// values (not including nulls) to skip to get to the next non-null.
+	// If there is no next non-null in 'rows_', sets 'atEnd'. If 'atEnd'
+	// is set and a non-zero skip is returned, the caller must perform
+	// the skip before returning.
+	FOLLY_ALWAYS_INLINE vector_size_t checkAndSkipNulls(const uint64_t *nulls, vector_size_t &current, bool &atEnd) {
+		auto testRow = currentRow();
+		// Check that the caller and the visitor are in sync about current row.
+		DATALIB_DCHECK(current == testRow);
+		uint32_t nullIndex = testRow >> 6;
+		uint64_t nullWord = nulls[nullIndex];
+		if (!nullWord) {
+			return 0;
+		}
+		uint8_t nullBit = testRow & 63;
+		if ((nullWord & (1UL << nullBit)) == 0) {
+			return 0;
+		}
+		// We have a null. We find the next non-null.
+		if (++rowIndex_ >= numRows_) {
+			atEnd = true;
+			return 0;
+		}
+		auto rowOfNullWord = testRow - nullBit;
+		if (isDense) {
+			if (nullBit == 63) {
+				nullBit = 0;
+				rowOfNullWord += 64;
+				nullWord = nulls[++nullIndex];
+			} else {
+				++nullBit;
+				// set all the bits below the row to null.
+				nullWord |= f4d::bits::lowMask(nullBit);
+			}
+			for (;;) {
+				auto nextNonNull = count_trailing_zeros(~nullWord);
+				if (rowOfNullWord + nextNonNull >= numRows_) {
+					// Nulls all the way to the end.
+					atEnd = true;
+					return 0;
+				}
+				if (nextNonNull < 64) {
+					DATALIB_CHECK(rowIndex_ <= rowOfNullWord + nextNonNull);
+					rowIndex_ = rowOfNullWord + nextNonNull;
+					current = currentRow();
+					return 0;
+				}
+				rowOfNullWord += 64;
+				nullWord = nulls[++nullIndex];
+			}
+		} else {
+			// Sparse row numbers. We find the first non-null and count
+			// how many non-nulls on rows not in 'rows_' we skipped.
+			int32_t toSkip = 0;
+			nullWord |= f4d::bits::lowMask(nullBit);
+			for (;;) {
+				testRow = currentRow();
+				while (testRow >= rowOfNullWord + 64) {
+					toSkip += __builtin_popcountll(~nullWord);
+					nullWord = nulls[++nullIndex];
+					rowOfNullWord += 64;
+				}
+				// testRow is inside nullWord. See if non-null.
+				nullBit = testRow & 63;
+				if ((nullWord & (1UL << nullBit)) == 0) {
+					toSkip += __builtin_popcountll(~nullWord & f4d::bits::lowMask(nullBit));
+					current = testRow;
+					return toSkip;
+				}
+				if (++rowIndex_ >= numRows_) {
+					// We end with a null. Add the non-nulls below the final null.
+					toSkip += __builtin_popcountll(~nullWord & f4d::bits::lowMask(testRow - rowOfNullWord));
+					atEnd = true;
+					return toSkip;
+				}
+			}
+		}
+	}
+
+	vector_size_t processNull(bool &atEnd) {
+		vector_size_t previous = currentRow();
+		if (filter_.testNull()) {
+			filterPassedForNull();
+		} else {
+			filterFailed();
+		}
+		if (++rowIndex_ >= numRows_) {
+			atEnd = true;
+			return rows_[numRows_ - 1] - previous;
+		}
+		if (TFilter::deterministic && isDense) {
+			return 0;
+		}
+		return currentRow() - previous - 1;
+	}
+
+	FOLLY_ALWAYS_INLINE vector_size_t process(T value, bool &atEnd) {
+		if (!TFilter::deterministic) {
+			auto previous = currentRow();
+			if (common::applyFilter(filter_, value)) {
+				filterPassed(value);
+			} else {
+				filterFailed();
+			}
+			if (++rowIndex_ >= numRows_) {
+				atEnd = true;
+				return rows_[numRows_ - 1] - previous;
+			}
+			return currentRow() - previous - 1;
+		}
+		// The filter passes or fails and we go to the next row if any.
+		if (common::applyFilter(filter_, value)) {
+			filterPassed(value);
+		} else {
+			filterFailed();
+		}
+		if (++rowIndex_ >= numRows_) {
+			atEnd = true;
+			return 0;
+		}
+		if (isDense) {
+			return 0;
+		}
+		return currentRow() - rows_[rowIndex_ - 1] - 1;
+	}
+
+	inline vector_size_t rowAt(vector_size_t index) {
+		if (isDense) {
+			return index;
+		}
+		return rows_[index];
+	}
+
+	vector_size_t currentRow() {
+		if (isDense) {
+			return rowIndex_;
+		}
+		return rows_[rowIndex_];
+	}
+
+	vector_size_t numRows() {
+		return numRows_;
+	}
+
+	void filterPassed(T value) {
+		addResult(value);
+		if (!std::is_same<TFilter, common::AlwaysTrue>::value) {
+			addOutputRow(currentRow());
+		}
+	}
+
+	inline void filterPassedForNull() {
+		addNull();
+		if (!std::is_same<TFilter, common::AlwaysTrue>::value) {
+			addOutputRow(currentRow());
+		}
+	}
+
+	FOLLY_ALWAYS_INLINE void filterFailed();
+	inline void addResult(T value);
+	inline void addNull();
+	inline void addOutputRow(vector_size_t row);
+
+protected:
+	TFilter &filter_;
+	SelectiveColumnReader *reader_;
+	const bool allowNulls_;
+	const vector_size_t *rows_;
+	vector_size_t numRows_;
+	vector_size_t rowIndex_;
+	ExtractValues values_;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_timestamp.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_timestamp.hpp
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_timestamp.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+
+namespace duckdb {
+
+struct Int96 {
+	uint32_t value[3];
+};
+
+timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts);
+timestamp_ns_t ImpalaTimestampToTimestampNS(const Int96 &raw_ts);
+Int96 TimestampToImpalaTimestamp(timestamp_t &ts);
+
+timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts);
+timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts);
+timestamp_t ParquetTimestampNsToTimestamp(const int64_t &raw_ts);
+
+timestamp_ns_t ParquetTimestampMsToTimestampNs(const int64_t &raw_ms);
+timestamp_ns_t ParquetTimestampUsToTimestampNs(const int64_t &raw_us);
+timestamp_ns_t ParquetTimestampNsToTimestampNs(const int64_t &raw_ns);
+
+date_t ParquetIntToDate(const int32_t &raw_date);
+dtime_t ParquetMsIntToTime(const int32_t &raw_millis);
+dtime_t ParquetIntToTime(const int64_t &raw_micros);
+dtime_t ParquetNsIntToTime(const int64_t &raw_nanos);
+
+dtime_ns_t ParquetMsIntToTimeNs(const int32_t &raw_millis);
+dtime_ns_t ParquetUsIntToTimeNs(const int64_t &raw_micros);
+dtime_ns_t ParquetIntToTimeNs(const int64_t &raw_nanos);
+
+dtime_tz_t ParquetIntToTimeMsTZ(const int32_t &raw_millis);
+dtime_tz_t ParquetIntToTimeTZ(const int64_t &raw_micros);
+dtime_tz_t ParquetIntToTimeNsTZ(const int64_t &raw_nanos);
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/parquet_writer.hpp
+++ b/external/duckdb/extension/parquet/include/parquet_writer.hpp
@@ -0,0 +1,182 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// parquet_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "duckdb/common/common.hpp"
+#include "duckdb/common/optional_idx.hpp"
+#include "duckdb/common/encryption_state.hpp"
+#include "duckdb/common/exception.hpp"
+#include "duckdb/common/mutex.hpp"
+#include "duckdb/common/atomic.hpp"
+#include "duckdb/common/serializer/buffered_file_writer.hpp"
+#include "duckdb/common/types/column/column_data_collection.hpp"
+#include "duckdb/function/copy_function.hpp"
+
+#include "parquet_statistics.hpp"
+#include "column_writer.hpp"
+#include "parquet_field_id.hpp"
+#include "parquet_shredding.hpp"
+#include "parquet_types.h"
+#include "geo_parquet.hpp"
+#include "writer/parquet_write_stats.hpp"
+#include "thrift/protocol/TCompactProtocol.h"
+
+namespace duckdb {
+class FileSystem;
+class FileOpener;
+class ParquetEncryptionConfig;
+class ParquetStatsAccumulator;
+
+class Serializer;
+class Deserializer;
+
+class ColumnWriterStatistics;
+struct CopyFunctionFileStatistics;
+
+struct PreparedRowGroup {
+	duckdb_parquet::RowGroup row_group;
+	vector<unique_ptr<ColumnWriterState>> states;
+};
+
+struct ParquetBloomFilterEntry {
+	unique_ptr<ParquetBloomFilter> bloom_filter;
+	idx_t row_group_idx;
+	idx_t column_idx;
+};
+
+enum class ParquetVersion : uint8_t {
+	V1 = 1, //! Excludes DELTA_BINARY_PACKED, DELTA_LENGTH_BYTE_ARRAY, BYTE_STREAM_SPLIT
+	V2 = 2, //! Includes the encodings above
+};
+
+class ParquetWriter {
+public:
+	ParquetWriter(ClientContext &context, FileSystem &fs, string file_name, vector<LogicalType> types,
+	              vector<string> names, duckdb_parquet::CompressionCodec::type codec, ChildFieldIDs field_ids,
+	              ShreddingType shredding_types, const vector<pair<string, string>> &kv_metadata,
+	              shared_ptr<ParquetEncryptionConfig> encryption_config, optional_idx dictionary_size_limit,
+	              idx_t string_dictionary_page_size_limit, bool enable_bloom_filters,
+	              double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl,
+	              ParquetVersion parquet_version, GeoParquetVersion geoparquet_version);
+	~ParquetWriter();
+
+public:
+	void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);
+	void FlushRowGroup(PreparedRowGroup &row_group);
+	void Flush(ColumnDataCollection &buffer);
+	void Finalize();
+
+	static duckdb_parquet::Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type);
+	static void SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::SchemaElement &schema_ele,
+	                                bool allow_geometry);
+
+	ClientContext &GetContext() {
+		return context;
+	}
+	duckdb_apache::thrift::protocol::TProtocol *GetProtocol() {
+		return protocol.get();
+	}
+	duckdb_parquet::CompressionCodec::type GetCodec() {
+		return codec;
+	}
+	duckdb_parquet::Type::type GetType(idx_t schema_idx) {
+		return file_meta_data.schema[schema_idx].type;
+	}
+	LogicalType GetSQLType(idx_t schema_idx) const {
+		return sql_types[schema_idx];
+	}
+	BufferedFileWriter &GetWriter() {
+		return *writer;
+	}
+	idx_t FileSize() {
+		return total_written;
+	}
+	optional_idx DictionarySizeLimit() const {
+		return dictionary_size_limit;
+	}
+	idx_t StringDictionaryPageSizeLimit() const {
+		return string_dictionary_page_size_limit;
+	}
+	double EnableBloomFilters() const {
+		return enable_bloom_filters;
+	}
+	double BloomFilterFalsePositiveRatio() const {
+		return bloom_filter_false_positive_ratio;
+	}
+	int64_t CompressionLevel() const {
+		return compression_level;
+	}
+	idx_t NumberOfRowGroups() {
+		return num_row_groups;
+	}
+	ParquetVersion GetParquetVersion() const {
+		return parquet_version;
+	}
+	GeoParquetVersion GetGeoParquetVersion() const {
+		return geoparquet_version;
+	}
+	const string &GetFileName() const {
+		return file_name;
+	}
+
+	uint32_t Write(const duckdb_apache::thrift::TBase &object);
+	uint32_t WriteData(const const_data_ptr_t buffer, const uint32_t buffer_size);
+
+	GeoParquetFileMetadata &GetGeoParquetData();
+
+	static bool TryGetParquetType(const LogicalType &duckdb_type,
+	                              optional_ptr<duckdb_parquet::Type::type> type = nullptr);
+
+	void BufferBloomFilter(idx_t col_idx, unique_ptr<ParquetBloomFilter> bloom_filter);
+	void SetWrittenStatistics(CopyFunctionFileStatistics &written_stats);
+	void FlushColumnStats(idx_t col_idx, duckdb_parquet::ColumnChunk &chunk,
+	                      optional_ptr<ColumnWriterStatistics> writer_stats);
+
+private:
+	void GatherWrittenStatistics();
+
+private:
+	ClientContext &context;
+	string file_name;
+	vector<LogicalType> sql_types;
+	vector<string> column_names;
+	duckdb_parquet::CompressionCodec::type codec;
+	ChildFieldIDs field_ids;
+	ShreddingType shredding_types;
+	shared_ptr<ParquetEncryptionConfig> encryption_config;
+	optional_idx dictionary_size_limit;
+	idx_t string_dictionary_page_size_limit;
+	bool enable_bloom_filters;
+	double bloom_filter_false_positive_ratio;
+	int64_t compression_level;
+	bool debug_use_openssl;
+	shared_ptr<EncryptionUtil> encryption_util;
+	ParquetVersion parquet_version;
+	GeoParquetVersion geoparquet_version;
+	vector<ParquetColumnSchema> column_schemas;
+
+	unique_ptr<BufferedFileWriter> writer;
+	//! Atomics to reduce contention when rotating writes to multiple Parquet files
+	atomic<idx_t> total_written;
+	atomic<idx_t> num_row_groups;
+	std::shared_ptr<duckdb_apache::thrift::protocol::TProtocol> protocol;
+	duckdb_parquet::FileMetaData file_meta_data;
+	std::mutex lock;
+
+	vector<unique_ptr<ColumnWriter>> column_writers;
+
+	unique_ptr<GeoParquetFileMetadata> geoparquet_data;
+	vector<ParquetBloomFilterEntry> bloom_filters;
+
+	optional_ptr<CopyFunctionFileStatistics> written_stats;
+	unique_ptr<ParquetStatsAccumulator> stats_accumulator;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/boolean_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/boolean_column_reader.hpp
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/boolean_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+
+namespace duckdb {
+
+struct BooleanParquetValueConversion;
+
+class BooleanColumnReader : public TemplatedColumnReader<bool, BooleanParquetValueConversion> {
+public:
+	static constexpr const PhysicalType TYPE = PhysicalType::BOOL;
+
+public:
+	BooleanColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema)
+	    : TemplatedColumnReader<bool, BooleanParquetValueConversion>(reader, schema), byte_pos(0) {
+	}
+
+	uint8_t byte_pos;
+
+	void InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns, TProtocol &protocol_p) override {
+		byte_pos = 0;
+		TemplatedColumnReader<bool, BooleanParquetValueConversion>::InitializeRead(row_group_idx_p, columns,
+		                                                                           protocol_p);
+	}
+
+	void ResetPage() override {
+		byte_pos = 0;
+	}
+};
+
+struct BooleanParquetValueConversion {
+	template <bool CHECKED>
+	static bool PlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
+		auto &byte_pos = reader.Cast<BooleanColumnReader>().byte_pos;
+		bool ret = (*plain_data.ptr >> byte_pos) & 1;
+		if (++byte_pos == 8) {
+			byte_pos = 0;
+			if (CHECKED) {
+				plain_data.inc(1);
+			} else {
+				plain_data.unsafe_inc(1);
+			}
+		}
+		return ret;
+	}
+
+	template <bool CHECKED>
+	static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
+		PlainRead<CHECKED>(plain_data, reader);
+	}
+
+	static bool PlainAvailable(const ByteBuffer &plain_data, const idx_t count) {
+		return plain_data.check_available((count + 7) / 8);
+	}
+
+	static idx_t PlainConstantSize() {
+		return 0;
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/callback_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/callback_column_reader.hpp
@@ -0,0 +1,46 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/callback_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+#include "parquet_reader.hpp"
+
+namespace duckdb {
+
+template <class PARQUET_PHYSICAL_TYPE, class DUCKDB_PHYSICAL_TYPE,
+          DUCKDB_PHYSICAL_TYPE (*FUNC)(const PARQUET_PHYSICAL_TYPE &input)>
+class CallbackColumnReader
+    : public TemplatedColumnReader<DUCKDB_PHYSICAL_TYPE,
+                                   CallbackParquetValueConversion<PARQUET_PHYSICAL_TYPE, DUCKDB_PHYSICAL_TYPE, FUNC>> {
+	using BaseType =
+	    TemplatedColumnReader<DUCKDB_PHYSICAL_TYPE,
+	                          CallbackParquetValueConversion<PARQUET_PHYSICAL_TYPE, DUCKDB_PHYSICAL_TYPE, FUNC>>;
+
+public:
+	static constexpr const PhysicalType TYPE = PhysicalType::INVALID;
+
+public:
+	CallbackColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema)
+	    : TemplatedColumnReader<DUCKDB_PHYSICAL_TYPE,
+	                            CallbackParquetValueConversion<PARQUET_PHYSICAL_TYPE, DUCKDB_PHYSICAL_TYPE, FUNC>>(
+	          reader, schema) {
+	}
+
+protected:
+	void Dictionary(shared_ptr<ResizeableBuffer> dictionary_data, idx_t num_entries) {
+		BaseType::AllocateDict(num_entries * sizeof(DUCKDB_PHYSICAL_TYPE));
+		auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)this->dict->ptr;
+		for (idx_t i = 0; i < num_entries; i++) {
+			dict_ptr[i] = FUNC(dictionary_data->read<PARQUET_PHYSICAL_TYPE>());
+		}
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/decimal_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/decimal_column_reader.hpp
@@ -0,0 +1,65 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/decimal_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+#include "parquet_reader.hpp"
+#include "parquet_decimal_utils.hpp"
+
+namespace duckdb {
+
+template <class DUCKDB_PHYSICAL_TYPE, bool FIXED_LENGTH>
+struct DecimalParquetValueConversion {
+	template <bool CHECKED>
+	static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
+		idx_t byte_len;
+		if (FIXED_LENGTH) {
+			byte_len = reader.Schema().type_length;
+		} else {
+			byte_len = plain_data.read<uint32_t>();
+		}
+		plain_data.available(byte_len);
+		auto res = ParquetDecimalUtils::ReadDecimalValue<DUCKDB_PHYSICAL_TYPE>(const_data_ptr_cast(plain_data.ptr),
+		                                                                       byte_len, reader.Schema());
+
+		plain_data.inc(byte_len);
+		return res;
+	}
+
+	template <bool CHECKED>
+	static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
+		uint32_t decimal_len = FIXED_LENGTH ? reader.Schema().type_length : plain_data.read<uint32_t>();
+		plain_data.inc(decimal_len);
+	}
+
+	static bool PlainAvailable(const ByteBuffer &plain_data, const idx_t count) {
+		return true;
+	}
+
+	static idx_t PlainConstantSize() {
+		return 0;
+	}
+};
+
+template <class DUCKDB_PHYSICAL_TYPE, bool FIXED_LENGTH>
+class DecimalColumnReader
+    : public TemplatedColumnReader<DUCKDB_PHYSICAL_TYPE,
+                                   DecimalParquetValueConversion<DUCKDB_PHYSICAL_TYPE, FIXED_LENGTH>> {
+	using BaseType =
+	    TemplatedColumnReader<DUCKDB_PHYSICAL_TYPE, DecimalParquetValueConversion<DUCKDB_PHYSICAL_TYPE, FIXED_LENGTH>>;
+
+public:
+	DecimalColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema)
+	    : TemplatedColumnReader<DUCKDB_PHYSICAL_TYPE,
+	                            DecimalParquetValueConversion<DUCKDB_PHYSICAL_TYPE, FIXED_LENGTH>>(reader, schema) {
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/expression_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/expression_column_reader.hpp
@@ -0,0 +1,56 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/expression_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "duckdb/execution/expression_executor.hpp"
+
+namespace duckdb {
+
+//! A column reader that executes an expression over a child reader
+class ExpressionColumnReader : public ColumnReader {
+public:
+	static constexpr const PhysicalType TYPE = PhysicalType::INVALID;
+
+public:
+	ExpressionColumnReader(ClientContext &context, unique_ptr<ColumnReader> child_reader, unique_ptr<Expression> expr,
+	                       const ParquetColumnSchema &schema);
+	ExpressionColumnReader(ClientContext &context, unique_ptr<ColumnReader> child_reader, unique_ptr<Expression> expr,
+	                       unique_ptr<ParquetColumnSchema> owned_schema);
+
+	unique_ptr<ColumnReader> child_reader;
+	DataChunk intermediate_chunk;
+	unique_ptr<Expression> expr;
+	ExpressionExecutor executor;
+
+	// If this reader was created on top of a child reader, after-the-fact, the schema needs to live somewhere
+	unique_ptr<ParquetColumnSchema> owned_schema;
+
+public:
+	void InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns, TProtocol &protocol_p) override;
+
+	idx_t Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result) override;
+
+	void Skip(idx_t num_values) override;
+	idx_t GroupRowsAvailable() override;
+
+	uint64_t TotalCompressedSize() override {
+		return child_reader->TotalCompressedSize();
+	}
+
+	idx_t FileOffset() const override {
+		return child_reader->FileOffset();
+	}
+
+	void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override {
+		child_reader->RegisterPrefetch(transport, allow_merge);
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/interval_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/interval_column_reader.hpp
@@ -0,0 +1,67 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/interval_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+#include "parquet_reader.hpp"
+
+namespace duckdb {
+
+//===--------------------------------------------------------------------===//
+// Interval Column Reader
+//===--------------------------------------------------------------------===//
+struct IntervalValueConversion {
+	static constexpr const idx_t PARQUET_INTERVAL_SIZE = 12;
+
+	static interval_t ReadParquetInterval(const_data_ptr_t input) {
+		interval_t result;
+		result.months = Load<int32_t>(input);
+		result.days = Load<int32_t>(input + sizeof(uint32_t));
+		result.micros = int64_t(Load<uint32_t>(input + sizeof(uint32_t) * 2)) * 1000;
+		return result;
+	}
+
+	template <bool CHECKED>
+	static interval_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
+		if (CHECKED) {
+			plain_data.available(PARQUET_INTERVAL_SIZE);
+		}
+		auto res = ReadParquetInterval(const_data_ptr_cast(plain_data.ptr));
+		plain_data.unsafe_inc(PARQUET_INTERVAL_SIZE);
+		return res;
+	}
+
+	template <bool CHECKED>
+	static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
+		if (CHECKED) {
+			plain_data.inc(PARQUET_INTERVAL_SIZE);
+		} else {
+			plain_data.unsafe_inc(PARQUET_INTERVAL_SIZE);
+		}
+	}
+
+	static bool PlainAvailable(const ByteBuffer &plain_data, const idx_t count) {
+		return plain_data.check_available(count * PARQUET_INTERVAL_SIZE);
+	}
+
+	static idx_t PlainConstantSize() {
+		return 0;
+	}
+};
+
+class IntervalColumnReader : public TemplatedColumnReader<interval_t, IntervalValueConversion> {
+
+public:
+	IntervalColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema)
+	    : TemplatedColumnReader<interval_t, IntervalValueConversion>(reader, schema) {
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/list_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/list_column_reader.hpp
@@ -0,0 +1,62 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/list_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+
+namespace duckdb {
+
+class ListColumnReader : public ColumnReader {
+public:
+	static constexpr const PhysicalType TYPE = PhysicalType::LIST;
+
+public:
+	ListColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema,
+	                 unique_ptr<ColumnReader> child_column_reader_p);
+
+	idx_t Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result_out) override;
+
+	void ApplyPendingSkips(data_ptr_t define_out, data_ptr_t repeat_out) override;
+
+	void InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns, TProtocol &protocol_p) override {
+		child_column_reader->InitializeRead(row_group_idx_p, columns, protocol_p);
+	}
+
+	idx_t GroupRowsAvailable() override {
+		return child_column_reader->GroupRowsAvailable() + overflow_child_count;
+	}
+
+	uint64_t TotalCompressedSize() override {
+		return child_column_reader->TotalCompressedSize();
+	}
+
+	void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override {
+		child_column_reader->RegisterPrefetch(transport, allow_merge);
+	}
+
+protected:
+	template <class OP>
+	idx_t ReadInternal(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out,
+	                   optional_ptr<Vector> result_out);
+
+private:
+	unique_ptr<ColumnReader> child_column_reader;
+	ResizeableBuffer child_defines;
+	ResizeableBuffer child_repeats;
+	uint8_t *child_defines_ptr;
+	uint8_t *child_repeats_ptr;
+
+	VectorCache read_cache;
+	Vector read_vector;
+
+	idx_t overflow_child_count;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/null_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/null_column_reader.hpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/null_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "duckdb/common/helper.hpp"
+
+namespace duckdb {
+
+class NullColumnReader : public ColumnReader {
+public:
+	static constexpr const PhysicalType TYPE = PhysicalType::INVALID;
+
+public:
+	NullColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema) : ColumnReader(reader, schema) {};
+
+	shared_ptr<ResizeableBuffer> dict;
+
+public:
+	void Plain(ByteBuffer &plain_data, uint8_t *defines, uint64_t num_values, idx_t result_offset,
+	           Vector &result) override {
+		(void)defines;
+		(void)plain_data;
+
+		auto &result_mask = FlatVector::Validity(result);
+		for (idx_t row_idx = 0; row_idx < num_values; row_idx++) {
+			result_mask.SetInvalid(row_idx + result_offset);
+		}
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/row_number_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/row_number_column_reader.hpp
@@ -0,0 +1,52 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/row_number_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/common/limits.hpp"
+#include "column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+
+namespace duckdb {
+
+//! Reads a file-absolute row number as a virtual column that's not actually stored in the file
+class RowNumberColumnReader : public ColumnReader {
+public:
+	static constexpr const PhysicalType TYPE = PhysicalType::INT64;
+
+public:
+	RowNumberColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema);
+
+public:
+	idx_t Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result) override;
+	void Filter(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result_out,
+	            const TableFilter &filter, TableFilterState &filter_state, SelectionVector &sel,
+	            idx_t &approved_tuple_count, bool is_first_filter) override;
+
+	void InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns, TProtocol &protocol_p) override;
+
+	void Skip(idx_t num_values) override {
+		row_group_offset += num_values;
+	}
+	idx_t GroupRowsAvailable() override {
+		return NumericLimits<idx_t>::Maximum();
+	};
+	uint64_t TotalCompressedSize() override {
+		return 0;
+	}
+	idx_t FileOffset() const override {
+		return 0;
+	}
+	void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override {
+	}
+
+private:
+	idx_t row_group_offset;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/string_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/string_column_reader.hpp
@@ -0,0 +1,91 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/string_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+
+namespace duckdb {
+
+class StringColumnReader : public ColumnReader {
+	enum class StringColumnType : uint8_t { VARCHAR, JSON, OTHER };
+
+	static StringColumnType GetStringColumnType(const LogicalType &type) {
+		if (type.IsJSONType()) {
+			return StringColumnType::JSON;
+		}
+		if (type.id() == LogicalTypeId::VARCHAR) {
+			return StringColumnType::VARCHAR;
+		}
+		return StringColumnType::OTHER;
+	}
+
+public:
+	static constexpr const PhysicalType TYPE = PhysicalType::VARCHAR;
+
+public:
+	StringColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema);
+	idx_t fixed_width_string_length;
+	const StringColumnType string_column_type;
+
+public:
+	static void VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar);
+	void VerifyString(const char *str_data, uint32_t str_len);
+
+	static void ReferenceBlock(Vector &result, shared_ptr<ResizeableBuffer> &block);
+
+protected:
+	void Plain(ByteBuffer &plain_data, uint8_t *defines, idx_t num_values, idx_t result_offset,
+	           Vector &result) override {
+		throw NotImplementedException("StringColumnReader can only read plain data from a shared buffer");
+	}
+	void Plain(shared_ptr<ResizeableBuffer> &plain_data, uint8_t *defines, idx_t num_values, idx_t result_offset,
+	           Vector &result) override;
+	void PlainSkip(ByteBuffer &plain_data, uint8_t *defines, idx_t num_values) override;
+	void PlainSelect(shared_ptr<ResizeableBuffer> &plain_data, uint8_t *defines, idx_t num_values, Vector &result,
+	                 const SelectionVector &sel, idx_t count) override;
+
+	bool SupportsDirectFilter() const override {
+		return true;
+	}
+	bool SupportsDirectSelect() const override {
+		return true;
+	}
+};
+
+struct StringParquetValueConversion {
+	template <bool CHECKED>
+	static string_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
+		auto &scr = reader.Cast<StringColumnReader>();
+		uint32_t str_len =
+		    scr.fixed_width_string_length == 0 ? plain_data.read<uint32_t>() : scr.fixed_width_string_length;
+		plain_data.available(str_len);
+		auto plain_str = char_ptr_cast(plain_data.ptr);
+		scr.VerifyString(plain_str, str_len);
+		auto ret_str = string_t(plain_str, str_len);
+		plain_data.inc(str_len);
+		return ret_str;
+	}
+	template <bool CHECKED>
+	static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
+		auto &scr = reader.Cast<StringColumnReader>();
+		uint32_t str_len =
+		    scr.fixed_width_string_length == 0 ? plain_data.read<uint32_t>() : scr.fixed_width_string_length;
+		plain_data.inc(str_len);
+	}
+	static bool PlainAvailable(const ByteBuffer &plain_data, const idx_t count) {
+		return false;
+	}
+
+	static idx_t PlainConstantSize() {
+		return 0;
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/struct_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/struct_column_reader.hpp
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/struct_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+
+namespace duckdb {
+
+class StructColumnReader : public ColumnReader {
+public:
+	static constexpr const PhysicalType TYPE = PhysicalType::STRUCT;
+
+public:
+	StructColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema,
+	                   vector<unique_ptr<ColumnReader>> child_readers_p);
+
+	vector<unique_ptr<ColumnReader>> child_readers;
+
+public:
+	ColumnReader &GetChildReader(idx_t child_idx);
+
+	void InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns, TProtocol &protocol_p) override;
+
+	idx_t Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result) override;
+
+	void Skip(idx_t num_values) override;
+	idx_t GroupRowsAvailable() override;
+	uint64_t TotalCompressedSize() override;
+	void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/templated_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/templated_column_reader.hpp
@@ -0,0 +1,110 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/templated_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "duckdb/common/helper.hpp"
+
+namespace duckdb {
+
+template <class VALUE_TYPE>
+struct TemplatedParquetValueConversion {
+	template <bool CHECKED>
+	static VALUE_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
+		if (CHECKED) {
+			return plain_data.read<VALUE_TYPE>();
+		} else {
+			return plain_data.unsafe_read<VALUE_TYPE>();
+		}
+	}
+
+	template <bool CHECKED>
+	static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
+		if (CHECKED) {
+			plain_data.inc(sizeof(VALUE_TYPE));
+		} else {
+			plain_data.unsafe_inc(sizeof(VALUE_TYPE));
+		}
+	}
+
+	static bool PlainAvailable(const ByteBuffer &plain_data, const idx_t count) {
+		return plain_data.check_available(count * sizeof(VALUE_TYPE));
+	}
+
+	static idx_t PlainConstantSize() {
+		return sizeof(VALUE_TYPE);
+	}
+};
+
+template <class VALUE_TYPE, class VALUE_CONVERSION>
+class TemplatedColumnReader : public ColumnReader {
+public:
+	static constexpr const PhysicalType TYPE = PhysicalType::INVALID;
+
+public:
+	TemplatedColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema) : ColumnReader(reader, schema) {
+	}
+
+	shared_ptr<ResizeableBuffer> dict;
+
+public:
+	void AllocateDict(idx_t size) {
+		if (!dict) {
+			dict = make_shared_ptr<ResizeableBuffer>(GetAllocator(), size);
+		} else {
+			dict->resize(GetAllocator(), size);
+		}
+	}
+
+	void Plain(ByteBuffer &plain_data, uint8_t *defines, uint64_t num_values, idx_t result_offset,
+	           Vector &result) override {
+		PlainTemplated<VALUE_TYPE, VALUE_CONVERSION>(plain_data, defines, num_values, result_offset, result);
+	}
+
+	void PlainSkip(ByteBuffer &plain_data, uint8_t *defines, idx_t num_values) override {
+		PlainSkipTemplated<VALUE_CONVERSION>(plain_data, defines, num_values);
+	}
+
+	bool SupportsDirectFilter() const override {
+		return true;
+	}
+};
+
+template <class PARQUET_PHYSICAL_TYPE, class DUCKDB_PHYSICAL_TYPE,
+          DUCKDB_PHYSICAL_TYPE (*FUNC)(const PARQUET_PHYSICAL_TYPE &input)>
+struct CallbackParquetValueConversion {
+
+	template <bool CHECKED>
+	static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
+		if (CHECKED) {
+			return FUNC(plain_data.read<PARQUET_PHYSICAL_TYPE>());
+		} else {
+			return FUNC(plain_data.unsafe_read<PARQUET_PHYSICAL_TYPE>());
+		}
+	}
+
+	template <bool CHECKED>
+	static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
+		if (CHECKED) {
+			plain_data.inc(sizeof(PARQUET_PHYSICAL_TYPE));
+		} else {
+			plain_data.unsafe_inc(sizeof(PARQUET_PHYSICAL_TYPE));
+		}
+	}
+
+	static bool PlainAvailable(const ByteBuffer &plain_data, const idx_t count) {
+		return plain_data.check_available(count * sizeof(PARQUET_PHYSICAL_TYPE));
+	}
+
+	static idx_t PlainConstantSize() {
+		return 0;
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/uuid_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/uuid_column_reader.hpp
@@ -0,0 +1,60 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/uuid_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "templated_column_reader.hpp"
+#include "parquet_reader.hpp"
+#include "duckdb/common/types/uuid.hpp"
+
+namespace duckdb {
+
+struct UUIDValueConversion {
+	static hugeint_t ReadParquetUUID(const_data_ptr_t input) {
+		// Use the utility function from BaseUUID
+		return BaseUUID::FromBlob(input);
+	}
+
+	template <bool CHECKED>
+	static hugeint_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader) {
+		if (CHECKED) {
+			plain_data.available(sizeof(hugeint_t));
+		}
+		auto res = ReadParquetUUID(const_data_ptr_cast(plain_data.ptr));
+		plain_data.unsafe_inc(sizeof(hugeint_t));
+		return res;
+	}
+
+	template <bool CHECKED>
+	static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) {
+		if (CHECKED) {
+			plain_data.inc(sizeof(hugeint_t));
+		} else {
+			plain_data.unsafe_inc(sizeof(hugeint_t));
+		}
+	}
+
+	static bool PlainAvailable(const ByteBuffer &plain_data, const idx_t count) {
+		return plain_data.check_available(count * sizeof(hugeint_t));
+	}
+
+	static idx_t PlainConstantSize() {
+		return 0;
+	}
+};
+
+class UUIDColumnReader : public TemplatedColumnReader<hugeint_t, UUIDValueConversion> {
+
+public:
+	UUIDColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema)
+	    : TemplatedColumnReader<hugeint_t, UUIDValueConversion>(reader, schema) {
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/variant/variant_binary_decoder.hpp
+++ b/external/duckdb/extension/parquet/include/reader/variant/variant_binary_decoder.hpp
@@ -0,0 +1,148 @@
+#pragma once
+
+#include "duckdb/common/types/string_type.hpp"
+#include "duckdb/common/types/value.hpp"
+#include "reader/variant/variant_value.hpp"
+
+using namespace duckdb_yyjson;
+
+namespace duckdb {
+
+//! ------------ Metadata ------------
+
+struct VariantMetadataHeader {
+public:
+	static VariantMetadataHeader FromHeaderByte(uint8_t byte);
+
+public:
+	//! The version of the protocol used (only '1' supported for now)
+	uint8_t version;
+	//! Number of bytes per dictionary size and offset field
+	uint8_t offset_size;
+	//! Whether dictionary strings are sorted and unique
+	bool sorted_strings = false;
+};
+
+struct VariantMetadata {
+public:
+	explicit VariantMetadata(const string_t &metadata);
+
+public:
+	const string_t &metadata;
+
+public:
+	VariantMetadataHeader header;
+	const_data_ptr_t offsets;
+	const_data_ptr_t bytes;
+
+	//! The json object keys have to be null-terminated
+	//! But we don't receive them null-terminated
+	vector<string> strings;
+};
+
+//! ------------ Value ------------
+
+enum class VariantBasicType : uint8_t { PRIMITIVE = 0, SHORT_STRING = 1, OBJECT = 2, ARRAY = 3, INVALID };
+
+enum class VariantPrimitiveType : uint8_t {
+	NULL_TYPE = 0,
+	BOOLEAN_TRUE = 1,
+	BOOLEAN_FALSE = 2,
+	INT8 = 3,
+	INT16 = 4,
+	INT32 = 5,
+	INT64 = 6,
+	DOUBLE = 7,
+	DECIMAL4 = 8,
+	DECIMAL8 = 9,
+	DECIMAL16 = 10,
+	DATE = 11,
+	TIMESTAMP_MICROS = 12,
+	TIMESTAMP_NTZ_MICROS = 13,
+	FLOAT = 14,
+	BINARY = 15,
+	STRING = 16,
+	TIME_NTZ_MICROS = 17,
+	TIMESTAMP_NANOS = 18,
+	TIMESTAMP_NTZ_NANOS = 19,
+	UUID = 20,
+	INVALID
+};
+
+struct VariantValueMetadata {
+public:
+	VariantValueMetadata() {
+	}
+
+public:
+	static VariantValueMetadata FromHeaderByte(uint8_t byte);
+	static VariantBasicType VariantBasicTypeFromByte(uint8_t byte) {
+		if (byte >= static_cast<uint8_t>(VariantBasicType::INVALID)) {
+			throw NotImplementedException("Variant BasicType (%d) is not supported", byte);
+		}
+		return static_cast<VariantBasicType>(byte);
+	}
+
+	static VariantPrimitiveType VariantPrimitiveTypeFromByte(uint8_t byte) {
+		if (byte >= static_cast<uint8_t>(VariantPrimitiveType::INVALID)) {
+			throw NotImplementedException("Variant PrimitiveType (%d) is not supported", byte);
+		}
+		return static_cast<VariantPrimitiveType>(byte);
+	}
+
+public:
+	VariantBasicType basic_type;
+
+public:
+	//! Primitive Type header
+	VariantPrimitiveType primitive_type;
+
+public:
+	//! Short String header
+	uint8_t string_size;
+
+public:
+	//! Object header | Array header
+
+	//! Size in bytes for each 'field_offset' entry
+	uint32_t field_offset_size;
+	//! Size in bytes for each 'field_id' entry
+	uint32_t field_id_size;
+	//! Whether the number of elements is encoded in 1 byte (false) or 4 bytes (true)
+	bool is_large;
+};
+
+struct VariantDecodeResult {
+public:
+	VariantDecodeResult() = default;
+	~VariantDecodeResult() {
+		if (doc) {
+			yyjson_mut_doc_free(doc);
+		}
+		if (data) {
+			free(data);
+		}
+	}
+
+public:
+	yyjson_mut_doc *doc = nullptr;
+	char *data = nullptr;
+};
+
+class VariantBinaryDecoder {
+public:
+	VariantBinaryDecoder() = delete;
+
+public:
+	static VariantValue Decode(const VariantMetadata &metadata, const_data_ptr_t data);
+
+public:
+	static VariantValue PrimitiveTypeDecode(const VariantValueMetadata &value_metadata, const_data_ptr_t data);
+	static VariantValue ShortStringDecode(const VariantValueMetadata &value_metadata, const_data_ptr_t data);
+	static VariantValue ObjectDecode(const VariantMetadata &metadata, const VariantValueMetadata &value_metadata,
+	                                 const_data_ptr_t data);
+	static VariantValue ArrayDecode(const VariantMetadata &metadata, const VariantValueMetadata &value_metadata,
+	                                const_data_ptr_t data);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/variant/variant_shredded_conversion.hpp
+++ b/external/duckdb/extension/parquet/include/reader/variant/variant_shredded_conversion.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "reader/variant/variant_value.hpp"
+#include "reader/variant/variant_binary_decoder.hpp"
+
+namespace duckdb {
+
+class VariantShreddedConversion {
+public:
+	VariantShreddedConversion() = delete;
+
+public:
+	static vector<VariantValue> Convert(Vector &metadata, Vector &group, idx_t offset, idx_t length, idx_t total_size,
+	                                    bool is_field);
+	static vector<VariantValue> ConvertShreddedLeaf(Vector &metadata, Vector &value, Vector &typed_value, idx_t offset,
+	                                                idx_t length, idx_t total_size, const bool is_field);
+	static vector<VariantValue> ConvertShreddedArray(Vector &metadata, Vector &value, Vector &typed_value, idx_t offset,
+	                                                 idx_t length, idx_t total_size, const bool is_field);
+	static vector<VariantValue> ConvertShreddedObject(Vector &metadata, Vector &value, Vector &typed_value,
+	                                                  idx_t offset, idx_t length, idx_t total_size,
+	                                                  const bool is_field);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/variant/variant_value.hpp
+++ b/external/duckdb/extension/parquet/include/reader/variant/variant_value.hpp
@@ -0,0 +1,54 @@
+#pragma once
+
+#include "duckdb/common/map.hpp"
+#include "duckdb/common/vector.hpp"
+#include "duckdb/common/types/value.hpp"
+
+#include "yyjson.hpp"
+
+using namespace duckdb_yyjson;
+
+namespace duckdb {
+
+enum class VariantValueType : uint8_t { PRIMITIVE, OBJECT, ARRAY, MISSING };
+
+struct VariantValue {
+public:
+	VariantValue() : value_type(VariantValueType::MISSING) {
+	}
+	explicit VariantValue(VariantValueType type) : value_type(type) {
+	}
+	explicit VariantValue(Value &&val) : value_type(VariantValueType::PRIMITIVE), primitive_value(std::move(val)) {
+	}
+	// Delete copy constructor and copy assignment operator
+	VariantValue(const VariantValue &) = delete;
+	VariantValue &operator=(const VariantValue &) = delete;
+
+	// Default move constructor and move assignment operator
+	VariantValue(VariantValue &&) noexcept = default;
+	VariantValue &operator=(VariantValue &&) noexcept = default;
+
+public:
+	bool IsNull() const {
+		return value_type == VariantValueType::PRIMITIVE && primitive_value.IsNull();
+	}
+	bool IsMissing() const {
+		return value_type == VariantValueType::MISSING;
+	}
+
+public:
+	void AddChild(const string &key, VariantValue &&val);
+	void AddItem(VariantValue &&val);
+
+public:
+	yyjson_mut_val *ToJSON(ClientContext &context, yyjson_mut_doc *doc) const;
+
+public:
+	VariantValueType value_type;
+	//! FIXME: how can we get a deterministic child order for a partially shredded object?
+	map<string, VariantValue> object_children;
+	vector<VariantValue> array_items;
+	Value primitive_value;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/reader/variant_column_reader.hpp
+++ b/external/duckdb/extension/parquet/include/reader/variant_column_reader.hpp
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// reader/variant_column_reader.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_reader.hpp"
+#include "reader/templated_column_reader.hpp"
+
+namespace duckdb {
+
+class VariantColumnReader : public ColumnReader {
+public:
+	static constexpr const PhysicalType TYPE = PhysicalType::VARCHAR;
+
+public:
+	VariantColumnReader(ClientContext &context, ParquetReader &reader, const ParquetColumnSchema &schema,
+	                    vector<unique_ptr<ColumnReader>> child_readers_p);
+
+	ClientContext &context;
+	vector<unique_ptr<ColumnReader>> child_readers;
+
+public:
+	ColumnReader &GetChildReader(idx_t child_idx);
+
+	void InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns, TProtocol &protocol_p) override;
+
+	idx_t Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result) override;
+
+	void Skip(idx_t num_values) override;
+	idx_t GroupRowsAvailable() override;
+	uint64_t TotalCompressedSize() override;
+	void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override;
+
+protected:
+	idx_t metadata_reader_idx;
+	idx_t value_reader_idx;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/resizable_buffer.hpp
+++ b/external/duckdb/extension/parquet/include/resizable_buffer.hpp
@@ -0,0 +1,114 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// resizable_buffer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "duckdb/common/allocator.hpp"
+
+#include <exception>
+
+namespace duckdb {
+
+class ByteBuffer { // on to the 10 thousandth impl
+public:
+	ByteBuffer() {};
+	ByteBuffer(data_ptr_t ptr, uint64_t len) : ptr(ptr), len(len) {};
+
+	data_ptr_t ptr = nullptr;
+	uint64_t len = 0;
+
+public:
+	void inc(const uint64_t increment) {
+		available(increment);
+		unsafe_inc(increment);
+	}
+
+	void unsafe_inc(const uint64_t increment) {
+		len -= increment;
+		ptr += increment;
+	}
+
+	template <class T>
+	T read() {
+		available(sizeof(T));
+		return unsafe_read<T>();
+	}
+
+	template <class T>
+	T unsafe_read() {
+		T val = unsafe_get<T>();
+		unsafe_inc(sizeof(T));
+		return val;
+	}
+
+	template <class T>
+	T get() {
+		available(sizeof(T));
+		return unsafe_get<T>();
+	}
+
+	template <class T>
+	T unsafe_get() {
+		return Load<T>(ptr);
+	}
+
+	void copy_to(char *dest, const uint64_t len) const {
+		available(len);
+		unsafe_copy_to(dest, len);
+	}
+
+	void unsafe_copy_to(char *dest, const uint64_t len) const {
+		std::memcpy(dest, ptr, len);
+	}
+
+	void zero() const {
+		std::memset(ptr, 0, len);
+	}
+
+	void available(const uint64_t req_len) const {
+		if (!check_available(req_len)) {
+			throw std::runtime_error("Out of buffer");
+		}
+	}
+
+	bool check_available(const uint64_t req_len) const {
+		return req_len <= len;
+	}
+};
+
+class ResizeableBuffer : public ByteBuffer {
+public:
+	ResizeableBuffer() {
+	}
+	ResizeableBuffer(Allocator &allocator, const uint64_t new_size) {
+		resize(allocator, new_size);
+	}
+	void resize(Allocator &allocator, const uint64_t new_size) {
+		len = new_size;
+		if (new_size == 0) {
+			return;
+		}
+		if (new_size > alloc_len) {
+			alloc_len = NextPowerOfTwo(new_size);
+			allocated_data.Reset(); // Have to reset before allocating new buffer (otherwise we use ~2x the memory)
+			allocated_data = allocator.Allocate(alloc_len);
+			ptr = allocated_data.get();
+		}
+	}
+	void reset() {
+		ptr = allocated_data.get();
+		len = alloc_len;
+	}
+
+private:
+	AllocatedData allocated_data;
+	idx_t alloc_len = 0;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/thrift_tools.hpp
+++ b/external/duckdb/extension/parquet/include/thrift_tools.hpp
@@ -0,0 +1,228 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// thrift_tools.hpp
+//
+//
+//===----------------------------------------------------------------------===/
+
+#pragma once
+
+#include <list>
+#include "thrift/protocol/TCompactProtocol.h"
+#include "thrift/transport/TBufferTransports.h"
+
+#include "duckdb.hpp"
+#include "duckdb/storage/caching_file_system.hpp"
+#include "duckdb/common/file_system.hpp"
+#include "duckdb/common/allocator.hpp"
+
+namespace duckdb {
+
+// A ReadHead for prefetching data in a specific range
+struct ReadHead {
+	ReadHead(idx_t location, uint64_t size) : location(location), size(size) {};
+	// Hint info
+	idx_t location;
+	uint64_t size;
+
+	// Current info
+	BufferHandle buffer_handle;
+	data_ptr_t buffer_ptr;
+	bool data_isset = false;
+
+	idx_t GetEnd() const {
+		return size + location;
+	}
+};
+
+// Comparator for ReadHeads that are either overlapping, adjacent, or within ALLOW_GAP bytes from each other
+struct ReadHeadComparator {
+	static constexpr uint64_t ALLOW_GAP = 1 << 14; // 16 KiB
+	bool operator()(const ReadHead *a, const ReadHead *b) const {
+		auto a_start = a->location;
+		auto a_end = a->location + a->size;
+		auto b_start = b->location;
+
+		if (a_end <= NumericLimits<idx_t>::Maximum() - ALLOW_GAP) {
+			a_end += ALLOW_GAP;
+		}
+
+		return a_start < b_start && a_end < b_start;
+	}
+};
+
+// Two-step read ahead buffer
+// 1: register all ranges that will be read, merging ranges that are consecutive
+// 2: prefetch all registered ranges
+struct ReadAheadBuffer {
+	explicit ReadAheadBuffer(CachingFileHandle &file_handle_p) : file_handle(file_handle_p) {
+	}
+
+	// The list of read heads
+	std::list<ReadHead> read_heads;
+	// Set for merging consecutive ranges
+	std::set<ReadHead *, ReadHeadComparator> merge_set;
+
+	CachingFileHandle &file_handle;
+
+	idx_t total_size = 0;
+
+	// Add a read head to the prefetching list
+	void AddReadHead(idx_t pos, uint64_t len, bool merge_buffers = true) {
+		// Attempt to merge with existing
+		if (merge_buffers) {
+			ReadHead new_read_head {pos, len};
+			auto lookup_set = merge_set.find(&new_read_head);
+			if (lookup_set != merge_set.end()) {
+				auto existing_head = *lookup_set;
+				auto new_start = MinValue<idx_t>(existing_head->location, new_read_head.location);
+				auto new_length = MaxValue<idx_t>(existing_head->GetEnd(), new_read_head.GetEnd()) - new_start;
+				existing_head->location = new_start;
+				existing_head->size = new_length;
+				return;
+			}
+		}
+
+		read_heads.emplace_front(ReadHead(pos, len));
+		total_size += len;
+		auto &read_head = read_heads.front();
+
+		if (merge_buffers) {
+			merge_set.insert(&read_head);
+		}
+
+		if (read_head.GetEnd() > file_handle.GetFileSize()) {
+			throw std::runtime_error("Prefetch registered for bytes outside file: " + file_handle.GetPath() +
+			                         ", attempted range: [" + std::to_string(pos) + ", " +
+			                         std::to_string(read_head.GetEnd()) +
+			                         "), file size: " + std::to_string(file_handle.GetFileSize()));
+		}
+	}
+
+	// Returns the relevant read head
+	ReadHead *GetReadHead(idx_t pos) {
+		for (auto &read_head : read_heads) {
+			if (pos >= read_head.location && pos < read_head.GetEnd()) {
+				return &read_head;
+			}
+		}
+		return nullptr;
+	}
+
+	// Prefetch all read heads
+	void Prefetch() {
+		for (auto &read_head : read_heads) {
+			if (read_head.GetEnd() > file_handle.GetFileSize()) {
+				throw std::runtime_error("Prefetch registered requested for bytes outside file");
+			}
+			read_head.buffer_handle = file_handle.Read(read_head.buffer_ptr, read_head.size, read_head.location);
+			D_ASSERT(read_head.buffer_handle.IsValid());
+			read_head.data_isset = true;
+		}
+	}
+};
+
+class ThriftFileTransport : public duckdb_apache::thrift::transport::TVirtualTransport<ThriftFileTransport> {
+public:
+	static constexpr uint64_t PREFETCH_FALLBACK_BUFFERSIZE = 1000000;
+
+	ThriftFileTransport(CachingFileHandle &file_handle_p, bool prefetch_mode_p)
+	    : file_handle(file_handle_p), location(0), size(file_handle.GetFileSize()),
+	      ra_buffer(ReadAheadBuffer(file_handle)), prefetch_mode(prefetch_mode_p) {
+	}
+
+	uint32_t read(uint8_t *buf, uint32_t len) {
+		auto prefetch_buffer = ra_buffer.GetReadHead(location);
+		if (prefetch_buffer != nullptr && location - prefetch_buffer->location + len <= prefetch_buffer->size) {
+			D_ASSERT(location - prefetch_buffer->location + len <= prefetch_buffer->size);
+
+			if (!prefetch_buffer->data_isset) {
+				prefetch_buffer->buffer_handle =
+				    file_handle.Read(prefetch_buffer->buffer_ptr, prefetch_buffer->size, prefetch_buffer->location);
+				D_ASSERT(prefetch_buffer->buffer_handle.IsValid());
+				prefetch_buffer->data_isset = true;
+			}
+			D_ASSERT(prefetch_buffer->buffer_handle.IsValid());
+			memcpy(buf, prefetch_buffer->buffer_ptr + location - prefetch_buffer->location, len);
+		} else if (prefetch_mode && len < PREFETCH_FALLBACK_BUFFERSIZE && len > 0) {
+			Prefetch(location, MinValue<uint64_t>(PREFETCH_FALLBACK_BUFFERSIZE, file_handle.GetFileSize() - location));
+			auto prefetch_buffer_fallback = ra_buffer.GetReadHead(location);
+			D_ASSERT(location - prefetch_buffer_fallback->location + len <= prefetch_buffer_fallback->size);
+			memcpy(buf, prefetch_buffer_fallback->buffer_ptr + location - prefetch_buffer_fallback->location, len);
+		} else {
+			// No prefetch, do a regular (non-caching) read
+			file_handle.GetFileHandle().Read(context, buf, len, location);
+		}
+
+		location += len;
+		return len;
+	}
+
+	// Prefetch a single buffer
+	void Prefetch(idx_t pos, uint64_t len) {
+		RegisterPrefetch(pos, len, false);
+		FinalizeRegistration();
+		PrefetchRegistered();
+	}
+
+	// Register a buffer for prefixing
+	void RegisterPrefetch(idx_t pos, uint64_t len, bool can_merge = true) {
+		ra_buffer.AddReadHead(pos, len, can_merge);
+	}
+
+	// Prevents any further merges, should be called before PrefetchRegistered
+	void FinalizeRegistration() {
+		ra_buffer.merge_set.clear();
+	}
+
+	// Prefetch all previously registered ranges
+	void PrefetchRegistered() {
+		ra_buffer.Prefetch();
+	}
+
+	void ClearPrefetch() {
+		ra_buffer.read_heads.clear();
+		ra_buffer.merge_set.clear();
+	}
+
+	void Skip(idx_t skip_count) {
+		location += skip_count;
+	}
+
+	bool HasPrefetch() const {
+		return !ra_buffer.read_heads.empty() || !ra_buffer.merge_set.empty();
+	}
+
+	void SetLocation(idx_t location_p) {
+		location = location_p;
+	}
+
+	idx_t GetLocation() const {
+		return location;
+	}
+
+	optional_ptr<ReadHead> GetReadHead(idx_t pos) {
+		return ra_buffer.GetReadHead(pos);
+	}
+
+	idx_t GetSize() const {
+		return size;
+	}
+
+private:
+	QueryContext context;
+
+	CachingFileHandle &file_handle;
+	idx_t location;
+	idx_t size;
+
+	// Multi-buffer prefetch
+	ReadAheadBuffer ra_buffer;
+
+	// Whether the prefetch mode is enabled. In this mode the DirectIO flag of the handle will be set and the parquet
+	// reader will manage the read buffering.
+	bool prefetch_mode;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/array_column_writer.hpp
+++ b/external/duckdb/extension/parquet/include/writer/array_column_writer.hpp
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/array_column_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "writer/list_column_writer.hpp"
+
+namespace duckdb {
+
+class ArrayColumnWriter : public ListColumnWriter {
+public:
+	ArrayColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema, vector<string> schema_path_p,
+	                  unique_ptr<ColumnWriter> child_writer_p, bool can_have_nulls)
+	    : ListColumnWriter(writer, column_schema, std::move(schema_path_p), std::move(child_writer_p), can_have_nulls) {
+	}
+	~ArrayColumnWriter() override = default;
+
+public:
+	void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
+	void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count,
+	             bool vector_can_span_multiple_pages) override;
+	void Write(ColumnWriterState &state, Vector &vector, idx_t count) override;
+
+protected:
+	void WriteArrayState(ListColumnWriterState &state, idx_t array_size, uint16_t first_repeat_level,
+	                     idx_t define_value, const bool is_empty = false);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/boolean_column_writer.hpp
+++ b/external/duckdb/extension/parquet/include/writer/boolean_column_writer.hpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/boolean_column_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "writer/primitive_column_writer.hpp"
+
+namespace duckdb {
+
+class BooleanColumnWriter : public PrimitiveColumnWriter {
+public:
+	BooleanColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema, vector<string> schema_path_p,
+	                    bool can_have_nulls);
+	~BooleanColumnWriter() override = default;
+
+public:
+	unique_ptr<ColumnWriterStatistics> InitializeStatsState() override;
+
+	void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *state_p,
+	                 Vector &input_column, idx_t chunk_start, idx_t chunk_end) override;
+
+	unique_ptr<ColumnWriterPageState> InitializePageState(PrimitiveColumnWriterState &state, idx_t page_idx) override;
+	void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override;
+
+	idx_t GetRowSize(const Vector &vector, const idx_t index, const PrimitiveColumnWriterState &state) const override;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/decimal_column_writer.hpp
+++ b/external/duckdb/extension/parquet/include/writer/decimal_column_writer.hpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/decimal_column_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "writer/primitive_column_writer.hpp"
+
+namespace duckdb {
+
+class FixedDecimalColumnWriter : public PrimitiveColumnWriter {
+public:
+	FixedDecimalColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema,
+	                         vector<string> schema_path_p, bool can_have_nulls);
+	~FixedDecimalColumnWriter() override = default;
+
+public:
+	unique_ptr<ColumnWriterStatistics> InitializeStatsState() override;
+
+	void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state,
+	                 Vector &input_column, idx_t chunk_start, idx_t chunk_end) override;
+
+	idx_t GetRowSize(const Vector &vector, const idx_t index, const PrimitiveColumnWriterState &state) const override;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/enum_column_writer.hpp
+++ b/external/duckdb/extension/parquet/include/writer/enum_column_writer.hpp
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/enum_column_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "writer/primitive_column_writer.hpp"
+
+namespace duckdb {
+class EnumWriterPageState;
+
+class EnumColumnWriter : public PrimitiveColumnWriter {
+public:
+	EnumColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema, vector<string> schema_path_p,
+	                 bool can_have_nulls);
+	~EnumColumnWriter() override = default;
+
+	uint32_t bit_width;
+
+public:
+	unique_ptr<ColumnWriterStatistics> InitializeStatsState() override;
+
+	void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state_p,
+	                 Vector &input_column, idx_t chunk_start, idx_t chunk_end) override;
+
+	unique_ptr<ColumnWriterPageState> InitializePageState(PrimitiveColumnWriterState &state, idx_t page_idx) override;
+
+	void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override;
+
+	duckdb_parquet::Encoding::type GetEncoding(PrimitiveColumnWriterState &state) override;
+
+	bool HasDictionary(PrimitiveColumnWriterState &state) override;
+
+	idx_t DictionarySize(PrimitiveColumnWriterState &state_p) override;
+
+	void FlushDictionary(PrimitiveColumnWriterState &state, ColumnWriterStatistics *stats_p) override;
+
+	idx_t GetRowSize(const Vector &vector, const idx_t index, const PrimitiveColumnWriterState &state) const override;
+
+private:
+	template <class T>
+	void WriteEnumInternal(WriteStream &temp_writer, Vector &input_column, idx_t chunk_start, idx_t chunk_end,
+	                       EnumWriterPageState &page_state);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/list_column_writer.hpp
+++ b/external/duckdb/extension/parquet/include/writer/list_column_writer.hpp
@@ -0,0 +1,52 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/list_column_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_writer.hpp"
+
+namespace duckdb {
+
+class ListColumnWriterState : public ColumnWriterState {
+public:
+	ListColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx) : row_group(row_group), col_idx(col_idx) {
+	}
+	~ListColumnWriterState() override = default;
+
+	duckdb_parquet::RowGroup &row_group;
+	idx_t col_idx;
+	unique_ptr<ColumnWriterState> child_state;
+	idx_t parent_index = 0;
+};
+
+class ListColumnWriter : public ColumnWriter {
+public:
+	ListColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema, vector<string> schema_path_p,
+	                 unique_ptr<ColumnWriter> child_writer_p, bool can_have_nulls)
+	    : ColumnWriter(writer, column_schema, std::move(schema_path_p), can_have_nulls) {
+		child_writers.push_back(std::move(child_writer_p));
+	}
+	~ListColumnWriter() override = default;
+
+public:
+	unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override;
+	bool HasAnalyze() override;
+	void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
+	void FinalizeAnalyze(ColumnWriterState &state) override;
+	void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count,
+	             bool vector_can_span_multiple_pages) override;
+
+	void BeginWrite(ColumnWriterState &state) override;
+	void Write(ColumnWriterState &state, Vector &vector, idx_t count) override;
+	void FinalizeWrite(ColumnWriterState &state) override;
+
+protected:
+	ColumnWriter &GetChildWriter();
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/parquet_write_operators.hpp
+++ b/external/duckdb/extension/parquet/include/writer/parquet_write_operators.hpp
@@ -0,0 +1,326 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/parquet_write_operators.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "writer/parquet_write_stats.hpp"
+#include "zstd/common/xxhash.hpp"
+#include "duckdb/common/types/uhugeint.hpp"
+#include "duckdb/common/types/uuid.hpp"
+
+namespace duckdb {
+
+struct BaseParquetOperator {
+	template <class SRC, class TGT>
+	static void WriteToStream(const TGT &input, WriteStream &ser) {
+		ser.WriteData(const_data_ptr_cast(&input), sizeof(TGT));
+	}
+
+	template <class SRC, class TGT>
+	static constexpr idx_t WriteSize(const TGT &input) {
+		return sizeof(TGT);
+	}
+
+	template <class SRC, class TGT>
+	static uint64_t XXHash64(const TGT &target_value) {
+		return duckdb_zstd::XXH64(&target_value, sizeof(target_value), 0);
+	}
+
+	template <class SRC, class TGT>
+	static unique_ptr<ColumnWriterStatistics> InitializeStats() {
+		return nullptr;
+	}
+
+	template <class SRC, class TGT>
+	static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
+	}
+
+	template <class SRC, class TGT>
+	static idx_t GetRowSize(const Vector &, idx_t) {
+		return sizeof(TGT);
+	}
+};
+
+struct ParquetCastOperator : public BaseParquetOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return TGT(input);
+	}
+	template <class SRC, class TGT>
+	static unique_ptr<ColumnWriterStatistics> InitializeStats() {
+		return make_uniq<NumericStatisticsState<SRC, TGT, BaseParquetOperator>>();
+	}
+
+	template <class SRC, class TGT>
+	static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
+		auto &numeric_stats = stats->Cast<NumericStatisticsState<SRC, TGT, BaseParquetOperator>>();
+		if (LessThan::Operation(target_value, numeric_stats.min)) {
+			numeric_stats.min = target_value;
+		}
+		if (GreaterThan::Operation(target_value, numeric_stats.max)) {
+			numeric_stats.max = target_value;
+		}
+	}
+};
+
+struct FloatingPointOperator : public BaseParquetOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return TGT(input);
+	}
+
+	template <class SRC, class TGT>
+	static unique_ptr<ColumnWriterStatistics> InitializeStats() {
+		return make_uniq<FloatingPointStatisticsState<SRC, TGT, BaseParquetOperator>>();
+	}
+
+	template <class SRC, class TGT>
+	static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
+		auto &numeric_stats = stats->Cast<FloatingPointStatisticsState<SRC, TGT, BaseParquetOperator>>();
+		if (Value::IsNan(target_value)) {
+			numeric_stats.has_nan = true;
+		} else {
+			if (LessThan::Operation(target_value, numeric_stats.min)) {
+				numeric_stats.min = target_value;
+			}
+			if (GreaterThan::Operation(target_value, numeric_stats.max)) {
+				numeric_stats.max = target_value;
+			}
+		}
+	}
+};
+
+struct ParquetTimestampNSOperator : public ParquetCastOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return TGT(input);
+	}
+};
+
+struct ParquetTimestampSOperator : public ParquetCastOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return Timestamp::FromEpochSecondsPossiblyInfinite(input).value;
+	}
+};
+
+// We will need a different operator for GEOGRAPHY later, so we define a base geo operator
+struct ParquetBaseGeoOperator : public BaseParquetOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return input;
+	}
+
+	template <class SRC, class TGT>
+	static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
+		auto &geo_stats = stats->Cast<GeoStatisticsState>();
+		geo_stats.Update(target_value);
+	}
+
+	template <class SRC, class TGT>
+	static void WriteToStream(const TGT &target_value, WriteStream &ser) {
+		ser.Write<uint32_t>(target_value.GetSize());
+		ser.WriteData(const_data_ptr_cast(target_value.GetData()), target_value.GetSize());
+	}
+
+	template <class SRC, class TGT>
+	static idx_t WriteSize(const TGT &target_value) {
+		return sizeof(uint32_t) + target_value.GetSize();
+	}
+
+	template <class SRC, class TGT>
+	static uint64_t XXHash64(const TGT &target_value) {
+		return duckdb_zstd::XXH64(target_value.GetData(), target_value.GetSize(), 0);
+	}
+
+	template <class SRC, class TGT>
+	static idx_t GetRowSize(const Vector &vector, idx_t index) {
+		// This needs to add the 4 bytes (just like WriteSize) otherwise we underestimate and we have to realloc
+		// This seriously harms performance, mostly by making it very inconsistent (see internal issue #4990)
+		return sizeof(uint32_t) + FlatVector::GetData<string_t>(vector)[index].GetSize();
+	}
+};
+
+struct ParquetGeometryOperator : public ParquetBaseGeoOperator {
+	template <class SRC, class TGT>
+	static unique_ptr<ColumnWriterStatistics> InitializeStats() {
+		return make_uniq<GeoStatisticsState>();
+	}
+};
+
+struct ParquetBaseStringOperator : public BaseParquetOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return input;
+	}
+
+	template <class SRC, class TGT>
+	static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
+		auto &string_stats = stats->Cast<StringStatisticsState>();
+		string_stats.Update(target_value);
+	}
+
+	template <class SRC, class TGT>
+	static void WriteToStream(const TGT &target_value, WriteStream &ser) {
+		ser.Write<uint32_t>(target_value.GetSize());
+		ser.WriteData(const_data_ptr_cast(target_value.GetData()), target_value.GetSize());
+	}
+
+	template <class SRC, class TGT>
+	static idx_t WriteSize(const TGT &target_value) {
+		return sizeof(uint32_t) + target_value.GetSize();
+	}
+
+	template <class SRC, class TGT>
+	static uint64_t XXHash64(const TGT &target_value) {
+		return duckdb_zstd::XXH64(target_value.GetData(), target_value.GetSize(), 0);
+	}
+
+	template <class SRC, class TGT>
+	static idx_t GetRowSize(const Vector &vector, idx_t index) {
+		// This needs to add the 4 bytes (just like WriteSize) otherwise we underestimate and we have to realloc
+		// This seriously harms performance, mostly by making it very inconsistent (see internal issue #4990)
+		return sizeof(uint32_t) + FlatVector::GetData<string_t>(vector)[index].GetSize();
+	}
+};
+
+struct ParquetBlobOperator : public ParquetBaseStringOperator {
+	template <class SRC, class TGT>
+	static unique_ptr<ColumnWriterStatistics> InitializeStats() {
+		return make_uniq<StringStatisticsState>(LogicalTypeId::BLOB);
+	}
+};
+
+struct ParquetStringOperator : public ParquetBaseStringOperator {
+	template <class SRC, class TGT>
+	static unique_ptr<ColumnWriterStatistics> InitializeStats() {
+		return make_uniq<StringStatisticsState>();
+	}
+};
+
+struct ParquetIntervalTargetType {
+	static constexpr const idx_t PARQUET_INTERVAL_SIZE = 12;
+	data_t bytes[PARQUET_INTERVAL_SIZE];
+};
+
+struct ParquetIntervalOperator : public BaseParquetOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		if (input.days < 0 || input.months < 0 || input.micros < 0) {
+			throw IOException("Parquet files do not support negative intervals");
+		}
+		TGT result;
+		Store<uint32_t>(input.months, result.bytes);
+		Store<uint32_t>(input.days, result.bytes + sizeof(uint32_t));
+		Store<uint32_t>(input.micros / 1000, result.bytes + sizeof(uint32_t) * 2);
+		return result;
+	}
+
+	template <class SRC, class TGT>
+	static void WriteToStream(const TGT &target_value, WriteStream &ser) {
+		ser.WriteData(target_value.bytes, ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE);
+	}
+
+	template <class SRC, class TGT>
+	static constexpr idx_t WriteSize(const TGT &target_value) {
+		return ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE;
+	}
+
+	template <class SRC, class TGT>
+	static uint64_t XXHash64(const TGT &target_value) {
+		return duckdb_zstd::XXH64(target_value.bytes, ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE, 0);
+	}
+};
+
+struct ParquetUUIDTargetType {
+	static constexpr const idx_t PARQUET_UUID_SIZE = 16;
+	data_t bytes[PARQUET_UUID_SIZE];
+};
+
+struct ParquetUUIDOperator : public BaseParquetOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		TGT result;
+		// Use the utility function from BaseUUID
+		BaseUUID::ToBlob(input, result.bytes);
+		return result;
+	}
+
+	template <class SRC, class TGT>
+	static void WriteToStream(const TGT &target_value, WriteStream &ser) {
+		ser.WriteData(target_value.bytes, ParquetUUIDTargetType::PARQUET_UUID_SIZE);
+	}
+
+	template <class SRC, class TGT>
+	static constexpr idx_t WriteSize(const TGT &target_value) {
+		return ParquetUUIDTargetType::PARQUET_UUID_SIZE;
+	}
+
+	template <class SRC, class TGT>
+	static uint64_t XXHash64(const TGT &target_value) {
+		return duckdb_zstd::XXH64(target_value.bytes, ParquetUUIDTargetType::PARQUET_UUID_SIZE, 0);
+	}
+
+	template <class SRC, class TGT>
+	static unique_ptr<ColumnWriterStatistics> InitializeStats() {
+		return make_uniq<UUIDStatisticsState>();
+	}
+
+	template <class SRC, class TGT>
+	static void HandleStats(ColumnWriterStatistics *stats_p, TGT target_value) {
+		auto &stats = stats_p->Cast<UUIDStatisticsState>();
+		if (!stats.has_stats || memcmp(target_value.bytes, stats.min, ParquetUUIDTargetType::PARQUET_UUID_SIZE) < 0) {
+			memcpy(stats.min, target_value.bytes, ParquetUUIDTargetType::PARQUET_UUID_SIZE);
+		}
+		if (!stats.has_stats || memcmp(target_value.bytes, stats.max, ParquetUUIDTargetType::PARQUET_UUID_SIZE) > 0) {
+			memcpy(stats.max, target_value.bytes, ParquetUUIDTargetType::PARQUET_UUID_SIZE);
+		}
+		stats.has_stats = true;
+	}
+};
+
+struct ParquetTimeTZOperator : public BaseParquetOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return input.time().micros;
+	}
+};
+
+struct ParquetHugeintOperator : public BaseParquetOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return Hugeint::Cast<double>(input);
+	}
+
+	template <class SRC, class TGT>
+	static unique_ptr<ColumnWriterStatistics> InitializeStats() {
+		return make_uniq<ColumnWriterStatistics>();
+	}
+
+	template <class SRC, class TGT>
+	static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
+	}
+};
+
+struct ParquetUhugeintOperator : public BaseParquetOperator {
+	template <class SRC, class TGT>
+	static TGT Operation(SRC input) {
+		return Uhugeint::Cast<double>(input);
+	}
+
+	template <class SRC, class TGT>
+	static unique_ptr<ColumnWriterStatistics> InitializeStats() {
+		return make_uniq<ColumnWriterStatistics>();
+	}
+
+	template <class SRC, class TGT>
+	static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/parquet_write_stats.hpp
+++ b/external/duckdb/extension/parquet/include/writer/parquet_write_stats.hpp
@@ -0,0 +1,305 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/parquet_write_stats.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_writer.hpp"
+#include "geo_parquet.hpp"
+
+namespace duckdb {
+
+class ColumnWriterStatistics {
+public:
+	virtual ~ColumnWriterStatistics();
+
+	virtual bool HasStats();
+	virtual string GetMin();
+	virtual string GetMax();
+	virtual string GetMinValue();
+	virtual string GetMaxValue();
+	virtual bool CanHaveNaN();
+	virtual bool HasNaN();
+	virtual bool MinIsExact();
+	virtual bool MaxIsExact();
+
+	virtual bool HasGeoStats();
+	virtual optional_ptr<GeometryStatsData> GetGeoStats();
+	virtual void WriteGeoStats(duckdb_parquet::GeospatialStatistics &stats);
+
+public:
+	template <class TARGET>
+	TARGET &Cast() {
+		DynamicCastCheck<TARGET>(this);
+		return reinterpret_cast<TARGET &>(*this);
+	}
+	template <class TARGET>
+	const TARGET &Cast() const {
+		D_ASSERT(dynamic_cast<const TARGET *>(this));
+		return reinterpret_cast<const TARGET &>(*this);
+	}
+};
+
+template <class SRC, class T, class OP>
+class NumericStatisticsState : public ColumnWriterStatistics {
+public:
+	NumericStatisticsState() : min(NumericLimits<T>::Maximum()), max(NumericLimits<T>::Minimum()) {
+	}
+
+	T min;
+	T max;
+
+public:
+	bool HasStats() override {
+		return min <= max;
+	}
+
+	string GetMin() override {
+		return NumericLimits<SRC>::IsSigned() ? GetMinValue() : string();
+	}
+	string GetMax() override {
+		return NumericLimits<SRC>::IsSigned() ? GetMaxValue() : string();
+	}
+	string GetMinValue() override {
+		return HasStats() ? string(char_ptr_cast(&min), sizeof(T)) : string();
+	}
+	string GetMaxValue() override {
+		return HasStats() ? string(char_ptr_cast(&max), sizeof(T)) : string();
+	}
+};
+
+template <class SRC, class T, class OP>
+class FloatingPointStatisticsState : public NumericStatisticsState<SRC, T, OP> {
+public:
+	bool has_nan = false;
+
+public:
+	bool CanHaveNaN() override {
+		return true;
+	}
+	bool HasNaN() override {
+		return has_nan;
+	}
+};
+
+class StringStatisticsState : public ColumnWriterStatistics {
+	static constexpr const idx_t MAX_STRING_STATISTICS_SIZE = 256;
+
+public:
+	explicit StringStatisticsState(LogicalTypeId type = LogicalTypeId::VARCHAR)
+	    : type(type), has_stats(false), min_truncated(false), max_truncated(false), min(), max() {
+	}
+
+	LogicalTypeId type;
+	bool has_stats;
+	bool min_truncated;
+	bool max_truncated;
+	bool failed_truncate = false;
+	string min;
+	string max;
+
+public:
+	bool HasStats() override {
+		return has_stats;
+	}
+
+	void Update(const string_t &val) {
+		if (failed_truncate) {
+			return;
+		}
+		if (!has_stats || LessThan::Operation(val, string_t(min))) {
+			if (val.GetSize() > MAX_STRING_STATISTICS_SIZE) {
+				// string value exceeds our max string stats size - truncate
+				min = TruncateMin(val, MAX_STRING_STATISTICS_SIZE);
+				min_truncated = true;
+			} else {
+				min = val.GetString();
+				min_truncated = false;
+			}
+		}
+		if (!has_stats || GreaterThan::Operation(val, string_t(max))) {
+			if (val.GetSize() > MAX_STRING_STATISTICS_SIZE) {
+				// string value exceeds our max string stats size - truncate
+				if (!TryTruncateMax(val, MAX_STRING_STATISTICS_SIZE, max)) {
+					// we failed to truncate - this can happen in some edge cases
+					// skip stats for this column
+					failed_truncate = true;
+					has_stats = false;
+					min = string();
+					max = string();
+					return;
+				}
+				max_truncated = true;
+			} else {
+				max = val.GetString();
+				max_truncated = false;
+			}
+		}
+		has_stats = true;
+	}
+
+	static inline bool IsCharacter(char c) {
+		return (c & 0xc0) != 0x80;
+	}
+
+	string TruncateMin(string_t str, idx_t max_size) {
+		// truncate a string for the min value
+		// since 'AAA' < 'AAAA', we can just truncate the string
+		D_ASSERT(str.GetSize() > max_size);
+		if (type == LogicalTypeId::BLOB) {
+			// for blobs - just truncate directly
+			return string(str.GetData(), max_size);
+		}
+		D_ASSERT(type == LogicalTypeId::VARCHAR);
+		// for varchar we need to truncate to a valid UTF8 string - so we need to truncate to the last valid UTF8 byte
+		auto str_data = str.GetData();
+		for (; max_size > 0; max_size--) {
+			if (IsCharacter(str_data[max_size])) {
+				break;
+			}
+		}
+		return string(str_data, max_size);
+	}
+
+	bool TryTruncateMax(string_t str, idx_t max_size, string &result, data_t max_byte) {
+		auto data = const_data_ptr_cast(str.GetData());
+
+		// find the last position in the string which we can increment for the truncation
+		// if ALL characters are above the max byte we cannot truncate
+		idx_t increment_pos;
+		for (increment_pos = max_size; increment_pos > 0; increment_pos--) {
+			idx_t str_idx = increment_pos - 1;
+			if (data[str_idx] < max_byte) {
+				// found the increment position
+				break;
+			}
+		}
+		if (increment_pos == 0) {
+			// all characters are above the max byte - we cannot truncate - return false
+			return false;
+		}
+		// set up the result string - we don't care about anything after the increment pos
+		result = string(str.GetData(), increment_pos);
+		// actually increment
+		result[increment_pos - 1]++;
+		return true;
+	}
+
+	bool TryTruncateMax(string_t str, idx_t max_size, string &result) {
+		// truncate a string for the max value
+		// since 'XXX' < 'XXXX', we need to "increment" a byte to get a correct max value
+		// i.e. we need to generate 'XXY' as a string
+		// note that this is not necessarily always possible
+		D_ASSERT(str.GetSize() > max_size);
+		if (type == LogicalTypeId::BLOB) {
+			// for blobs we can always increment bytes - we just can't increment past the max of a single byte (2^8)
+			return TryTruncateMax(str, max_size, result, static_cast<data_t>(0xFF));
+		}
+		D_ASSERT(type == LogicalTypeId::VARCHAR);
+		// for varchar the situation is more complex - we need to truncate to a valid UTF8 string and increment
+		// for now we only increment ASCII characters (characters below 0x7F)
+		return TryTruncateMax(str, max_size, result, static_cast<data_t>(0x7F));
+	}
+
+	string GetMin() override {
+		return GetMinValue();
+	}
+	string GetMax() override {
+		return GetMaxValue();
+	}
+	string GetMinValue() override {
+		return HasStats() ? min : string();
+	}
+	string GetMaxValue() override {
+		return HasStats() ? max : string();
+	}
+
+	bool MinIsExact() override {
+		return !min_truncated;
+	}
+
+	bool MaxIsExact() override {
+		return !max_truncated;
+	}
+};
+
+class UUIDStatisticsState : public ColumnWriterStatistics {
+public:
+	bool has_stats = false;
+	data_t min[16] = {0};
+	data_t max[16] = {0};
+
+public:
+	bool HasStats() override {
+		return has_stats;
+	}
+
+	string GetMin() override {
+		return GetMinValue();
+	}
+	string GetMax() override {
+		return GetMaxValue();
+	}
+	string GetMinValue() override {
+		return HasStats() ? string(char_ptr_cast(min), 16) : string();
+	}
+	string GetMaxValue() override {
+		return HasStats() ? string(char_ptr_cast(max), 16) : string();
+	}
+};
+
+class GeoStatisticsState final : public ColumnWriterStatistics {
+public:
+	explicit GeoStatisticsState() : has_stats(false) {
+		geo_stats.SetEmpty();
+	}
+
+	bool has_stats;
+	GeometryStatsData geo_stats;
+
+public:
+	void Update(const string_t &val) {
+		geo_stats.Update(val);
+		has_stats = true;
+	}
+	bool HasGeoStats() override {
+		return has_stats;
+	}
+	optional_ptr<GeometryStatsData> GetGeoStats() override {
+		return geo_stats;
+	}
+	void WriteGeoStats(duckdb_parquet::GeospatialStatistics &stats) override {
+		const auto &types = geo_stats.types;
+		const auto &bbox = geo_stats.extent;
+
+		if (bbox.HasXY()) {
+			stats.__isset.bbox = true;
+			stats.bbox.xmin = bbox.x_min;
+			stats.bbox.xmax = bbox.x_max;
+			stats.bbox.ymin = bbox.y_min;
+			stats.bbox.ymax = bbox.y_max;
+
+			if (bbox.HasZ()) {
+				stats.bbox.__isset.zmin = true;
+				stats.bbox.__isset.zmax = true;
+				stats.bbox.zmin = bbox.z_min;
+				stats.bbox.zmax = bbox.z_max;
+			}
+			if (bbox.HasM()) {
+				stats.bbox.__isset.mmin = true;
+				stats.bbox.__isset.mmax = true;
+				stats.bbox.mmin = bbox.m_min;
+				stats.bbox.mmax = bbox.m_max;
+			}
+		}
+
+		stats.__isset.geospatial_types = true;
+		stats.geospatial_types = types.ToWKBList();
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/primitive_column_writer.hpp
+++ b/external/duckdb/extension/parquet/include/writer/primitive_column_writer.hpp
@@ -0,0 +1,115 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/primitive_column_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_writer.hpp"
+#include "writer/parquet_write_stats.hpp"
+#include "duckdb/common/serializer/memory_stream.hpp"
+#include "parquet_statistics.hpp"
+
+namespace duckdb {
+
+struct PageInformation {
+	idx_t offset = 0;
+	idx_t row_count = 0;
+	idx_t empty_count = 0;
+	idx_t estimated_page_size = 0;
+	idx_t null_count = 0;
+};
+
+struct PageWriteInformation {
+	duckdb_parquet::PageHeader page_header;
+	unique_ptr<MemoryStream> temp_writer;
+	unique_ptr<ColumnWriterPageState> page_state;
+	idx_t write_page_idx = 0;
+	idx_t write_count = 0;
+	idx_t max_write_count = 0;
+	size_t compressed_size;
+	data_ptr_t compressed_data;
+	AllocatedData compressed_buf;
+};
+
+class PrimitiveColumnWriterState : public ColumnWriterState {
+public:
+	PrimitiveColumnWriterState(ParquetWriter &writer_p, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
+	    : writer(writer_p), row_group(row_group), col_idx(col_idx) {
+		page_info.emplace_back();
+	}
+	~PrimitiveColumnWriterState() override = default;
+
+	ParquetWriter &writer;
+	duckdb_parquet::RowGroup &row_group;
+	idx_t col_idx;
+	vector<PageInformation> page_info;
+	vector<PageWriteInformation> write_info;
+	unique_ptr<ColumnWriterStatistics> stats_state;
+	idx_t current_page = 0;
+
+	unique_ptr<ParquetBloomFilter> bloom_filter;
+};
+
+//! Base class for writing non-compound types (ex. numerics, strings)
+class PrimitiveColumnWriter : public ColumnWriter {
+public:
+	PrimitiveColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema, vector<string> schema_path,
+	                      bool can_have_nulls);
+	~PrimitiveColumnWriter() override = default;
+
+	//! We limit the uncompressed page size to 100MB
+	//! The max size in Parquet is 2GB, but we choose a more conservative limit
+	static constexpr const idx_t MAX_UNCOMPRESSED_PAGE_SIZE = 104857600ULL;
+	//! Dictionary pages must be below 2GB. Unlike data pages, there's only one dictionary page.
+	//! For this reason we go with a much higher, but still a conservative upper bound of 1GB;
+	static constexpr const idx_t MAX_UNCOMPRESSED_DICT_PAGE_SIZE = 1073741824ULL;
+
+public:
+	unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override;
+	void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count,
+	             bool vector_can_span_multiple_pages) override;
+	void BeginWrite(ColumnWriterState &state) override;
+	void Write(ColumnWriterState &state, Vector &vector, idx_t count) override;
+	void FinalizeWrite(ColumnWriterState &state) override;
+
+protected:
+	static void WriteLevels(Allocator &allocator, WriteStream &temp_writer, const unsafe_vector<uint16_t> &levels,
+	                        idx_t max_value, idx_t start_offset, idx_t count, optional_idx null_count = optional_idx());
+
+	virtual duckdb_parquet::Encoding::type GetEncoding(PrimitiveColumnWriterState &state);
+
+	void NextPage(PrimitiveColumnWriterState &state);
+	void FlushPage(PrimitiveColumnWriterState &state);
+
+	//! Initializes the state used to track statistics during writing. Only used for scalar types.
+	virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
+
+	//! Initialize the writer for a specific page. Only used for scalar types.
+	virtual unique_ptr<ColumnWriterPageState> InitializePageState(PrimitiveColumnWriterState &state, idx_t page_idx);
+
+	//! Flushes the writer for a specific page. Only used for scalar types.
+	virtual void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state);
+
+	//! Retrieves the row size of a vector at the specified location. Only used for scalar types.
+	virtual idx_t GetRowSize(const Vector &vector, const idx_t index, const PrimitiveColumnWriterState &state) const;
+	//! Writes a (subset of a) vector to the specified serializer. Only used for scalar types.
+	virtual void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state,
+	                         Vector &vector, idx_t chunk_start, idx_t chunk_end) = 0;
+
+	virtual bool HasDictionary(PrimitiveColumnWriterState &state_p) {
+		return false;
+	}
+	//! The number of elements in the dictionary
+	virtual idx_t DictionarySize(PrimitiveColumnWriterState &state_p);
+	void WriteDictionary(PrimitiveColumnWriterState &state, unique_ptr<MemoryStream> temp_writer, idx_t row_count);
+	virtual void FlushDictionary(PrimitiveColumnWriterState &state, ColumnWriterStatistics *stats);
+
+	void SetParquetStatistics(PrimitiveColumnWriterState &state, duckdb_parquet::ColumnChunk &column);
+	void RegisterToRowGroup(duckdb_parquet::RowGroup &row_group);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/struct_column_writer.hpp
+++ b/external/duckdb/extension/parquet/include/writer/struct_column_writer.hpp
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/struct_column_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "column_writer.hpp"
+
+namespace duckdb {
+
+class StructColumnWriter : public ColumnWriter {
+public:
+	StructColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema, vector<string> schema_path_p,
+	                   vector<unique_ptr<ColumnWriter>> child_writers_p, bool can_have_nulls)
+	    : ColumnWriter(writer, column_schema, std::move(schema_path_p), can_have_nulls) {
+		child_writers = std::move(child_writers_p);
+	}
+	~StructColumnWriter() override = default;
+
+public:
+	unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override;
+	bool HasAnalyze() override;
+	void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
+	void FinalizeAnalyze(ColumnWriterState &state) override;
+	void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count,
+	             bool vector_can_span_multiple_pages) override;
+
+	void BeginWrite(ColumnWriterState &state) override;
+	void Write(ColumnWriterState &state, Vector &vector, idx_t count) override;
+	void FinalizeWrite(ColumnWriterState &state) override;
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/templated_column_writer.hpp
+++ b/external/duckdb/extension/parquet/include/writer/templated_column_writer.hpp
@@ -0,0 +1,444 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/templated_column_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "writer/primitive_column_writer.hpp"
+#include "writer/parquet_write_operators.hpp"
+#include "parquet_dbp_encoder.hpp"
+#include "parquet_dlba_encoder.hpp"
+#include "parquet_rle_bp_encoder.hpp"
+#include "duckdb/common/primitive_dictionary.hpp"
+
+namespace duckdb {
+
+template <class SRC, class TGT, class OP = ParquetCastOperator, bool ALL_VALID>
+static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, const idx_t chunk_start,
+                                const idx_t chunk_end, const ValidityMask &mask, WriteStream &ser) {
+	static constexpr bool COPY_DIRECTLY_FROM_VECTOR = ALL_VALID && std::is_same<SRC, TGT>::value &&
+	                                                  std::is_arithmetic<TGT>::value &&
+	                                                  std::is_same<OP, ParquetCastOperator>::value;
+
+	const auto *const ptr = FlatVector::GetData<SRC>(col);
+
+	TGT local_write[STANDARD_VECTOR_SIZE];
+	idx_t local_write_count = 0;
+
+	for (idx_t r = chunk_start; r < chunk_end; r++) {
+		if (!ALL_VALID && !mask.RowIsValid(r)) {
+			continue;
+		}
+
+		TGT target_value = OP::template Operation<SRC, TGT>(ptr[r]);
+		OP::template HandleStats<SRC, TGT>(stats, target_value);
+
+		if (COPY_DIRECTLY_FROM_VECTOR) {
+			continue;
+		}
+
+		if (std::is_arithmetic<TGT>::value) {
+			local_write[local_write_count++] = target_value;
+			if (local_write_count == STANDARD_VECTOR_SIZE) {
+				ser.WriteData(data_ptr_cast(local_write), local_write_count * sizeof(TGT));
+				local_write_count = 0;
+			}
+		} else {
+			OP::template WriteToStream<SRC, TGT>(target_value, ser);
+		}
+	}
+
+	if (COPY_DIRECTLY_FROM_VECTOR) {
+		ser.WriteData(const_data_ptr_cast(&ptr[chunk_start]), (chunk_end - chunk_start) * sizeof(TGT));
+		return;
+	}
+
+	if (std::is_arithmetic<TGT>::value) {
+		ser.WriteData(data_ptr_cast(local_write), local_write_count * sizeof(TGT));
+	}
+	// Else we already wrote to stream
+}
+
+template <class SRC, class TGT, class OP>
+class StandardColumnWriterState : public PrimitiveColumnWriterState {
+public:
+	StandardColumnWriterState(ParquetWriter &writer, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
+	    : PrimitiveColumnWriterState(writer, row_group, col_idx),
+	      dictionary(BufferAllocator::Get(writer.GetContext()),
+	                 writer.DictionarySizeLimit().IsValid() ? writer.DictionarySizeLimit().GetIndex()
+	                                                        : NumericCast<idx_t>(row_group.num_rows) / 5,
+	                 writer.StringDictionaryPageSizeLimit()),
+	      encoding(duckdb_parquet::Encoding::PLAIN) {
+	}
+	~StandardColumnWriterState() override = default;
+
+	// analysis state for integer values for DELTA_BINARY_PACKED/DELTA_LENGTH_BYTE_ARRAY
+	idx_t total_value_count = 0;
+	idx_t total_string_size = 0;
+	uint32_t key_bit_width = 0;
+
+	PrimitiveDictionary<SRC, TGT, OP> dictionary;
+	duckdb_parquet::Encoding::type encoding;
+};
+
+template <class SRC, class TGT, class OP>
+class StandardWriterPageState : public ColumnWriterPageState {
+public:
+	explicit StandardWriterPageState(const idx_t total_value_count, const idx_t total_string_size,
+	                                 duckdb_parquet::Encoding::type encoding_p,
+	                                 const PrimitiveDictionary<SRC, TGT, OP> &dictionary_p)
+	    : encoding(encoding_p), dbp_initialized(false), dbp_encoder(total_value_count), dlba_initialized(false),
+	      dlba_encoder(total_value_count, total_string_size), bss_initialized(false),
+	      bss_encoder(total_value_count, sizeof(TGT)), dictionary(dictionary_p), dict_written_value(false),
+	      dict_bit_width(RleBpDecoder::ComputeBitWidth(dictionary.GetSize())), dict_encoder(dict_bit_width) {
+	}
+	duckdb_parquet::Encoding::type encoding;
+
+	bool dbp_initialized;
+	DbpEncoder dbp_encoder;
+
+	bool dlba_initialized;
+	DlbaEncoder dlba_encoder;
+
+	bool bss_initialized;
+	BssEncoder bss_encoder;
+
+	const PrimitiveDictionary<SRC, TGT, OP> &dictionary;
+	bool dict_written_value;
+	uint32_t dict_bit_width;
+	RleBpEncoder dict_encoder;
+};
+
+template <class SRC, class TGT, class OP = ParquetCastOperator>
+class StandardColumnWriter : public PrimitiveColumnWriter {
+public:
+	StandardColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema,
+	                     vector<string> schema_path_p, // NOLINT
+	                     bool can_have_nulls)
+	    : PrimitiveColumnWriter(writer, column_schema, std::move(schema_path_p), can_have_nulls) {
+	}
+	~StandardColumnWriter() override = default;
+
+public:
+	unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override {
+		auto result = make_uniq<StandardColumnWriterState<SRC, TGT, OP>>(writer, row_group, row_group.columns.size());
+		result->encoding = duckdb_parquet::Encoding::RLE_DICTIONARY;
+		RegisterToRowGroup(row_group);
+		return std::move(result);
+	}
+
+	unique_ptr<ColumnWriterPageState> InitializePageState(PrimitiveColumnWriterState &state_p,
+	                                                      idx_t page_idx) override {
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
+		const auto &page_info = state_p.page_info[page_idx];
+		auto result = make_uniq<StandardWriterPageState<SRC, TGT, OP>>(
+		    page_info.row_count - (page_info.empty_count + page_info.null_count), state.total_string_size,
+		    state.encoding, state.dictionary);
+		return std::move(result);
+	}
+
+	void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override {
+		auto &page_state = state_p->Cast<StandardWriterPageState<SRC, TGT, OP>>();
+		switch (page_state.encoding) {
+		case duckdb_parquet::Encoding::DELTA_BINARY_PACKED:
+			if (!page_state.dbp_initialized) {
+				page_state.dbp_encoder.BeginWrite(temp_writer, 0);
+			}
+			page_state.dbp_encoder.FinishWrite(temp_writer);
+			break;
+		case duckdb_parquet::Encoding::RLE_DICTIONARY:
+			D_ASSERT(page_state.dict_bit_width != 0);
+			if (!page_state.dict_written_value) {
+				// all values are null
+				// just write the bit width
+				temp_writer.Write<uint8_t>(page_state.dict_bit_width);
+				return;
+			}
+			page_state.dict_encoder.FinishWrite(temp_writer);
+			break;
+		case duckdb_parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY:
+			if (!page_state.dlba_initialized) {
+				page_state.dlba_encoder.BeginWrite(BufferAllocator::Get(writer.GetContext()), temp_writer,
+				                                   string_t(""));
+			}
+			page_state.dlba_encoder.FinishWrite(temp_writer);
+			break;
+		case duckdb_parquet::Encoding::BYTE_STREAM_SPLIT:
+			if (!page_state.bss_initialized) {
+				page_state.bss_encoder.BeginWrite(BufferAllocator::Get(writer.GetContext()));
+			}
+			page_state.bss_encoder.FinishWrite(temp_writer);
+			break;
+		case duckdb_parquet::Encoding::PLAIN:
+			break;
+		default:
+			throw InternalException("Unknown encoding");
+		}
+	}
+
+	duckdb_parquet::Encoding::type GetEncoding(PrimitiveColumnWriterState &state_p) override {
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
+		return state.encoding;
+	}
+
+	bool HasAnalyze() override {
+		return true;
+	}
+
+	void Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) override {
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
+
+		auto data_ptr = FlatVector::GetData<SRC>(vector);
+		idx_t vector_index = 0;
+
+		const bool check_parent_empty = parent && !parent->is_empty.empty();
+		const idx_t parent_index = state.definition_levels.size();
+		D_ASSERT(!check_parent_empty || parent_index < parent->is_empty.size());
+
+		const idx_t vcount =
+		    check_parent_empty ? parent->definition_levels.size() - state.definition_levels.size() : count;
+
+		const auto &validity = FlatVector::Validity(vector);
+
+		if (!check_parent_empty && validity.AllValid()) {
+			// Fast path
+			for (; vector_index < vcount; vector_index++) {
+				const auto &src_value = data_ptr[vector_index];
+				state.dictionary.Insert(src_value);
+				state.total_value_count++;
+				state.total_string_size += DlbaEncoder::GetStringSize(src_value);
+			}
+		} else {
+			for (idx_t i = 0; i < vcount; i++) {
+				if (check_parent_empty && parent->is_empty[parent_index + i]) {
+					continue;
+				}
+				if (validity.RowIsValid(vector_index)) {
+					const auto &src_value = data_ptr[vector_index];
+					state.dictionary.Insert(src_value);
+					state.total_value_count++;
+					state.total_string_size += DlbaEncoder::GetStringSize(src_value);
+				}
+				vector_index++;
+			}
+		}
+	}
+
+	void FinalizeAnalyze(ColumnWriterState &state_p) override {
+		const auto type = writer.GetType(SchemaIndex());
+
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
+		if (state.dictionary.GetSize() == 0 || state.dictionary.IsFull()) {
+			state.dictionary.Reset();
+			if (writer.GetParquetVersion() == ParquetVersion::V1) {
+				// Can't do the cool stuff for V1
+				state.encoding = duckdb_parquet::Encoding::PLAIN;
+			} else {
+				// If we aren't doing dictionary encoding, these encodings are virtually always better than PLAIN
+				switch (type) {
+				case duckdb_parquet::Type::type::INT32:
+				case duckdb_parquet::Type::type::INT64:
+					state.encoding = duckdb_parquet::Encoding::DELTA_BINARY_PACKED;
+					break;
+				case duckdb_parquet::Type::type::BYTE_ARRAY:
+					state.encoding = duckdb_parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY;
+					break;
+				case duckdb_parquet::Type::type::FLOAT:
+				case duckdb_parquet::Type::type::DOUBLE:
+					state.encoding = duckdb_parquet::Encoding::BYTE_STREAM_SPLIT;
+					break;
+				default:
+					state.encoding = duckdb_parquet::Encoding::PLAIN;
+				}
+			}
+		} else {
+			state.key_bit_width = RleBpDecoder::ComputeBitWidth(state.dictionary.GetSize());
+		}
+	}
+
+	unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
+		return OP::template InitializeStats<SRC, TGT>();
+	}
+
+	bool HasDictionary(PrimitiveColumnWriterState &state_p) override {
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
+		return state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY;
+	}
+
+	idx_t DictionarySize(PrimitiveColumnWriterState &state_p) override {
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
+		return state.dictionary.GetSize();
+	}
+
+	void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state_p,
+	                 Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
+		const auto &mask = FlatVector::Validity(input_column);
+		if (mask.AllValid()) {
+			WriteVectorInternal<true>(temp_writer, stats, page_state_p, input_column, chunk_start, chunk_end);
+		} else {
+			WriteVectorInternal<false>(temp_writer, stats, page_state_p, input_column, chunk_start, chunk_end);
+		}
+	}
+
+	void FlushDictionary(PrimitiveColumnWriterState &state_p, ColumnWriterStatistics *stats) override {
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
+		D_ASSERT(state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY);
+
+		if (writer.EnableBloomFilters()) {
+			state.bloom_filter =
+			    make_uniq<ParquetBloomFilter>(state.dictionary.GetSize(), writer.BloomFilterFalsePositiveRatio());
+		}
+
+		state.dictionary.IterateValues([&](const SRC &src_value, const TGT &tgt_value) {
+			// update the statistics
+			OP::template HandleStats<SRC, TGT>(stats, tgt_value);
+			if (state.bloom_filter) {
+				// update the bloom filter
+				auto hash = OP::template XXHash64<SRC, TGT>(tgt_value);
+				state.bloom_filter->FilterInsert(hash);
+			}
+		});
+
+		// flush the dictionary page and add it to the to-be-written pages
+		WriteDictionary(state, state.dictionary.GetTargetMemoryStream(), state.dictionary.GetSize());
+		// bloom filter will be queued for writing in ParquetWriter::BufferBloomFilter one level up
+	}
+
+	idx_t GetRowSize(const Vector &vector, const idx_t index,
+	                 const PrimitiveColumnWriterState &state_p) const override {
+		auto &state = state_p.Cast<StandardColumnWriterState<SRC, TGT, OP>>();
+		if (state.encoding == duckdb_parquet::Encoding::RLE_DICTIONARY) {
+			return (state.key_bit_width + 7) / 8;
+		} else {
+			return OP::template GetRowSize<SRC, TGT>(vector, index);
+		}
+	}
+
+private:
+	template <bool ALL_VALID>
+	void WriteVectorInternal(WriteStream &temp_writer, ColumnWriterStatistics *stats,
+	                         ColumnWriterPageState *page_state_p, Vector &input_column, idx_t chunk_start,
+	                         idx_t chunk_end) {
+		auto &page_state = page_state_p->Cast<StandardWriterPageState<SRC, TGT, OP>>();
+
+		const auto &mask = FlatVector::Validity(input_column);
+		const auto *data_ptr = FlatVector::GetData<SRC>(input_column);
+
+		switch (page_state.encoding) {
+		case duckdb_parquet::Encoding::RLE_DICTIONARY: {
+			idx_t r = chunk_start;
+			if (!page_state.dict_written_value) {
+				// find first non-null value
+				for (; r < chunk_end; r++) {
+					if (!mask.RowIsValid(r)) {
+						continue;
+					}
+					// write the bit-width as a one-byte entry and initialize writer
+					temp_writer.Write<uint8_t>(page_state.dict_bit_width);
+					page_state.dict_encoder.BeginWrite();
+					page_state.dict_written_value = true;
+					break;
+				}
+			}
+
+			for (; r < chunk_end; r++) {
+				if (!ALL_VALID && !mask.RowIsValid(r)) {
+					continue;
+				}
+				const auto &src_value = data_ptr[r];
+				const auto value_index = page_state.dictionary.GetIndex(src_value);
+				page_state.dict_encoder.WriteValue(temp_writer, value_index);
+			}
+			break;
+		}
+		case duckdb_parquet::Encoding::DELTA_BINARY_PACKED: {
+			idx_t r = chunk_start;
+			if (!page_state.dbp_initialized) {
+				// find first non-null value
+				for (; r < chunk_end; r++) {
+					if (!mask.RowIsValid(r)) {
+						continue;
+					}
+					const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
+					OP::template HandleStats<SRC, TGT>(stats, target_value);
+					page_state.dbp_encoder.BeginWrite(temp_writer, target_value);
+					page_state.dbp_initialized = true;
+					r++; // skip over
+					break;
+				}
+			}
+
+			for (; r < chunk_end; r++) {
+				if (!ALL_VALID && !mask.RowIsValid(r)) {
+					continue;
+				}
+				const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
+				OP::template HandleStats<SRC, TGT>(stats, target_value);
+				page_state.dbp_encoder.WriteValue(temp_writer, target_value);
+			}
+			break;
+		}
+		case duckdb_parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY: {
+			idx_t r = chunk_start;
+			if (!page_state.dlba_initialized) {
+				// find first non-null value
+				for (; r < chunk_end; r++) {
+					if (!mask.RowIsValid(r)) {
+						continue;
+					}
+					const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
+					OP::template HandleStats<SRC, TGT>(stats, target_value);
+					page_state.dlba_encoder.BeginWrite(BufferAllocator::Get(writer.GetContext()), temp_writer,
+					                                   target_value);
+					page_state.dlba_initialized = true;
+					r++; // skip over
+					break;
+				}
+			}
+
+			for (; r < chunk_end; r++) {
+				if (!ALL_VALID && !mask.RowIsValid(r)) {
+					continue;
+				}
+				const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
+				OP::template HandleStats<SRC, TGT>(stats, target_value);
+				page_state.dlba_encoder.WriteValue(temp_writer, target_value);
+			}
+			break;
+		}
+		case duckdb_parquet::Encoding::BYTE_STREAM_SPLIT: {
+			if (!page_state.bss_initialized) {
+				page_state.bss_encoder.BeginWrite(BufferAllocator::Get(writer.GetContext()));
+				page_state.bss_initialized = true;
+			}
+			for (idx_t r = chunk_start; r < chunk_end; r++) {
+				if (!ALL_VALID && !mask.RowIsValid(r)) {
+					continue;
+				}
+				const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
+				OP::template HandleStats<SRC, TGT>(stats, target_value);
+				page_state.bss_encoder.WriteValue(target_value);
+			}
+			break;
+		}
+		case duckdb_parquet::Encoding::PLAIN: {
+			D_ASSERT(page_state.encoding == duckdb_parquet::Encoding::PLAIN);
+			if (mask.AllValid()) {
+				TemplatedWritePlain<SRC, TGT, OP, true>(input_column, stats, chunk_start, chunk_end, mask, temp_writer);
+			} else {
+				TemplatedWritePlain<SRC, TGT, OP, false>(input_column, stats, chunk_start, chunk_end, mask,
+				                                         temp_writer);
+			}
+			break;
+		}
+		default:
+			throw InternalException("Unknown encoding");
+		}
+	}
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/writer/variant_column_writer.hpp
+++ b/external/duckdb/extension/parquet/include/writer/variant_column_writer.hpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// writer/variant_column_writer.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "struct_column_writer.hpp"
+#include "duckdb/planner/expression/bound_function_expression.hpp"
+
+namespace duckdb {
+
+class VariantColumnWriter : public StructColumnWriter {
+public:
+	VariantColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema, vector<string> schema_path_p,
+	                    vector<unique_ptr<ColumnWriter>> child_writers_p, bool can_have_nulls)
+	    : StructColumnWriter(writer, column_schema, std::move(schema_path_p), std::move(child_writers_p),
+	                         can_have_nulls) {
+	}
+	~VariantColumnWriter() override = default;
+
+public:
+	static ScalarFunction GetTransformFunction();
+	static LogicalType TransformTypedValueRecursive(const LogicalType &type);
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/include/zstd_file_system.hpp
+++ b/external/duckdb/extension/parquet/include/zstd_file_system.hpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// zstd_file_system.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb.hpp"
+#include "duckdb/common/compressed_file_system.hpp"
+
+namespace duckdb {
+
+class ZStdFileSystem : public CompressedFileSystem {
+public:
+	unique_ptr<FileHandle> OpenCompressedFile(QueryContext context, unique_ptr<FileHandle> handle, bool write) override;
+
+	std::string GetName() const override {
+		return "ZStdFileSystem";
+	}
+
+	unique_ptr<StreamWrapper> CreateStream() override;
+	idx_t InBufferSize() override;
+	idx_t OutBufferSize() override;
+
+	static int64_t DefaultCompressionLevel();
+	static int64_t MinimumCompressionLevel();
+	static int64_t MaximumCompressionLevel();
+};
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/parquet_config.py
+++ b/external/duckdb/extension/parquet/parquet_config.py
@@ -0,0 +1,84 @@
+import os
+
+# list all include directories
+include_directories = [
+    os.path.sep.join(x.split('/'))
+    for x in [
+        'extension/parquet/include',
+        'third_party/parquet',
+        'third_party/thrift',
+        'third_party/lz4',
+        'third_party/brotli/include',
+        'third_party/brotli/common',
+        'third_party/brotli/dec',
+        'third_party/brotli/enc',
+        'third_party/snappy',
+        'third_party/mbedtls',
+        'third_party/mbedtls/include',
+        'third_party/zstd/include',
+    ]
+]
+prefix = os.path.join('extension', 'parquet')
+
+
+def list_files_recursive(rootdir, suffix):
+    file_list = []
+    for root, _, files in os.walk(rootdir):
+        file_list += [os.path.join(root, f) for f in files if f.endswith(suffix)]
+    return file_list
+
+
+source_files = list_files_recursive(prefix, '.cpp')
+
+# parquet/thrift/snappy
+source_files += [
+    os.path.sep.join(x.split('/'))
+    for x in [
+        'third_party/parquet/parquet_types.cpp',
+        'third_party/thrift/thrift/protocol/TProtocol.cpp',
+        'third_party/thrift/thrift/transport/TTransportException.cpp',
+        'third_party/thrift/thrift/transport/TBufferTransports.cpp',
+        'third_party/snappy/snappy.cc',
+        'third_party/snappy/snappy-sinksource.cc',
+    ]
+]
+# lz4
+source_files += [os.path.sep.join(x.split('/')) for x in ['third_party/lz4/lz4.cpp']]
+
+# brotli
+source_files += [
+    os.path.sep.join(x.split('/'))
+    for x in [
+        'third_party/brotli/common/constants.cpp',
+        'third_party/brotli/common/context.cpp',
+        'third_party/brotli/common/dictionary.cpp',
+        'third_party/brotli/common/platform.cpp',
+        'third_party/brotli/common/shared_dictionary.cpp',
+        'third_party/brotli/common/transform.cpp',
+        'third_party/brotli/dec/bit_reader.cpp',
+        'third_party/brotli/dec/decode.cpp',
+        'third_party/brotli/dec/huffman.cpp',
+        'third_party/brotli/dec/state.cpp',
+        'third_party/brotli/enc/backward_references.cpp',
+        'third_party/brotli/enc/backward_references_hq.cpp',
+        'third_party/brotli/enc/bit_cost.cpp',
+        'third_party/brotli/enc/block_splitter.cpp',
+        'third_party/brotli/enc/brotli_bit_stream.cpp',
+        'third_party/brotli/enc/cluster.cpp',
+        'third_party/brotli/enc/command.cpp',
+        'third_party/brotli/enc/compound_dictionary.cpp',
+        'third_party/brotli/enc/compress_fragment.cpp',
+        'third_party/brotli/enc/compress_fragment_two_pass.cpp',
+        'third_party/brotli/enc/dictionary_hash.cpp',
+        'third_party/brotli/enc/encode.cpp',
+        'third_party/brotli/enc/encoder_dict.cpp',
+        'third_party/brotli/enc/entropy_encode.cpp',
+        'third_party/brotli/enc/fast_log.cpp',
+        'third_party/brotli/enc/histogram.cpp',
+        'third_party/brotli/enc/literal_cost.cpp',
+        'third_party/brotli/enc/memory.cpp',
+        'third_party/brotli/enc/metablock.cpp',
+        'third_party/brotli/enc/static_dict.cpp',
+        'third_party/brotli/enc/utf8_util.cpp',
+    ]
+]
--- a/external/duckdb/extension/parquet/parquet_crypto.cpp
+++ b/external/duckdb/extension/parquet/parquet_crypto.cpp
@@ -0,0 +1,406 @@
+#include "parquet_crypto.hpp"
+
+#include "mbedtls_wrapper.hpp"
+#include "thrift_tools.hpp"
+
+#include "duckdb/common/exception/conversion_exception.hpp"
+#include "duckdb/common/helper.hpp"
+#include "duckdb/common/types/blob.hpp"
+#include "duckdb/storage/arena_allocator.hpp"
+
+namespace duckdb {
+
+ParquetKeys &ParquetKeys::Get(ClientContext &context) {
+	auto &cache = ObjectCache::GetObjectCache(context);
+	if (!cache.Get<ParquetKeys>(ParquetKeys::ObjectType())) {
+		cache.Put(ParquetKeys::ObjectType(), make_shared_ptr<ParquetKeys>());
+	}
+	return *cache.Get<ParquetKeys>(ParquetKeys::ObjectType());
+}
+
+void ParquetKeys::AddKey(const string &key_name, const string &key) {
+	keys[key_name] = key;
+}
+
+bool ParquetKeys::HasKey(const string &key_name) const {
+	return keys.find(key_name) != keys.end();
+}
+
+const string &ParquetKeys::GetKey(const string &key_name) const {
+	D_ASSERT(HasKey(key_name));
+	return keys.at(key_name);
+}
+
+string ParquetKeys::ObjectType() {
+	return "parquet_keys";
+}
+
+string ParquetKeys::GetObjectType() {
+	return ObjectType();
+}
+
+ParquetEncryptionConfig::ParquetEncryptionConfig() {
+}
+
+ParquetEncryptionConfig::ParquetEncryptionConfig(string footer_key_p) : footer_key(std::move(footer_key_p)) {
+}
+
+ParquetEncryptionConfig::ParquetEncryptionConfig(ClientContext &context, const Value &arg) {
+	if (arg.type().id() != LogicalTypeId::STRUCT) {
+		throw BinderException("Parquet encryption_config must be of type STRUCT");
+	}
+	const auto &child_types = StructType::GetChildTypes(arg.type());
+	auto &children = StructValue::GetChildren(arg);
+	const auto &keys = ParquetKeys::Get(context);
+	for (idx_t i = 0; i < StructType::GetChildCount(arg.type()); i++) {
+		auto &struct_key = child_types[i].first;
+		if (StringUtil::Lower(struct_key) == "footer_key") {
+			const auto footer_key_name = StringValue::Get(children[i].DefaultCastAs(LogicalType::VARCHAR));
+			if (!keys.HasKey(footer_key_name)) {
+				throw BinderException(
+				    "No key with name \"%s\" exists. Add it with PRAGMA add_parquet_key('<key_name>','<key>');",
+				    footer_key_name);
+			}
+			// footer key name provided - read the key from the config
+			const auto &keys = ParquetKeys::Get(context);
+			footer_key = keys.GetKey(footer_key_name);
+		} else if (StringUtil::Lower(struct_key) == "footer_key_value") {
+			footer_key = StringValue::Get(children[i].DefaultCastAs(LogicalType::BLOB));
+		} else if (StringUtil::Lower(struct_key) == "column_keys") {
+			throw NotImplementedException("Parquet encryption_config column_keys not yet implemented");
+		} else {
+			throw BinderException("Unknown key in encryption_config \"%s\"", struct_key);
+		}
+	}
+}
+
+shared_ptr<ParquetEncryptionConfig> ParquetEncryptionConfig::Create(ClientContext &context, const Value &arg) {
+	return shared_ptr<ParquetEncryptionConfig>(new ParquetEncryptionConfig(context, arg));
+}
+
+const string &ParquetEncryptionConfig::GetFooterKey() const {
+	return footer_key;
+}
+
+using duckdb_apache::thrift::protocol::TCompactProtocolFactoryT;
+using duckdb_apache::thrift::transport::TTransport;
+
+//! Encryption wrapper for a transport protocol
+class EncryptionTransport : public TTransport {
+public:
+	EncryptionTransport(TProtocol &prot_p, const string &key, const EncryptionUtil &encryption_util_p)
+	    : prot(prot_p), trans(*prot.getTransport()),
+	      aes(encryption_util_p.CreateEncryptionState(EncryptionTypes::GCM, key.size())),
+	      allocator(Allocator::DefaultAllocator(), ParquetCrypto::CRYPTO_BLOCK_SIZE) {
+		Initialize(key);
+	}
+
+	bool isOpen() const override {
+		return trans.isOpen();
+	}
+
+	void open() override {
+		trans.open();
+	}
+
+	void close() override {
+		trans.close();
+	}
+
+	void write_virt(const uint8_t *buf, uint32_t len) override {
+		memcpy(allocator.Allocate(len), buf, len);
+	}
+
+	uint32_t Finalize() {
+		// Write length
+		const auto ciphertext_length = allocator.SizeInBytes();
+		const uint32_t total_length = ParquetCrypto::NONCE_BYTES + ciphertext_length + ParquetCrypto::TAG_BYTES;
+
+		trans.write(const_data_ptr_cast(&total_length), ParquetCrypto::LENGTH_BYTES);
+		// Write nonce at beginning of encrypted chunk
+		trans.write(nonce, ParquetCrypto::NONCE_BYTES);
+
+		data_t aes_buffer[ParquetCrypto::CRYPTO_BLOCK_SIZE];
+		auto current = allocator.GetTail();
+
+		// Loop through the whole chunk
+		while (current != nullptr) {
+			for (idx_t pos = 0; pos < current->current_position; pos += ParquetCrypto::CRYPTO_BLOCK_SIZE) {
+				auto next = MinValue<idx_t>(current->current_position - pos, ParquetCrypto::CRYPTO_BLOCK_SIZE);
+				auto write_size =
+				    aes->Process(current->data.get() + pos, next, aes_buffer, ParquetCrypto::CRYPTO_BLOCK_SIZE);
+				trans.write(aes_buffer, write_size);
+			}
+			current = current->prev;
+		}
+
+		// Finalize the last encrypted data
+		data_t tag[ParquetCrypto::TAG_BYTES];
+		auto write_size = aes->Finalize(aes_buffer, 0, tag, ParquetCrypto::TAG_BYTES);
+		trans.write(aes_buffer, write_size);
+		// Write tag for verification
+		trans.write(tag, ParquetCrypto::TAG_BYTES);
+
+		return ParquetCrypto::LENGTH_BYTES + total_length;
+	}
+
+private:
+	void Initialize(const string &key) {
+		// Generate Nonce
+		aes->GenerateRandomData(nonce, ParquetCrypto::NONCE_BYTES);
+		// Initialize Encryption
+		aes->InitializeEncryption(nonce, ParquetCrypto::NONCE_BYTES, reinterpret_cast<const_data_ptr_t>(key.data()),
+		                          key.size());
+	}
+
+private:
+	//! Protocol and corresponding transport that we're wrapping
+	TProtocol &prot;
+	TTransport &trans;
+
+	//! AES context and buffers
+	shared_ptr<EncryptionState> aes;
+
+	//! Nonce created by Initialize()
+	data_t nonce[ParquetCrypto::NONCE_BYTES];
+
+	//! Arena Allocator to fully materialize in memory before encrypting
+	ArenaAllocator allocator;
+};
+
+//! Decryption wrapper for a transport protocol
+class DecryptionTransport : public TTransport {
+public:
+	DecryptionTransport(TProtocol &prot_p, const string &key, const EncryptionUtil &encryption_util_p)
+	    : prot(prot_p), trans(*prot.getTransport()),
+	      aes(encryption_util_p.CreateEncryptionState(EncryptionTypes::GCM, key.size())), read_buffer_size(0),
+	      read_buffer_offset(0) {
+		Initialize(key);
+	}
+	uint32_t read_virt(uint8_t *buf, uint32_t len) override {
+		const uint32_t result = len;
+
+		if (len > transport_remaining - ParquetCrypto::TAG_BYTES + read_buffer_size - read_buffer_offset) {
+			throw InvalidInputException("Too many bytes requested from crypto buffer");
+		}
+
+		while (len != 0) {
+			if (read_buffer_offset == read_buffer_size) {
+				ReadBlock(buf);
+			}
+			const auto next = MinValue(read_buffer_size - read_buffer_offset, len);
+			read_buffer_offset += next;
+			buf += next;
+			len -= next;
+		}
+
+		return result;
+	}
+
+	uint32_t Finalize() {
+
+		if (read_buffer_offset != read_buffer_size) {
+			throw InternalException("DecryptionTransport::Finalize was called with bytes remaining in read buffer: \n"
+			                        "read buffer offset: %d, read buffer size: %d",
+			                        read_buffer_offset, read_buffer_size);
+		}
+
+		data_t computed_tag[ParquetCrypto::TAG_BYTES];
+		transport_remaining -= trans.read(computed_tag, ParquetCrypto::TAG_BYTES);
+		aes->Finalize(read_buffer, 0, computed_tag, ParquetCrypto::TAG_BYTES);
+
+		if (transport_remaining != 0) {
+			throw InvalidInputException("Encoded ciphertext length differs from actual ciphertext length");
+		}
+
+		return ParquetCrypto::LENGTH_BYTES + total_bytes;
+	}
+
+	AllocatedData ReadAll() {
+		D_ASSERT(transport_remaining == total_bytes - ParquetCrypto::NONCE_BYTES);
+		auto result = Allocator::DefaultAllocator().Allocate(transport_remaining - ParquetCrypto::TAG_BYTES);
+		read_virt(result.get(), transport_remaining - ParquetCrypto::TAG_BYTES);
+		Finalize();
+		return result;
+	}
+
+private:
+	void Initialize(const string &key) {
+		// Read encoded length (don't add to read_bytes)
+		data_t length_buf[ParquetCrypto::LENGTH_BYTES];
+		trans.read(length_buf, ParquetCrypto::LENGTH_BYTES);
+		total_bytes = Load<uint32_t>(length_buf);
+		transport_remaining = total_bytes;
+		// Read nonce and initialize AES
+		transport_remaining -= trans.read(nonce, ParquetCrypto::NONCE_BYTES);
+		// check whether context is initialized
+		aes->InitializeDecryption(nonce, ParquetCrypto::NONCE_BYTES, reinterpret_cast<const_data_ptr_t>(key.data()),
+		                          key.size());
+	}
+
+	void ReadBlock(uint8_t *buf) {
+		// Read from transport into read_buffer at one AES block size offset (up to the tag)
+		read_buffer_size = MinValue(ParquetCrypto::CRYPTO_BLOCK_SIZE, transport_remaining - ParquetCrypto::TAG_BYTES);
+		transport_remaining -= trans.read(read_buffer + ParquetCrypto::BLOCK_SIZE, read_buffer_size);
+
+		// Decrypt from read_buffer + block size into read_buffer start (decryption can trail behind in same buffer)
+#ifdef DEBUG
+		auto size = aes->Process(read_buffer + ParquetCrypto::BLOCK_SIZE, read_buffer_size, buf,
+		                         ParquetCrypto::CRYPTO_BLOCK_SIZE + ParquetCrypto::BLOCK_SIZE);
+		D_ASSERT(size == read_buffer_size);
+#else
+		aes->Process(read_buffer + ParquetCrypto::BLOCK_SIZE, read_buffer_size, buf,
+		             ParquetCrypto::CRYPTO_BLOCK_SIZE + ParquetCrypto::BLOCK_SIZE);
+#endif
+		read_buffer_offset = 0;
+	}
+
+private:
+	//! Protocol and corresponding transport that we're wrapping
+	TProtocol &prot;
+	TTransport &trans;
+
+	//! AES context and buffers
+	shared_ptr<EncryptionState> aes;
+
+	//! We read/decrypt big blocks at a time
+	data_t read_buffer[ParquetCrypto::CRYPTO_BLOCK_SIZE + ParquetCrypto::BLOCK_SIZE];
+	uint32_t read_buffer_size;
+	uint32_t read_buffer_offset;
+
+	//! Remaining bytes to read, set by Initialize(), decremented by ReadBlock()
+	uint32_t total_bytes;
+	uint32_t transport_remaining;
+	//! Nonce read by Initialize()
+	data_t nonce[ParquetCrypto::NONCE_BYTES];
+};
+
+class SimpleReadTransport : public TTransport {
+public:
+	explicit SimpleReadTransport(data_ptr_t read_buffer_p, uint32_t read_buffer_size_p)
+	    : read_buffer(read_buffer_p), read_buffer_size(read_buffer_size_p), read_buffer_offset(0) {
+	}
+
+	uint32_t read_virt(uint8_t *buf, uint32_t len) override {
+		const auto remaining = read_buffer_size - read_buffer_offset;
+		if (len > remaining) {
+			return remaining;
+		}
+		memcpy(buf, read_buffer + read_buffer_offset, len);
+		read_buffer_offset += len;
+		return len;
+	}
+
+private:
+	const data_ptr_t read_buffer;
+	const uint32_t read_buffer_size;
+	uint32_t read_buffer_offset;
+};
+
+uint32_t ParquetCrypto::Read(TBase &object, TProtocol &iprot, const string &key,
+                             const EncryptionUtil &encryption_util_p) {
+	TCompactProtocolFactoryT<DecryptionTransport> tproto_factory;
+	auto dprot =
+	    tproto_factory.getProtocol(duckdb_base_std::make_shared<DecryptionTransport>(iprot, key, encryption_util_p));
+	auto &dtrans = reinterpret_cast<DecryptionTransport &>(*dprot->getTransport());
+
+	// We have to read the whole thing otherwise thrift throws an error before we realize we're decryption is wrong
+	auto all = dtrans.ReadAll();
+	TCompactProtocolFactoryT<SimpleReadTransport> tsimple_proto_factory;
+	auto simple_prot =
+	    tsimple_proto_factory.getProtocol(duckdb_base_std::make_shared<SimpleReadTransport>(all.get(), all.GetSize()));
+
+	// Read the object
+	object.read(simple_prot.get());
+
+	return ParquetCrypto::LENGTH_BYTES + ParquetCrypto::NONCE_BYTES + all.GetSize() + ParquetCrypto::TAG_BYTES;
+}
+
+uint32_t ParquetCrypto::Write(const TBase &object, TProtocol &oprot, const string &key,
+                              const EncryptionUtil &encryption_util_p) {
+	// Create encryption protocol
+	TCompactProtocolFactoryT<EncryptionTransport> tproto_factory;
+	auto eprot =
+	    tproto_factory.getProtocol(duckdb_base_std::make_shared<EncryptionTransport>(oprot, key, encryption_util_p));
+	auto &etrans = reinterpret_cast<EncryptionTransport &>(*eprot->getTransport());
+
+	// Write the object in memory
+	object.write(eprot.get());
+
+	// Encrypt and write to oprot
+	return etrans.Finalize();
+}
+
+uint32_t ParquetCrypto::ReadData(TProtocol &iprot, const data_ptr_t buffer, const uint32_t buffer_size,
+                                 const string &key, const EncryptionUtil &encryption_util_p) {
+	// Create decryption protocol
+	TCompactProtocolFactoryT<DecryptionTransport> tproto_factory;
+	auto dprot =
+	    tproto_factory.getProtocol(duckdb_base_std::make_shared<DecryptionTransport>(iprot, key, encryption_util_p));
+	auto &dtrans = reinterpret_cast<DecryptionTransport &>(*dprot->getTransport());
+
+	// Read buffer
+	dtrans.read(buffer, buffer_size);
+
+	// Verify AES tag and read length
+	return dtrans.Finalize();
+}
+
+uint32_t ParquetCrypto::WriteData(TProtocol &oprot, const const_data_ptr_t buffer, const uint32_t buffer_size,
+                                  const string &key, const EncryptionUtil &encryption_util_p) {
+	// FIXME: we know the size upfront so we could do a streaming write instead of this
+	// Create encryption protocol
+	TCompactProtocolFactoryT<EncryptionTransport> tproto_factory;
+	auto eprot =
+	    tproto_factory.getProtocol(duckdb_base_std::make_shared<EncryptionTransport>(oprot, key, encryption_util_p));
+	auto &etrans = reinterpret_cast<EncryptionTransport &>(*eprot->getTransport());
+
+	// Write the data in memory
+	etrans.write(buffer, buffer_size);
+
+	// Encrypt and write to oprot
+	return etrans.Finalize();
+}
+
+bool ParquetCrypto::ValidKey(const std::string &key) {
+	switch (key.size()) {
+	case 16:
+	case 24:
+	case 32:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static string Base64Decode(const string &key) {
+	auto result_size = Blob::FromBase64Size(key);
+	auto output = duckdb::unique_ptr<unsigned char[]>(new unsigned char[result_size]);
+	Blob::FromBase64(key, output.get(), result_size);
+	string decoded_key(reinterpret_cast<const char *>(output.get()), result_size);
+	return decoded_key;
+}
+
+void ParquetCrypto::AddKey(ClientContext &context, const FunctionParameters &parameters) {
+	const auto &key_name = StringValue::Get(parameters.values[0]);
+	const auto &key = StringValue::Get(parameters.values[1]);
+
+	auto &keys = ParquetKeys::Get(context);
+	if (ValidKey(key)) {
+		keys.AddKey(key_name, key);
+	} else {
+		string decoded_key;
+		try {
+			decoded_key = Base64Decode(key);
+		} catch (const ConversionException &e) {
+			throw InvalidInputException("Invalid AES key. Not a plain AES key NOR a base64 encoded string");
+		}
+		if (!ValidKey(decoded_key)) {
+			throw InvalidInputException(
+			    "Invalid AES key. Must have a length of 128, 192, or 256 bits (16, 24, or 32 bytes)");
+		}
+		keys.AddKey(key_name, decoded_key);
+	}
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/parquet_extension.cpp
+++ b/external/duckdb/extension/parquet/parquet_extension.cpp
--- a/external/duckdb/extension/parquet/parquet_field_id.cpp
+++ b/external/duckdb/extension/parquet/parquet_field_id.cpp
@@ -0,0 +1,180 @@
+#include "parquet_field_id.hpp"
+#include "duckdb/common/exception/binder_exception.hpp"
+
+namespace duckdb {
+
+constexpr const char *FieldID::DUCKDB_FIELD_ID;
+
+ChildFieldIDs::ChildFieldIDs() : ids(make_uniq<case_insensitive_map_t<FieldID>>()) {
+}
+
+ChildFieldIDs ChildFieldIDs::Copy() const {
+	ChildFieldIDs result;
+	for (const auto &id : *ids) {
+		result.ids->emplace(id.first, id.second.Copy());
+	}
+	return result;
+}
+
+FieldID::FieldID() : set(false) {
+}
+
+FieldID::FieldID(int32_t field_id_p) : set(true), field_id(field_id_p) {
+}
+
+FieldID FieldID::Copy() const {
+	auto result = set ? FieldID(field_id) : FieldID();
+	result.child_field_ids = child_field_ids.Copy();
+	return result;
+}
+
+static case_insensitive_map_t<LogicalType> GetChildNameToTypeMap(const LogicalType &type) {
+	case_insensitive_map_t<LogicalType> name_to_type_map;
+	switch (type.id()) {
+	case LogicalTypeId::LIST:
+		name_to_type_map.emplace("element", ListType::GetChildType(type));
+		break;
+	case LogicalTypeId::MAP:
+		name_to_type_map.emplace("key", MapType::KeyType(type));
+		name_to_type_map.emplace("value", MapType::ValueType(type));
+		break;
+	case LogicalTypeId::STRUCT:
+		for (auto &child_type : StructType::GetChildTypes(type)) {
+			if (child_type.first == FieldID::DUCKDB_FIELD_ID) {
+				throw BinderException("Cannot have column named \"%s\" with FIELD_IDS", FieldID::DUCKDB_FIELD_ID);
+			}
+			name_to_type_map.emplace(child_type);
+		}
+		break;
+	default: // LCOV_EXCL_START
+		throw InternalException("Unexpected type in GetChildNameToTypeMap");
+	} // LCOV_EXCL_STOP
+	return name_to_type_map;
+}
+
+static void GetChildNamesAndTypes(const LogicalType &type, vector<string> &child_names,
+                                  vector<LogicalType> &child_types) {
+	switch (type.id()) {
+	case LogicalTypeId::LIST:
+		child_names.emplace_back("element");
+		child_types.emplace_back(ListType::GetChildType(type));
+		break;
+	case LogicalTypeId::MAP:
+		child_names.emplace_back("key");
+		child_names.emplace_back("value");
+		child_types.emplace_back(MapType::KeyType(type));
+		child_types.emplace_back(MapType::ValueType(type));
+		break;
+	case LogicalTypeId::STRUCT:
+		for (auto &child_type : StructType::GetChildTypes(type)) {
+			child_names.emplace_back(child_type.first);
+			child_types.emplace_back(child_type.second);
+		}
+		break;
+	default: // LCOV_EXCL_START
+		throw InternalException("Unexpected type in GetChildNamesAndTypes");
+	} // LCOV_EXCL_STOP
+}
+
+void FieldID::GenerateFieldIDs(ChildFieldIDs &field_ids, idx_t &field_id, const vector<string> &names,
+                               const vector<LogicalType> &sql_types) {
+	D_ASSERT(names.size() == sql_types.size());
+	for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
+		const auto &col_name = names[col_idx];
+		auto inserted = field_ids.ids->insert(make_pair(col_name, FieldID(UnsafeNumericCast<int32_t>(field_id++))));
+		D_ASSERT(inserted.second);
+
+		const auto &col_type = sql_types[col_idx];
+		if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP &&
+		    col_type.id() != LogicalTypeId::STRUCT) {
+			continue;
+		}
+
+		// Cannot use GetChildNameToTypeMap here because we lose order, and we want to generate depth-first
+		vector<string> child_names;
+		vector<LogicalType> child_types;
+		GetChildNamesAndTypes(col_type, child_names, child_types);
+		GenerateFieldIDs(inserted.first->second.child_field_ids, field_id, child_names, child_types);
+	}
+}
+
+void FieldID::GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
+                          unordered_set<uint32_t> &unique_field_ids,
+                          const case_insensitive_map_t<LogicalType> &name_to_type_map) {
+	const auto &struct_type = field_ids_value.type();
+	if (struct_type.id() != LogicalTypeId::STRUCT) {
+		throw BinderException(
+		    "Expected FIELD_IDS to be a STRUCT, e.g., {col1: 42, col2: {%s: 43, nested_col: 44}, col3: 44}",
+		    FieldID::DUCKDB_FIELD_ID);
+	}
+	const auto &struct_children = StructValue::GetChildren(field_ids_value);
+	D_ASSERT(StructType::GetChildTypes(struct_type).size() == struct_children.size());
+	for (idx_t i = 0; i < struct_children.size(); i++) {
+		const auto &col_name = StringUtil::Lower(StructType::GetChildName(struct_type, i));
+		if (col_name == FieldID::DUCKDB_FIELD_ID) {
+			continue;
+		}
+
+		auto it = name_to_type_map.find(col_name);
+		if (it == name_to_type_map.end()) {
+			string names;
+			for (const auto &name : name_to_type_map) {
+				if (!names.empty()) {
+					names += ", ";
+				}
+				names += name.first;
+			}
+			throw BinderException(
+			    "Column name \"%s\" specified in FIELD_IDS not found. Consider using WRITE_PARTITION_COLUMNS if this "
+			    "column is a partition column. Available column names: [%s]",
+			    col_name, names);
+		}
+		D_ASSERT(field_ids.ids->find(col_name) == field_ids.ids->end()); // Caught by STRUCT - deduplicates keys
+
+		const auto &child_value = struct_children[i];
+		const auto &child_type = child_value.type();
+		optional_ptr<const Value> field_id_value;
+		optional_ptr<const Value> child_field_ids_value;
+
+		if (child_type.id() == LogicalTypeId::STRUCT) {
+			const auto &nested_children = StructValue::GetChildren(child_value);
+			D_ASSERT(StructType::GetChildTypes(child_type).size() == nested_children.size());
+			for (idx_t nested_i = 0; nested_i < nested_children.size(); nested_i++) {
+				const auto &field_id_or_nested_col = StructType::GetChildName(child_type, nested_i);
+				if (field_id_or_nested_col == FieldID::DUCKDB_FIELD_ID) {
+					field_id_value = &nested_children[nested_i];
+				} else {
+					child_field_ids_value = &child_value;
+				}
+			}
+		} else {
+			field_id_value = &child_value;
+		}
+
+		FieldID field_id;
+		if (field_id_value) {
+			Value field_id_integer_value = field_id_value->DefaultCastAs(LogicalType::INTEGER);
+			const uint32_t field_id_int = IntegerValue::Get(field_id_integer_value);
+			if (!unique_field_ids.insert(field_id_int).second) {
+				throw BinderException("Duplicate field_id %s found in FIELD_IDS", field_id_integer_value.ToString());
+			}
+			field_id = FieldID(UnsafeNumericCast<int32_t>(field_id_int));
+		}
+		auto inserted = field_ids.ids->insert(make_pair(col_name, std::move(field_id)));
+		D_ASSERT(inserted.second);
+
+		if (child_field_ids_value) {
+			const auto &col_type = it->second;
+			if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP &&
+			    col_type.id() != LogicalTypeId::STRUCT) {
+				throw BinderException("Column \"%s\" with type \"%s\" cannot have a nested FIELD_IDS specification",
+				                      col_name, LogicalTypeIdToString(col_type.id()));
+			}
+
+			GetFieldIDs(*child_field_ids_value, inserted.first->second.child_field_ids, unique_field_ids,
+			            GetChildNameToTypeMap(col_type));
+		}
+	}
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/parquet_file_metadata_cache.cpp
+++ b/external/duckdb/extension/parquet/parquet_file_metadata_cache.cpp
@@ -0,0 +1,55 @@
+#include "parquet_file_metadata_cache.hpp"
+#include "duckdb/storage/external_file_cache.hpp"
+#include "duckdb/storage/caching_file_system.hpp"
+
+namespace duckdb {
+
+ParquetFileMetadataCache::ParquetFileMetadataCache(unique_ptr<duckdb_parquet::FileMetaData> file_metadata,
+                                                   CachingFileHandle &handle,
+                                                   unique_ptr<GeoParquetFileMetadata> geo_metadata, idx_t footer_size)
+    : metadata(std::move(file_metadata)), geo_metadata(std::move(geo_metadata)), footer_size(footer_size),
+      validate(handle.Validate()), last_modified(handle.GetLastModifiedTime()), version_tag(handle.GetVersionTag()) {
+}
+
+string ParquetFileMetadataCache::ObjectType() {
+	return "parquet_metadata";
+}
+
+string ParquetFileMetadataCache::GetObjectType() {
+	return ObjectType();
+}
+
+bool ParquetFileMetadataCache::IsValid(CachingFileHandle &new_handle) const {
+	return ExternalFileCache::IsValid(validate, version_tag, last_modified, new_handle.GetVersionTag(),
+	                                  new_handle.GetLastModifiedTime());
+}
+
+ParquetCacheValidity ParquetFileMetadataCache::IsValid(const OpenFileInfo &info) const {
+	if (!info.extended_info) {
+		return ParquetCacheValidity::UNKNOWN;
+	}
+	auto &open_options = info.extended_info->options;
+	const auto validate_entry = open_options.find("validate_external_file_cache");
+	if (validate_entry != open_options.end()) {
+		// check if always valid - if so just return valid
+		if (BooleanValue::Get(validate_entry->second)) {
+			return ParquetCacheValidity::VALID;
+		}
+	}
+	const auto lm_entry = open_options.find("last_modified");
+	if (lm_entry == open_options.end()) {
+		return ParquetCacheValidity::UNKNOWN;
+	}
+	auto new_last_modified = lm_entry->second.GetValue<timestamp_t>();
+	string new_etag;
+	const auto etag_entry = open_options.find("etag");
+	if (etag_entry != open_options.end()) {
+		new_etag = StringValue::Get(etag_entry->second);
+	}
+	if (ExternalFileCache::IsValid(false, version_tag, last_modified, new_etag, new_last_modified)) {
+		return ParquetCacheValidity::VALID;
+	}
+	return ParquetCacheValidity::INVALID;
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/parquet_float16.cpp
+++ b/external/duckdb/extension/parquet/parquet_float16.cpp
@@ -0,0 +1,44 @@
+#include "parquet_float16.hpp"
+
+#include "duckdb.hpp"
+
+namespace duckdb {
+
+float Float16ToFloat32(const uint16_t &float16_value) {
+	uint32_t sign = float16_value >> 15;
+	uint32_t exponent = (float16_value >> 10) & 0x1F;
+	uint32_t fraction = (float16_value & 0x3FF);
+	// Avoid strict aliasing issues and compiler warnings
+	uint32_t float32_value = 0;
+
+	if (exponent == 0) {
+		if (fraction == 0) {
+			// zero
+			float32_value = (sign << 31);
+		} else {
+			// can be represented as ordinary value in float32
+			// 2 ** -14 * 0.0101
+			// => 2 ** -16 * 1.0100
+			// int int_exponent = -14;
+			exponent = 127 - 14;
+			while ((fraction & (1 << 10)) == 0) {
+				// int_exponent--;
+				exponent--;
+				fraction <<= 1;
+			}
+			fraction &= 0x3FF;
+			// int_exponent += 127;
+			float32_value = (sign << 31) | (exponent << 23) | (fraction << 13);
+		}
+	} else if (exponent == 0x1F) {
+		/* Inf or NaN */
+		float32_value = (sign << 31) | (0xFF << 23) | (fraction << 13);
+	} else {
+		/* ordinary number */
+		float32_value = (sign << 31) | ((exponent + (127 - 15)) << 23) | (fraction << 13);
+	}
+
+	return Load<float>(const_data_ptr_cast(&float32_value));
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/parquet_metadata.cpp
+++ b/external/duckdb/extension/parquet/parquet_metadata.cpp
@@ -0,0 +1,969 @@
+#include "parquet_metadata.hpp"
+
+#include "parquet_statistics.hpp"
+
+#include <sstream>
+
+#include "duckdb/common/multi_file/multi_file_reader.hpp"
+#include "duckdb/common/types/blob.hpp"
+#include "duckdb/planner/filter/constant_filter.hpp"
+#include "duckdb/main/config.hpp"
+#include "duckdb/common/multi_file/multi_file_list.hpp"
+#include "parquet_reader.hpp"
+#include "duckdb/common/numeric_utils.hpp"
+
+namespace duckdb {
+
+struct ParquetMetadataFilePaths {
+	MultiFileListScanData scan_data;
+	shared_ptr<MultiFileList> file_list;
+	mutex file_lock;
+
+	bool NextFile(OpenFileInfo &result) {
+		D_ASSERT(file_list);
+		unique_lock<mutex> lock(file_lock);
+		return file_list->Scan(scan_data, result);
+	}
+
+	FileExpandResult GetExpandResult() {
+		D_ASSERT(file_list);
+		unique_lock<mutex> lock(file_lock);
+		return file_list->GetExpandResult();
+	}
+};
+
+struct ParquetMetaDataBindData : public TableFunctionData {
+	unique_ptr<ParquetMetadataFilePaths> file_paths;
+};
+
+struct ParquetBloomProbeBindData : public ParquetMetaDataBindData {
+	string probe_column_name;
+	Value probe_constant;
+};
+
+enum class ParquetMetadataOperatorType : uint8_t {
+	META_DATA,
+	SCHEMA,
+	KEY_VALUE_META_DATA,
+	FILE_META_DATA,
+	BLOOM_PROBE
+};
+
+class ParquetMetadataFileProcessor {
+public:
+	ParquetMetadataFileProcessor() = default;
+	virtual ~ParquetMetadataFileProcessor() = default;
+	void Initialize(ClientContext &context, OpenFileInfo &file_info) {
+		ParquetOptions parquet_options(context);
+		reader = make_uniq<ParquetReader>(context, file_info, parquet_options);
+	}
+	virtual void InitializeInternal(ClientContext &context) {};
+	virtual idx_t TotalRowCount() = 0;
+	virtual void ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) = 0;
+
+protected:
+	unique_ptr<ParquetReader> reader;
+};
+
+struct ParquetMetaDataBindData;
+
+class ParquetMetaDataOperator {
+public:
+	template <ParquetMetadataOperatorType OP_TYPE>
+	static unique_ptr<FunctionData> Bind(ClientContext &context, TableFunctionBindInput &input,
+	                                     vector<LogicalType> &return_types, vector<string> &names);
+	static unique_ptr<GlobalTableFunctionState> InitGlobal(ClientContext &context, TableFunctionInitInput &input);
+	template <ParquetMetadataOperatorType OP_TYPE>
+	static unique_ptr<LocalTableFunctionState> InitLocal(ExecutionContext &context, TableFunctionInitInput &input,
+	                                                     GlobalTableFunctionState *global_state);
+	template <ParquetMetadataOperatorType OP_TYPE>
+	static void Function(ClientContext &context, TableFunctionInput &data_p, DataChunk &output);
+	static double Progress(ClientContext &context, const FunctionData *bind_data_p,
+	                       const GlobalTableFunctionState *global_state);
+
+	template <ParquetMetadataOperatorType OP_TYPE>
+	static void BindSchema(vector<LogicalType> &return_types, vector<string> &names);
+};
+
+struct ParquetMetadataGlobalState : public GlobalTableFunctionState {
+	ParquetMetadataGlobalState(unique_ptr<ParquetMetadataFilePaths> file_paths_p, ClientContext &context)
+	    : file_paths(std::move(file_paths_p)) {
+		auto expand_result = file_paths->GetExpandResult();
+		if (expand_result == FileExpandResult::MULTIPLE_FILES) {
+			max_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
+		} else {
+			max_threads = 1;
+		}
+	}
+
+	idx_t MaxThreads() const override {
+		return max_threads;
+	}
+
+	bool NextFile(ClientContext &context, OpenFileInfo &result) {
+		return file_paths->NextFile(result);
+	}
+
+	double GetProgress() const {
+		// Not the most accurate, instantly assumes all files are done and equal
+		unique_lock<mutex> lock(file_paths->file_lock);
+		return static_cast<double>(file_paths->scan_data.current_file_idx) / file_paths->file_list->GetTotalFileCount();
+	}
+
+	unique_ptr<ParquetMetadataFilePaths> file_paths;
+	idx_t max_threads;
+};
+
+struct ParquetMetadataLocalState : public LocalTableFunctionState {
+	unique_ptr<ParquetMetadataFileProcessor> processor;
+	bool file_exhausted = true;
+	idx_t row_idx = 0;
+	idx_t total_rows = 0;
+};
+
+template <class T>
+static string ConvertParquetElementToString(T &&entry) {
+	duckdb::stringstream ss;
+	ss << entry;
+	return ss.str();
+}
+
+template <class T>
+static string PrintParquetElementToString(T &&entry) {
+	duckdb::stringstream ss;
+	entry.printTo(ss);
+	return ss.str();
+}
+
+template <class T>
+static Value ParquetElementString(T &&value, bool is_set) {
+	if (!is_set) {
+		return Value();
+	}
+	return Value(ConvertParquetElementToString(value));
+}
+
+static Value ParquetElementStringVal(const string &value, bool is_set) {
+	if (!is_set) {
+		return Value();
+	}
+	return Value(value);
+}
+
+template <class T>
+static Value ParquetElementInteger(T &&value, bool is_iset) {
+	if (!is_iset) {
+		return Value();
+	}
+	return Value::INTEGER(value);
+}
+
+template <class T>
+static Value ParquetElementBigint(T &&value, bool is_iset) {
+	if (!is_iset) {
+		return Value();
+	}
+	return Value::BIGINT(value);
+}
+
+static Value ParquetElementBoolean(bool value, bool is_iset) {
+	if (!is_iset) {
+		return Value();
+	}
+	return Value::BOOLEAN(value);
+}
+
+//===--------------------------------------------------------------------===//
+// Row Group Meta Data
+//===--------------------------------------------------------------------===//
+
+class ParquetRowGroupMetadataProcessor : public ParquetMetadataFileProcessor {
+public:
+	void InitializeInternal(ClientContext &context) override;
+	idx_t TotalRowCount() override;
+	void ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) override;
+
+private:
+	vector<ParquetColumnSchema> column_schemas;
+};
+
+template <>
+void ParquetMetaDataOperator::BindSchema<ParquetMetadataOperatorType::META_DATA>(vector<LogicalType> &return_types,
+                                                                                 vector<string> &names) {
+	names.emplace_back("file_name");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("row_group_id");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("row_group_num_rows");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("row_group_num_columns");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("row_group_bytes");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("column_id");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("file_offset");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("num_values");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("path_in_schema");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("type");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("stats_min");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("stats_max");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("stats_null_count");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("stats_distinct_count");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("stats_min_value");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("stats_max_value");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("compression");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("encodings");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("index_page_offset");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("dictionary_page_offset");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("data_page_offset");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("total_compressed_size");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("total_uncompressed_size");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("key_value_metadata");
+	return_types.emplace_back(LogicalType::MAP(LogicalType::BLOB, LogicalType::BLOB));
+
+	names.emplace_back("bloom_filter_offset");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("bloom_filter_length");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("min_is_exact");
+	return_types.emplace_back(LogicalType::BOOLEAN);
+
+	names.emplace_back("max_is_exact");
+	return_types.emplace_back(LogicalType::BOOLEAN);
+
+	names.emplace_back("row_group_compressed_bytes");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("geo_bbox");
+	return_types.emplace_back(LogicalType::STRUCT({
+	    {"xmin", LogicalType::DOUBLE},
+	    {"xmax", LogicalType::DOUBLE},
+	    {"ymin", LogicalType::DOUBLE},
+	    {"ymax", LogicalType::DOUBLE},
+	    {"zmin", LogicalType::DOUBLE},
+	    {"zmax", LogicalType::DOUBLE},
+	    {"mmin", LogicalType::DOUBLE},
+	    {"mmax", LogicalType::DOUBLE},
+	}));
+
+	names.emplace_back("geo_types");
+	return_types.emplace_back(LogicalType::LIST(LogicalType::VARCHAR));
+}
+
+static Value ConvertParquetStats(const LogicalType &type, const ParquetColumnSchema &schema_ele, bool stats_is_set,
+                                 const std::string &stats) {
+	if (!stats_is_set) {
+		return Value(LogicalType::VARCHAR);
+	}
+	return ParquetStatisticsUtils::ConvertValue(type, schema_ele, stats).DefaultCastAs(LogicalType::VARCHAR);
+}
+
+static Value ConvertParquetGeoStatsBBOX(const duckdb_parquet::GeospatialStatistics &stats) {
+	if (!stats.__isset.bbox) {
+		return Value(LogicalType::STRUCT({
+		    {"xmin", LogicalType::DOUBLE},
+		    {"xmax", LogicalType::DOUBLE},
+		    {"ymin", LogicalType::DOUBLE},
+		    {"ymax", LogicalType::DOUBLE},
+		    {"zmin", LogicalType::DOUBLE},
+		    {"zmax", LogicalType::DOUBLE},
+		    {"mmin", LogicalType::DOUBLE},
+		    {"mmax", LogicalType::DOUBLE},
+		}));
+	}
+
+	return Value::STRUCT({
+	    {"xmin", Value::DOUBLE(stats.bbox.xmin)},
+	    {"xmax", Value::DOUBLE(stats.bbox.xmax)},
+	    {"ymin", Value::DOUBLE(stats.bbox.ymin)},
+	    {"ymax", Value::DOUBLE(stats.bbox.ymax)},
+	    {"zmin", stats.bbox.__isset.zmin ? Value::DOUBLE(stats.bbox.zmin) : Value(LogicalTypeId::DOUBLE)},
+	    {"zmax", stats.bbox.__isset.zmax ? Value::DOUBLE(stats.bbox.zmax) : Value(LogicalTypeId::DOUBLE)},
+	    {"mmin", stats.bbox.__isset.mmin ? Value::DOUBLE(stats.bbox.mmin) : Value(LogicalTypeId::DOUBLE)},
+	    {"mmax", stats.bbox.__isset.mmax ? Value::DOUBLE(stats.bbox.mmax) : Value(LogicalTypeId::DOUBLE)},
+	});
+}
+
+static Value ConvertParquetGeoStatsTypes(const duckdb_parquet::GeospatialStatistics &stats) {
+	if (!stats.__isset.geospatial_types) {
+		return Value(LogicalType::LIST(LogicalType::VARCHAR));
+	}
+	vector<Value> types;
+	types.reserve(stats.geospatial_types.size());
+
+	GeometryTypeSet type_set;
+	for (auto &type : stats.geospatial_types) {
+		const auto geom_type = (type % 1000);
+		const auto vert_type = (type / 1000);
+		if (geom_type < 1 || geom_type > 7) {
+			throw InvalidInputException("Unsupported geometry type in Parquet geo metadata");
+		}
+		if (vert_type < 0 || vert_type > 3) {
+			throw InvalidInputException("Unsupported geometry vertex type in Parquet geo metadata");
+		}
+		type_set.Add(static_cast<GeometryType>(geom_type), static_cast<VertexType>(vert_type));
+	}
+
+	for (auto &type_name : type_set.ToString(true)) {
+		types.push_back(Value(type_name));
+	}
+	return Value::LIST(LogicalType::VARCHAR, types);
+}
+
+void ParquetRowGroupMetadataProcessor::InitializeInternal(ClientContext &context) {
+	auto meta_data = reader->GetFileMetadata();
+	column_schemas.clear();
+	for (idx_t schema_idx = 0; schema_idx < meta_data->schema.size(); schema_idx++) {
+		auto &schema_element = meta_data->schema[schema_idx];
+		if (schema_element.num_children > 0) {
+			continue;
+		}
+		ParquetColumnSchema column_schema;
+		column_schema.type = reader->DeriveLogicalType(schema_element, column_schema);
+		column_schemas.push_back(std::move(column_schema));
+	}
+}
+
+idx_t ParquetRowGroupMetadataProcessor::TotalRowCount() {
+	auto meta_data = reader->GetFileMetadata();
+	return meta_data->row_groups.size() * column_schemas.size();
+}
+
+void ParquetRowGroupMetadataProcessor::ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) {
+	auto meta_data = reader->GetFileMetadata();
+	idx_t col_idx = row_idx % column_schemas.size();
+	idx_t row_group_idx = row_idx / column_schemas.size();
+
+	auto &row_group = meta_data->row_groups[row_group_idx];
+
+	auto &column = row_group.columns[col_idx];
+	auto &column_schema = column_schemas[col_idx];
+	auto &col_meta = column.meta_data;
+	auto &stats = col_meta.statistics;
+	auto &column_type = column_schema.type;
+
+	// file_name
+	output.SetValue(0, output_idx, reader->file.path);
+	// row_group_id
+	output.SetValue(1, output_idx, Value::BIGINT(UnsafeNumericCast<int64_t>(row_group_idx)));
+	// row_group_num_rows
+	output.SetValue(2, output_idx, Value::BIGINT(row_group.num_rows));
+	// row_group_num_columns
+	output.SetValue(3, output_idx, Value::BIGINT(UnsafeNumericCast<int64_t>(row_group.columns.size())));
+	// row_group_bytes
+	output.SetValue(4, output_idx, Value::BIGINT(row_group.total_byte_size));
+	// column_id
+	output.SetValue(5, output_idx, Value::BIGINT(UnsafeNumericCast<int64_t>(col_idx)));
+	// file_offset
+	output.SetValue(6, output_idx, ParquetElementBigint(column.file_offset, row_group.__isset.file_offset));
+	// num_values
+	output.SetValue(7, output_idx, Value::BIGINT(col_meta.num_values));
+	// path_in_schema
+	output.SetValue(8, output_idx, StringUtil::Join(col_meta.path_in_schema, ", "));
+	// type
+	output.SetValue(9, output_idx, ConvertParquetElementToString(col_meta.type));
+	// stats_min
+	output.SetValue(10, output_idx, ConvertParquetStats(column_type, column_schema, stats.__isset.min, stats.min));
+	// stats_max
+	output.SetValue(11, output_idx, ConvertParquetStats(column_type, column_schema, stats.__isset.max, stats.max));
+	// stats_null_count
+	output.SetValue(12, output_idx, ParquetElementBigint(stats.null_count, stats.__isset.null_count));
+	// stats_distinct_count
+	output.SetValue(13, output_idx, ParquetElementBigint(stats.distinct_count, stats.__isset.distinct_count));
+	// stats_min_value
+	output.SetValue(14, output_idx,
+	                ConvertParquetStats(column_type, column_schema, stats.__isset.min_value, stats.min_value));
+	// stats_max_value
+	output.SetValue(15, output_idx,
+	                ConvertParquetStats(column_type, column_schema, stats.__isset.max_value, stats.max_value));
+	// compression
+	output.SetValue(16, output_idx, ConvertParquetElementToString(col_meta.codec));
+	// encodings
+	vector<string> encoding_string;
+	encoding_string.reserve(col_meta.encodings.size());
+	for (auto &encoding : col_meta.encodings) {
+		encoding_string.push_back(ConvertParquetElementToString(encoding));
+	}
+	output.SetValue(17, output_idx, Value(StringUtil::Join(encoding_string, ", ")));
+	// index_page_offset
+	output.SetValue(18, output_idx,
+	                ParquetElementBigint(col_meta.index_page_offset, col_meta.__isset.index_page_offset));
+	// dictionary_page_offset
+	output.SetValue(19, output_idx,
+	                ParquetElementBigint(col_meta.dictionary_page_offset, col_meta.__isset.dictionary_page_offset));
+	// data_page_offset
+	output.SetValue(20, output_idx, Value::BIGINT(col_meta.data_page_offset));
+	// total_compressed_size
+	output.SetValue(21, output_idx, Value::BIGINT(col_meta.total_compressed_size));
+	// total_uncompressed_size
+	output.SetValue(22, output_idx, Value::BIGINT(col_meta.total_uncompressed_size));
+	// key_value_metadata
+	vector<Value> map_keys, map_values;
+	for (auto &entry : col_meta.key_value_metadata) {
+		map_keys.push_back(Value::BLOB_RAW(entry.key));
+		map_values.push_back(Value::BLOB_RAW(entry.value));
+	}
+	output.SetValue(23, output_idx,
+	                Value::MAP(LogicalType::BLOB, LogicalType::BLOB, std::move(map_keys), std::move(map_values)));
+	// bloom_filter_offset
+	output.SetValue(24, output_idx,
+	                ParquetElementBigint(col_meta.bloom_filter_offset, col_meta.__isset.bloom_filter_offset));
+	// bloom_filter_length
+	output.SetValue(25, output_idx,
+	                ParquetElementBigint(col_meta.bloom_filter_length, col_meta.__isset.bloom_filter_length));
+	// min_is_exact
+	output.SetValue(26, output_idx, ParquetElementBoolean(stats.is_min_value_exact, stats.__isset.is_min_value_exact));
+	// max_is_exact
+	output.SetValue(27, output_idx, ParquetElementBoolean(stats.is_max_value_exact, stats.__isset.is_max_value_exact));
+	// row_group_compressed_bytes
+	output.SetValue(28, output_idx,
+	                ParquetElementBigint(row_group.total_compressed_size, row_group.__isset.total_compressed_size));
+	// geo_stats_bbox, LogicalType::STRUCT(...)
+	output.SetValue(29, output_idx, ConvertParquetGeoStatsBBOX(col_meta.geospatial_statistics));
+
+	// geo_stats_types, LogicalType::LIST(LogicalType::VARCHAR)
+	output.SetValue(30, output_idx, ConvertParquetGeoStatsTypes(col_meta.geospatial_statistics));
+}
+
+//===--------------------------------------------------------------------===//
+// Schema Data
+//===--------------------------------------------------------------------===//
+
+class ParquetSchemaProcessor : public ParquetMetadataFileProcessor {
+public:
+	idx_t TotalRowCount() override;
+	void ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) override;
+};
+
+template <>
+void ParquetMetaDataOperator::BindSchema<ParquetMetadataOperatorType::SCHEMA>(vector<LogicalType> &return_types,
+                                                                              vector<string> &names) {
+	names.emplace_back("file_name");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("name");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("type");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("type_length");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("repetition_type");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("num_children");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("converted_type");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("scale");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("precision");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("field_id");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("logical_type");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("duckdb_type");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("column_id");
+	return_types.emplace_back(LogicalType::BIGINT);
+}
+
+static Value ParquetLogicalTypeToString(const duckdb_parquet::LogicalType &type, bool is_set) {
+	if (!is_set) {
+		return Value();
+	}
+	if (type.__isset.STRING) {
+		return Value(PrintParquetElementToString(type.STRING));
+	}
+	if (type.__isset.MAP) {
+		return Value(PrintParquetElementToString(type.MAP));
+	}
+	if (type.__isset.LIST) {
+		return Value(PrintParquetElementToString(type.LIST));
+	}
+	if (type.__isset.ENUM) {
+		return Value(PrintParquetElementToString(type.ENUM));
+	}
+	if (type.__isset.DECIMAL) {
+		return Value(PrintParquetElementToString(type.DECIMAL));
+	}
+	if (type.__isset.DATE) {
+		return Value(PrintParquetElementToString(type.DATE));
+	}
+	if (type.__isset.TIME) {
+		return Value(PrintParquetElementToString(type.TIME));
+	}
+	if (type.__isset.TIMESTAMP) {
+		return Value(PrintParquetElementToString(type.TIMESTAMP));
+	}
+	if (type.__isset.INTEGER) {
+		return Value(PrintParquetElementToString(type.INTEGER));
+	}
+	if (type.__isset.UNKNOWN) {
+		return Value(PrintParquetElementToString(type.UNKNOWN));
+	}
+	if (type.__isset.JSON) {
+		return Value(PrintParquetElementToString(type.JSON));
+	}
+	if (type.__isset.BSON) {
+		return Value(PrintParquetElementToString(type.BSON));
+	}
+	if (type.__isset.UUID) {
+		return Value(PrintParquetElementToString(type.UUID));
+	}
+	if (type.__isset.FLOAT16) {
+		return Value(PrintParquetElementToString(type.FLOAT16));
+	}
+	if (type.__isset.GEOMETRY) {
+		return Value(PrintParquetElementToString(type.GEOMETRY));
+	}
+	if (type.__isset.GEOGRAPHY) {
+		return Value(PrintParquetElementToString(type.GEOGRAPHY));
+	}
+	return Value();
+}
+
+idx_t ParquetSchemaProcessor::TotalRowCount() {
+	return reader->GetFileMetadata()->schema.size();
+}
+
+void ParquetSchemaProcessor::ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) {
+	auto meta_data = reader->GetFileMetadata();
+	const auto &column = meta_data->schema[row_idx];
+
+	// file_name
+	output.SetValue(0, output_idx, reader->file.path);
+	// name
+	output.SetValue(1, output_idx, column.name);
+	// type
+	output.SetValue(2, output_idx, ParquetElementString(column.type, column.__isset.type));
+	// type_length
+	output.SetValue(3, output_idx, ParquetElementInteger(column.type_length, column.__isset.type_length));
+	// repetition_type
+	output.SetValue(4, output_idx, ParquetElementString(column.repetition_type, column.__isset.repetition_type));
+	// num_children
+	output.SetValue(5, output_idx, ParquetElementBigint(column.num_children, column.__isset.num_children));
+	// converted_type
+	output.SetValue(6, output_idx, ParquetElementString(column.converted_type, column.__isset.converted_type));
+	// scale
+	output.SetValue(7, output_idx, ParquetElementBigint(column.scale, column.__isset.scale));
+	// precision
+	output.SetValue(8, output_idx, ParquetElementBigint(column.precision, column.__isset.precision));
+	// field_id
+	output.SetValue(9, output_idx, ParquetElementBigint(column.field_id, column.__isset.field_id));
+	// logical_type
+	output.SetValue(10, output_idx, ParquetLogicalTypeToString(column.logicalType, column.__isset.logicalType));
+	// duckdb_type
+	ParquetColumnSchema column_schema;
+	Value duckdb_type;
+	if (column.__isset.type) {
+		duckdb_type = reader->DeriveLogicalType(column, column_schema).ToString();
+	}
+	output.SetValue(11, output_idx, duckdb_type);
+	// column_id
+	output.SetValue(12, output_idx, Value::BIGINT(UnsafeNumericCast<int64_t>(row_idx)));
+}
+
+//===--------------------------------------------------------------------===//
+// KV Meta Data
+//===--------------------------------------------------------------------===//
+
+class ParquetKeyValueMetadataProcessor : public ParquetMetadataFileProcessor {
+public:
+	idx_t TotalRowCount() override;
+	void ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) override;
+};
+
+template <>
+void ParquetMetaDataOperator::BindSchema<ParquetMetadataOperatorType::KEY_VALUE_META_DATA>(
+    vector<LogicalType> &return_types, vector<string> &names) {
+	names.emplace_back("file_name");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("key");
+	return_types.emplace_back(LogicalType::BLOB);
+
+	names.emplace_back("value");
+	return_types.emplace_back(LogicalType::BLOB);
+}
+
+idx_t ParquetKeyValueMetadataProcessor::TotalRowCount() {
+	return reader->GetFileMetadata()->key_value_metadata.size();
+}
+
+void ParquetKeyValueMetadataProcessor::ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) {
+	auto meta_data = reader->GetFileMetadata();
+	auto &entry = meta_data->key_value_metadata[row_idx];
+
+	output.SetValue(0, output_idx, Value(reader->file.path));
+	output.SetValue(1, output_idx, Value::BLOB_RAW(entry.key));
+	output.SetValue(2, output_idx, Value::BLOB_RAW(entry.value));
+}
+
+//===--------------------------------------------------------------------===//
+// File Meta Data
+//===--------------------------------------------------------------------===//
+
+class ParquetFileMetadataProcessor : public ParquetMetadataFileProcessor {
+public:
+	idx_t TotalRowCount() override;
+	void ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) override;
+};
+
+template <>
+void ParquetMetaDataOperator::BindSchema<ParquetMetadataOperatorType::FILE_META_DATA>(vector<LogicalType> &return_types,
+                                                                                      vector<string> &names) {
+	names.emplace_back("file_name");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("created_by");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("num_rows");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("num_row_groups");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("format_version");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("encryption_algorithm");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("footer_signing_key_metadata");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("file_size_bytes");
+	return_types.emplace_back(LogicalType::UBIGINT);
+
+	names.emplace_back("footer_size");
+	return_types.emplace_back(LogicalType::UBIGINT);
+}
+
+idx_t ParquetFileMetadataProcessor::TotalRowCount() {
+	return 1;
+}
+
+void ParquetFileMetadataProcessor::ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) {
+	auto meta_data = reader->GetFileMetadata();
+
+	// file_name
+	output.SetValue(0, output_idx, Value(reader->file.path));
+	// created_by
+	output.SetValue(1, output_idx, ParquetElementStringVal(meta_data->created_by, meta_data->__isset.created_by));
+	// num_rows
+	output.SetValue(2, output_idx, Value::BIGINT(meta_data->num_rows));
+	// num_row_groups
+	output.SetValue(3, output_idx, Value::BIGINT(UnsafeNumericCast<int64_t>(meta_data->row_groups.size())));
+	// format_version
+	output.SetValue(4, output_idx, Value::BIGINT(meta_data->version));
+	// encryption_algorithm
+	output.SetValue(5, output_idx,
+	                ParquetElementString(meta_data->encryption_algorithm, meta_data->__isset.encryption_algorithm));
+	// footer_signing_key_metadata
+	output.SetValue(6, output_idx,
+	                ParquetElementStringVal(meta_data->footer_signing_key_metadata,
+	                                        meta_data->__isset.footer_signing_key_metadata));
+	// file_size_bytes
+	output.SetValue(7, output_idx, Value::UBIGINT(reader->GetHandle().GetFileSize()));
+	// footer_size
+	output.SetValue(8, output_idx, Value::UBIGINT(reader->metadata->footer_size));
+}
+
+//===--------------------------------------------------------------------===//
+// Bloom Probe
+//===--------------------------------------------------------------------===//
+
+class ParquetBloomProbeProcessor : public ParquetMetadataFileProcessor {
+public:
+	ParquetBloomProbeProcessor(const string &probe_column, const Value &probe_value);
+
+	void InitializeInternal(ClientContext &context) override;
+	idx_t TotalRowCount() override;
+	void ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) override;
+
+private:
+	string probe_column_name;
+	Value probe_constant;
+	optional_idx probe_column_idx;
+
+	unique_ptr<duckdb_apache::thrift::protocol::TCompactProtocolT<ThriftFileTransport>> protocol;
+	optional_ptr<Allocator> allocator;
+	unique_ptr<ConstantFilter> filter;
+};
+
+template <>
+void ParquetMetaDataOperator::BindSchema<ParquetMetadataOperatorType::BLOOM_PROBE>(vector<LogicalType> &return_types,
+                                                                                   vector<string> &names) {
+	names.emplace_back("file_name");
+	return_types.emplace_back(LogicalType::VARCHAR);
+
+	names.emplace_back("row_group_id");
+	return_types.emplace_back(LogicalType::BIGINT);
+
+	names.emplace_back("bloom_filter_excludes");
+	return_types.emplace_back(LogicalType::BOOLEAN);
+}
+
+ParquetBloomProbeProcessor::ParquetBloomProbeProcessor(const string &probe_column, const Value &probe_value)
+    : probe_column_name(probe_column), probe_constant(probe_value) {
+}
+
+void ParquetBloomProbeProcessor::InitializeInternal(ClientContext &context) {
+	probe_column_idx = optional_idx::Invalid();
+
+	for (idx_t column_idx = 0; column_idx < reader->columns.size(); column_idx++) {
+		if (reader->columns[column_idx].name == probe_column_name) {
+			probe_column_idx = column_idx;
+			break;
+		}
+	}
+
+	if (!probe_column_idx.IsValid()) {
+		throw InvalidInputException("Column %s not found in %s", probe_column_name, reader->file.path);
+	}
+
+	auto transport = duckdb_base_std::make_shared<ThriftFileTransport>(reader->GetHandle(), false);
+	protocol = make_uniq<duckdb_apache::thrift::protocol::TCompactProtocolT<ThriftFileTransport>>(std::move(transport));
+	allocator = &BufferAllocator::Get(context);
+	filter = make_uniq<ConstantFilter>(
+	    ExpressionType::COMPARE_EQUAL,
+	    probe_constant.CastAs(context, reader->GetColumns()[probe_column_idx.GetIndex()].type));
+}
+
+idx_t ParquetBloomProbeProcessor::TotalRowCount() {
+	return reader->GetFileMetadata()->row_groups.size();
+}
+
+void ParquetBloomProbeProcessor::ReadRow(DataChunk &output, idx_t output_idx, idx_t row_idx) {
+	auto meta_data = reader->GetFileMetadata();
+	auto &row_group = meta_data->row_groups[row_idx];
+	auto &column = row_group.columns[probe_column_idx.GetIndex()];
+
+	D_ASSERT(!probe_constant.IsNull());
+
+	auto bloom_excludes = ParquetStatisticsUtils::BloomFilterExcludes(*filter, column.meta_data, *protocol, *allocator);
+
+	output.SetValue(0, output_idx, Value(reader->file.path));
+	output.SetValue(1, output_idx, Value::BIGINT(NumericCast<int64_t>(row_idx)));
+	output.SetValue(2, output_idx, Value::BOOLEAN(bloom_excludes));
+}
+
+//===--------------------------------------------------------------------===//
+// Template Function Implementation
+//===--------------------------------------------------------------------===//
+
+template <ParquetMetadataOperatorType OP_TYPE>
+unique_ptr<FunctionData> ParquetMetaDataOperator::Bind(ClientContext &context, TableFunctionBindInput &input,
+                                                       vector<LogicalType> &return_types, vector<string> &names) {
+	// Extract file paths from input using MultiFileReader (handles both single files and arrays)
+	auto multi_file_reader = MultiFileReader::CreateDefault("ParquetMetadata");
+	auto glob_input = FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "parquet");
+
+	auto result = make_uniq<ParquetMetaDataBindData>();
+	// Bind schema based on operation type
+	if (OP_TYPE == ParquetMetadataOperatorType::BLOOM_PROBE) {
+		auto probe_bind_data = make_uniq<ParquetBloomProbeBindData>();
+		D_ASSERT(input.inputs.size() == 3);
+		if (input.inputs[1].IsNull() || input.inputs[2].IsNull()) {
+			throw InvalidInputException("Can't have NULL parameters for parquet_bloom_probe");
+		}
+		probe_bind_data->probe_column_name = input.inputs[1].CastAs(context, LogicalType::VARCHAR).GetValue<string>();
+		probe_bind_data->probe_constant = input.inputs[2];
+		result = std::move(probe_bind_data);
+	}
+
+	result->file_paths = make_uniq<ParquetMetadataFilePaths>();
+	result->file_paths->file_list = multi_file_reader->CreateFileList(context, input.inputs[0], glob_input);
+	D_ASSERT(!result->file_paths->file_list->IsEmpty());
+	result->file_paths->file_list->InitializeScan(result->file_paths->scan_data);
+
+	BindSchema<OP_TYPE>(return_types, names);
+
+	return std::move(result);
+}
+
+unique_ptr<GlobalTableFunctionState> ParquetMetaDataOperator::InitGlobal(ClientContext &context,
+                                                                         TableFunctionInitInput &input) {
+	auto &bind_data = input.bind_data->CastNoConst<ParquetMetaDataBindData>();
+	return make_uniq<ParquetMetadataGlobalState>(std::move(bind_data.file_paths), context);
+}
+
+template <ParquetMetadataOperatorType OP_TYPE>
+unique_ptr<LocalTableFunctionState> ParquetMetaDataOperator::InitLocal(ExecutionContext &context,
+                                                                       TableFunctionInitInput &input,
+                                                                       GlobalTableFunctionState *global_state) {
+	auto &bind_data = input.bind_data->Cast<ParquetMetaDataBindData>();
+	auto res = make_uniq<ParquetMetadataLocalState>();
+	switch (OP_TYPE) {
+	case ParquetMetadataOperatorType::META_DATA:
+		res->processor = make_uniq<ParquetRowGroupMetadataProcessor>();
+		break;
+	case ParquetMetadataOperatorType::SCHEMA:
+		res->processor = make_uniq<ParquetSchemaProcessor>();
+		break;
+	case ParquetMetadataOperatorType::KEY_VALUE_META_DATA:
+		res->processor = make_uniq<ParquetKeyValueMetadataProcessor>();
+		break;
+	case ParquetMetadataOperatorType::FILE_META_DATA:
+		res->processor = make_uniq<ParquetFileMetadataProcessor>();
+		break;
+	case ParquetMetadataOperatorType::BLOOM_PROBE: {
+		const auto &probe_bind_data = static_cast<const ParquetBloomProbeBindData &>(bind_data);
+		res->processor =
+		    make_uniq<ParquetBloomProbeProcessor>(probe_bind_data.probe_column_name, probe_bind_data.probe_constant);
+		break;
+	}
+	default:
+		throw InternalException("Unsupported ParquetMetadataOperatorType");
+	}
+	return unique_ptr_cast<LocalTableFunctionState, ParquetMetadataLocalState>(std::move(res));
+}
+
+template <ParquetMetadataOperatorType OP_TYPE>
+void ParquetMetaDataOperator::Function(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
+	auto &global_state = data_p.global_state->Cast<ParquetMetadataGlobalState>();
+	auto &local_state = data_p.local_state->Cast<ParquetMetadataLocalState>();
+
+	idx_t output_count = 0;
+
+	while (output_count < STANDARD_VECTOR_SIZE) {
+		// Check if we need a new file
+		if (local_state.file_exhausted) {
+			OpenFileInfo next_file;
+			if (!global_state.file_paths->NextFile(next_file)) {
+				break; // No more files to process
+			}
+
+			local_state.processor->Initialize(context, next_file);
+			local_state.processor->InitializeInternal(context);
+			local_state.file_exhausted = false;
+			local_state.row_idx = 0;
+			local_state.total_rows = local_state.processor->TotalRowCount();
+		}
+
+		idx_t left_in_vector = STANDARD_VECTOR_SIZE - output_count;
+		idx_t left_in_file = local_state.total_rows - local_state.row_idx;
+		idx_t rows_to_output = 0;
+		if (left_in_file <= left_in_vector) {
+			local_state.file_exhausted = true;
+			rows_to_output = left_in_file;
+		} else {
+			rows_to_output = left_in_vector;
+		}
+
+		for (idx_t i = 0; i < rows_to_output; ++i) {
+			local_state.processor->ReadRow(output, output_count + i, local_state.row_idx + i);
+		}
+		output_count += rows_to_output;
+		local_state.row_idx += rows_to_output;
+	}
+
+	output.SetCardinality(output_count);
+}
+
+double ParquetMetaDataOperator::Progress(ClientContext &context, const FunctionData *bind_data_p,
+                                         const GlobalTableFunctionState *global_state) {
+	auto &global_data = global_state->Cast<ParquetMetadataGlobalState>();
+	return global_data.GetProgress() * 100.0;
+}
+
+ParquetMetaDataFunction::ParquetMetaDataFunction()
+    : TableFunction("parquet_metadata", {LogicalType::VARCHAR},
+                    ParquetMetaDataOperator::Function<ParquetMetadataOperatorType::META_DATA>,
+                    ParquetMetaDataOperator::Bind<ParquetMetadataOperatorType::META_DATA>,
+                    ParquetMetaDataOperator::InitGlobal,
+                    ParquetMetaDataOperator::InitLocal<ParquetMetadataOperatorType::META_DATA>) {
+	table_scan_progress = ParquetMetaDataOperator::Progress;
+}
+
+ParquetSchemaFunction::ParquetSchemaFunction()
+    : TableFunction("parquet_schema", {LogicalType::VARCHAR},
+                    ParquetMetaDataOperator::Function<ParquetMetadataOperatorType::SCHEMA>,
+                    ParquetMetaDataOperator::Bind<ParquetMetadataOperatorType::SCHEMA>,
+                    ParquetMetaDataOperator::InitGlobal,
+                    ParquetMetaDataOperator::InitLocal<ParquetMetadataOperatorType::SCHEMA>) {
+	table_scan_progress = ParquetMetaDataOperator::Progress;
+}
+
+ParquetKeyValueMetadataFunction::ParquetKeyValueMetadataFunction()
+    : TableFunction("parquet_kv_metadata", {LogicalType::VARCHAR},
+                    ParquetMetaDataOperator::Function<ParquetMetadataOperatorType::KEY_VALUE_META_DATA>,
+                    ParquetMetaDataOperator::Bind<ParquetMetadataOperatorType::KEY_VALUE_META_DATA>,
+                    ParquetMetaDataOperator::InitGlobal,
+                    ParquetMetaDataOperator::InitLocal<ParquetMetadataOperatorType::KEY_VALUE_META_DATA>) {
+	table_scan_progress = ParquetMetaDataOperator::Progress;
+}
+
+ParquetFileMetadataFunction::ParquetFileMetadataFunction()
+    : TableFunction("parquet_file_metadata", {LogicalType::VARCHAR},
+                    ParquetMetaDataOperator::Function<ParquetMetadataOperatorType::FILE_META_DATA>,
+                    ParquetMetaDataOperator::Bind<ParquetMetadataOperatorType::FILE_META_DATA>,
+                    ParquetMetaDataOperator::InitGlobal,
+                    ParquetMetaDataOperator::InitLocal<ParquetMetadataOperatorType::FILE_META_DATA>) {
+	table_scan_progress = ParquetMetaDataOperator::Progress;
+}
+
+ParquetBloomProbeFunction::ParquetBloomProbeFunction()
+    : TableFunction("parquet_bloom_probe", {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::ANY},
+                    ParquetMetaDataOperator::Function<ParquetMetadataOperatorType::BLOOM_PROBE>,
+                    ParquetMetaDataOperator::Bind<ParquetMetadataOperatorType::BLOOM_PROBE>,
+                    ParquetMetaDataOperator::InitGlobal,
+                    ParquetMetaDataOperator::InitLocal<ParquetMetadataOperatorType::BLOOM_PROBE>) {
+	table_scan_progress = ParquetMetaDataOperator::Progress;
+}
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/parquet_multi_file_info.cpp
+++ b/external/duckdb/extension/parquet/parquet_multi_file_info.cpp
@@ -0,0 +1,594 @@
+#include "parquet_multi_file_info.hpp"
+#include "duckdb/common/multi_file/multi_file_function.hpp"
+#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
+#include "duckdb/common/serializer/serializer.hpp"
+#include "duckdb/common/serializer/deserializer.hpp"
+#include "parquet_crypto.hpp"
+#include "duckdb/function/table_function.hpp"
+
+namespace duckdb {
+
+struct ParquetReadBindData : public TableFunctionData {
+	// These come from the initial_reader, but need to be stored in case the initial_reader is removed by a filter
+	idx_t initial_file_cardinality;
+	idx_t initial_file_row_groups;
+	idx_t explicit_cardinality = 0; // can be set to inject exterior cardinality knowledge (e.g. from a data lake)
+	unique_ptr<ParquetFileReaderOptions> options;
+
+	ParquetOptions &GetParquetOptions() {
+		return options->options;
+	}
+	const ParquetOptions &GetParquetOptions() const {
+		return options->options;
+	}
+
+	unique_ptr<FunctionData> Copy() const override {
+		auto result = make_uniq<ParquetReadBindData>();
+		result->initial_file_cardinality = initial_file_cardinality;
+		result->initial_file_row_groups = initial_file_row_groups;
+		result->explicit_cardinality = explicit_cardinality;
+		result->options = make_uniq<ParquetFileReaderOptions>(options->options);
+		return std::move(result);
+	}
+};
+
+struct ParquetReadGlobalState : public GlobalTableFunctionState {
+	explicit ParquetReadGlobalState(optional_ptr<const PhysicalOperator> op_p)
+	    : row_group_index(0), batch_index(0), op(op_p) {
+	}
+	//! Index of row group within file currently up for scanning
+	idx_t row_group_index;
+	//! Batch index of the next row group to be scanned
+	idx_t batch_index;
+	//! (Optional) pointer to physical operator performing the scan
+	optional_ptr<const PhysicalOperator> op;
+};
+
+struct ParquetReadLocalState : public LocalTableFunctionState {
+	ParquetReaderScanState scan_state;
+};
+
+static void ParseFileRowNumberOption(MultiFileReaderBindData &bind_data, ParquetOptions &options,
+                                     vector<LogicalType> &return_types, vector<string> &names) {
+	if (options.file_row_number) {
+		if (StringUtil::CIFind(names, "file_row_number") != DConstants::INVALID_INDEX) {
+			throw BinderException(
+			    "Using file_row_number option on file with column named file_row_number is not supported");
+		}
+
+		return_types.emplace_back(LogicalType::BIGINT);
+		names.emplace_back("file_row_number");
+	}
+}
+
+static void BindSchema(ClientContext &context, vector<LogicalType> &return_types, vector<string> &names,
+                       MultiFileBindData &bind_data) {
+	auto &parquet_bind = bind_data.bind_data->Cast<ParquetReadBindData>();
+	auto &options = parquet_bind.GetParquetOptions();
+	D_ASSERT(!options.schema.empty());
+
+	auto &file_options = bind_data.file_options;
+	if (file_options.union_by_name || file_options.hive_partitioning) {
+		throw BinderException("Parquet schema cannot be combined with union_by_name=true or hive_partitioning=true");
+	}
+	auto &reader_bind = bind_data.reader_bind;
+
+	vector<string> schema_col_names;
+	vector<LogicalType> schema_col_types;
+	schema_col_names.reserve(options.schema.size());
+	schema_col_types.reserve(options.schema.size());
+	bool match_by_field_id;
+	if (!options.schema.empty()) {
+		auto &column = options.schema[0];
+		if (column.identifier.type().id() == LogicalTypeId::INTEGER) {
+			match_by_field_id = true;
+		} else {
+			match_by_field_id = false;
+		}
+	} else {
+		match_by_field_id = false;
+	}
+
+	for (idx_t i = 0; i < options.schema.size(); i++) {
+		const auto &column = options.schema[i];
+		schema_col_names.push_back(column.name);
+		schema_col_types.push_back(column.type);
+
+		auto res = MultiFileColumnDefinition(column.name, column.type);
+		res.identifier = column.identifier;
+#ifdef DEBUG
+		if (match_by_field_id) {
+			D_ASSERT(res.identifier.type().id() == LogicalTypeId::INTEGER);
+		} else {
+			D_ASSERT(res.identifier.type().id() == LogicalTypeId::VARCHAR);
+		}
+#endif
+
+		res.default_expression = make_uniq<ConstantExpression>(column.default_value);
+		reader_bind.schema.emplace_back(res);
+	}
+	ParseFileRowNumberOption(reader_bind, options, return_types, names);
+	if (options.file_row_number) {
+		MultiFileColumnDefinition res("file_row_number", LogicalType::BIGINT);
+		res.identifier = Value::INTEGER(MultiFileReader::ORDINAL_FIELD_ID);
+		schema_col_names.push_back(res.name);
+		schema_col_types.push_back(res.type);
+		reader_bind.schema.emplace_back(res);
+	}
+
+	if (match_by_field_id) {
+		reader_bind.mapping = MultiFileColumnMappingMode::BY_FIELD_ID;
+	} else {
+		reader_bind.mapping = MultiFileColumnMappingMode::BY_NAME;
+	}
+
+	// perform the binding on the obtained set of names + types
+	bind_data.multi_file_reader->BindOptions(file_options, *bind_data.file_list, schema_col_types, schema_col_names,
+	                                         reader_bind);
+
+	names = schema_col_names;
+	return_types = schema_col_types;
+	D_ASSERT(names.size() == return_types.size());
+}
+
+unique_ptr<MultiFileReaderInterface> ParquetMultiFileInfo::CreateInterface(ClientContext &context) {
+	return make_uniq<ParquetMultiFileInfo>();
+}
+
+void ParquetMultiFileInfo::BindReader(ClientContext &context, vector<LogicalType> &return_types, vector<string> &names,
+                                      MultiFileBindData &bind_data) {
+	auto &parquet_bind = bind_data.bind_data->Cast<ParquetReadBindData>();
+	auto &options = parquet_bind.GetParquetOptions();
+	if (!options.schema.empty()) {
+		BindSchema(context, return_types, names, bind_data);
+	} else {
+		bind_data.reader_bind =
+		    bind_data.multi_file_reader->BindReader(context, return_types, names, *bind_data.file_list, bind_data,
+		                                            *parquet_bind.options, bind_data.file_options);
+	}
+}
+
+static bool GetBooleanArgument(const string &key, const vector<Value> &option_values) {
+	if (option_values.empty()) {
+		return true;
+	}
+	Value boolean_value;
+	string error_message;
+	if (!option_values[0].DefaultTryCastAs(LogicalType::BOOLEAN, boolean_value, &error_message)) {
+		throw InvalidInputException("Unable to cast \"%s\" to BOOLEAN for Parquet option \"%s\"",
+		                            option_values[0].ToString(), key);
+	}
+	return BooleanValue::Get(boolean_value);
+}
+
+static bool ParquetScanPushdownExpression(ClientContext &context, const LogicalGet &get, Expression &expr) {
+	return true;
+}
+
+static void VerifyParquetSchemaParameter(const Value &schema) {
+	LogicalType::MAP(LogicalType::BLOB, LogicalType::STRUCT({{{"name", LogicalType::VARCHAR},
+	                                                          {"type", LogicalType::VARCHAR},
+	                                                          {"default_value", LogicalType::VARCHAR}}}));
+	auto &map_type = schema.type();
+	if (map_type.id() != LogicalTypeId::MAP) {
+		throw InvalidInputException("'schema' expects a value of type MAP, not %s",
+		                            LogicalTypeIdToString(map_type.id()));
+	}
+	auto &key_type = MapType::KeyType(map_type);
+	auto &value_type = MapType::ValueType(map_type);
+
+	if (value_type.id() != LogicalTypeId::STRUCT) {
+		throw InvalidInputException("'schema' expects a STRUCT as the value type of the map");
+	}
+	auto &children = StructType::GetChildTypes(value_type);
+	if (children.size() < 3) {
+		throw InvalidInputException(
+		    "'schema' expects the STRUCT to have 3 children, 'name', 'type' and 'default_value");
+	}
+	if (!StringUtil::CIEquals(children[0].first, "name")) {
+		throw InvalidInputException("'schema' expects the first field of the struct to be called 'name'");
+	}
+	if (children[0].second.id() != LogicalTypeId::VARCHAR) {
+		throw InvalidInputException("'schema' expects the 'name' field to be of type VARCHAR, not %s",
+		                            LogicalTypeIdToString(children[0].second.id()));
+	}
+	if (!StringUtil::CIEquals(children[1].first, "type")) {
+		throw InvalidInputException("'schema' expects the second field of the struct to be called 'type'");
+	}
+	if (children[1].second.id() != LogicalTypeId::VARCHAR) {
+		throw InvalidInputException("'schema' expects the 'type' field to be of type VARCHAR, not %s",
+		                            LogicalTypeIdToString(children[1].second.id()));
+	}
+	if (!StringUtil::CIEquals(children[2].first, "default_value")) {
+		throw InvalidInputException("'schema' expects the third field of the struct to be called 'default_value'");
+	}
+	//! NOTE: default_value can be any type
+
+	if (key_type.id() != LogicalTypeId::INTEGER && key_type.id() != LogicalTypeId::VARCHAR) {
+		throw InvalidInputException(
+		    "'schema' expects the value type of the map to be either INTEGER or VARCHAR, not %s",
+		    LogicalTypeIdToString(key_type.id()));
+	}
+}
+
+static void ParquetScanSerialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
+                                 const TableFunction &function) {
+	auto &bind_data = bind_data_p->Cast<MultiFileBindData>();
+	auto &parquet_data = bind_data.bind_data->Cast<ParquetReadBindData>();
+
+	vector<string> files;
+	for (auto &file : bind_data.file_list->GetAllFiles()) {
+		files.emplace_back(file.path);
+	}
+	serializer.WriteProperty(100, "files", files);
+	serializer.WriteProperty(101, "types", bind_data.types);
+	serializer.WriteProperty(102, "names", bind_data.names);
+	ParquetOptionsSerialization serialization(parquet_data.GetParquetOptions(), bind_data.file_options);
+	serializer.WriteProperty(103, "parquet_options", serialization);
+	if (serializer.ShouldSerialize(3)) {
+		serializer.WriteProperty(104, "table_columns", bind_data.table_columns);
+	}
+}
+
+static unique_ptr<FunctionData> ParquetScanDeserialize(Deserializer &deserializer, TableFunction &function) {
+	auto &context = deserializer.Get<ClientContext &>();
+	auto files = deserializer.ReadProperty<vector<string>>(100, "files");
+	auto types = deserializer.ReadProperty<vector<LogicalType>>(101, "types");
+	auto names = deserializer.ReadProperty<vector<string>>(102, "names");
+	auto serialization = deserializer.ReadProperty<ParquetOptionsSerialization>(103, "parquet_options");
+	auto table_columns =
+	    deserializer.ReadPropertyWithExplicitDefault<vector<string>>(104, "table_columns", vector<string> {});
+
+	vector<Value> file_path;
+	for (auto &path : files) {
+		file_path.emplace_back(path);
+	}
+	FileGlobInput input(FileGlobOptions::FALLBACK_GLOB, "parquet");
+
+	auto multi_file_reader = MultiFileReader::Create(function);
+	auto file_list = multi_file_reader->CreateFileList(context, Value::LIST(LogicalType::VARCHAR, file_path), input);
+	auto parquet_options = make_uniq<ParquetFileReaderOptions>(std::move(serialization.parquet_options));
+	auto interface = make_uniq<ParquetMultiFileInfo>();
+	auto bind_data = MultiFileFunction<ParquetMultiFileInfo>::MultiFileBindInternal(
+	    context, std::move(multi_file_reader), std::move(file_list), types, names,
+	    std::move(serialization.file_options), std::move(parquet_options), std::move(interface));
+	bind_data->Cast<MultiFileBindData>().table_columns = std::move(table_columns);
+	return bind_data;
+}
+
+static vector<column_t> ParquetGetRowIdColumns(ClientContext &context, optional_ptr<FunctionData> bind_data) {
+	vector<column_t> result;
+	result.emplace_back(MultiFileReader::COLUMN_IDENTIFIER_FILE_INDEX);
+	result.emplace_back(MultiFileReader::COLUMN_IDENTIFIER_FILE_ROW_NUMBER);
+	return result;
+}
+
+static vector<PartitionStatistics> ParquetGetPartitionStats(ClientContext &context, GetPartitionStatsInput &input) {
+	auto &bind_data = input.bind_data->Cast<MultiFileBindData>();
+	vector<PartitionStatistics> result;
+	if (bind_data.file_list->GetExpandResult() == FileExpandResult::SINGLE_FILE && bind_data.initial_reader) {
+		// we have read the metadata - get the partitions for this reader
+		auto &reader = bind_data.initial_reader->Cast<ParquetReader>();
+		reader.GetPartitionStats(result);
+		return result;
+	}
+	// if we are reading multiple files - we check if we have caching enabled
+	if (!ParquetReader::MetadataCacheEnabled(context)) {
+		// no caching - bail
+		return result;
+	}
+	// caching is enabled - check if we have ALL of the metadata cached
+	vector<shared_ptr<ParquetFileMetadataCache>> caches;
+	for (auto &file : bind_data.file_list->Files()) {
+		auto metadata_entry = ParquetReader::GetMetadataCacheEntry(context, file);
+		if (!metadata_entry) {
+			// no cache entry found
+			return result;
+		}
+		// check if the file has any deletes
+		if (file.extended_info) {
+			auto entry = file.extended_info->options.find("has_deletes");
+			if (entry != file.extended_info->options.end()) {
+				if (BooleanValue::Get(entry->second)) {
+					// the file has deletes - skip emitting partition stats
+					// FIXME: we could emit partition stats but set count to `COUNT_APPROXIMATE` instead of
+					// `COUNT_EXACT`
+					return result;
+				}
+			}
+		}
+
+		// check if the cache is valid based ONLY on the OpenFileInfo (do not do any file system requests here)
+		auto is_valid = metadata_entry->IsValid(file);
+		if (is_valid != ParquetCacheValidity::VALID) {
+			return result;
+		}
+		caches.push_back(std::move(metadata_entry));
+	}
+	// all caches are valid! we can return the partition stats
+	for (auto &cache : caches) {
+		ParquetReader::GetPartitionStats(*cache->metadata, result);
+	}
+	return result;
+}
+
+TableFunctionSet ParquetScanFunction::GetFunctionSet() {
+	MultiFileFunction<ParquetMultiFileInfo> table_function("parquet_scan");
+	table_function.named_parameters["binary_as_string"] = LogicalType::BOOLEAN;
+	table_function.named_parameters["file_row_number"] = LogicalType::BOOLEAN;
+	table_function.named_parameters["debug_use_openssl"] = LogicalType::BOOLEAN;
+	table_function.named_parameters["compression"] = LogicalType::VARCHAR;
+	table_function.named_parameters["explicit_cardinality"] = LogicalType::UBIGINT;
+	table_function.named_parameters["schema"] = LogicalTypeId::ANY;
+	table_function.named_parameters["encryption_config"] = LogicalTypeId::ANY;
+	table_function.named_parameters["parquet_version"] = LogicalType::VARCHAR;
+	table_function.named_parameters["can_have_nan"] = LogicalType::BOOLEAN;
+	table_function.statistics = MultiFileFunction<ParquetMultiFileInfo>::MultiFileScanStats;
+	table_function.serialize = ParquetScanSerialize;
+	table_function.deserialize = ParquetScanDeserialize;
+	table_function.get_row_id_columns = ParquetGetRowIdColumns;
+	table_function.pushdown_expression = ParquetScanPushdownExpression;
+	table_function.get_partition_stats = ParquetGetPartitionStats;
+	table_function.filter_pushdown = true;
+	table_function.filter_prune = true;
+	table_function.late_materialization = true;
+
+	return MultiFileReader::CreateFunctionSet(static_cast<TableFunction>(table_function));
+}
+
+unique_ptr<BaseFileReaderOptions> ParquetMultiFileInfo::InitializeOptions(ClientContext &context,
+                                                                          optional_ptr<TableFunctionInfo> info) {
+	return make_uniq<ParquetFileReaderOptions>(context);
+}
+
+bool ParquetMultiFileInfo::ParseCopyOption(ClientContext &context, const string &key, const vector<Value> &values,
+                                           BaseFileReaderOptions &file_options, vector<string> &expected_names,
+                                           vector<LogicalType> &expected_types) {
+	auto &parquet_options = file_options.Cast<ParquetFileReaderOptions>();
+	auto &options = parquet_options.options;
+	if (key == "compression" || key == "codec" || key == "row_group_size") {
+		// CODEC/COMPRESSION and ROW_GROUP_SIZE options have no effect on parquet read.
+		// These options are determined from the file.
+		return true;
+	}
+	if (key == "binary_as_string") {
+		options.binary_as_string = GetBooleanArgument(key, values);
+		return true;
+	}
+	if (key == "file_row_number") {
+		options.file_row_number = GetBooleanArgument(key, values);
+		return true;
+	}
+	if (key == "debug_use_openssl") {
+		options.debug_use_openssl = GetBooleanArgument(key, values);
+		return true;
+	}
+	if (key == "encryption_config") {
+		if (values.size() != 1) {
+			throw BinderException("Parquet encryption_config cannot be empty!");
+		}
+		options.encryption_config = ParquetEncryptionConfig::Create(context, values[0]);
+		return true;
+	}
+	if (key == "can_have_nan") {
+		if (values.size() != 1) {
+			throw BinderException("Parquet can_have_nan cannot be empty!");
+		}
+		options.can_have_nan = GetBooleanArgument(key, values);
+		return true;
+	}
+	return false;
+}
+
+bool ParquetMultiFileInfo::ParseOption(ClientContext &context, const string &original_key, const Value &val,
+                                       MultiFileOptions &file_options, BaseFileReaderOptions &base_options) {
+	auto &parquet_options = base_options.Cast<ParquetFileReaderOptions>();
+	auto &options = parquet_options.options;
+	auto key = StringUtil::Lower(original_key);
+	if (val.IsNull()) {
+		throw BinderException("Cannot use NULL as argument to %s", original_key);
+	}
+	if (key == "compression") {
+		// COMPRESSION has no effect on parquet read.
+		// These options are determined from the file.
+		return true;
+	}
+	if (key == "binary_as_string") {
+		options.binary_as_string = BooleanValue::Get(val);
+		return true;
+	}
+	if (key == "variant_legacy_encoding") {
+		options.variant_legacy_encoding = BooleanValue::Get(val);
+		return true;
+	}
+	if (key == "file_row_number") {
+		options.file_row_number = BooleanValue::Get(val);
+		return true;
+	}
+	if (key == "debug_use_openssl") {
+		options.debug_use_openssl = BooleanValue::Get(val);
+		return true;
+	}
+	if (key == "can_have_nan") {
+		options.can_have_nan = BooleanValue::Get(val);
+		return true;
+	}
+	if (key == "schema") {
+		// Argument is a map that defines the schema
+		const auto &schema_value = val;
+		VerifyParquetSchemaParameter(schema_value);
+		const auto column_values = ListValue::GetChildren(schema_value);
+		if (column_values.empty()) {
+			throw BinderException("Parquet schema cannot be empty");
+		}
+		options.schema.reserve(column_values.size());
+		for (idx_t i = 0; i < column_values.size(); i++) {
+			options.schema.emplace_back(ParquetColumnDefinition::FromSchemaValue(context, column_values[i]));
+		}
+		file_options.auto_detect_hive_partitioning = false;
+		return true;
+	}
+	if (key == "explicit_cardinality") {
+		options.explicit_cardinality = UBigIntValue::Get(val);
+		return true;
+	}
+	if (key == "encryption_config") {
+		options.encryption_config = ParquetEncryptionConfig::Create(context, val);
+		return true;
+	}
+	return false;
+}
+
+unique_ptr<TableFunctionData> ParquetMultiFileInfo::InitializeBindData(MultiFileBindData &multi_file_data,
+                                                                       unique_ptr<BaseFileReaderOptions> options_p) {
+	auto result = make_uniq<ParquetReadBindData>();
+	// Set the explicit cardinality if requested
+	result->options = unique_ptr_cast<BaseFileReaderOptions, ParquetFileReaderOptions>(std::move(options_p));
+	auto &parquet_options = result->GetParquetOptions();
+	if (parquet_options.explicit_cardinality) {
+		auto file_count = multi_file_data.file_list->GetTotalFileCount();
+		result->explicit_cardinality = parquet_options.explicit_cardinality;
+		result->initial_file_cardinality = result->explicit_cardinality / (file_count ? file_count : 1);
+	}
+	return std::move(result);
+}
+
+void ParquetMultiFileInfo::GetBindInfo(const TableFunctionData &bind_data_p, BindInfo &info) {
+	auto &bind_data = bind_data_p.Cast<ParquetReadBindData>();
+	auto &parquet_options = bind_data.GetParquetOptions();
+	info.type = ScanType::PARQUET;
+	info.InsertOption("binary_as_string", Value::BOOLEAN(parquet_options.binary_as_string));
+	info.InsertOption("file_row_number", Value::BOOLEAN(parquet_options.file_row_number));
+	info.InsertOption("debug_use_openssl", Value::BOOLEAN(parquet_options.debug_use_openssl));
+}
+
+optional_idx ParquetMultiFileInfo::MaxThreads(const MultiFileBindData &bind_data_p,
+                                              const MultiFileGlobalState &global_state,
+                                              FileExpandResult expand_result) {
+	if (expand_result == FileExpandResult::MULTIPLE_FILES) {
+		// always launch max threads if we are reading multiple files
+		return optional_idx();
+	}
+	auto &bind_data = bind_data_p.bind_data->Cast<ParquetReadBindData>();
+	return MaxValue(bind_data.initial_file_row_groups, static_cast<idx_t>(1));
+}
+
+void ParquetMultiFileInfo::FinalizeBindData(MultiFileBindData &multi_file_data) {
+	auto &bind_data = multi_file_data.bind_data->Cast<ParquetReadBindData>();
+	if (multi_file_data.initial_reader) {
+		auto &initial_reader = multi_file_data.initial_reader->Cast<ParquetReader>();
+		bind_data.initial_file_cardinality = initial_reader.NumRows();
+		bind_data.initial_file_row_groups = initial_reader.NumRowGroups();
+		bind_data.options->options = initial_reader.parquet_options;
+	}
+}
+
+unique_ptr<NodeStatistics> ParquetMultiFileInfo::GetCardinality(const MultiFileBindData &bind_data_p,
+                                                                idx_t file_count) {
+	auto &bind_data = bind_data_p.bind_data->Cast<ParquetReadBindData>();
+	if (bind_data.explicit_cardinality) {
+		return make_uniq<NodeStatistics>(bind_data.explicit_cardinality);
+	}
+	return make_uniq<NodeStatistics>(MaxValue(bind_data.initial_file_cardinality, (idx_t)1) * file_count);
+}
+
+unique_ptr<BaseStatistics> ParquetReader::GetStatistics(ClientContext &context, const string &name) {
+	return ReadStatistics(name);
+}
+
+double ParquetReader::GetProgressInFile(ClientContext &context) {
+	auto read_rows = rows_read.load();
+	return 100.0 * (static_cast<double>(read_rows) / static_cast<double>(NumRows()));
+}
+
+void ParquetMultiFileInfo::GetVirtualColumns(ClientContext &, MultiFileBindData &, virtual_column_map_t &result) {
+	result.insert(make_pair(MultiFileReader::COLUMN_IDENTIFIER_FILE_ROW_NUMBER,
+	                        TableColumn("file_row_number", LogicalType::BIGINT)));
+}
+
+shared_ptr<BaseFileReader> ParquetMultiFileInfo::CreateReader(ClientContext &context, GlobalTableFunctionState &,
+                                                              BaseUnionData &union_data_p,
+                                                              const MultiFileBindData &bind_data_p) {
+	auto &union_data = union_data_p.Cast<ParquetUnionData>();
+	return make_shared_ptr<ParquetReader>(context, union_data.file, union_data.options, union_data.metadata);
+}
+
+shared_ptr<BaseFileReader> ParquetMultiFileInfo::CreateReader(ClientContext &context, GlobalTableFunctionState &,
+                                                              const OpenFileInfo &file, idx_t file_idx,
+                                                              const MultiFileBindData &multi_bind_data) {
+	auto &bind_data = multi_bind_data.bind_data->Cast<ParquetReadBindData>();
+	return make_shared_ptr<ParquetReader>(context, file, bind_data.GetParquetOptions());
+}
+
+shared_ptr<BaseFileReader> ParquetMultiFileInfo::CreateReader(ClientContext &context, const OpenFileInfo &file,
+                                                              BaseFileReaderOptions &options_p,
+                                                              const MultiFileOptions &) {
+	auto &options = options_p.Cast<ParquetFileReaderOptions>();
+	return make_shared_ptr<ParquetReader>(context, file, options.options);
+}
+
+shared_ptr<BaseUnionData> ParquetReader::GetUnionData(idx_t file_idx) {
+	auto result = make_uniq<ParquetUnionData>(file);
+	for (auto &column : columns) {
+		result->names.push_back(column.name);
+		result->types.push_back(column.type);
+	}
+	if (file_idx == 0) {
+		result->options = parquet_options;
+		result->metadata = metadata;
+		result->reader = shared_from_this();
+	} else {
+		result->options = std::move(parquet_options);
+		result->metadata = std::move(metadata);
+		result->root_schema = std::move(root_schema);
+	}
+	return std::move(result);
+}
+
+unique_ptr<GlobalTableFunctionState> ParquetMultiFileInfo::InitializeGlobalState(ClientContext &, MultiFileBindData &,
+                                                                                 MultiFileGlobalState &global_state) {
+	return make_uniq<ParquetReadGlobalState>(global_state.op);
+}
+
+unique_ptr<LocalTableFunctionState> ParquetMultiFileInfo::InitializeLocalState(ExecutionContext &,
+                                                                               GlobalTableFunctionState &) {
+	return make_uniq<ParquetReadLocalState>();
+}
+
+bool ParquetReader::TryInitializeScan(ClientContext &context, GlobalTableFunctionState &gstate_p,
+                                      LocalTableFunctionState &lstate_p) {
+	auto &gstate = gstate_p.Cast<ParquetReadGlobalState>();
+	auto &lstate = lstate_p.Cast<ParquetReadLocalState>();
+	if (gstate.row_group_index >= NumRowGroups()) {
+		// scanned all row groups in this file
+		return false;
+	}
+	// The current reader has rowgroups left to be scanned
+	vector<idx_t> group_indexes {gstate.row_group_index};
+	InitializeScan(context, lstate.scan_state, group_indexes);
+	gstate.row_group_index++;
+	return true;
+}
+
+void ParquetReader::FinishFile(ClientContext &context, GlobalTableFunctionState &gstate_p) {
+	auto &gstate = gstate_p.Cast<ParquetReadGlobalState>();
+	gstate.row_group_index = 0;
+}
+
+void ParquetReader::Scan(ClientContext &context, GlobalTableFunctionState &gstate_p,
+                         LocalTableFunctionState &local_state_p, DataChunk &chunk) {
+	auto &gstate = gstate_p.Cast<ParquetReadGlobalState>();
+	auto &local_state = local_state_p.Cast<ParquetReadLocalState>();
+	local_state.scan_state.op = gstate.op;
+	Scan(context, local_state.scan_state, chunk);
+}
+
+unique_ptr<MultiFileReaderInterface> ParquetMultiFileInfo::Copy() {
+	return make_uniq<ParquetMultiFileInfo>();
+}
+
+FileGlobInput ParquetMultiFileInfo::GetGlobInput() {
+	return FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "parquet");
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/parquet_reader.cpp
+++ b/external/duckdb/extension/parquet/parquet_reader.cpp
--- a/external/duckdb/extension/parquet/parquet_shredding.cpp
+++ b/external/duckdb/extension/parquet/parquet_shredding.cpp
@@ -0,0 +1,81 @@
+#include "parquet_shredding.hpp"
+#include "duckdb/common/exception/binder_exception.hpp"
+#include "duckdb/common/type_visitor.hpp"
+
+namespace duckdb {
+
+ChildShreddingTypes::ChildShreddingTypes() : types(make_uniq<case_insensitive_map_t<ShreddingType>>()) {
+}
+
+ChildShreddingTypes ChildShreddingTypes::Copy() const {
+	ChildShreddingTypes result;
+	for (const auto &type : *types) {
+		result.types->emplace(type.first, type.second.Copy());
+	}
+	return result;
+}
+
+ShreddingType::ShreddingType() : set(false) {
+}
+
+ShreddingType::ShreddingType(const LogicalType &type) : set(true), type(type) {
+}
+
+ShreddingType ShreddingType::Copy() const {
+	auto result = set ? ShreddingType(type) : ShreddingType();
+	result.children = children.Copy();
+	return result;
+}
+
+static ShreddingType ConvertShreddingTypeRecursive(const LogicalType &type) {
+	if (type.id() == LogicalTypeId::VARIANT) {
+		return ShreddingType(LogicalType(LogicalTypeId::ANY));
+	}
+	if (!type.IsNested()) {
+		return ShreddingType(type);
+	}
+
+	switch (type.id()) {
+	case LogicalTypeId::STRUCT: {
+		ShreddingType res(type);
+		auto &children = StructType::GetChildTypes(type);
+		for (auto &entry : children) {
+			res.AddChild(entry.first, ConvertShreddingTypeRecursive(entry.second));
+		}
+		return res;
+	}
+	case LogicalTypeId::LIST: {
+		ShreddingType res(type);
+		const auto &child = ListType::GetChildType(type);
+		res.AddChild("element", ConvertShreddingTypeRecursive(child));
+		return res;
+	}
+	default:
+		break;
+	}
+	throw BinderException("VARIANT can only be shredded on LIST/STRUCT/ANY/non-nested type, not %s", type.ToString());
+}
+
+void ShreddingType::AddChild(const string &name, ShreddingType &&child) {
+	children.types->emplace(name, std::move(child));
+}
+
+optional_ptr<const ShreddingType> ShreddingType::GetChild(const string &name) const {
+	auto it = children.types->find(name);
+	if (it == children.types->end()) {
+		return nullptr;
+	}
+	return it->second;
+}
+
+ShreddingType ShreddingType::GetShreddingTypes(const Value &val) {
+	if (val.type().id() != LogicalTypeId::VARCHAR) {
+		throw BinderException("SHREDDING value should be of type VARCHAR, a stringified type to use for the column");
+	}
+	auto type_str = val.GetValue<string>();
+	auto logical_type = TransformStringToLogicalType(type_str);
+
+	return ConvertShreddingTypeRecursive(logical_type);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/parquet_statistics.cpp
+++ b/external/duckdb/extension/parquet/parquet_statistics.cpp
@@ -0,0 +1,640 @@
+#include "parquet_statistics.hpp"
+
+#include "duckdb.hpp"
+#include "parquet_decimal_utils.hpp"
+#include "parquet_timestamp.hpp"
+#include "parquet_float16.hpp"
+#include "parquet_reader.hpp"
+#include "reader/string_column_reader.hpp"
+#include "reader/struct_column_reader.hpp"
+#include "zstd/common/xxhash.hpp"
+#include "duckdb/common/types/blob.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/value.hpp"
+#include "duckdb/storage/statistics/struct_stats.hpp"
+#include "duckdb/planner/filter/constant_filter.hpp"
+#include "reader/uuid_column_reader.hpp"
+
+namespace duckdb {
+
+using duckdb_parquet::ConvertedType;
+using duckdb_parquet::Type;
+
+unique_ptr<BaseStatistics> ParquetStatisticsUtils::CreateNumericStats(const LogicalType &type,
+                                                                      const ParquetColumnSchema &schema_ele,
+                                                                      const duckdb_parquet::Statistics &parquet_stats) {
+	auto stats = NumericStats::CreateUnknown(type);
+
+	// for reasons unknown to science, Parquet defines *both* `min` and `min_value` as well as `max` and
+	// `max_value`. All are optional. such elegance.
+	Value min;
+	Value max;
+	if (parquet_stats.__isset.min_value) {
+		min = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.min_value);
+	} else if (parquet_stats.__isset.min) {
+		min = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.min);
+	} else {
+		min = Value(type);
+	}
+	if (parquet_stats.__isset.max_value) {
+		max = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.max_value);
+	} else if (parquet_stats.__isset.max) {
+		max = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.max);
+	} else {
+		max = Value(type);
+	}
+	NumericStats::SetMin(stats, min);
+	NumericStats::SetMax(stats, max);
+	return stats.ToUnique();
+}
+
+static unique_ptr<BaseStatistics> CreateFloatingPointStats(const LogicalType &type,
+                                                           const ParquetColumnSchema &schema_ele,
+                                                           const duckdb_parquet::Statistics &parquet_stats) {
+	auto stats = NumericStats::CreateUnknown(type);
+
+	// floating point values can always have NaN values - hence we cannot use the max value from the file
+	Value min;
+	Value max;
+	if (parquet_stats.__isset.min_value) {
+		min = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.min_value);
+	} else if (parquet_stats.__isset.min) {
+		min = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.min);
+	} else {
+		min = Value(type);
+	}
+	max = Value("nan").DefaultCastAs(type);
+	NumericStats::SetMin(stats, min);
+	NumericStats::SetMax(stats, max);
+	return stats.ToUnique();
+}
+
+Value ParquetStatisticsUtils::ConvertValue(const LogicalType &type, const ParquetColumnSchema &schema_ele,
+                                           const std::string &stats) {
+	Value result;
+	string error;
+	auto stats_val = ConvertValueInternal(type, schema_ele, stats);
+	if (!stats_val.DefaultTryCastAs(type, result, &error)) {
+		return Value(type);
+	}
+	return result;
+}
+Value ParquetStatisticsUtils::ConvertValueInternal(const LogicalType &type, const ParquetColumnSchema &schema_ele,
+                                                   const std::string &stats) {
+	auto stats_data = const_data_ptr_cast(stats.c_str());
+	switch (type.id()) {
+	case LogicalTypeId::BOOLEAN: {
+		if (stats.size() != sizeof(bool)) {
+			throw InvalidInputException("Incorrect stats size for type BOOLEAN");
+		}
+		return Value::BOOLEAN(Load<bool>(stats_data));
+	}
+	case LogicalTypeId::UTINYINT:
+	case LogicalTypeId::USMALLINT:
+	case LogicalTypeId::UINTEGER:
+		if (stats.size() != sizeof(uint32_t)) {
+			throw InvalidInputException("Incorrect stats size for type UINTEGER");
+		}
+		return Value::UINTEGER(Load<uint32_t>(stats_data));
+	case LogicalTypeId::UBIGINT:
+		if (stats.size() != sizeof(uint64_t)) {
+			throw InvalidInputException("Incorrect stats size for type UBIGINT");
+		}
+		return Value::UBIGINT(Load<uint64_t>(stats_data));
+	case LogicalTypeId::TINYINT:
+	case LogicalTypeId::SMALLINT:
+	case LogicalTypeId::INTEGER:
+		if (stats.size() != sizeof(int32_t)) {
+			throw InvalidInputException("Incorrect stats size for type INTEGER");
+		}
+		return Value::INTEGER(Load<int32_t>(stats_data));
+	case LogicalTypeId::BIGINT:
+		if (stats.size() != sizeof(int64_t)) {
+			throw InvalidInputException("Incorrect stats size for type BIGINT");
+		}
+		return Value::BIGINT(Load<int64_t>(stats_data));
+	case LogicalTypeId::FLOAT: {
+		float val;
+		if (schema_ele.type_info == ParquetExtraTypeInfo::FLOAT16) {
+			if (stats.size() != sizeof(uint16_t)) {
+				throw InvalidInputException("Incorrect stats size for type FLOAT16");
+			}
+			val = Float16ToFloat32(Load<uint16_t>(stats_data));
+		} else {
+			if (stats.size() != sizeof(float)) {
+				throw InvalidInputException("Incorrect stats size for type FLOAT");
+			}
+			val = Load<float>(stats_data);
+		}
+		if (!Value::FloatIsFinite(val)) {
+			return Value();
+		}
+		return Value::FLOAT(val);
+	}
+	case LogicalTypeId::DOUBLE: {
+		if (schema_ele.type_info == ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY) {
+			// decimals cast to double
+			return Value::DOUBLE(ParquetDecimalUtils::ReadDecimalValue<double>(stats_data, stats.size(), schema_ele));
+		}
+		if (stats.size() != sizeof(double)) {
+			throw InvalidInputException("Incorrect stats size for type DOUBLE");
+		}
+		auto val = Load<double>(stats_data);
+		if (!Value::DoubleIsFinite(val)) {
+			return Value();
+		}
+		return Value::DOUBLE(val);
+	}
+	case LogicalTypeId::DECIMAL: {
+		auto width = DecimalType::GetWidth(type);
+		auto scale = DecimalType::GetScale(type);
+		switch (schema_ele.type_info) {
+		case ParquetExtraTypeInfo::DECIMAL_INT32:
+			if (stats.size() != sizeof(int32_t)) {
+				throw InvalidInputException("Incorrect stats size for type %s", type.ToString());
+			}
+			return Value::DECIMAL(Load<int32_t>(stats_data), width, scale);
+		case ParquetExtraTypeInfo::DECIMAL_INT64:
+			if (stats.size() != sizeof(int64_t)) {
+				throw InvalidInputException("Incorrect stats size for type %s", type.ToString());
+			}
+			return Value::DECIMAL(Load<int64_t>(stats_data), width, scale);
+		case ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY:
+			switch (type.InternalType()) {
+			case PhysicalType::INT16:
+				return Value::DECIMAL(
+				    ParquetDecimalUtils::ReadDecimalValue<int16_t>(stats_data, stats.size(), schema_ele), width, scale);
+			case PhysicalType::INT32:
+				return Value::DECIMAL(
+				    ParquetDecimalUtils::ReadDecimalValue<int32_t>(stats_data, stats.size(), schema_ele), width, scale);
+			case PhysicalType::INT64:
+				return Value::DECIMAL(
+				    ParquetDecimalUtils::ReadDecimalValue<int64_t>(stats_data, stats.size(), schema_ele), width, scale);
+			case PhysicalType::INT128:
+				return Value::DECIMAL(
+				    ParquetDecimalUtils::ReadDecimalValue<hugeint_t>(stats_data, stats.size(), schema_ele), width,
+				    scale);
+			default:
+				throw InvalidInputException("Unsupported internal type for decimal");
+			}
+		default:
+			throw NotImplementedException("Unrecognized Parquet type for Decimal");
+		}
+	}
+	case LogicalTypeId::VARCHAR:
+	case LogicalTypeId::BLOB:
+		if (type.id() == LogicalTypeId::BLOB || !Value::StringIsValid(stats)) {
+			return Value(Blob::ToString(string_t(stats)));
+		}
+		return Value(stats);
+	case LogicalTypeId::DATE:
+		if (stats.size() != sizeof(int32_t)) {
+			throw InvalidInputException("Incorrect stats size for type DATE");
+		}
+		return Value::DATE(date_t(Load<int32_t>(stats_data)));
+	case LogicalTypeId::TIME: {
+		int64_t val;
+		if (stats.size() == sizeof(int32_t)) {
+			val = Load<int32_t>(stats_data);
+		} else if (stats.size() == sizeof(int64_t)) {
+			val = Load<int64_t>(stats_data);
+		} else {
+			throw InvalidInputException("Incorrect stats size for type TIME");
+		}
+		switch (schema_ele.type_info) {
+		case ParquetExtraTypeInfo::UNIT_MS:
+			return Value::TIME(Time::FromTimeMs(val));
+		case ParquetExtraTypeInfo::UNIT_NS:
+			return Value::TIME(Time::FromTimeNs(val));
+		case ParquetExtraTypeInfo::UNIT_MICROS:
+		default:
+			return Value::TIME(dtime_t(val));
+		}
+	}
+	case LogicalTypeId::TIME_NS: {
+		int64_t val;
+		if (stats.size() == sizeof(int32_t)) {
+			val = Load<int32_t>(stats_data);
+		} else if (stats.size() == sizeof(int64_t)) {
+			val = Load<int64_t>(stats_data);
+		} else {
+			throw InvalidInputException("Incorrect stats size for type TIME");
+		}
+		switch (schema_ele.type_info) {
+		case ParquetExtraTypeInfo::UNIT_MS:
+			return Value::TIME_NS(ParquetMsIntToTimeNs(val));
+		case ParquetExtraTypeInfo::UNIT_NS:
+			return Value::TIME_NS(ParquetIntToTimeNs(val));
+		case ParquetExtraTypeInfo::UNIT_MICROS:
+		default:
+			return Value::TIME_NS(dtime_ns_t(val));
+		}
+	}
+	case LogicalTypeId::TIME_TZ: {
+		int64_t val;
+		if (stats.size() == sizeof(int32_t)) {
+			val = Load<int32_t>(stats_data);
+		} else if (stats.size() == sizeof(int64_t)) {
+			val = Load<int64_t>(stats_data);
+		} else {
+			throw InvalidInputException("Incorrect stats size for type TIMETZ");
+		}
+		switch (schema_ele.type_info) {
+		case ParquetExtraTypeInfo::UNIT_MS:
+			return Value::TIMETZ(ParquetIntToTimeMsTZ(NumericCast<int32_t>(val)));
+		case ParquetExtraTypeInfo::UNIT_NS:
+			return Value::TIMETZ(ParquetIntToTimeNsTZ(val));
+		case ParquetExtraTypeInfo::UNIT_MICROS:
+		default:
+			return Value::TIMETZ(ParquetIntToTimeTZ(val));
+		}
+	}
+	case LogicalTypeId::TIMESTAMP:
+	case LogicalTypeId::TIMESTAMP_TZ: {
+		timestamp_t timestamp_value;
+		if (schema_ele.type_info == ParquetExtraTypeInfo::IMPALA_TIMESTAMP) {
+			if (stats.size() != sizeof(Int96)) {
+				throw InvalidInputException("Incorrect stats size for type TIMESTAMP");
+			}
+			timestamp_value = ImpalaTimestampToTimestamp(Load<Int96>(stats_data));
+		} else {
+			if (stats.size() != sizeof(int64_t)) {
+				throw InvalidInputException("Incorrect stats size for type TIMESTAMP");
+			}
+			auto val = Load<int64_t>(stats_data);
+			switch (schema_ele.type_info) {
+			case ParquetExtraTypeInfo::UNIT_MS:
+				timestamp_value = Timestamp::FromEpochMs(val);
+				break;
+			case ParquetExtraTypeInfo::UNIT_NS:
+				timestamp_value = Timestamp::FromEpochNanoSeconds(val);
+				break;
+			case ParquetExtraTypeInfo::UNIT_MICROS:
+			default:
+				timestamp_value = timestamp_t(val);
+				break;
+			}
+		}
+		if (type.id() == LogicalTypeId::TIMESTAMP_TZ) {
+			return Value::TIMESTAMPTZ(timestamp_tz_t(timestamp_value));
+		}
+		return Value::TIMESTAMP(timestamp_value);
+	}
+	case LogicalTypeId::TIMESTAMP_NS: {
+		timestamp_ns_t timestamp_value;
+		if (schema_ele.type_info == ParquetExtraTypeInfo::IMPALA_TIMESTAMP) {
+			if (stats.size() != sizeof(Int96)) {
+				throw InvalidInputException("Incorrect stats size for type TIMESTAMP_NS");
+			}
+			timestamp_value = ImpalaTimestampToTimestampNS(Load<Int96>(stats_data));
+		} else {
+			if (stats.size() != sizeof(int64_t)) {
+				throw InvalidInputException("Incorrect stats size for type TIMESTAMP_NS");
+			}
+			auto val = Load<int64_t>(stats_data);
+			switch (schema_ele.type_info) {
+			case ParquetExtraTypeInfo::UNIT_MS:
+				timestamp_value = ParquetTimestampMsToTimestampNs(val);
+				break;
+			case ParquetExtraTypeInfo::UNIT_NS:
+				timestamp_value = ParquetTimestampNsToTimestampNs(val);
+				break;
+			case ParquetExtraTypeInfo::UNIT_MICROS:
+			default:
+				timestamp_value = ParquetTimestampUsToTimestampNs(val);
+				break;
+			}
+		}
+		return Value::TIMESTAMPNS(timestamp_value);
+	}
+	case LogicalTypeId::UUID: {
+		if (stats.size() != 16) {
+			throw InvalidInputException("Incorrect stats size for type UUID");
+		}
+		auto uuid_val = UUIDValueConversion::ReadParquetUUID(const_data_ptr_cast(stats.c_str()));
+		return Value::UUID(uuid_val);
+	}
+	default:
+		throw InternalException("Unsupported type for stats %s", type.ToString());
+	}
+}
+
+unique_ptr<BaseStatistics> ParquetStatisticsUtils::TransformColumnStatistics(const ParquetColumnSchema &schema,
+                                                                             const vector<ColumnChunk> &columns,
+                                                                             bool can_have_nan) {
+
+	// Not supported types
+	auto &type = schema.type;
+	if (type.id() == LogicalTypeId::ARRAY || type.id() == LogicalTypeId::MAP || type.id() == LogicalTypeId::LIST) {
+		return nullptr;
+	}
+
+	unique_ptr<BaseStatistics> row_group_stats;
+
+	// Structs are handled differently (they dont have stats)
+	if (type.id() == LogicalTypeId::STRUCT) {
+		auto struct_stats = StructStats::CreateUnknown(type);
+		// Recurse into child readers
+		for (idx_t i = 0; i < schema.children.size(); i++) {
+			auto &child_schema = schema.children[i];
+			auto child_stats = ParquetStatisticsUtils::TransformColumnStatistics(child_schema, columns, can_have_nan);
+			StructStats::SetChildStats(struct_stats, i, std::move(child_stats));
+		}
+		row_group_stats = struct_stats.ToUnique();
+
+		// null count is generic
+		if (row_group_stats) {
+			row_group_stats->Set(StatsInfo::CAN_HAVE_NULL_AND_VALID_VALUES);
+		}
+		return row_group_stats;
+	} else if (schema.schema_type == ParquetColumnSchemaType::VARIANT) {
+		//! FIXME: there are situations where VARIANT columns can have stats
+		return nullptr;
+	}
+
+	// Otherwise, its a standard column with stats
+
+	auto &column_chunk = columns[schema.column_index];
+	if (!column_chunk.__isset.meta_data || !column_chunk.meta_data.__isset.statistics) {
+		// no stats present for row group
+		return nullptr;
+	}
+	auto &parquet_stats = column_chunk.meta_data.statistics;
+
+	switch (type.id()) {
+	case LogicalTypeId::UTINYINT:
+	case LogicalTypeId::USMALLINT:
+	case LogicalTypeId::UINTEGER:
+	case LogicalTypeId::UBIGINT:
+	case LogicalTypeId::TINYINT:
+	case LogicalTypeId::SMALLINT:
+	case LogicalTypeId::INTEGER:
+	case LogicalTypeId::BIGINT:
+	case LogicalTypeId::DATE:
+	case LogicalTypeId::TIME:
+	case LogicalTypeId::TIME_TZ:
+	case LogicalTypeId::TIMESTAMP:
+	case LogicalTypeId::TIMESTAMP_TZ:
+	case LogicalTypeId::TIMESTAMP_SEC:
+	case LogicalTypeId::TIMESTAMP_MS:
+	case LogicalTypeId::TIMESTAMP_NS:
+	case LogicalTypeId::DECIMAL:
+		row_group_stats = CreateNumericStats(type, schema, parquet_stats);
+		break;
+	case LogicalTypeId::FLOAT:
+	case LogicalTypeId::DOUBLE:
+		if (can_have_nan) {
+			// Since parquet doesn't tell us if the column has NaN values, if the user has explicitly declared that it
+			// does, we create stats without an upper max value, as NaN compares larger than anything else.
+			row_group_stats = CreateFloatingPointStats(type, schema, parquet_stats);
+		} else {
+			// Otherwise we use the numeric stats as usual, which might lead to "wrong" pruning if the column contains
+			// NaN values. The parquet spec is not clear on how to handle NaN values in statistics, and so this is
+			// probably the best we can do for now.
+			row_group_stats = CreateNumericStats(type, schema, parquet_stats);
+		}
+		break;
+	case LogicalTypeId::VARCHAR: {
+		auto string_stats = StringStats::CreateUnknown(type);
+		if (parquet_stats.__isset.min_value) {
+			StringColumnReader::VerifyString(parquet_stats.min_value.c_str(), parquet_stats.min_value.size(), true);
+			StringStats::SetMin(string_stats, parquet_stats.min_value);
+		} else if (parquet_stats.__isset.min) {
+			StringColumnReader::VerifyString(parquet_stats.min.c_str(), parquet_stats.min.size(), true);
+			StringStats::SetMin(string_stats, parquet_stats.min);
+		}
+		if (parquet_stats.__isset.max_value) {
+			StringColumnReader::VerifyString(parquet_stats.max_value.c_str(), parquet_stats.max_value.size(), true);
+			StringStats::SetMax(string_stats, parquet_stats.max_value);
+		} else if (parquet_stats.__isset.max) {
+			StringColumnReader::VerifyString(parquet_stats.max.c_str(), parquet_stats.max.size(), true);
+			StringStats::SetMax(string_stats, parquet_stats.max);
+		}
+		row_group_stats = string_stats.ToUnique();
+		break;
+	}
+	default:
+		// no stats for you
+		break;
+	} // end of type switch
+
+	// null count is generic
+	if (row_group_stats) {
+		row_group_stats->Set(StatsInfo::CAN_HAVE_NULL_AND_VALID_VALUES);
+		if (parquet_stats.__isset.null_count && parquet_stats.null_count == 0) {
+			row_group_stats->Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
+		}
+		if (parquet_stats.__isset.null_count && parquet_stats.null_count == column_chunk.meta_data.num_values) {
+			row_group_stats->Set(StatsInfo::CANNOT_HAVE_VALID_VALUES);
+		}
+	}
+	return row_group_stats;
+}
+
+static bool HasFilterConstants(const TableFilter &duckdb_filter) {
+	switch (duckdb_filter.filter_type) {
+	case TableFilterType::CONSTANT_COMPARISON: {
+		auto &constant_filter = duckdb_filter.Cast<ConstantFilter>();
+		return (constant_filter.comparison_type == ExpressionType::COMPARE_EQUAL && !constant_filter.constant.IsNull());
+	}
+	case TableFilterType::CONJUNCTION_AND: {
+		auto &conjunction_and_filter = duckdb_filter.Cast<ConjunctionAndFilter>();
+		bool child_has_constant = false;
+		for (auto &child_filter : conjunction_and_filter.child_filters) {
+			child_has_constant |= HasFilterConstants(*child_filter);
+		}
+		return child_has_constant;
+	}
+	case TableFilterType::CONJUNCTION_OR: {
+		auto &conjunction_or_filter = duckdb_filter.Cast<ConjunctionOrFilter>();
+		bool child_has_constant = false;
+		for (auto &child_filter : conjunction_or_filter.child_filters) {
+			child_has_constant |= HasFilterConstants(*child_filter);
+		}
+		return child_has_constant;
+	}
+	default:
+		return false;
+	}
+}
+
+template <class T>
+static uint64_t ValueXH64FixedWidth(const Value &constant) {
+	T val = constant.GetValue<T>();
+	return duckdb_zstd::XXH64(&val, sizeof(val), 0);
+}
+
+// TODO we can only this if the parquet representation of the type exactly matches the duckdb rep!
+// TODO TEST THIS!
+// TODO perhaps we can re-use some writer infra here
+static uint64_t ValueXXH64(const Value &constant) {
+	switch (constant.type().InternalType()) {
+	case PhysicalType::UINT8:
+		return ValueXH64FixedWidth<int32_t>(constant);
+	case PhysicalType::INT8:
+		return ValueXH64FixedWidth<int32_t>(constant);
+	case PhysicalType::UINT16:
+		return ValueXH64FixedWidth<int32_t>(constant);
+	case PhysicalType::INT16:
+		return ValueXH64FixedWidth<int32_t>(constant);
+	case PhysicalType::UINT32:
+		return ValueXH64FixedWidth<uint32_t>(constant);
+	case PhysicalType::INT32:
+		return ValueXH64FixedWidth<int32_t>(constant);
+	case PhysicalType::UINT64:
+		return ValueXH64FixedWidth<uint64_t>(constant);
+	case PhysicalType::INT64:
+		return ValueXH64FixedWidth<int64_t>(constant);
+	case PhysicalType::FLOAT:
+		return ValueXH64FixedWidth<float>(constant);
+	case PhysicalType::DOUBLE:
+		return ValueXH64FixedWidth<double>(constant);
+	case PhysicalType::VARCHAR: {
+		auto val = constant.GetValue<string>();
+		return duckdb_zstd::XXH64(val.c_str(), val.length(), 0);
+	}
+	default:
+		return 0;
+	}
+}
+
+static bool ApplyBloomFilter(const TableFilter &duckdb_filter, ParquetBloomFilter &bloom_filter) {
+	switch (duckdb_filter.filter_type) {
+	case TableFilterType::CONSTANT_COMPARISON: {
+		auto &constant_filter = duckdb_filter.Cast<ConstantFilter>();
+		auto is_compare_equal = constant_filter.comparison_type == ExpressionType::COMPARE_EQUAL;
+		D_ASSERT(!constant_filter.constant.IsNull());
+		auto hash = ValueXXH64(constant_filter.constant);
+		return hash > 0 && !bloom_filter.FilterCheck(hash) && is_compare_equal;
+	}
+	case TableFilterType::CONJUNCTION_AND: {
+		auto &conjunction_and_filter = duckdb_filter.Cast<ConjunctionAndFilter>();
+		bool any_children_true = false;
+		for (auto &child_filter : conjunction_and_filter.child_filters) {
+			any_children_true |= ApplyBloomFilter(*child_filter, bloom_filter);
+		}
+		return any_children_true;
+	}
+	case TableFilterType::CONJUNCTION_OR: {
+		auto &conjunction_or_filter = duckdb_filter.Cast<ConjunctionOrFilter>();
+		bool all_children_true = true;
+		for (auto &child_filter : conjunction_or_filter.child_filters) {
+			all_children_true &= ApplyBloomFilter(*child_filter, bloom_filter);
+		}
+		return all_children_true;
+	}
+	default:
+		return false;
+	}
+}
+
+bool ParquetStatisticsUtils::BloomFilterSupported(const LogicalTypeId &type_id) {
+	switch (type_id) {
+	case LogicalTypeId::TINYINT:
+	case LogicalTypeId::UTINYINT:
+	case LogicalTypeId::SMALLINT:
+	case LogicalTypeId::USMALLINT:
+	case LogicalTypeId::INTEGER:
+	case LogicalTypeId::UINTEGER:
+	case LogicalTypeId::BIGINT:
+	case LogicalTypeId::UBIGINT:
+	case LogicalTypeId::FLOAT:
+	case LogicalTypeId::DOUBLE:
+	case LogicalTypeId::VARCHAR:
+	case LogicalTypeId::BLOB:
+		return true;
+	default:
+		return false;
+	}
+}
+
+bool ParquetStatisticsUtils::BloomFilterExcludes(const TableFilter &duckdb_filter,
+                                                 const duckdb_parquet::ColumnMetaData &column_meta_data,
+                                                 TProtocol &file_proto, Allocator &allocator) {
+	if (!HasFilterConstants(duckdb_filter) || !column_meta_data.__isset.bloom_filter_offset ||
+	    column_meta_data.bloom_filter_offset <= 0) {
+		return false;
+	}
+	// TODO check length against file length!
+
+	auto &transport = reinterpret_cast<ThriftFileTransport &>(*file_proto.getTransport());
+	transport.SetLocation(column_meta_data.bloom_filter_offset);
+	if (column_meta_data.__isset.bloom_filter_length && column_meta_data.bloom_filter_length > 0) {
+		transport.Prefetch(column_meta_data.bloom_filter_offset, column_meta_data.bloom_filter_length);
+	}
+
+	duckdb_parquet::BloomFilterHeader filter_header;
+	// TODO the bloom filter could be encrypted, too, so need to double check that this is NOT the case
+	filter_header.read(&file_proto);
+	if (!filter_header.algorithm.__isset.BLOCK || !filter_header.compression.__isset.UNCOMPRESSED ||
+	    !filter_header.hash.__isset.XXHASH) {
+		return false;
+	}
+
+	auto new_buffer = make_uniq<ResizeableBuffer>(allocator, filter_header.numBytes);
+	transport.read(new_buffer->ptr, filter_header.numBytes);
+	ParquetBloomFilter bloom_filter(std::move(new_buffer));
+	return ApplyBloomFilter(duckdb_filter, bloom_filter);
+}
+
+ParquetBloomFilter::ParquetBloomFilter(idx_t num_entries, double bloom_filter_false_positive_ratio) {
+
+	// aim for hit ratio of 0.01%
+	// see http://tfk.mit.edu/pdf/bloom.pdf
+	double f = bloom_filter_false_positive_ratio;
+	double k = 8.0;
+	double n = LossyNumericCast<double>(num_entries);
+	double m = -k * n / std::log(1 - std::pow(f, 1 / k));
+	auto b = MaxValue<idx_t>(NextPowerOfTwo(LossyNumericCast<idx_t>(m / k)) / 32, 1);
+
+	D_ASSERT(b > 0 && IsPowerOfTwo(b));
+
+	data = make_uniq<ResizeableBuffer>(Allocator::DefaultAllocator(), sizeof(ParquetBloomBlock) * b);
+	data->zero();
+	block_count = data->len / sizeof(ParquetBloomBlock);
+	D_ASSERT(data->len % sizeof(ParquetBloomBlock) == 0);
+}
+
+ParquetBloomFilter::ParquetBloomFilter(unique_ptr<ResizeableBuffer> data_p) {
+	D_ASSERT(data_p->len % sizeof(ParquetBloomBlock) == 0);
+	data = std::move(data_p);
+	block_count = data->len / sizeof(ParquetBloomBlock);
+	D_ASSERT(data->len % sizeof(ParquetBloomBlock) == 0);
+}
+
+void ParquetBloomFilter::FilterInsert(uint64_t x) {
+	auto blocks = reinterpret_cast<ParquetBloomBlock *>(data->ptr);
+	uint64_t i = ((x >> 32) * block_count) >> 32;
+	auto &b = blocks[i];
+	ParquetBloomBlock::BlockInsert(b, x);
+}
+
+bool ParquetBloomFilter::FilterCheck(uint64_t x) {
+	auto blocks = reinterpret_cast<ParquetBloomBlock *>(data->ptr);
+	auto i = ((x >> 32) * block_count) >> 32;
+	return ParquetBloomBlock::BlockCheck(blocks[i], x);
+}
+
+// compiler optimizes this into a single instruction (popcnt)
+static uint8_t PopCnt64(uint64_t n) {
+	uint8_t c = 0;
+	for (; n; ++c) {
+		n &= n - 1;
+	}
+	return c;
+}
+
+double ParquetBloomFilter::OneRatio() {
+	auto bloom_ptr = reinterpret_cast<uint64_t *>(data->ptr);
+	idx_t one_count = 0;
+	for (idx_t b_idx = 0; b_idx < data->len / sizeof(uint64_t); ++b_idx) {
+		one_count += PopCnt64(bloom_ptr[b_idx]);
+	}
+	return LossyNumericCast<double>(one_count) / (LossyNumericCast<double>(data->len) * 8.0);
+}
+
+ResizeableBuffer *ParquetBloomFilter::Get() {
+	return data.get();
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/parquet_timestamp.cpp
+++ b/external/duckdb/extension/parquet/parquet_timestamp.cpp
@@ -0,0 +1,156 @@
+#include "parquet_timestamp.hpp"
+
+#include "duckdb.hpp"
+#include "duckdb/common/types/date.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/timestamp.hpp"
+
+namespace duckdb {
+
+// surely they are joking
+static constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = 2440588LL;
+static constexpr int64_t MILLISECONDS_PER_DAY = 86400000LL;
+static constexpr int64_t MICROSECONDS_PER_DAY = MILLISECONDS_PER_DAY * 1000LL;
+static constexpr int64_t NANOSECONDS_PER_MICRO = 1000LL;
+static constexpr int64_t NANOSECONDS_PER_DAY = MICROSECONDS_PER_DAY * 1000LL;
+
+static inline int64_t ImpalaTimestampToDays(const Int96 &impala_timestamp) {
+	return impala_timestamp.value[2] - JULIAN_TO_UNIX_EPOCH_DAYS;
+}
+
+static int64_t ImpalaTimestampToMicroseconds(const Int96 &impala_timestamp) {
+	int64_t days_since_epoch = ImpalaTimestampToDays(impala_timestamp);
+	auto nanoseconds = Load<int64_t>(const_data_ptr_cast(impala_timestamp.value));
+	auto microseconds = nanoseconds / NANOSECONDS_PER_MICRO;
+	return days_since_epoch * MICROSECONDS_PER_DAY + microseconds;
+}
+
+static int64_t ImpalaTimestampToNanoseconds(const Int96 &impala_timestamp) {
+	int64_t days_since_epoch = ImpalaTimestampToDays(impala_timestamp);
+	auto nanoseconds = Load<int64_t>(const_data_ptr_cast(impala_timestamp.value));
+	return days_since_epoch * NANOSECONDS_PER_DAY + nanoseconds;
+}
+
+timestamp_ns_t ImpalaTimestampToTimestampNS(const Int96 &raw_ts) {
+	timestamp_ns_t result;
+	result.value = ImpalaTimestampToNanoseconds(raw_ts);
+	return result;
+}
+
+timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts) {
+	auto impala_us = ImpalaTimestampToMicroseconds(raw_ts);
+	return Timestamp::FromEpochMicroSeconds(impala_us);
+}
+
+Int96 TimestampToImpalaTimestamp(timestamp_t &ts) {
+	int32_t hour, min, sec, msec;
+	Time::Convert(Timestamp::GetTime(ts), hour, min, sec, msec);
+	uint64_t ms_since_midnight = hour * 60 * 60 * 1000 + min * 60 * 1000 + sec * 1000 + msec;
+	auto days_since_epoch = Date::Epoch(Timestamp::GetDate(ts)) / int64_t(24 * 60 * 60);
+	// first two uint32 in Int96 are nanoseconds since midnights
+	// last uint32 is number of days since year 4713 BC ("Julian date")
+	Int96 impala_ts;
+	Store<uint64_t>(ms_since_midnight * 1000000, data_ptr_cast(impala_ts.value));
+	impala_ts.value[2] = days_since_epoch + JULIAN_TO_UNIX_EPOCH_DAYS;
+	return impala_ts;
+}
+
+timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts) {
+	return Timestamp::FromEpochMicroSeconds(raw_ts);
+}
+
+timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts) {
+	timestamp_t input(raw_ts);
+	if (!Timestamp::IsFinite(input)) {
+		return input;
+	}
+	return Timestamp::FromEpochMs(raw_ts);
+}
+
+timestamp_ns_t ParquetTimestampMsToTimestampNs(const int64_t &raw_ms) {
+	timestamp_ns_t input;
+	input.value = raw_ms;
+	if (!Timestamp::IsFinite(input)) {
+		return input;
+	}
+	return Timestamp::TimestampNsFromEpochMillis(raw_ms);
+}
+
+timestamp_ns_t ParquetTimestampUsToTimestampNs(const int64_t &raw_us) {
+	timestamp_ns_t input;
+	input.value = raw_us;
+	if (!Timestamp::IsFinite(input)) {
+		return input;
+	}
+	return Timestamp::TimestampNsFromEpochMicros(raw_us);
+}
+
+timestamp_ns_t ParquetTimestampNsToTimestampNs(const int64_t &raw_ns) {
+	timestamp_ns_t result;
+	result.value = raw_ns;
+	return result;
+}
+
+timestamp_t ParquetTimestampNsToTimestamp(const int64_t &raw_ts) {
+	timestamp_t input(raw_ts);
+	if (!Timestamp::IsFinite(input)) {
+		return input;
+	}
+	return Timestamp::FromEpochNanoSeconds(raw_ts);
+}
+
+date_t ParquetIntToDate(const int32_t &raw_date) {
+	return date_t(raw_date);
+}
+
+template <typename T>
+static T ParquetWrapTime(const T &raw, const T day) {
+	// Special case 24:00:00
+	if (raw == day) {
+		return raw;
+	}
+	const auto modulus = raw % day;
+	return modulus + (modulus < 0) * day;
+}
+
+dtime_t ParquetMsIntToTime(const int32_t &raw_millis) {
+	return Time::FromTimeMs(raw_millis);
+}
+
+dtime_t ParquetIntToTime(const int64_t &raw_micros) {
+	return dtime_t(raw_micros);
+}
+
+dtime_t ParquetNsIntToTime(const int64_t &raw_nanos) {
+	return Time::FromTimeNs(raw_nanos);
+}
+
+dtime_ns_t ParquetMsIntToTimeNs(const int32_t &raw_millis) {
+	return dtime_ns_t(Interval::NANOS_PER_MSEC * raw_millis);
+}
+
+dtime_ns_t ParquetUsIntToTimeNs(const int64_t &raw_micros) {
+	return dtime_ns_t(raw_micros * Interval::NANOS_PER_MICRO);
+}
+
+dtime_ns_t ParquetIntToTimeNs(const int64_t &raw_nanos) {
+	return dtime_ns_t(raw_nanos);
+}
+
+dtime_tz_t ParquetIntToTimeMsTZ(const int32_t &raw_millis) {
+	const int32_t MSECS_PER_DAY = Interval::MSECS_PER_SEC * Interval::SECS_PER_DAY;
+	const auto millis = ParquetWrapTime(raw_millis, MSECS_PER_DAY);
+	return dtime_tz_t(Time::FromTimeMs(millis), 0);
+}
+
+dtime_tz_t ParquetIntToTimeTZ(const int64_t &raw_micros) {
+	const auto micros = ParquetWrapTime(raw_micros, Interval::MICROS_PER_DAY);
+	return dtime_tz_t(dtime_t(micros), 0);
+}
+
+dtime_tz_t ParquetIntToTimeNsTZ(const int64_t &raw_nanos) {
+	const auto nanos = ParquetWrapTime(raw_nanos, Interval::NANOS_PER_DAY);
+	return dtime_tz_t(Time::FromTimeNs(nanos), 0);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/parquet_writer.cpp
+++ b/external/duckdb/extension/parquet/parquet_writer.cpp
--- a/external/duckdb/extension/parquet/reader/CMakeLists.txt
+++ b/external/duckdb/extension/parquet/reader/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_library_unity(
+  duckdb_parquet_readers
+  OBJECT
+  decimal_column_reader.cpp
+  expression_column_reader.cpp
+  list_column_reader.cpp
+  row_number_column_reader.cpp
+  string_column_reader.cpp
+  struct_column_reader.cpp
+  variant_column_reader.cpp)
+
+add_subdirectory(variant)
+
+set(PARQUET_EXTENSION_FILES
+    ${PARQUET_EXTENSION_FILES} $<TARGET_OBJECTS:duckdb_parquet_readers>
+    PARENT_SCOPE)
--- a/external/duckdb/extension/parquet/reader/decimal_column_reader.cpp
+++ b/external/duckdb/extension/parquet/reader/decimal_column_reader.cpp
@@ -0,0 +1,56 @@
+#include "reader/decimal_column_reader.hpp"
+
+namespace duckdb {
+
+template <bool FIXED>
+static unique_ptr<ColumnReader> CreateDecimalReaderInternal(ParquetReader &reader, const ParquetColumnSchema &schema) {
+	switch (schema.type.InternalType()) {
+	case PhysicalType::INT16:
+		return make_uniq<DecimalColumnReader<int16_t, FIXED>>(reader, schema);
+	case PhysicalType::INT32:
+		return make_uniq<DecimalColumnReader<int32_t, FIXED>>(reader, schema);
+	case PhysicalType::INT64:
+		return make_uniq<DecimalColumnReader<int64_t, FIXED>>(reader, schema);
+	case PhysicalType::INT128:
+		return make_uniq<DecimalColumnReader<hugeint_t, FIXED>>(reader, schema);
+	case PhysicalType::DOUBLE:
+		return make_uniq<DecimalColumnReader<double, FIXED>>(reader, schema);
+	default:
+		throw InternalException("Unrecognized type for Decimal");
+	}
+}
+
+template <>
+double ParquetDecimalUtils::ReadDecimalValue(const_data_ptr_t pointer, idx_t size,
+                                             const ParquetColumnSchema &schema_ele) {
+	double res = 0;
+	bool positive = (*pointer & 0x80) == 0;
+	for (idx_t i = 0; i < size; i += 8) {
+		auto byte_size = MinValue<idx_t>(sizeof(uint64_t), size - i);
+		uint64_t input = 0;
+		auto res_ptr = reinterpret_cast<uint8_t *>(&input);
+		for (idx_t k = 0; k < byte_size; k++) {
+			auto byte = pointer[i + k];
+			res_ptr[sizeof(uint64_t) - k - 1] = positive ? byte : byte ^ 0xFF;
+		}
+		res *= double(NumericLimits<uint64_t>::Maximum()) + 1;
+		res += static_cast<double>(input);
+	}
+	if (!positive) {
+		res += 1;
+		res /= pow(10, schema_ele.type_scale);
+		return -res;
+	}
+	res /= pow(10, schema_ele.type_scale);
+	return res;
+}
+
+unique_ptr<ColumnReader> ParquetDecimalUtils::CreateReader(ParquetReader &reader, const ParquetColumnSchema &schema) {
+	if (schema.parquet_type == Type::FIXED_LEN_BYTE_ARRAY) {
+		return CreateDecimalReaderInternal<true>(reader, schema);
+	} else {
+		return CreateDecimalReaderInternal<false>(reader, schema);
+	}
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/reader/expression_column_reader.cpp
+++ b/external/duckdb/extension/parquet/reader/expression_column_reader.cpp
@@ -0,0 +1,50 @@
+#include "reader/expression_column_reader.hpp"
+#include "parquet_reader.hpp"
+
+namespace duckdb {
+
+//===--------------------------------------------------------------------===//
+// Expression Column Reader
+//===--------------------------------------------------------------------===//
+ExpressionColumnReader::ExpressionColumnReader(ClientContext &context, unique_ptr<ColumnReader> child_reader_p,
+                                               unique_ptr<Expression> expr_p, const ParquetColumnSchema &schema_p)
+    : ColumnReader(child_reader_p->Reader(), schema_p), child_reader(std::move(child_reader_p)),
+      expr(std::move(expr_p)), executor(context, expr.get()) {
+	vector<LogicalType> intermediate_types {child_reader->Type()};
+	intermediate_chunk.Initialize(reader.allocator, intermediate_types);
+}
+
+ExpressionColumnReader::ExpressionColumnReader(ClientContext &context, unique_ptr<ColumnReader> child_reader_p,
+                                               unique_ptr<Expression> expr_p,
+                                               unique_ptr<ParquetColumnSchema> owned_schema_p)
+    : ColumnReader(child_reader_p->Reader(), *owned_schema_p), child_reader(std::move(child_reader_p)),
+      expr(std::move(expr_p)), executor(context, expr.get()), owned_schema(std::move(owned_schema_p)) {
+	vector<LogicalType> intermediate_types {child_reader->Type()};
+	intermediate_chunk.Initialize(reader.allocator, intermediate_types);
+}
+
+void ExpressionColumnReader::InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns,
+                                            TProtocol &protocol_p) {
+	child_reader->InitializeRead(row_group_idx_p, columns, protocol_p);
+}
+
+idx_t ExpressionColumnReader::Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result) {
+	intermediate_chunk.Reset();
+	auto &intermediate_vector = intermediate_chunk.data[0];
+
+	auto amount = child_reader->Read(num_values, define_out, repeat_out, intermediate_vector);
+	// Execute the expression
+	intermediate_chunk.SetCardinality(amount);
+	executor.ExecuteExpression(intermediate_chunk, result);
+	return amount;
+}
+
+void ExpressionColumnReader::Skip(idx_t num_values) {
+	child_reader->Skip(num_values);
+}
+
+idx_t ExpressionColumnReader::GroupRowsAvailable() {
+	return child_reader->GroupRowsAvailable();
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/reader/list_column_reader.cpp
+++ b/external/duckdb/extension/parquet/reader/list_column_reader.cpp
@@ -0,0 +1,190 @@
+#include "reader/list_column_reader.hpp"
+#include "parquet_reader.hpp"
+
+namespace duckdb {
+
+struct ListReaderData {
+	ListReaderData(list_entry_t *result_ptr, ValidityMask &result_mask)
+	    : result_ptr(result_ptr), result_mask(result_mask) {
+	}
+
+	list_entry_t *result_ptr;
+	ValidityMask &result_mask;
+};
+
+struct TemplatedListReader {
+	using DATA = ListReaderData;
+
+	static DATA Initialize(optional_ptr<Vector> result_out) {
+		D_ASSERT(ListVector::GetListSize(*result_out) == 0);
+
+		auto result_ptr = FlatVector::GetData<list_entry_t>(*result_out);
+		auto &result_mask = FlatVector::Validity(*result_out);
+		return ListReaderData(result_ptr, result_mask);
+	}
+
+	static idx_t GetOffset(optional_ptr<Vector> result_out) {
+		return ListVector::GetListSize(*result_out);
+	}
+
+	static void HandleRepeat(DATA &data, idx_t offset) {
+		data.result_ptr[offset].length++;
+	}
+
+	static void HandleListStart(DATA &data, idx_t offset, idx_t offset_in_child, idx_t length) {
+		data.result_ptr[offset].offset = offset_in_child;
+		data.result_ptr[offset].length = length;
+	}
+
+	static void HandleNull(DATA &data, idx_t offset) {
+		data.result_mask.SetInvalid(offset);
+		data.result_ptr[offset].offset = 0;
+		data.result_ptr[offset].length = 0;
+	}
+
+	static void AppendVector(optional_ptr<Vector> result_out, Vector &read_vector, idx_t child_idx) {
+		ListVector::Append(*result_out, read_vector, child_idx);
+	}
+};
+
+struct TemplatedListSkipper {
+	using DATA = bool;
+
+	static DATA Initialize(optional_ptr<Vector>) {
+		return false;
+	}
+
+	static idx_t GetOffset(optional_ptr<Vector>) {
+		return 0;
+	}
+
+	static void HandleRepeat(DATA &, idx_t) {
+	}
+
+	static void HandleListStart(DATA &, idx_t, idx_t, idx_t) {
+	}
+
+	static void HandleNull(DATA &, idx_t) {
+	}
+
+	static void AppendVector(optional_ptr<Vector>, Vector &, idx_t) {
+	}
+};
+
+template <class OP>
+idx_t ListColumnReader::ReadInternal(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out,
+                                     optional_ptr<Vector> result_out) {
+	idx_t result_offset = 0;
+	auto data = OP::Initialize(result_out);
+
+	// if an individual list is longer than STANDARD_VECTOR_SIZE we actually have to loop the child read to fill it
+	bool finished = false;
+	while (!finished) {
+		idx_t child_actual_num_values = 0;
+
+		// check if we have any overflow from a previous read
+		if (overflow_child_count == 0) {
+			// we don't: read elements from the child reader
+			child_defines.zero();
+			child_repeats.zero();
+			// we don't know in advance how many values to read because of the beautiful repetition/definition setup
+			// we just read (up to) a vector from the child column, and see if we have read enough
+			// if we have not read enough, we read another vector
+			// if we have read enough, we leave any unhandled elements in the overflow vector for a subsequent read
+			auto child_req_num_values =
+			    MinValue<idx_t>(STANDARD_VECTOR_SIZE, child_column_reader->GroupRowsAvailable());
+			read_vector.ResetFromCache(read_cache);
+			child_actual_num_values =
+			    child_column_reader->Read(child_req_num_values, child_defines_ptr, child_repeats_ptr, read_vector);
+		} else {
+			// we do: use the overflow values
+			child_actual_num_values = overflow_child_count;
+			overflow_child_count = 0;
+		}
+
+		if (child_actual_num_values == 0) {
+			// no more elements available: we are done
+			break;
+		}
+		read_vector.Verify(child_actual_num_values);
+		idx_t current_chunk_offset = OP::GetOffset(result_out);
+
+		// hard-won piece of code this, modify at your own risk
+		// the intuition is that we have to only collapse values into lists that are repeated *on this level*
+		// the rest is pretty much handed up as-is as a single-valued list or NULL
+		idx_t child_idx;
+		for (child_idx = 0; child_idx < child_actual_num_values; child_idx++) {
+			if (child_repeats_ptr[child_idx] == MaxRepeat()) {
+				// value repeats on this level, append
+				D_ASSERT(result_offset > 0);
+				OP::HandleRepeat(data, result_offset - 1);
+				continue;
+			}
+
+			if (result_offset >= num_values) {
+				// we ran out of output space
+				finished = true;
+				break;
+			}
+			if (child_defines_ptr[child_idx] >= MaxDefine()) {
+				// value has been defined down the stack, hence its NOT NULL
+				OP::HandleListStart(data, result_offset, child_idx + current_chunk_offset, 1);
+			} else if (child_defines_ptr[child_idx] == MaxDefine() - 1) {
+				// empty list
+				OP::HandleListStart(data, result_offset, child_idx + current_chunk_offset, 0);
+			} else {
+				// value is NULL somewhere up the stack
+				OP::HandleNull(data, result_offset);
+			}
+
+			if (repeat_out) {
+				repeat_out[result_offset] = child_repeats_ptr[child_idx];
+			}
+			if (define_out) {
+				define_out[result_offset] = child_defines_ptr[child_idx];
+			}
+
+			result_offset++;
+		}
+		// actually append the required elements to the child list
+		OP::AppendVector(result_out, read_vector, child_idx);
+
+		// we have read more values from the child reader than we can fit into the result for this read
+		// we have to pass everything from child_idx to child_actual_num_values into the next call
+		if (child_idx < child_actual_num_values && result_offset == num_values) {
+			read_vector.Slice(read_vector, child_idx, child_actual_num_values);
+			overflow_child_count = child_actual_num_values - child_idx;
+			read_vector.Verify(overflow_child_count);
+
+			// move values in the child repeats and defines *backward* by child_idx
+			for (idx_t repdef_idx = 0; repdef_idx < overflow_child_count; repdef_idx++) {
+				child_defines_ptr[repdef_idx] = child_defines_ptr[child_idx + repdef_idx];
+				child_repeats_ptr[repdef_idx] = child_repeats_ptr[child_idx + repdef_idx];
+			}
+		}
+	}
+	return result_offset;
+}
+
+idx_t ListColumnReader::Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result_out) {
+	ApplyPendingSkips(define_out, repeat_out);
+	return ReadInternal<TemplatedListReader>(num_values, define_out, repeat_out, result_out);
+}
+
+ListColumnReader::ListColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema,
+                                   unique_ptr<ColumnReader> child_column_reader_p)
+    : ColumnReader(reader, schema), child_column_reader(std::move(child_column_reader_p)),
+      read_cache(reader.allocator, ListType::GetChildType(Type())), read_vector(read_cache), overflow_child_count(0) {
+
+	child_defines.resize(reader.allocator, STANDARD_VECTOR_SIZE);
+	child_repeats.resize(reader.allocator, STANDARD_VECTOR_SIZE);
+	child_defines_ptr = (uint8_t *)child_defines.ptr;
+	child_repeats_ptr = (uint8_t *)child_repeats.ptr;
+}
+
+void ListColumnReader::ApplyPendingSkips(data_ptr_t define_out, data_ptr_t repeat_out) {
+	ReadInternal<TemplatedListSkipper>(pending_skips, nullptr, nullptr, nullptr);
+	pending_skips = 0;
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/reader/row_number_column_reader.cpp
+++ b/external/duckdb/extension/parquet/reader/row_number_column_reader.cpp
@@ -0,0 +1,46 @@
+#include "reader/row_number_column_reader.hpp"
+#include "parquet_reader.hpp"
+#include "duckdb/storage/table/row_group.hpp"
+
+namespace duckdb {
+
+//===--------------------------------------------------------------------===//
+// Row NumberColumn Reader
+//===--------------------------------------------------------------------===//
+RowNumberColumnReader::RowNumberColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema)
+    : ColumnReader(reader, schema) {
+}
+
+void RowNumberColumnReader::InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns,
+                                           TProtocol &protocol_p) {
+	row_group_offset = 0;
+	auto &row_groups = reader.GetFileMetadata()->row_groups;
+	for (idx_t i = 0; i < row_group_idx_p; i++) {
+		row_group_offset += row_groups[i].num_rows;
+	}
+}
+
+void RowNumberColumnReader::Filter(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out,
+                                   Vector &result_out, const TableFilter &filter, TableFilterState &filter_state,
+                                   SelectionVector &sel, idx_t &approved_tuple_count, bool is_first_filter) {
+	// check the row id stats if this filter has any chance of passing
+	auto prune_result = RowGroup::CheckRowIdFilter(filter, row_group_offset, row_group_offset + num_values);
+	if (prune_result == FilterPropagateResult::FILTER_ALWAYS_FALSE) {
+		// filter is always false - don't read anything
+		approved_tuple_count = 0;
+		Skip(num_values);
+		return;
+	}
+	ColumnReader::Filter(num_values, define_out, repeat_out, result_out, filter, filter_state, sel,
+	                     approved_tuple_count, is_first_filter);
+}
+
+idx_t RowNumberColumnReader::Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result) {
+	auto data_ptr = FlatVector::GetData<int64_t>(result);
+	for (idx_t i = 0; i < num_values; i++) {
+		data_ptr[i] = UnsafeNumericCast<int64_t>(row_group_offset++);
+	}
+	return num_values;
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/reader/string_column_reader.cpp
+++ b/external/duckdb/extension/parquet/reader/string_column_reader.cpp
@@ -0,0 +1,81 @@
+#include "reader/string_column_reader.hpp"
+#include "utf8proc_wrapper.hpp"
+#include "parquet_reader.hpp"
+#include "duckdb/common/types/blob.hpp"
+
+namespace duckdb {
+
+//===--------------------------------------------------------------------===//
+// String Column Reader
+//===--------------------------------------------------------------------===//
+StringColumnReader::StringColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema)
+    : ColumnReader(reader, schema), string_column_type(GetStringColumnType(Type())) {
+	fixed_width_string_length = 0;
+	if (schema.parquet_type == Type::FIXED_LEN_BYTE_ARRAY) {
+		fixed_width_string_length = schema.type_length;
+	}
+}
+
+void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, const bool is_varchar) {
+	if (!is_varchar) {
+		return;
+	}
+	// verify if a string is actually UTF8, and if there are no null bytes in the middle of the string
+	// technically Parquet should guarantee this, but reality is often disappointing
+	UnicodeInvalidReason reason;
+	size_t pos;
+	auto utf_type = Utf8Proc::Analyze(str_data, str_len, &reason, &pos);
+	if (utf_type == UnicodeType::INVALID) {
+		throw InvalidInputException("Invalid string encoding found in Parquet file: value \"%s\" is not valid UTF8!",
+		                            Blob::ToString(string_t(str_data, str_len)));
+	}
+}
+
+void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) {
+	switch (string_column_type) {
+	case StringColumnType::VARCHAR:
+		VerifyString(str_data, str_len, true);
+		break;
+	case StringColumnType::JSON: {
+		const auto error = StringUtil::ValidateJSON(str_data, str_len);
+		if (!error.empty()) {
+			throw InvalidInputException("Invalid JSON found in Parquet file: %s", error);
+		}
+		break;
+	}
+	default:
+		break;
+	}
+}
+
+class ParquetStringVectorBuffer : public VectorBuffer {
+public:
+	explicit ParquetStringVectorBuffer(shared_ptr<ResizeableBuffer> buffer_p)
+	    : VectorBuffer(VectorBufferType::OPAQUE_BUFFER), buffer(std::move(buffer_p)) {
+	}
+
+private:
+	shared_ptr<ResizeableBuffer> buffer;
+};
+
+void StringColumnReader::ReferenceBlock(Vector &result, shared_ptr<ResizeableBuffer> &block) {
+	StringVector::AddBuffer(result, make_buffer<ParquetStringVectorBuffer>(block));
+}
+
+void StringColumnReader::Plain(shared_ptr<ResizeableBuffer> &plain_data, uint8_t *defines, idx_t num_values,
+                               idx_t result_offset, Vector &result) {
+	ReferenceBlock(result, plain_data);
+	PlainTemplated<string_t, StringParquetValueConversion>(*plain_data, defines, num_values, result_offset, result);
+}
+
+void StringColumnReader::PlainSkip(ByteBuffer &plain_data, uint8_t *defines, idx_t num_values) {
+	PlainSkipTemplated<StringParquetValueConversion>(plain_data, defines, num_values);
+}
+
+void StringColumnReader::PlainSelect(shared_ptr<ResizeableBuffer> &plain_data, uint8_t *defines, idx_t num_values,
+                                     Vector &result, const SelectionVector &sel, idx_t count) {
+	ReferenceBlock(result, plain_data);
+	PlainSelectTemplated<string_t, StringParquetValueConversion>(*plain_data, defines, num_values, result, sel, count);
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/reader/struct_column_reader.cpp
+++ b/external/duckdb/extension/parquet/reader/struct_column_reader.cpp
@@ -0,0 +1,138 @@
+#include "reader/struct_column_reader.hpp"
+
+namespace duckdb {
+
+//===--------------------------------------------------------------------===//
+// Struct Column Reader
+//===--------------------------------------------------------------------===//
+StructColumnReader::StructColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema,
+                                       vector<unique_ptr<ColumnReader>> child_readers_p)
+    : ColumnReader(reader, schema), child_readers(std::move(child_readers_p)) {
+	D_ASSERT(Type().InternalType() == PhysicalType::STRUCT);
+}
+
+ColumnReader &StructColumnReader::GetChildReader(idx_t child_idx) {
+	if (!child_readers[child_idx]) {
+		throw InternalException("StructColumnReader::GetChildReader(%d) - but this child reader is not set", child_idx);
+	}
+	return *child_readers[child_idx].get();
+}
+
+void StructColumnReader::InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns,
+                                        TProtocol &protocol_p) {
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		child->InitializeRead(row_group_idx_p, columns, protocol_p);
+	}
+}
+
+idx_t StructColumnReader::Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result) {
+	auto &struct_entries = StructVector::GetEntries(result);
+	D_ASSERT(StructType::GetChildTypes(Type()).size() == struct_entries.size());
+
+	if (pending_skips > 0) {
+		throw InternalException("StructColumnReader cannot have pending skips");
+	}
+
+	// If the child reader values are all valid, "define_out" may not be initialized at all
+	// So, we just initialize them to all be valid beforehand
+	std::fill_n(define_out, num_values, MaxDefine());
+
+	optional_idx read_count;
+	for (idx_t i = 0; i < child_readers.size(); i++) {
+		auto &child = child_readers[i];
+		auto &target_vector = *struct_entries[i];
+		if (!child) {
+			// if we are not scanning this vector - set it to NULL
+			target_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
+			ConstantVector::SetNull(target_vector, true);
+			continue;
+		}
+		auto child_num_values = child->Read(num_values, define_out, repeat_out, target_vector);
+		if (!read_count.IsValid()) {
+			read_count = child_num_values;
+		} else if (read_count.GetIndex() != child_num_values) {
+			throw std::runtime_error("Struct child row count mismatch");
+		}
+	}
+	if (!read_count.IsValid()) {
+		read_count = num_values;
+	}
+	// set the validity mask for this level
+	auto &validity = FlatVector::Validity(result);
+	for (idx_t i = 0; i < read_count.GetIndex(); i++) {
+		if (define_out[i] < MaxDefine()) {
+			validity.SetInvalid(i);
+		}
+	}
+
+	return read_count.GetIndex();
+}
+
+void StructColumnReader::Skip(idx_t num_values) {
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		child->Skip(num_values);
+	}
+}
+
+void StructColumnReader::RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) {
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		child->RegisterPrefetch(transport, allow_merge);
+	}
+}
+
+uint64_t StructColumnReader::TotalCompressedSize() {
+	uint64_t size = 0;
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		size += child->TotalCompressedSize();
+	}
+	return size;
+}
+
+static bool TypeHasExactRowCount(const LogicalType &type) {
+	switch (type.id()) {
+	case LogicalTypeId::LIST:
+	case LogicalTypeId::MAP:
+		return false;
+	case LogicalTypeId::STRUCT:
+		for (auto &kv : StructType::GetChildTypes(type)) {
+			if (TypeHasExactRowCount(kv.second)) {
+				return true;
+			}
+		}
+		return false;
+	default:
+		return true;
+	}
+}
+
+idx_t StructColumnReader::GroupRowsAvailable() {
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		if (TypeHasExactRowCount(child->Type())) {
+			return child->GroupRowsAvailable();
+		}
+	}
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		return child->GroupRowsAvailable();
+	}
+	throw InternalException("No projected columns in struct?");
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/reader/variant/CMakeLists.txt
+++ b/external/duckdb/extension/parquet/reader/variant/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_library_unity(
+  duckdb_parquet_reader_variant OBJECT variant_binary_decoder.cpp
+  variant_value.cpp variant_shredded_conversion.cpp)
+
+set(PARQUET_EXTENSION_FILES
+    ${PARQUET_EXTENSION_FILES} $<TARGET_OBJECTS:duckdb_parquet_reader_variant>
+    PARENT_SCOPE)
--- a/external/duckdb/extension/parquet/reader/variant/variant_binary_decoder.cpp
+++ b/external/duckdb/extension/parquet/reader/variant/variant_binary_decoder.cpp
@@ -0,0 +1,365 @@
+#include "reader/variant/variant_binary_decoder.hpp"
+#include "duckdb/common/printer.hpp"
+#include "utf8proc_wrapper.hpp"
+
+#include "reader/uuid_column_reader.hpp"
+
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/types/decimal.hpp"
+#include "duckdb/common/types/uuid.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/date.hpp"
+#include "duckdb/common/types/blob.hpp"
+
+static constexpr uint8_t VERSION_MASK = 0xF;
+static constexpr uint8_t SORTED_STRINGS_MASK = 0x1;
+static constexpr uint8_t SORTED_STRINGS_SHIFT = 4;
+static constexpr uint8_t OFFSET_SIZE_MINUS_ONE_MASK = 0x3;
+static constexpr uint8_t OFFSET_SIZE_MINUS_ONE_SHIFT = 6;
+
+static constexpr uint8_t BASIC_TYPE_MASK = 0x3;
+static constexpr uint8_t VALUE_HEADER_SHIFT = 2;
+
+//! Object and Array header
+static constexpr uint8_t FIELD_OFFSET_SIZE_MINUS_ONE_MASK = 0x3;
+
+//! Object header
+static constexpr uint8_t FIELD_ID_SIZE_MINUS_ONE_MASK = 0x3;
+static constexpr uint8_t FIELD_ID_SIZE_MINUS_ONE_SHIFT = 2;
+
+static constexpr uint8_t OBJECT_IS_LARGE_MASK = 0x1;
+static constexpr uint8_t OBJECT_IS_LARGE_SHIFT = 4;
+
+//! Array header
+static constexpr uint8_t ARRAY_IS_LARGE_MASK = 0x1;
+static constexpr uint8_t ARRAY_IS_LARGE_SHIFT = 2;
+
+using namespace duckdb_yyjson;
+
+namespace duckdb {
+
+namespace {
+
+static idx_t ReadVariableLengthLittleEndian(idx_t length_in_bytes, const_data_ptr_t &ptr) {
+	if (length_in_bytes > sizeof(idx_t)) {
+		throw NotImplementedException("Can't read little-endian value of %d bytes", length_in_bytes);
+	}
+	idx_t result = 0;
+	memcpy(reinterpret_cast<uint8_t *>(&result), ptr, length_in_bytes);
+	ptr += length_in_bytes;
+	return result;
+}
+
+} // namespace
+
+VariantMetadataHeader VariantMetadataHeader::FromHeaderByte(uint8_t byte) {
+	VariantMetadataHeader header;
+	header.version = byte & VERSION_MASK;
+	header.sorted_strings = (byte >> SORTED_STRINGS_SHIFT) & SORTED_STRINGS_MASK;
+	header.offset_size = ((byte >> OFFSET_SIZE_MINUS_ONE_SHIFT) & OFFSET_SIZE_MINUS_ONE_MASK) + 1;
+
+	if (header.version != 1) {
+		throw NotImplementedException("Only version 1 of the Variant encoding scheme is supported, found version: %d",
+		                              header.version);
+	}
+
+	return header;
+}
+
+VariantMetadata::VariantMetadata(const string_t &metadata) : metadata(metadata) {
+	auto metadata_data = metadata.GetData();
+
+	header = VariantMetadataHeader::FromHeaderByte(metadata_data[0]);
+
+	const_data_ptr_t ptr = reinterpret_cast<const_data_ptr_t>(metadata_data + sizeof(uint8_t));
+	idx_t dictionary_size = ReadVariableLengthLittleEndian(header.offset_size, ptr);
+
+	auto offsets = ptr;
+	auto bytes = offsets + ((dictionary_size + 1) * header.offset_size);
+	idx_t last_offset = ReadVariableLengthLittleEndian(header.offset_size, ptr);
+	for (idx_t i = 0; i < dictionary_size; i++) {
+		auto next_offset = ReadVariableLengthLittleEndian(header.offset_size, ptr);
+		strings.emplace_back(reinterpret_cast<const char *>(bytes + last_offset), next_offset - last_offset);
+		last_offset = next_offset;
+	}
+}
+
+VariantValueMetadata VariantValueMetadata::FromHeaderByte(uint8_t byte) {
+	VariantValueMetadata result;
+	result.basic_type = VariantBasicTypeFromByte(byte & BASIC_TYPE_MASK);
+	uint8_t value_header = byte >> VALUE_HEADER_SHIFT;
+	switch (result.basic_type) {
+	case VariantBasicType::PRIMITIVE: {
+		result.primitive_type = VariantPrimitiveTypeFromByte(value_header);
+		break;
+	}
+	case VariantBasicType::SHORT_STRING: {
+		result.string_size = value_header;
+		break;
+	}
+	case VariantBasicType::OBJECT: {
+		result.field_offset_size = (value_header & FIELD_OFFSET_SIZE_MINUS_ONE_MASK) + 1;
+		result.field_id_size = ((value_header >> FIELD_ID_SIZE_MINUS_ONE_SHIFT) & FIELD_ID_SIZE_MINUS_ONE_MASK) + 1;
+		result.is_large = (value_header >> OBJECT_IS_LARGE_SHIFT) & OBJECT_IS_LARGE_MASK;
+		break;
+	}
+	case VariantBasicType::ARRAY: {
+		result.field_offset_size = (value_header & FIELD_OFFSET_SIZE_MINUS_ONE_MASK) + 1;
+		result.is_large = (value_header >> ARRAY_IS_LARGE_SHIFT) & ARRAY_IS_LARGE_MASK;
+		break;
+	}
+	default:
+		throw InternalException("VariantBasicType (%d) not handled", static_cast<uint8_t>(result.basic_type));
+	}
+	return result;
+}
+
+template <class T>
+static T DecodeDecimal(const_data_ptr_t data, uint8_t &scale, uint8_t &width) {
+	scale = Load<uint8_t>(data);
+	data++;
+
+	auto result = Load<T>(data);
+	//! FIXME: The spec says:
+	//! The implied precision of a decimal value is `floor(log_10(val)) + 1`
+	width = DecimalWidth<T>::max;
+	return result;
+}
+
+template <>
+hugeint_t DecodeDecimal(const_data_ptr_t data, uint8_t &scale, uint8_t &width) {
+	scale = Load<uint8_t>(data);
+	data++;
+
+	hugeint_t result;
+	result.lower = Load<uint64_t>(data);
+	result.upper = Load<int64_t>(data + sizeof(uint64_t));
+	//! FIXME: The spec says:
+	//! The implied precision of a decimal value is `floor(log_10(val)) + 1`
+	width = DecimalWidth<hugeint_t>::max;
+	return result;
+}
+
+VariantValue VariantBinaryDecoder::PrimitiveTypeDecode(const VariantValueMetadata &value_metadata,
+                                                       const_data_ptr_t data) {
+	switch (value_metadata.primitive_type) {
+	case VariantPrimitiveType::NULL_TYPE: {
+		return VariantValue(Value());
+	}
+	case VariantPrimitiveType::BOOLEAN_TRUE: {
+		return VariantValue(Value::BOOLEAN(true));
+	}
+	case VariantPrimitiveType::BOOLEAN_FALSE: {
+		return VariantValue(Value::BOOLEAN(false));
+	}
+	case VariantPrimitiveType::INT8: {
+		auto value = Load<int8_t>(data);
+		return VariantValue(Value::TINYINT(value));
+	}
+	case VariantPrimitiveType::INT16: {
+		auto value = Load<int16_t>(data);
+		return VariantValue(Value::SMALLINT(value));
+	}
+	case VariantPrimitiveType::INT32: {
+		auto value = Load<int32_t>(data);
+		return VariantValue(Value::INTEGER(value));
+	}
+	case VariantPrimitiveType::INT64: {
+		auto value = Load<int64_t>(data);
+		return VariantValue(Value::BIGINT(value));
+	}
+	case VariantPrimitiveType::DOUBLE: {
+		double value = Load<double>(data);
+		return VariantValue(Value::DOUBLE(value));
+	}
+	case VariantPrimitiveType::FLOAT: {
+		float value = Load<float>(data);
+		return VariantValue(Value::FLOAT(value));
+	}
+	case VariantPrimitiveType::DECIMAL4: {
+		uint8_t scale;
+		uint8_t width;
+
+		auto value = DecodeDecimal<int32_t>(data, scale, width);
+		auto value_str = Decimal::ToString(value, width, scale);
+		return VariantValue(Value(value_str));
+	}
+	case VariantPrimitiveType::DECIMAL8: {
+		uint8_t scale;
+		uint8_t width;
+
+		auto value = DecodeDecimal<int64_t>(data, scale, width);
+		auto value_str = Decimal::ToString(value, width, scale);
+		return VariantValue(Value(value_str));
+	}
+	case VariantPrimitiveType::DECIMAL16: {
+		uint8_t scale;
+		uint8_t width;
+
+		auto value = DecodeDecimal<hugeint_t>(data, scale, width);
+		auto value_str = Decimal::ToString(value, width, scale);
+		return VariantValue(Value(value_str));
+	}
+	case VariantPrimitiveType::DATE: {
+		date_t value;
+		value.days = Load<int32_t>(data);
+		return VariantValue(Value::DATE(value));
+	}
+	case VariantPrimitiveType::TIMESTAMP_MICROS: {
+		timestamp_tz_t micros_ts_tz;
+		micros_ts_tz.value = Load<int64_t>(data);
+		return VariantValue(Value::TIMESTAMPTZ(micros_ts_tz));
+	}
+	case VariantPrimitiveType::TIMESTAMP_NTZ_MICROS: {
+		timestamp_t micros_ts;
+		micros_ts.value = Load<int64_t>(data);
+
+		auto value = Value::TIMESTAMP(micros_ts);
+		auto value_str = value.ToString();
+		return VariantValue(Value(value_str));
+	}
+	case VariantPrimitiveType::BINARY: {
+		//! Follow the JSON serialization guide by converting BINARY to Base64:
+		//! For example: `"dmFyaWFudAo="`
+		auto size = Load<uint32_t>(data);
+		auto string_data = reinterpret_cast<const char *>(data + sizeof(uint32_t));
+		auto base64_string = Blob::ToBase64(string_t(string_data, size));
+		return VariantValue(Value(base64_string));
+	}
+	case VariantPrimitiveType::STRING: {
+		auto size = Load<uint32_t>(data);
+		auto string_data = reinterpret_cast<const char *>(data + sizeof(uint32_t));
+		if (!Utf8Proc::IsValid(string_data, size)) {
+			throw InternalException("Can't decode Variant short-string, string isn't valid UTF8");
+		}
+		return VariantValue(Value(string(string_data, size)));
+	}
+	case VariantPrimitiveType::TIME_NTZ_MICROS: {
+		dtime_t micros_time;
+		micros_time.micros = Load<int64_t>(data);
+		return VariantValue(Value::TIME(micros_time));
+	}
+	case VariantPrimitiveType::TIMESTAMP_NANOS: {
+		timestamp_ns_t nanos_ts;
+		nanos_ts.value = Load<int64_t>(data);
+
+		//! Convert the nanos timestamp to a micros timestamp (not lossless)
+		auto micros_ts = Timestamp::FromEpochNanoSeconds(nanos_ts.value);
+		return VariantValue(Value::TIMESTAMPTZ(timestamp_tz_t(micros_ts)));
+	}
+	case VariantPrimitiveType::TIMESTAMP_NTZ_NANOS: {
+		timestamp_ns_t nanos_ts;
+		nanos_ts.value = Load<int64_t>(data);
+
+		auto value = Value::TIMESTAMPNS(nanos_ts);
+		auto value_str = value.ToString();
+		return VariantValue(Value(value_str));
+	}
+	case VariantPrimitiveType::UUID: {
+		auto uuid_value = UUIDValueConversion::ReadParquetUUID(data);
+		auto value_str = UUID::ToString(uuid_value);
+		return VariantValue(Value(value_str));
+	}
+	default:
+		throw NotImplementedException("Variant PrimitiveTypeDecode not implemented for type (%d)",
+		                              static_cast<uint8_t>(value_metadata.primitive_type));
+	}
+}
+
+VariantValue VariantBinaryDecoder::ShortStringDecode(const VariantValueMetadata &value_metadata,
+                                                     const_data_ptr_t data) {
+	D_ASSERT(value_metadata.string_size < 64);
+	auto string_data = reinterpret_cast<const char *>(data);
+	if (!Utf8Proc::IsValid(string_data, value_metadata.string_size)) {
+		throw InternalException("Can't decode Variant short-string, string isn't valid UTF8");
+	}
+	return VariantValue(Value(string(string_data, value_metadata.string_size)));
+}
+
+VariantValue VariantBinaryDecoder::ObjectDecode(const VariantMetadata &metadata,
+                                                const VariantValueMetadata &value_metadata, const_data_ptr_t data) {
+	VariantValue ret(VariantValueType::OBJECT);
+
+	auto field_offset_size = value_metadata.field_offset_size;
+	auto field_id_size = value_metadata.field_id_size;
+	auto is_large = value_metadata.is_large;
+
+	idx_t num_elements;
+	if (is_large) {
+		num_elements = Load<uint32_t>(data);
+		data += sizeof(uint32_t);
+	} else {
+		num_elements = Load<uint8_t>(data);
+		data += sizeof(uint8_t);
+	}
+
+	auto field_ids = data;
+	auto field_offsets = data + (num_elements * field_id_size);
+	auto values = field_offsets + ((num_elements + 1) * field_offset_size);
+
+	idx_t last_offset = ReadVariableLengthLittleEndian(field_offset_size, field_offsets);
+	for (idx_t i = 0; i < num_elements; i++) {
+		auto field_id = ReadVariableLengthLittleEndian(field_id_size, field_ids);
+		auto next_offset = ReadVariableLengthLittleEndian(field_offset_size, field_offsets);
+
+		auto value = Decode(metadata, values + last_offset);
+		auto &key = metadata.strings[field_id];
+
+		ret.AddChild(key, std::move(value));
+		last_offset = next_offset;
+	}
+	return ret;
+}
+
+VariantValue VariantBinaryDecoder::ArrayDecode(const VariantMetadata &metadata,
+                                               const VariantValueMetadata &value_metadata, const_data_ptr_t data) {
+	VariantValue ret(VariantValueType::ARRAY);
+
+	auto field_offset_size = value_metadata.field_offset_size;
+	auto is_large = value_metadata.is_large;
+
+	uint32_t num_elements;
+	if (is_large) {
+		num_elements = Load<uint32_t>(data);
+		data += sizeof(uint32_t);
+	} else {
+		num_elements = Load<uint8_t>(data);
+		data += sizeof(uint8_t);
+	}
+
+	auto field_offsets = data;
+	auto values = field_offsets + ((num_elements + 1) * field_offset_size);
+
+	idx_t last_offset = ReadVariableLengthLittleEndian(field_offset_size, field_offsets);
+	for (idx_t i = 0; i < num_elements; i++) {
+		auto next_offset = ReadVariableLengthLittleEndian(field_offset_size, field_offsets);
+
+		ret.AddItem(Decode(metadata, values + last_offset));
+		last_offset = next_offset;
+	}
+	return ret;
+}
+
+VariantValue VariantBinaryDecoder::Decode(const VariantMetadata &variant_metadata, const_data_ptr_t data) {
+	auto value_metadata = VariantValueMetadata::FromHeaderByte(data[0]);
+
+	data++;
+	switch (value_metadata.basic_type) {
+	case VariantBasicType::PRIMITIVE: {
+		return PrimitiveTypeDecode(value_metadata, data);
+	}
+	case VariantBasicType::SHORT_STRING: {
+		return ShortStringDecode(value_metadata, data);
+	}
+	case VariantBasicType::OBJECT: {
+		return ObjectDecode(variant_metadata, value_metadata, data);
+	}
+	case VariantBasicType::ARRAY: {
+		return ArrayDecode(variant_metadata, value_metadata, data);
+	}
+	default:
+		throw InternalException("Unexpected value for VariantBasicType");
+	}
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/reader/variant/variant_shredded_conversion.cpp
+++ b/external/duckdb/extension/parquet/reader/variant/variant_shredded_conversion.cpp
@@ -0,0 +1,577 @@
+#include "reader/variant/variant_shredded_conversion.hpp"
+#include "column_reader.hpp"
+#include "utf8proc_wrapper.hpp"
+
+#include "duckdb/common/types/timestamp.hpp"
+#include "duckdb/common/types/decimal.hpp"
+#include "duckdb/common/types/uuid.hpp"
+#include "duckdb/common/types/time.hpp"
+#include "duckdb/common/types/date.hpp"
+#include "duckdb/common/types/blob.hpp"
+
+namespace duckdb {
+
+template <class T>
+struct ConvertShreddedValue {
+	static VariantValue Convert(T val);
+	static VariantValue ConvertDecimal(T val, uint8_t width, uint8_t scale) {
+		throw InternalException("ConvertShreddedValue::ConvertDecimal not implemented for type");
+	}
+	static VariantValue ConvertBlob(T val) {
+		throw InternalException("ConvertShreddedValue::ConvertBlob not implemented for type");
+	}
+};
+
+//! boolean
+template <>
+VariantValue ConvertShreddedValue<bool>::Convert(bool val) {
+	return VariantValue(Value::BOOLEAN(val));
+}
+//! int8
+template <>
+VariantValue ConvertShreddedValue<int8_t>::Convert(int8_t val) {
+	return VariantValue(Value::TINYINT(val));
+}
+//! int16
+template <>
+VariantValue ConvertShreddedValue<int16_t>::Convert(int16_t val) {
+	return VariantValue(Value::SMALLINT(val));
+}
+//! int32
+template <>
+VariantValue ConvertShreddedValue<int32_t>::Convert(int32_t val) {
+	return VariantValue(Value::INTEGER(val));
+}
+//! int64
+template <>
+VariantValue ConvertShreddedValue<int64_t>::Convert(int64_t val) {
+	return VariantValue(Value::BIGINT(val));
+}
+//! float
+template <>
+VariantValue ConvertShreddedValue<float>::Convert(float val) {
+	return VariantValue(Value::FLOAT(val));
+}
+//! double
+template <>
+VariantValue ConvertShreddedValue<double>::Convert(double val) {
+	return VariantValue(Value::DOUBLE(val));
+}
+//! decimal4/decimal8/decimal16
+template <>
+VariantValue ConvertShreddedValue<int32_t>::ConvertDecimal(int32_t val, uint8_t width, uint8_t scale) {
+	auto value_str = Decimal::ToString(val, width, scale);
+	return VariantValue(Value(value_str));
+}
+template <>
+VariantValue ConvertShreddedValue<int64_t>::ConvertDecimal(int64_t val, uint8_t width, uint8_t scale) {
+	auto value_str = Decimal::ToString(val, width, scale);
+	return VariantValue(Value(value_str));
+}
+template <>
+VariantValue ConvertShreddedValue<hugeint_t>::ConvertDecimal(hugeint_t val, uint8_t width, uint8_t scale) {
+	auto value_str = Decimal::ToString(val, width, scale);
+	return VariantValue(Value(value_str));
+}
+//! date
+template <>
+VariantValue ConvertShreddedValue<date_t>::Convert(date_t val) {
+	return VariantValue(Value::DATE(val));
+}
+//! time
+template <>
+VariantValue ConvertShreddedValue<dtime_t>::Convert(dtime_t val) {
+	return VariantValue(Value::TIME(val));
+}
+//! timestamptz(6)
+template <>
+VariantValue ConvertShreddedValue<timestamp_tz_t>::Convert(timestamp_tz_t val) {
+	return VariantValue(Value::TIMESTAMPTZ(val));
+}
+////! timestamptz(9)
+// template <>
+// VariantValue ConvertShreddedValue<timestamp_ns_tz_t>::Convert(timestamp_ns_tz_t val) {
+//	return VariantValue(Value::TIMESTAMPNS_TZ(val));
+//}
+//! timestampntz(6)
+template <>
+VariantValue ConvertShreddedValue<timestamp_t>::Convert(timestamp_t val) {
+	return VariantValue(Value::TIMESTAMP(val));
+}
+//! timestampntz(9)
+template <>
+VariantValue ConvertShreddedValue<timestamp_ns_t>::Convert(timestamp_ns_t val) {
+	return VariantValue(Value::TIMESTAMPNS(val));
+}
+//! binary
+template <>
+VariantValue ConvertShreddedValue<string_t>::ConvertBlob(string_t val) {
+	return VariantValue(Value(Blob::ToBase64(val)));
+}
+//! string
+template <>
+VariantValue ConvertShreddedValue<string_t>::Convert(string_t val) {
+	if (!Utf8Proc::IsValid(val.GetData(), val.GetSize())) {
+		throw InternalException("Can't decode Variant string, it isn't valid UTF8");
+	}
+	return VariantValue(Value(val.GetString()));
+}
+//! uuid
+template <>
+VariantValue ConvertShreddedValue<hugeint_t>::Convert(hugeint_t val) {
+	return VariantValue(Value(UUID::ToString(val)));
+}
+
+template <class T, class OP, LogicalTypeId TYPE_ID>
+vector<VariantValue> ConvertTypedValues(Vector &vec, Vector &metadata, Vector &blob, idx_t offset, idx_t length,
+                                        idx_t total_size, const bool is_field) {
+	UnifiedVectorFormat metadata_format;
+	metadata.ToUnifiedFormat(length, metadata_format);
+	auto metadata_data = metadata_format.GetData<string_t>(metadata_format);
+
+	UnifiedVectorFormat typed_format;
+	vec.ToUnifiedFormat(total_size, typed_format);
+	auto data = typed_format.GetData<T>(typed_format);
+
+	UnifiedVectorFormat value_format;
+	blob.ToUnifiedFormat(total_size, value_format);
+	auto value_data = value_format.GetData<string_t>(value_format);
+
+	auto &validity = typed_format.validity;
+	auto &value_validity = value_format.validity;
+	auto &type = vec.GetType();
+
+	//! Values only used for Decimal conversion
+	uint8_t width;
+	uint8_t scale;
+	if (TYPE_ID == LogicalTypeId::DECIMAL) {
+		type.GetDecimalProperties(width, scale);
+	}
+
+	vector<VariantValue> ret(length);
+	if (validity.AllValid()) {
+		for (idx_t i = 0; i < length; i++) {
+			auto index = typed_format.sel->get_index(i + offset);
+			if (TYPE_ID == LogicalTypeId::DECIMAL) {
+				ret[i] = OP::ConvertDecimal(data[index], width, scale);
+			} else if (TYPE_ID == LogicalTypeId::BLOB) {
+				ret[i] = OP::ConvertBlob(data[index]);
+			} else {
+				ret[i] = OP::Convert(data[index]);
+			}
+		}
+	} else {
+		for (idx_t i = 0; i < length; i++) {
+			auto typed_index = typed_format.sel->get_index(i + offset);
+			auto value_index = value_format.sel->get_index(i + offset);
+			if (validity.RowIsValid(typed_index)) {
+				//! This is a leaf, partially shredded values aren't possible here
+				D_ASSERT(!value_validity.RowIsValid(value_index));
+				if (TYPE_ID == LogicalTypeId::DECIMAL) {
+					ret[i] = OP::ConvertDecimal(data[typed_index], width, scale);
+				} else if (TYPE_ID == LogicalTypeId::BLOB) {
+					ret[i] = OP::ConvertBlob(data[typed_index]);
+				} else {
+					ret[i] = OP::Convert(data[typed_index]);
+				}
+			} else {
+				if (is_field && !value_validity.RowIsValid(value_index)) {
+					//! Value is missing for this field
+					continue;
+				}
+				D_ASSERT(value_validity.RowIsValid(value_index));
+				auto metadata_value = metadata_data[metadata_format.sel->get_index(i)];
+				VariantMetadata variant_metadata(metadata_value);
+				ret[i] = VariantBinaryDecoder::Decode(variant_metadata,
+				                                      const_data_ptr_cast(value_data[value_index].GetData()));
+			}
+		}
+	}
+	return ret;
+}
+
+vector<VariantValue> VariantShreddedConversion::ConvertShreddedLeaf(Vector &metadata, Vector &value,
+                                                                    Vector &typed_value, idx_t offset, idx_t length,
+                                                                    idx_t total_size, const bool is_field) {
+	D_ASSERT(!typed_value.GetType().IsNested());
+	vector<VariantValue> result;
+
+	auto &type = typed_value.GetType();
+	switch (type.id()) {
+	//! boolean
+	case LogicalTypeId::BOOLEAN: {
+		return ConvertTypedValues<bool, ConvertShreddedValue<bool>, LogicalTypeId::BOOLEAN>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! int8
+	case LogicalTypeId::TINYINT: {
+		return ConvertTypedValues<int8_t, ConvertShreddedValue<int8_t>, LogicalTypeId::TINYINT>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! int16
+	case LogicalTypeId::SMALLINT: {
+		return ConvertTypedValues<int16_t, ConvertShreddedValue<int16_t>, LogicalTypeId::SMALLINT>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! int32
+	case LogicalTypeId::INTEGER: {
+		return ConvertTypedValues<int32_t, ConvertShreddedValue<int32_t>, LogicalTypeId::INTEGER>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! int64
+	case LogicalTypeId::BIGINT: {
+		return ConvertTypedValues<int64_t, ConvertShreddedValue<int64_t>, LogicalTypeId::BIGINT>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! float
+	case LogicalTypeId::FLOAT: {
+		return ConvertTypedValues<float, ConvertShreddedValue<float>, LogicalTypeId::FLOAT>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! double
+	case LogicalTypeId::DOUBLE: {
+		return ConvertTypedValues<double, ConvertShreddedValue<double>, LogicalTypeId::DOUBLE>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! decimal4/decimal8/decimal16
+	case LogicalTypeId::DECIMAL: {
+		auto physical_type = type.InternalType();
+		switch (physical_type) {
+		case PhysicalType::INT32: {
+			return ConvertTypedValues<int32_t, ConvertShreddedValue<int32_t>, LogicalTypeId::DECIMAL>(
+			    typed_value, metadata, value, offset, length, total_size, is_field);
+		}
+		case PhysicalType::INT64: {
+			return ConvertTypedValues<int64_t, ConvertShreddedValue<int64_t>, LogicalTypeId::DECIMAL>(
+			    typed_value, metadata, value, offset, length, total_size, is_field);
+		}
+		case PhysicalType::INT128: {
+			return ConvertTypedValues<hugeint_t, ConvertShreddedValue<hugeint_t>, LogicalTypeId::DECIMAL>(
+			    typed_value, metadata, value, offset, length, total_size, is_field);
+		}
+		default:
+			throw NotImplementedException("Decimal with PhysicalType (%s) not implemented for shredded Variant",
+			                              EnumUtil::ToString(physical_type));
+		}
+	}
+	//! date
+	case LogicalTypeId::DATE: {
+		return ConvertTypedValues<date_t, ConvertShreddedValue<date_t>, LogicalTypeId::DATE>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! time
+	case LogicalTypeId::TIME: {
+		return ConvertTypedValues<dtime_t, ConvertShreddedValue<dtime_t>, LogicalTypeId::TIME>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! timestamptz(6) (timestamptz(9) not implemented in DuckDB)
+	case LogicalTypeId::TIMESTAMP_TZ: {
+		return ConvertTypedValues<timestamp_tz_t, ConvertShreddedValue<timestamp_tz_t>, LogicalTypeId::TIMESTAMP_TZ>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! timestampntz(6)
+	case LogicalTypeId::TIMESTAMP: {
+		return ConvertTypedValues<timestamp_t, ConvertShreddedValue<timestamp_t>, LogicalTypeId::TIMESTAMP>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! timestampntz(9)
+	case LogicalTypeId::TIMESTAMP_NS: {
+		return ConvertTypedValues<timestamp_ns_t, ConvertShreddedValue<timestamp_ns_t>, LogicalTypeId::TIMESTAMP_NS>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! binary
+	case LogicalTypeId::BLOB: {
+		return ConvertTypedValues<string_t, ConvertShreddedValue<string_t>, LogicalTypeId::BLOB>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! string
+	case LogicalTypeId::VARCHAR: {
+		return ConvertTypedValues<string_t, ConvertShreddedValue<string_t>, LogicalTypeId::VARCHAR>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	//! uuid
+	case LogicalTypeId::UUID: {
+		return ConvertTypedValues<hugeint_t, ConvertShreddedValue<hugeint_t>, LogicalTypeId::UUID>(
+		    typed_value, metadata, value, offset, length, total_size, is_field);
+	}
+	default:
+		throw NotImplementedException("Variant shredding on type: '%s' is not implemented", type.ToString());
+	}
+}
+
+namespace {
+
+struct ShreddedVariantField {
+public:
+	explicit ShreddedVariantField(const string &field_name) : field_name(field_name) {
+	}
+
+public:
+	string field_name;
+	//! Values for the field, for all rows
+	vector<VariantValue> values;
+};
+
+} // namespace
+
+template <bool IS_REQUIRED>
+static vector<VariantValue> ConvertBinaryEncoding(Vector &metadata, Vector &value, idx_t offset, idx_t length,
+                                                  idx_t total_size) {
+	UnifiedVectorFormat value_format;
+	value.ToUnifiedFormat(total_size, value_format);
+	auto value_data = value_format.GetData<string_t>(value_format);
+	auto &validity = value_format.validity;
+
+	UnifiedVectorFormat metadata_format;
+	metadata.ToUnifiedFormat(length, metadata_format);
+	auto metadata_data = metadata_format.GetData<string_t>(metadata_format);
+	auto metadata_validity = metadata_format.validity;
+
+	vector<VariantValue> ret(length);
+	if (IS_REQUIRED) {
+		for (idx_t i = 0; i < length; i++) {
+			auto index = value_format.sel->get_index(i + offset);
+
+			// Variant itself is NULL
+			if (!validity.RowIsValid(index) && !metadata_validity.RowIsValid(metadata_format.sel->get_index(i))) {
+				ret[i] = VariantValue(Value());
+				continue;
+			}
+
+			D_ASSERT(validity.RowIsValid(index));
+			auto &metadata_value = metadata_data[metadata_format.sel->get_index(i)];
+			VariantMetadata variant_metadata(metadata_value);
+			auto binary_value = value_data[index].GetData();
+			ret[i] = VariantBinaryDecoder::Decode(variant_metadata, const_data_ptr_cast(binary_value));
+		}
+	} else {
+		//! Even though 'typed_value' is not present, 'value' is allowed to contain NULLs because we're scanning an
+		//! Object's shredded field.
+		//! When 'value' is null for a row, that means the Object does not contain this field
+		//! for that row.
+		for (idx_t i = 0; i < length; i++) {
+			auto index = value_format.sel->get_index(i + offset);
+			if (validity.RowIsValid(index)) {
+				auto &metadata_value = metadata_data[metadata_format.sel->get_index(i)];
+				VariantMetadata variant_metadata(metadata_value);
+				auto binary_value = value_data[index].GetData();
+				ret[i] = VariantBinaryDecoder::Decode(variant_metadata, const_data_ptr_cast(binary_value));
+			}
+		}
+	}
+	return ret;
+}
+
+static VariantValue ConvertPartiallyShreddedObject(vector<ShreddedVariantField> &shredded_fields,
+                                                   const UnifiedVectorFormat &metadata_format,
+                                                   const UnifiedVectorFormat &value_format, idx_t i, idx_t offset) {
+	auto ret = VariantValue(VariantValueType::OBJECT);
+	auto index = value_format.sel->get_index(i + offset);
+	auto value_data = value_format.GetData<string_t>(value_format);
+	auto metadata_data = metadata_format.GetData<string_t>(metadata_format);
+	auto &value_validity = value_format.validity;
+
+	for (idx_t field_index = 0; field_index < shredded_fields.size(); field_index++) {
+		auto &shredded_field = shredded_fields[field_index];
+		auto &field_value = shredded_field.values[i];
+
+		if (field_value.IsMissing()) {
+			//! This field is missing from the value, skip it
+			continue;
+		}
+		ret.AddChild(shredded_field.field_name, std::move(field_value));
+	}
+
+	if (value_validity.RowIsValid(index)) {
+		//! Object is partially shredded, decode the object and merge the values
+		auto &metadata_value = metadata_data[metadata_format.sel->get_index(i)];
+		VariantMetadata variant_metadata(metadata_value);
+		auto binary_value = value_data[index].GetData();
+		auto unshredded = VariantBinaryDecoder::Decode(variant_metadata, const_data_ptr_cast(binary_value));
+		if (unshredded.value_type != VariantValueType::OBJECT) {
+			throw InvalidInputException("Partially shredded objects have to encode Object Variants in the 'value'");
+		}
+		for (auto &item : unshredded.object_children) {
+			ret.AddChild(item.first, std::move(item.second));
+		}
+	}
+	return ret;
+}
+
+vector<VariantValue> VariantShreddedConversion::ConvertShreddedObject(Vector &metadata, Vector &value,
+                                                                      Vector &typed_value, idx_t offset, idx_t length,
+                                                                      idx_t total_size, const bool is_field) {
+	auto &type = typed_value.GetType();
+	D_ASSERT(type.id() == LogicalTypeId::STRUCT);
+	auto &fields = StructType::GetChildTypes(type);
+	auto &entries = StructVector::GetEntries(typed_value);
+	D_ASSERT(entries.size() == fields.size());
+
+	//! 'value'
+	UnifiedVectorFormat value_format;
+	value.ToUnifiedFormat(total_size, value_format);
+	auto value_data = value_format.GetData<string_t>(value_format);
+	auto &validity = value_format.validity;
+	(void)validity;
+
+	//! 'metadata'
+	UnifiedVectorFormat metadata_format;
+	metadata.ToUnifiedFormat(length, metadata_format);
+	auto metadata_data = metadata_format.GetData<string_t>(metadata_format);
+
+	//! 'typed_value'
+	UnifiedVectorFormat typed_format;
+	typed_value.ToUnifiedFormat(total_size, typed_format);
+	auto &typed_validity = typed_format.validity;
+
+	//! Process all fields to get the shredded field values
+	vector<ShreddedVariantField> shredded_fields;
+	shredded_fields.reserve(fields.size());
+	for (idx_t i = 0; i < fields.size(); i++) {
+		auto &field = fields[i];
+		auto &field_name = field.first;
+		auto &field_vec = *entries[i];
+
+		shredded_fields.emplace_back(field_name);
+		auto &shredded_field = shredded_fields.back();
+		shredded_field.values = Convert(metadata, field_vec, offset, length, total_size, true);
+	}
+
+	vector<VariantValue> ret(length);
+	if (typed_validity.AllValid()) {
+		for (idx_t i = 0; i < length; i++) {
+			ret[i] = ConvertPartiallyShreddedObject(shredded_fields, metadata_format, value_format, i, offset);
+		}
+	} else {
+		//! For some of the rows, the value is not an object
+		for (idx_t i = 0; i < length; i++) {
+			auto typed_index = typed_format.sel->get_index(i + offset);
+			auto value_index = value_format.sel->get_index(i + offset);
+			if (typed_validity.RowIsValid(typed_index)) {
+				ret[i] = ConvertPartiallyShreddedObject(shredded_fields, metadata_format, value_format, i, offset);
+			} else {
+				if (is_field && !validity.RowIsValid(value_index)) {
+					//! This object is a field in the parent object, the value is missing, skip it
+					continue;
+				}
+				D_ASSERT(validity.RowIsValid(value_index));
+				auto &metadata_value = metadata_data[metadata_format.sel->get_index(i)];
+				VariantMetadata variant_metadata(metadata_value);
+				auto binary_value = value_data[value_index].GetData();
+				ret[i] = VariantBinaryDecoder::Decode(variant_metadata, const_data_ptr_cast(binary_value));
+				if (ret[i].value_type == VariantValueType::OBJECT) {
+					throw InvalidInputException(
+					    "When 'typed_value' for a shredded Object is NULL, 'value' can not contain an Object value");
+				}
+			}
+		}
+	}
+	return ret;
+}
+
+vector<VariantValue> VariantShreddedConversion::ConvertShreddedArray(Vector &metadata, Vector &value,
+                                                                     Vector &typed_value, idx_t offset, idx_t length,
+                                                                     idx_t total_size, const bool is_field) {
+	auto &child = ListVector::GetEntry(typed_value);
+	auto list_size = ListVector::GetListSize(typed_value);
+
+	//! 'value'
+	UnifiedVectorFormat value_format;
+	value.ToUnifiedFormat(total_size, value_format);
+	auto value_data = value_format.GetData<string_t>(value_format);
+
+	//! 'metadata'
+	UnifiedVectorFormat metadata_format;
+	metadata.ToUnifiedFormat(length, metadata_format);
+	auto metadata_data = metadata_format.GetData<string_t>(metadata_format);
+
+	//! 'typed_value'
+	UnifiedVectorFormat list_format;
+	typed_value.ToUnifiedFormat(total_size, list_format);
+	auto list_data = list_format.GetData<list_entry_t>(list_format);
+	auto &validity = list_format.validity;
+	auto &value_validity = value_format.validity;
+
+	vector<VariantValue> ret(length);
+	if (validity.AllValid()) {
+		//! We can be sure that none of the values are binary encoded
+		for (idx_t i = 0; i < length; i++) {
+			auto typed_index = list_format.sel->get_index(i + offset);
+			auto entry = list_data[typed_index];
+			Vector child_metadata(metadata.GetValue(i));
+			ret[i] = VariantValue(VariantValueType::ARRAY);
+			ret[i].array_items = Convert(child_metadata, child, entry.offset, entry.length, list_size, false);
+		}
+	} else {
+		for (idx_t i = 0; i < length; i++) {
+			auto typed_index = list_format.sel->get_index(i + offset);
+			auto value_index = value_format.sel->get_index(i + offset);
+			if (validity.RowIsValid(typed_index)) {
+				auto entry = list_data[typed_index];
+				Vector child_metadata(metadata.GetValue(i));
+				ret[i] = VariantValue(VariantValueType::ARRAY);
+				ret[i].array_items = Convert(child_metadata, child, entry.offset, entry.length, list_size, false);
+			} else {
+				if (is_field && !value_validity.RowIsValid(value_index)) {
+					//! Value is missing for this field
+					continue;
+				}
+				D_ASSERT(value_validity.RowIsValid(value_index));
+				auto metadata_value = metadata_data[metadata_format.sel->get_index(i)];
+				VariantMetadata variant_metadata(metadata_value);
+				ret[i] = VariantBinaryDecoder::Decode(variant_metadata,
+				                                      const_data_ptr_cast(value_data[value_index].GetData()));
+			}
+		}
+	}
+	return ret;
+}
+
+vector<VariantValue> VariantShreddedConversion::Convert(Vector &metadata, Vector &group, idx_t offset, idx_t length,
+                                                        idx_t total_size, bool is_field) {
+	D_ASSERT(group.GetType().id() == LogicalTypeId::STRUCT);
+
+	auto &group_entries = StructVector::GetEntries(group);
+	auto &group_type_children = StructType::GetChildTypes(group.GetType());
+	D_ASSERT(group_type_children.size() == group_entries.size());
+
+	//! From the spec:
+	//! The Parquet columns used to store variant metadata and values must be accessed by name, not by position.
+	optional_ptr<Vector> value;
+	optional_ptr<Vector> typed_value;
+	for (idx_t i = 0; i < group_entries.size(); i++) {
+		auto &name = group_type_children[i].first;
+		auto &vec = group_entries[i];
+		if (name == "value") {
+			value = vec.get();
+		} else if (name == "typed_value") {
+			typed_value = vec.get();
+		} else {
+			throw InvalidInputException("Variant group can only contain 'value'/'typed_value', not: %s", name);
+		}
+	}
+	if (!value) {
+		throw InvalidInputException("Required column 'value' not found in Variant group");
+	}
+
+	if (typed_value) {
+		auto &type = typed_value->GetType();
+		vector<VariantValue> ret;
+		if (type.id() == LogicalTypeId::STRUCT) {
+			return ConvertShreddedObject(metadata, *value, *typed_value, offset, length, total_size, is_field);
+		} else if (type.id() == LogicalTypeId::LIST) {
+			return ConvertShreddedArray(metadata, *value, *typed_value, offset, length, total_size, is_field);
+		} else {
+			return ConvertShreddedLeaf(metadata, *value, *typed_value, offset, length, total_size, is_field);
+		}
+	} else {
+		if (is_field) {
+			return ConvertBinaryEncoding<false>(metadata, *value, offset, length, total_size);
+		} else {
+			//! Only 'value' is present, we can assume this to be 'required', so it can't contain NULLs
+			return ConvertBinaryEncoding<true>(metadata, *value, offset, length, total_size);
+		}
+	}
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/reader/variant/variant_value.cpp
+++ b/external/duckdb/extension/parquet/reader/variant/variant_value.cpp
@@ -0,0 +1,85 @@
+#include "reader/variant/variant_value.hpp"
+
+namespace duckdb {
+
+void VariantValue::AddChild(const string &key, VariantValue &&val) {
+	D_ASSERT(value_type == VariantValueType::OBJECT);
+	object_children.emplace(key, std::move(val));
+}
+
+void VariantValue::AddItem(VariantValue &&val) {
+	D_ASSERT(value_type == VariantValueType::ARRAY);
+	array_items.push_back(std::move(val));
+}
+
+yyjson_mut_val *VariantValue::ToJSON(ClientContext &context, yyjson_mut_doc *doc) const {
+	switch (value_type) {
+	case VariantValueType::PRIMITIVE: {
+		if (primitive_value.IsNull()) {
+			return yyjson_mut_null(doc);
+		}
+		switch (primitive_value.type().id()) {
+		case LogicalTypeId::BOOLEAN: {
+			if (primitive_value.GetValue<bool>()) {
+				return yyjson_mut_true(doc);
+			} else {
+				return yyjson_mut_false(doc);
+			}
+		}
+		case LogicalTypeId::TINYINT:
+			return yyjson_mut_int(doc, primitive_value.GetValue<int8_t>());
+		case LogicalTypeId::SMALLINT:
+			return yyjson_mut_int(doc, primitive_value.GetValue<int16_t>());
+		case LogicalTypeId::INTEGER:
+			return yyjson_mut_int(doc, primitive_value.GetValue<int32_t>());
+		case LogicalTypeId::BIGINT:
+			return yyjson_mut_int(doc, primitive_value.GetValue<int64_t>());
+		case LogicalTypeId::FLOAT:
+			return yyjson_mut_real(doc, primitive_value.GetValue<float>());
+		case LogicalTypeId::DOUBLE:
+			return yyjson_mut_real(doc, primitive_value.GetValue<double>());
+		case LogicalTypeId::DATE:
+		case LogicalTypeId::TIME:
+		case LogicalTypeId::VARCHAR: {
+			auto value_str = primitive_value.ToString();
+			return yyjson_mut_strncpy(doc, value_str.c_str(), value_str.size());
+		}
+		case LogicalTypeId::TIMESTAMP: {
+			auto value_str = primitive_value.ToString();
+			return yyjson_mut_strncpy(doc, value_str.c_str(), value_str.size());
+		}
+		case LogicalTypeId::TIMESTAMP_TZ: {
+			auto value_str = primitive_value.CastAs(context, LogicalType::VARCHAR).GetValue<string>();
+			return yyjson_mut_strncpy(doc, value_str.c_str(), value_str.size());
+		}
+		case LogicalTypeId::TIMESTAMP_NS: {
+			auto value_str = primitive_value.CastAs(context, LogicalType::VARCHAR).GetValue<string>();
+			return yyjson_mut_strncpy(doc, value_str.c_str(), value_str.size());
+		}
+		default:
+			throw InternalException("Unexpected primitive type: %s", primitive_value.type().ToString());
+		}
+	}
+	case VariantValueType::OBJECT: {
+		auto obj = yyjson_mut_obj(doc);
+		for (const auto &it : object_children) {
+			auto &key = it.first;
+			auto value = it.second.ToJSON(context, doc);
+			yyjson_mut_obj_add_val(doc, obj, key.c_str(), value);
+		}
+		return obj;
+	}
+	case VariantValueType::ARRAY: {
+		auto arr = yyjson_mut_arr(doc);
+		for (auto &item : array_items) {
+			auto value = item.ToJSON(context, doc);
+			yyjson_mut_arr_add_val(arr, value);
+		}
+		return arr;
+	}
+	default:
+		throw InternalException("Can't serialize this VariantValue type to JSON");
+	}
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/reader/variant_column_reader.cpp
+++ b/external/duckdb/extension/parquet/reader/variant_column_reader.cpp
@@ -0,0 +1,161 @@
+#include "reader/variant_column_reader.hpp"
+#include "reader/variant/variant_binary_decoder.hpp"
+#include "reader/variant/variant_shredded_conversion.hpp"
+
+namespace duckdb {
+
+//===--------------------------------------------------------------------===//
+// Variant Column Reader
+//===--------------------------------------------------------------------===//
+VariantColumnReader::VariantColumnReader(ClientContext &context, ParquetReader &reader,
+                                         const ParquetColumnSchema &schema,
+                                         vector<unique_ptr<ColumnReader>> child_readers_p)
+    : ColumnReader(reader, schema), context(context), child_readers(std::move(child_readers_p)) {
+	D_ASSERT(Type().InternalType() == PhysicalType::VARCHAR);
+
+	if (child_readers[0]->Schema().name == "metadata" && child_readers[1]->Schema().name == "value") {
+		metadata_reader_idx = 0;
+		value_reader_idx = 1;
+	} else if (child_readers[1]->Schema().name == "metadata" && child_readers[0]->Schema().name == "value") {
+		metadata_reader_idx = 1;
+		value_reader_idx = 0;
+	} else {
+		throw InternalException("The Variant column must have 'metadata' and 'value' as the first two columns");
+	}
+}
+
+ColumnReader &VariantColumnReader::GetChildReader(idx_t child_idx) {
+	if (!child_readers[child_idx]) {
+		throw InternalException("VariantColumnReader::GetChildReader(%d) - but this child reader is not set",
+		                        child_idx);
+	}
+	return *child_readers[child_idx].get();
+}
+
+void VariantColumnReader::InitializeRead(idx_t row_group_idx_p, const vector<ColumnChunk> &columns,
+                                         TProtocol &protocol_p) {
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		child->InitializeRead(row_group_idx_p, columns, protocol_p);
+	}
+}
+
+static LogicalType GetIntermediateGroupType(optional_ptr<ColumnReader> typed_value) {
+	child_list_t<LogicalType> children;
+	children.emplace_back("value", LogicalType::BLOB);
+	if (typed_value) {
+		children.emplace_back("typed_value", typed_value->Type());
+	}
+	return LogicalType::STRUCT(std::move(children));
+}
+
+idx_t VariantColumnReader::Read(uint64_t num_values, data_ptr_t define_out, data_ptr_t repeat_out, Vector &result) {
+	if (pending_skips > 0) {
+		throw InternalException("VariantColumnReader cannot have pending skips");
+	}
+	optional_ptr<ColumnReader> typed_value_reader = child_readers.size() == 3 ? child_readers[2].get() : nullptr;
+
+	// If the child reader values are all valid, "define_out" may not be initialized at all
+	// So, we just initialize them to all be valid beforehand
+	std::fill_n(define_out, num_values, MaxDefine());
+
+	optional_idx read_count;
+
+	Vector metadata_intermediate(LogicalType::BLOB, num_values);
+	Vector intermediate_group(GetIntermediateGroupType(typed_value_reader), num_values);
+	auto &group_entries = StructVector::GetEntries(intermediate_group);
+	auto &value_intermediate = *group_entries[0];
+
+	auto metadata_values =
+	    child_readers[metadata_reader_idx]->Read(num_values, define_out, repeat_out, metadata_intermediate);
+	auto value_values = child_readers[value_reader_idx]->Read(num_values, define_out, repeat_out, value_intermediate);
+
+	D_ASSERT(child_readers[metadata_reader_idx]->Schema().name == "metadata");
+	D_ASSERT(child_readers[value_reader_idx]->Schema().name == "value");
+
+	if (metadata_values != value_values) {
+		throw InvalidInputException(
+		    "The Variant column did not contain the same amount of values for 'metadata' and 'value'");
+	}
+
+	auto result_data = FlatVector::GetData<string_t>(result);
+	auto &result_validity = FlatVector::Validity(result);
+
+	vector<VariantValue> conversion_result;
+	if (typed_value_reader) {
+		auto typed_values = typed_value_reader->Read(num_values, define_out, repeat_out, *group_entries[1]);
+		if (typed_values != value_values) {
+			throw InvalidInputException(
+			    "The shredded Variant column did not contain the same amount of values for 'typed_value' and 'value'");
+		}
+	}
+	conversion_result =
+	    VariantShreddedConversion::Convert(metadata_intermediate, intermediate_group, 0, num_values, num_values, false);
+
+	for (idx_t i = 0; i < conversion_result.size(); i++) {
+		auto &variant = conversion_result[i];
+		if (variant.IsNull()) {
+			result_validity.SetInvalid(i);
+			continue;
+		}
+
+		//! Write the result to a string
+		VariantDecodeResult decode_result;
+		decode_result.doc = yyjson_mut_doc_new(nullptr);
+		auto json_val = variant.ToJSON(context, decode_result.doc);
+
+		size_t len;
+		decode_result.data =
+		    yyjson_mut_val_write_opts(json_val, YYJSON_WRITE_ALLOW_INF_AND_NAN, nullptr, &len, nullptr);
+		if (!decode_result.data) {
+			throw InvalidInputException("Could not serialize the JSON to string, yyjson failed");
+		}
+		result_data[i] = StringVector::AddString(result, decode_result.data, static_cast<idx_t>(len));
+	}
+
+	read_count = value_values;
+	return read_count.GetIndex();
+}
+
+void VariantColumnReader::Skip(idx_t num_values) {
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		child->Skip(num_values);
+	}
+}
+
+void VariantColumnReader::RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) {
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		child->RegisterPrefetch(transport, allow_merge);
+	}
+}
+
+uint64_t VariantColumnReader::TotalCompressedSize() {
+	uint64_t size = 0;
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		size += child->TotalCompressedSize();
+	}
+	return size;
+}
+
+idx_t VariantColumnReader::GroupRowsAvailable() {
+	for (auto &child : child_readers) {
+		if (!child) {
+			continue;
+		}
+		return child->GroupRowsAvailable();
+	}
+	throw InternalException("No projected columns in struct?");
+}
+
+} // namespace duckdb
--- a/external/duckdb/extension/parquet/serialize_parquet.cpp
+++ b/external/duckdb/extension/parquet/serialize_parquet.cpp
@@ -0,0 +1,117 @@
+//===----------------------------------------------------------------------===//
+// This file is automatically generated by scripts/generate_serialization.py
+// Do not edit this file manually, your changes will be overwritten
+//===----------------------------------------------------------------------===//
+
+#include "duckdb/common/serializer/serializer.hpp"
+#include "duckdb/common/serializer/deserializer.hpp"
+#include "parquet_reader.hpp"
+#include "parquet_crypto.hpp"
+#include "parquet_field_id.hpp"
+#include "parquet_shredding.hpp"
+
+namespace duckdb {
+
+void ChildFieldIDs::Serialize(Serializer &serializer) const {
+	serializer.WritePropertyWithDefault<case_insensitive_map_t<FieldID>>(100, "ids", ids.operator*());
+}
+
+ChildFieldIDs ChildFieldIDs::Deserialize(Deserializer &deserializer) {
+	ChildFieldIDs result;
+	deserializer.ReadPropertyWithDefault<case_insensitive_map_t<FieldID>>(100, "ids", result.ids.operator*());
+	return result;
+}
+
+void ChildShreddingTypes::Serialize(Serializer &serializer) const {
+	serializer.WritePropertyWithDefault<case_insensitive_map_t<ShreddingType>>(100, "types", types.operator*());
+}
+
+ChildShreddingTypes ChildShreddingTypes::Deserialize(Deserializer &deserializer) {
+	ChildShreddingTypes result;
+	deserializer.ReadPropertyWithDefault<case_insensitive_map_t<ShreddingType>>(100, "types", result.types.operator*());
+	return result;
+}
+
+void FieldID::Serialize(Serializer &serializer) const {
+	serializer.WritePropertyWithDefault<bool>(100, "set", set);
+	serializer.WritePropertyWithDefault<int32_t>(101, "field_id", field_id);
+	serializer.WriteProperty<ChildFieldIDs>(102, "child_field_ids", child_field_ids);
+}
+
+FieldID FieldID::Deserialize(Deserializer &deserializer) {
+	FieldID result;
+	deserializer.ReadPropertyWithDefault<bool>(100, "set", result.set);
+	deserializer.ReadPropertyWithDefault<int32_t>(101, "field_id", result.field_id);
+	deserializer.ReadProperty<ChildFieldIDs>(102, "child_field_ids", result.child_field_ids);
+	return result;
+}
+
+void ParquetColumnDefinition::Serialize(Serializer &serializer) const {
+	serializer.WritePropertyWithDefault<int32_t>(100, "field_id", field_id);
+	serializer.WritePropertyWithDefault<string>(101, "name", name);
+	serializer.WriteProperty<LogicalType>(103, "type", type);
+	serializer.WriteProperty<Value>(104, "default_value", default_value);
+	serializer.WritePropertyWithDefault<Value>(105, "identifier", identifier, Value());
+}
+
+ParquetColumnDefinition ParquetColumnDefinition::Deserialize(Deserializer &deserializer) {
+	ParquetColumnDefinition result;
+	deserializer.ReadPropertyWithDefault<int32_t>(100, "field_id", result.field_id);
+	deserializer.ReadPropertyWithDefault<string>(101, "name", result.name);
+	deserializer.ReadProperty<LogicalType>(103, "type", result.type);
+	deserializer.ReadProperty<Value>(104, "default_value", result.default_value);
+	deserializer.ReadPropertyWithExplicitDefault<Value>(105, "identifier", result.identifier, Value());
+	return result;
+}
+
+void ParquetEncryptionConfig::Serialize(Serializer &serializer) const {
+	serializer.WritePropertyWithDefault<string>(100, "footer_key", footer_key);
+	serializer.WritePropertyWithDefault<unordered_map<string, string>>(101, "column_keys", column_keys);
+}
+
+shared_ptr<ParquetEncryptionConfig> ParquetEncryptionConfig::Deserialize(Deserializer &deserializer) {
+	auto result = duckdb::shared_ptr<ParquetEncryptionConfig>(new ParquetEncryptionConfig());
+	deserializer.ReadPropertyWithDefault<string>(100, "footer_key", result->footer_key);
+	deserializer.ReadPropertyWithDefault<unordered_map<string, string>>(101, "column_keys", result->column_keys);
+	return result;
+}
+
+void ParquetOptionsSerialization::Serialize(Serializer &serializer) const {
+	serializer.WritePropertyWithDefault<bool>(100, "binary_as_string", parquet_options.binary_as_string);
+	serializer.WritePropertyWithDefault<bool>(101, "file_row_number", parquet_options.file_row_number);
+	serializer.WriteProperty<MultiFileOptions>(102, "file_options", file_options);
+	serializer.WritePropertyWithDefault<vector<ParquetColumnDefinition>>(103, "schema", parquet_options.schema);
+	serializer.WritePropertyWithDefault<shared_ptr<ParquetEncryptionConfig>>(104, "encryption_config", parquet_options.encryption_config, nullptr);
+	serializer.WritePropertyWithDefault<bool>(105, "debug_use_openssl", parquet_options.debug_use_openssl, true);
+	serializer.WritePropertyWithDefault<idx_t>(106, "explicit_cardinality", parquet_options.explicit_cardinality, 0);
+	serializer.WritePropertyWithDefault<bool>(107, "can_have_nan", parquet_options.can_have_nan, false);
+}
+
+ParquetOptionsSerialization ParquetOptionsSerialization::Deserialize(Deserializer &deserializer) {
+	ParquetOptionsSerialization result;
+	deserializer.ReadPropertyWithDefault<bool>(100, "binary_as_string", result.parquet_options.binary_as_string);
+	deserializer.ReadPropertyWithDefault<bool>(101, "file_row_number", result.parquet_options.file_row_number);
+	deserializer.ReadProperty<MultiFileOptions>(102, "file_options", result.file_options);
+	deserializer.ReadPropertyWithDefault<vector<ParquetColumnDefinition>>(103, "schema", result.parquet_options.schema);
+	deserializer.ReadPropertyWithExplicitDefault<shared_ptr<ParquetEncryptionConfig>>(104, "encryption_config", result.parquet_options.encryption_config, nullptr);
+	deserializer.ReadPropertyWithExplicitDefault<bool>(105, "debug_use_openssl", result.parquet_options.debug_use_openssl, true);
+	deserializer.ReadPropertyWithExplicitDefault<idx_t>(106, "explicit_cardinality", result.parquet_options.explicit_cardinality, 0);
+	deserializer.ReadPropertyWithExplicitDefault<bool>(107, "can_have_nan", result.parquet_options.can_have_nan, false);
+	return result;
+}
+
+void ShreddingType::Serialize(Serializer &serializer) const {
+	serializer.WritePropertyWithDefault<bool>(100, "set", set);
+	serializer.WriteProperty<LogicalType>(101, "type", type);
+	serializer.WriteProperty<ChildShreddingTypes>(102, "children", children);
+}
+
+ShreddingType ShreddingType::Deserialize(Deserializer &deserializer) {
+	ShreddingType result;
+	deserializer.ReadPropertyWithDefault<bool>(100, "set", result.set);
+	deserializer.ReadProperty<LogicalType>(101, "type", result.type);
+	deserializer.ReadProperty<ChildShreddingTypes>(102, "children", result.children);
+	return result;
+}
+
+} // namespace duckdb
--- a/Show More
+++ b/Show More