Files
email-tracker/external/duckdb/extension/parquet/parquet_extension.cpp
2025-10-24 19:21:19 -05:00

1032 lines
45 KiB
C++

#include "parquet_extension.hpp"
#include "duckdb.hpp"
#include "duckdb/parser/expression/positional_reference_expression.hpp"
#include "duckdb/parser/expression/constant_expression.hpp"
#include "duckdb/parser/query_node/select_node.hpp"
#include "duckdb/parser/tableref/subqueryref.hpp"
#include "duckdb/planner/operator/logical_projection.hpp"
#include "duckdb/planner/query_node/bound_select_node.hpp"
#include "geo_parquet.hpp"
#include "parquet_crypto.hpp"
#include "parquet_metadata.hpp"
#include "parquet_reader.hpp"
#include "parquet_writer.hpp"
#include "parquet_shredding.hpp"
#include "reader/struct_column_reader.hpp"
#include "zstd_file_system.hpp"
#include "writer/primitive_column_writer.hpp"
#include "writer/variant_column_writer.hpp"
#include <fstream>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#include "duckdb/catalog/catalog.hpp"
#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
#include "duckdb/common/constants.hpp"
#include "duckdb/common/enums/file_compression_type.hpp"
#include "duckdb/common/file_system.hpp"
#include "duckdb/common/helper.hpp"
#include "duckdb/common/multi_file/multi_file_reader.hpp"
#include "duckdb/common/serializer/deserializer.hpp"
#include "duckdb/common/serializer/serializer.hpp"
#include "duckdb/common/type_visitor.hpp"
#include "duckdb/function/copy_function.hpp"
#include "duckdb/function/pragma_function.hpp"
#include "duckdb/function/table_function.hpp"
#include "duckdb/main/client_context.hpp"
#include "duckdb/main/config.hpp"
#include "duckdb/main/extension/extension_loader.hpp"
#include "duckdb/parser/expression/constant_expression.hpp"
#include "duckdb/parser/expression/function_expression.hpp"
#include "duckdb/parser/parsed_data/create_copy_function_info.hpp"
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
#include "duckdb/parser/tableref/table_function_ref.hpp"
#include "duckdb/planner/expression/bound_cast_expression.hpp"
#include "duckdb/planner/expression/bound_function_expression.hpp"
#include "duckdb/planner/expression/bound_reference_expression.hpp"
#include "duckdb/planner/expression/bound_constant_expression.hpp"
#include "duckdb/planner/operator/logical_get.hpp"
#include "duckdb/storage/statistics/base_statistics.hpp"
#include "duckdb/storage/table/row_group.hpp"
#include "duckdb/common/multi_file/multi_file_function.hpp"
#include "duckdb/common/primitive_dictionary.hpp"
#include "duckdb/logging/log_manager.hpp"
#include "duckdb/main/settings.hpp"
#include "parquet_multi_file_info.hpp"
namespace duckdb {
struct ParquetWriteBindData : public TableFunctionData {
vector<LogicalType> sql_types;
vector<string> column_names;
duckdb_parquet::CompressionCodec::type codec = duckdb_parquet::CompressionCodec::SNAPPY;
vector<pair<string, string>> kv_metadata;
idx_t row_group_size = DEFAULT_ROW_GROUP_SIZE;
idx_t row_group_size_bytes = NumericLimits<idx_t>::Maximum();
//! How/Whether to encrypt the data
shared_ptr<ParquetEncryptionConfig> encryption_config;
bool debug_use_openssl = true;
//! After how many distinct values should we abandon dictionary compression and bloom filters?
//! Defaults to 1/5th of the row group size if unset (in templated_column_writer.hpp)
//! This needs to be set dynamically because row groups can be much smaller than "row_group_size" set here,
//! e.g., due to less data or row_group_size_bytes
optional_idx dictionary_size_limit;
//! This is huge but we grow it starting from 1 MB
idx_t string_dictionary_page_size_limit = PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE;
bool enable_bloom_filters = true;
//! What false positive rate are we willing to accept for bloom filters
double bloom_filter_false_positive_ratio = 0.01;
//! After how many row groups to rotate to a new file
optional_idx row_groups_per_file;
ChildFieldIDs field_ids;
ShreddingType shredding_types;
//! The compression level, higher value is more
int64_t compression_level = ZStdFileSystem::DefaultCompressionLevel();
//! Which encodings to include when writing
ParquetVersion parquet_version = ParquetVersion::V1;
//! Which geo-parquet version to use when writing
GeoParquetVersion geoparquet_version = GeoParquetVersion::V1;
};
struct ParquetWriteGlobalState : public GlobalFunctionData {
unique_ptr<ParquetWriter> writer;
optional_ptr<const PhysicalOperator> op;
void LogFlushingRowGroup(const ColumnDataCollection &buffer, const string &reason) {
if (!op) {
return;
}
DUCKDB_LOG(writer->GetContext(), PhysicalOperatorLogType, *op, "ParquetWriter", "FlushRowGroup",
{{"file", writer->GetFileName()},
{"rows", to_string(buffer.Count())},
{"size", to_string(buffer.SizeInBytes())},
{"reason", reason}});
}
mutex lock;
unique_ptr<ColumnDataCollection> combine_buffer;
};
struct ParquetWriteLocalState : public LocalFunctionData {
explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types) : buffer(context, types) {
buffer.SetPartitionIndex(0); // Makes the buffer manager less likely to spill this data
buffer.InitializeAppend(append_state);
}
ColumnDataCollection buffer;
ColumnDataAppendState append_state;
};
static void ParquetListCopyOptions(ClientContext &context, CopyOptionsInput &input) {
auto &copy_options = input.options;
copy_options["row_group_size"] = CopyOption(LogicalType::UBIGINT, CopyOptionMode::READ_WRITE);
copy_options["chunk_size"] = CopyOption(LogicalType::UBIGINT, CopyOptionMode::WRITE_ONLY);
copy_options["row_group_size_bytes"] = CopyOption(LogicalType::ANY, CopyOptionMode::WRITE_ONLY);
copy_options["row_groups_per_file"] = CopyOption(LogicalType::UBIGINT, CopyOptionMode::WRITE_ONLY);
copy_options["compression"] = CopyOption(LogicalType::VARCHAR, CopyOptionMode::READ_WRITE);
copy_options["codec"] = CopyOption(LogicalType::VARCHAR, CopyOptionMode::READ_WRITE);
copy_options["field_ids"] = CopyOption(LogicalType::ANY, CopyOptionMode::WRITE_ONLY);
copy_options["kv_metadata"] = CopyOption(LogicalType::ANY, CopyOptionMode::WRITE_ONLY);
copy_options["encryption_config"] = CopyOption(LogicalType::ANY, CopyOptionMode::READ_WRITE);
copy_options["dictionary_compression_ratio_threshold"] = CopyOption(LogicalType::ANY, CopyOptionMode::WRITE_ONLY);
copy_options["dictionary_size_limit"] = CopyOption(LogicalType::BIGINT, CopyOptionMode::WRITE_ONLY);
copy_options["string_dictionary_page_size_limit"] = CopyOption(LogicalType::UBIGINT, CopyOptionMode::WRITE_ONLY);
copy_options["bloom_filter_false_positive_ratio"] = CopyOption(LogicalType::DOUBLE, CopyOptionMode::WRITE_ONLY);
copy_options["write_bloom_filter"] = CopyOption(LogicalType::BOOLEAN, CopyOptionMode::WRITE_ONLY);
copy_options["debug_use_openssl"] = CopyOption(LogicalType::BOOLEAN, CopyOptionMode::READ_WRITE);
copy_options["compression_level"] = CopyOption(LogicalType::BIGINT, CopyOptionMode::WRITE_ONLY);
copy_options["parquet_version"] = CopyOption(LogicalType::VARCHAR, CopyOptionMode::WRITE_ONLY);
copy_options["binary_as_string"] = CopyOption(LogicalType::BOOLEAN, CopyOptionMode::READ_ONLY);
copy_options["file_row_number"] = CopyOption(LogicalType::BOOLEAN, CopyOptionMode::READ_ONLY);
copy_options["can_have_nan"] = CopyOption(LogicalType::BOOLEAN, CopyOptionMode::READ_ONLY);
copy_options["geoparquet_version"] = CopyOption(LogicalType::VARCHAR, CopyOptionMode::WRITE_ONLY);
copy_options["shredding"] = CopyOption(LogicalType::ANY, CopyOptionMode::WRITE_ONLY);
}
static unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBindInput &input,
const vector<string> &names, const vector<LogicalType> &sql_types) {
D_ASSERT(names.size() == sql_types.size());
bool row_group_size_bytes_set = false;
bool compression_level_set = false;
auto bind_data = make_uniq<ParquetWriteBindData>();
for (auto &option : input.info.options) {
const auto loption = StringUtil::Lower(option.first);
if (option.second.size() != 1) {
// All parquet write options require exactly one argument
throw BinderException("%s requires exactly one argument", StringUtil::Upper(loption));
}
if (loption == "row_group_size" || loption == "chunk_size") {
bind_data->row_group_size = option.second[0].GetValue<uint64_t>();
} else if (loption == "row_group_size_bytes") {
auto roption = option.second[0];
if (roption.GetTypeMutable().id() == LogicalTypeId::VARCHAR) {
bind_data->row_group_size_bytes = DBConfig::ParseMemoryLimit(roption.ToString());
} else {
bind_data->row_group_size_bytes = option.second[0].GetValue<uint64_t>();
}
row_group_size_bytes_set = true;
} else if (loption == "row_groups_per_file") {
bind_data->row_groups_per_file = option.second[0].GetValue<uint64_t>();
} else if (loption == "compression" || loption == "codec") {
const auto roption = StringUtil::Lower(option.second[0].ToString());
if (roption == "uncompressed") {
bind_data->codec = duckdb_parquet::CompressionCodec::UNCOMPRESSED;
} else if (roption == "snappy") {
bind_data->codec = duckdb_parquet::CompressionCodec::SNAPPY;
} else if (roption == "gzip") {
bind_data->codec = duckdb_parquet::CompressionCodec::GZIP;
} else if (roption == "zstd") {
bind_data->codec = duckdb_parquet::CompressionCodec::ZSTD;
} else if (roption == "brotli") {
bind_data->codec = duckdb_parquet::CompressionCodec::BROTLI;
} else if (roption == "lz4" || roption == "lz4_raw") {
/* LZ4 is technically another compression scheme, but deprecated and arrow also uses them
* interchangeably */
bind_data->codec = duckdb_parquet::CompressionCodec::LZ4_RAW;
} else {
throw BinderException(
"Expected %s argument to be any of [uncompressed, brotli, gzip, snappy, lz4, lz4_raw or zstd]",
loption);
}
} else if (loption == "field_ids") {
if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
idx_t field_id = 0;
FieldID::GenerateFieldIDs(bind_data->field_ids, field_id, names, sql_types);
} else {
unordered_set<uint32_t> unique_field_ids;
case_insensitive_map_t<LogicalType> name_to_type_map;
for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
if (names[col_idx] == FieldID::DUCKDB_FIELD_ID) {
throw BinderException("Cannot have a column named \"%s\" when writing FIELD_IDS",
FieldID::DUCKDB_FIELD_ID);
}
name_to_type_map.emplace(names[col_idx], sql_types[col_idx]);
}
FieldID::GetFieldIDs(option.second[0], bind_data->field_ids, unique_field_ids, name_to_type_map);
}
} else if (loption == "shredding") {
if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
throw NotImplementedException("The 'auto' option is not yet implemented for 'shredding'");
} else {
case_insensitive_set_t variant_names;
for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
if (sql_types[col_idx].id() != LogicalTypeId::STRUCT) {
continue;
}
if (sql_types[col_idx].GetAlias() != "PARQUET_VARIANT") {
continue;
}
variant_names.emplace(names[col_idx]);
}
auto &shredding_types_value = option.second[0];
if (shredding_types_value.type().id() != LogicalTypeId::STRUCT) {
BinderException("SHREDDING value should be a STRUCT of column names to types, i.e: {col1: "
"'INTEGER[]', col2: 'BOOLEAN'}");
}
const auto &struct_type = shredding_types_value.type();
const auto &struct_children = StructValue::GetChildren(shredding_types_value);
D_ASSERT(StructType::GetChildTypes(struct_type).size() == struct_children.size());
for (idx_t i = 0; i < struct_children.size(); i++) {
const auto &col_name = StringUtil::Lower(StructType::GetChildName(struct_type, i));
auto it = variant_names.find(col_name);
if (it == variant_names.end()) {
string names;
for (const auto &entry : variant_names) {
if (!names.empty()) {
names += ", ";
}
names += entry;
}
if (names.empty()) {
throw BinderException("VARIANT by name \"%s\" specified in SHREDDING not found. There are "
"no VARIANT columns present.",
col_name);
} else {
throw BinderException(
"VARIANT by name \"%s\" specified in SHREDDING not found. Consider using "
"WRITE_PARTITION_COLUMNS if this "
"column is a partition column. Available names of VARIANT columns: [%s]",
col_name, names);
}
}
const auto &child_value = struct_children[i];
bind_data->shredding_types.AddChild(col_name, ShreddingType::GetShreddingTypes(child_value));
}
}
} else if (loption == "kv_metadata") {
auto &kv_struct = option.second[0];
auto &kv_struct_type = kv_struct.type();
if (kv_struct_type.id() != LogicalTypeId::STRUCT) {
throw BinderException("Expected kv_metadata argument to be a STRUCT");
}
auto values = StructValue::GetChildren(kv_struct);
for (idx_t i = 0; i < values.size(); i++) {
auto &value = values[i];
auto key = StructType::GetChildName(kv_struct_type, i);
// If the value is a blob, write the raw blob bytes
// otherwise, cast to string
if (value.type().id() == LogicalTypeId::BLOB) {
bind_data->kv_metadata.emplace_back(key, StringValue::Get(value));
} else {
bind_data->kv_metadata.emplace_back(key, value.ToString());
}
}
} else if (loption == "encryption_config") {
bind_data->encryption_config = ParquetEncryptionConfig::Create(context, option.second[0]);
} else if (loption == "dictionary_compression_ratio_threshold") {
// deprecated, ignore setting
} else if (loption == "dictionary_size_limit") {
auto val = option.second[0].GetValue<int64_t>();
if (val < 0) {
throw BinderException("dictionary_size_limit must be greater than 0 or 0 to disable");
}
bind_data->dictionary_size_limit = val;
} else if (loption == "string_dictionary_page_size_limit") {
auto val = option.second[0].GetValue<uint64_t>();
if (val > PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE || val == 0) {
throw BinderException(
"string_dictionary_page_size_limit cannot be 0 and must be less than or equal to %llu",
PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE);
}
bind_data->string_dictionary_page_size_limit = val;
} else if (loption == "write_bloom_filter") {
bind_data->enable_bloom_filters = BooleanValue::Get(option.second[0].DefaultCastAs(LogicalType::BOOLEAN));
} else if (loption == "bloom_filter_false_positive_ratio") {
auto val = option.second[0].GetValue<double>();
if (val <= 0) {
throw BinderException("bloom_filter_false_positive_ratio must be greater than 0");
}
bind_data->bloom_filter_false_positive_ratio = val;
} else if (loption == "debug_use_openssl") {
auto val = StringUtil::Lower(option.second[0].GetValue<std::string>());
if (val == "false") {
bind_data->debug_use_openssl = false;
} else if (val == "true") {
bind_data->debug_use_openssl = true;
} else {
throw BinderException("Expected debug_use_openssl to be a BOOLEAN");
}
} else if (loption == "compression_level") {
const auto val = option.second[0].GetValue<int64_t>();
if (val < ZStdFileSystem::MinimumCompressionLevel() || val > ZStdFileSystem::MaximumCompressionLevel()) {
throw BinderException("Compression level must be between %lld and %lld",
ZStdFileSystem::MinimumCompressionLevel(),
ZStdFileSystem::MaximumCompressionLevel());
}
bind_data->compression_level = val;
compression_level_set = true;
} else if (loption == "parquet_version") {
const auto roption = StringUtil::Upper(option.second[0].ToString());
if (roption == "V1") {
bind_data->parquet_version = ParquetVersion::V1;
} else if (roption == "V2") {
bind_data->parquet_version = ParquetVersion::V2;
} else {
throw BinderException("Expected parquet_version 'V1' or 'V2'");
}
} else if (loption == "geoparquet_version") {
const auto roption = StringUtil::Upper(option.second[0].ToString());
if (roption == "NONE") {
bind_data->geoparquet_version = GeoParquetVersion::NONE;
} else if (roption == "V1") {
bind_data->geoparquet_version = GeoParquetVersion::V1;
} else if (roption == "V2") {
bind_data->geoparquet_version = GeoParquetVersion::V2;
} else if (roption == "BOTH") {
bind_data->geoparquet_version = GeoParquetVersion::BOTH;
} else {
throw BinderException("Expected geoparquet_version 'NONE', 'V1' or 'BOTH'");
}
} else {
throw InternalException("Unrecognized option for PARQUET: %s", option.first.c_str());
}
}
if (row_group_size_bytes_set) {
if (DBConfig::GetSetting<PreserveInsertionOrderSetting>(context)) {
throw BinderException("ROW_GROUP_SIZE_BYTES does not work while preserving insertion order. Use \"SET "
"preserve_insertion_order=false;\" to disable preserving insertion order.");
}
}
if (compression_level_set && bind_data->codec != CompressionCodec::ZSTD) {
throw BinderException("Compression level is only supported for the ZSTD compression codec");
}
bind_data->sql_types = sql_types;
bind_data->column_names = names;
return std::move(bind_data);
}
static unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &context, FunctionData &bind_data,
const string &file_path) {
auto global_state = make_uniq<ParquetWriteGlobalState>();
auto &parquet_bind = bind_data.Cast<ParquetWriteBindData>();
auto &fs = FileSystem::GetFileSystem(context);
global_state->writer = make_uniq<ParquetWriter>(
context, fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec,
parquet_bind.field_ids.Copy(), parquet_bind.shredding_types.Copy(), parquet_bind.kv_metadata,
parquet_bind.encryption_config, parquet_bind.dictionary_size_limit,
parquet_bind.string_dictionary_page_size_limit, parquet_bind.enable_bloom_filters,
parquet_bind.bloom_filter_false_positive_ratio, parquet_bind.compression_level, parquet_bind.debug_use_openssl,
parquet_bind.parquet_version, parquet_bind.geoparquet_version);
return std::move(global_state);
}
static void ParquetWriteGetWrittenStatistics(ClientContext &context, FunctionData &bind_data,
GlobalFunctionData &gstate, CopyFunctionFileStatistics &statistics) {
auto &global_state = gstate.Cast<ParquetWriteGlobalState>();
global_state.writer->SetWrittenStatistics(statistics);
}
static void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, GlobalFunctionData &gstate,
LocalFunctionData &lstate, DataChunk &input) {
auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
auto &global_state = gstate.Cast<ParquetWriteGlobalState>();
auto &local_state = lstate.Cast<ParquetWriteLocalState>();
// append data to the local (buffered) chunk collection
local_state.buffer.Append(local_state.append_state, input);
if (local_state.buffer.Count() >= bind_data.row_group_size ||
local_state.buffer.SizeInBytes() >= bind_data.row_group_size_bytes) {
const string reason =
local_state.buffer.Count() >= bind_data.row_group_size ? "ROW_GROUP_SIZE" : "ROW_GROUP_SIZE_BYTES";
global_state.LogFlushingRowGroup(local_state.buffer, reason);
// if the chunk collection exceeds a certain size (rows/bytes) we flush it to the parquet file
local_state.append_state.current_chunk_state.handles.clear();
global_state.writer->Flush(local_state.buffer);
local_state.buffer.InitializeAppend(local_state.append_state);
}
}
static void ParquetWriteCombine(ExecutionContext &context, FunctionData &bind_data_p, GlobalFunctionData &gstate,
LocalFunctionData &lstate) {
auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
auto &global_state = gstate.Cast<ParquetWriteGlobalState>();
auto &local_state = lstate.Cast<ParquetWriteLocalState>();
if (local_state.buffer.Count() >= bind_data.row_group_size / 2 ||
local_state.buffer.SizeInBytes() >= bind_data.row_group_size_bytes / 2) {
// local state buffer is more than half of the row_group_size(_bytes), just flush it
global_state.LogFlushingRowGroup(local_state.buffer, "Combine");
global_state.writer->Flush(local_state.buffer);
return;
}
unique_lock<mutex> guard(global_state.lock);
if (global_state.combine_buffer) {
// There is still some data, combine it
global_state.combine_buffer->Combine(local_state.buffer);
if (global_state.combine_buffer->Count() >= bind_data.row_group_size / 2 ||
global_state.combine_buffer->SizeInBytes() >= bind_data.row_group_size_bytes / 2) {
// After combining, the combine buffer is more than half of the row_group_size(_bytes), so we flush
auto owned_combine_buffer = std::move(global_state.combine_buffer);
guard.unlock();
global_state.LogFlushingRowGroup(*owned_combine_buffer, "Combine");
// Lock free, of course
global_state.writer->Flush(*owned_combine_buffer);
}
return;
}
global_state.combine_buffer = make_uniq<ColumnDataCollection>(context.client, local_state.buffer.Types());
global_state.combine_buffer->Combine(local_state.buffer);
}
static void ParquetWriteFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) {
auto &global_state = gstate.Cast<ParquetWriteGlobalState>();
// flush the combine buffer (if it's there)
if (global_state.combine_buffer) {
global_state.LogFlushingRowGroup(*global_state.combine_buffer, "Finalize");
global_state.writer->Flush(*global_state.combine_buffer);
}
// finalize: write any additional metadata to the file here
global_state.writer->Finalize();
}
static unique_ptr<LocalFunctionData> ParquetWriteInitializeLocal(ExecutionContext &context, FunctionData &bind_data_p) {
auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
return make_uniq<ParquetWriteLocalState>(context.client, bind_data.sql_types);
}
// LCOV_EXCL_START
// FIXME: Have these be generated instead
template <>
const char *EnumUtil::ToChars<duckdb_parquet::CompressionCodec::type>(duckdb_parquet::CompressionCodec::type value) {
switch (value) {
case CompressionCodec::UNCOMPRESSED:
return "UNCOMPRESSED";
break;
case CompressionCodec::SNAPPY:
return "SNAPPY";
break;
case CompressionCodec::GZIP:
return "GZIP";
break;
case CompressionCodec::LZO:
return "LZO";
break;
case CompressionCodec::BROTLI:
return "BROTLI";
break;
case CompressionCodec::LZ4:
return "LZ4";
break;
case CompressionCodec::LZ4_RAW:
return "LZ4_RAW";
break;
case CompressionCodec::ZSTD:
return "ZSTD";
break;
default:
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}
}
template <>
duckdb_parquet::CompressionCodec::type EnumUtil::FromString<duckdb_parquet::CompressionCodec::type>(const char *value) {
if (StringUtil::Equals(value, "UNCOMPRESSED")) {
return CompressionCodec::UNCOMPRESSED;
}
if (StringUtil::Equals(value, "SNAPPY")) {
return CompressionCodec::SNAPPY;
}
if (StringUtil::Equals(value, "GZIP")) {
return CompressionCodec::GZIP;
}
if (StringUtil::Equals(value, "LZO")) {
return CompressionCodec::LZO;
}
if (StringUtil::Equals(value, "BROTLI")) {
return CompressionCodec::BROTLI;
}
if (StringUtil::Equals(value, "LZ4")) {
return CompressionCodec::LZ4;
}
if (StringUtil::Equals(value, "LZ4_RAW")) {
return CompressionCodec::LZ4_RAW;
}
if (StringUtil::Equals(value, "ZSTD")) {
return CompressionCodec::ZSTD;
}
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}
template <>
const char *EnumUtil::ToChars<ParquetVersion>(ParquetVersion value) {
switch (value) {
case ParquetVersion::V1:
return "V1";
case ParquetVersion::V2:
return "V2";
default:
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}
}
template <>
ParquetVersion EnumUtil::FromString<ParquetVersion>(const char *value) {
if (StringUtil::Equals(value, "V1")) {
return ParquetVersion::V1;
}
if (StringUtil::Equals(value, "V2")) {
return ParquetVersion::V2;
}
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}
template <>
const char *EnumUtil::ToChars<GeoParquetVersion>(GeoParquetVersion value) {
switch (value) {
case GeoParquetVersion::NONE:
return "NONE";
case GeoParquetVersion::V1:
return "V1";
case GeoParquetVersion::V2:
return "V2";
case GeoParquetVersion::BOTH:
return "BOTH";
default:
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}
}
template <>
GeoParquetVersion EnumUtil::FromString<GeoParquetVersion>(const char *value) {
if (StringUtil::Equals(value, "NONE")) {
return GeoParquetVersion::NONE;
}
if (StringUtil::Equals(value, "V1")) {
return GeoParquetVersion::V1;
}
if (StringUtil::Equals(value, "V2")) {
return GeoParquetVersion::V2;
}
if (StringUtil::Equals(value, "BOTH")) {
return GeoParquetVersion::BOTH;
}
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}
static optional_idx SerializeCompressionLevel(const int64_t compression_level) {
return compression_level < 0 ? NumericLimits<idx_t>::Maximum() - NumericCast<idx_t>(AbsValue(compression_level))
: NumericCast<idx_t>(compression_level);
}
static int64_t DeserializeCompressionLevel(const optional_idx compression_level) {
// Was originally an optional_idx, now int64_t, so we still serialize as such
if (!compression_level.IsValid()) {
return ZStdFileSystem::DefaultCompressionLevel();
}
if (compression_level.GetIndex() > NumericCast<idx_t>(ZStdFileSystem::MaximumCompressionLevel())) {
// restore the negative compression level
return -NumericCast<int64_t>(NumericLimits<idx_t>::Maximum() - compression_level.GetIndex());
}
return NumericCast<int64_t>(compression_level.GetIndex());
}
static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bind_data_p,
const CopyFunction &function) {
auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
serializer.WriteProperty(100, "sql_types", bind_data.sql_types);
serializer.WriteProperty(101, "column_names", bind_data.column_names);
serializer.WriteProperty(102, "codec", bind_data.codec);
serializer.WriteProperty(103, "row_group_size", bind_data.row_group_size);
serializer.WriteProperty(104, "row_group_size_bytes", bind_data.row_group_size_bytes);
serializer.WriteProperty(105, "kv_metadata", bind_data.kv_metadata);
serializer.WriteProperty(106, "field_ids", bind_data.field_ids);
serializer.WritePropertyWithDefault<shared_ptr<ParquetEncryptionConfig>>(107, "encryption_config",
bind_data.encryption_config, nullptr);
// 108 was dictionary_compression_ratio_threshold, but was deleted
// To avoid doubly defining the default values in both ParquetWriteBindData and here,
// and possibly making a mistake, we just get the values from ParquetWriteBindData.
// We have to std::move them, otherwise MSVC will complain that it's not a "const T &&"
const auto compression_level = SerializeCompressionLevel(bind_data.compression_level);
D_ASSERT(DeserializeCompressionLevel(compression_level) == bind_data.compression_level);
ParquetWriteBindData default_value;
serializer.WritePropertyWithDefault(109, "compression_level", compression_level);
serializer.WritePropertyWithDefault(110, "row_groups_per_file", bind_data.row_groups_per_file,
default_value.row_groups_per_file);
serializer.WritePropertyWithDefault(111, "debug_use_openssl", bind_data.debug_use_openssl,
default_value.debug_use_openssl);
serializer.WritePropertyWithDefault(112, "dictionary_size_limit", bind_data.dictionary_size_limit,
default_value.dictionary_size_limit);
serializer.WritePropertyWithDefault(113, "bloom_filter_false_positive_ratio",
bind_data.bloom_filter_false_positive_ratio,
default_value.bloom_filter_false_positive_ratio);
serializer.WritePropertyWithDefault(114, "parquet_version", bind_data.parquet_version,
default_value.parquet_version);
serializer.WritePropertyWithDefault(115, "string_dictionary_page_size_limit",
bind_data.string_dictionary_page_size_limit,
default_value.string_dictionary_page_size_limit);
serializer.WritePropertyWithDefault(116, "geoparquet_version", bind_data.geoparquet_version,
default_value.geoparquet_version);
serializer.WriteProperty(117, "shredding_types", bind_data.shredding_types);
}
static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserializer, CopyFunction &function) {
auto data = make_uniq<ParquetWriteBindData>();
data->sql_types = deserializer.ReadProperty<vector<LogicalType>>(100, "sql_types");
data->column_names = deserializer.ReadProperty<vector<string>>(101, "column_names");
data->codec = deserializer.ReadProperty<duckdb_parquet::CompressionCodec::type>(102, "codec");
data->row_group_size = deserializer.ReadProperty<idx_t>(103, "row_group_size");
data->row_group_size_bytes = deserializer.ReadProperty<idx_t>(104, "row_group_size_bytes");
data->kv_metadata = deserializer.ReadProperty<vector<pair<string, string>>>(105, "kv_metadata");
data->field_ids = deserializer.ReadProperty<ChildFieldIDs>(106, "field_ids");
deserializer.ReadPropertyWithExplicitDefault<shared_ptr<ParquetEncryptionConfig>>(
107, "encryption_config", data->encryption_config, std::move(ParquetWriteBindData().encryption_config));
deserializer.ReadDeletedProperty<double>(108, "dictionary_compression_ratio_threshold");
optional_idx compression_level;
deserializer.ReadPropertyWithDefault<optional_idx>(109, "compression_level", compression_level);
data->compression_level = DeserializeCompressionLevel(compression_level);
D_ASSERT(SerializeCompressionLevel(data->compression_level) == compression_level);
ParquetWriteBindData default_value;
data->row_groups_per_file = deserializer.ReadPropertyWithExplicitDefault<optional_idx>(
110, "row_groups_per_file", default_value.row_groups_per_file);
data->debug_use_openssl =
deserializer.ReadPropertyWithExplicitDefault<bool>(111, "debug_use_openssl", default_value.debug_use_openssl);
data->dictionary_size_limit =
deserializer.ReadPropertyWithExplicitDefault<optional_idx>(112, "dictionary_size_limit", optional_idx());
data->bloom_filter_false_positive_ratio = deserializer.ReadPropertyWithExplicitDefault<double>(
113, "bloom_filter_false_positive_ratio", default_value.bloom_filter_false_positive_ratio);
data->parquet_version =
deserializer.ReadPropertyWithExplicitDefault(114, "parquet_version", default_value.parquet_version);
data->string_dictionary_page_size_limit = deserializer.ReadPropertyWithExplicitDefault(
115, "string_dictionary_page_size_limit", default_value.string_dictionary_page_size_limit);
data->geoparquet_version =
deserializer.ReadPropertyWithExplicitDefault(116, "geoparquet_version", default_value.geoparquet_version);
data->shredding_types = deserializer.ReadProperty<ShreddingType>(117, "shredding_types");
return std::move(data);
}
// LCOV_EXCL_STOP
//===--------------------------------------------------------------------===//
// Execution Mode
//===--------------------------------------------------------------------===//
static CopyFunctionExecutionMode ParquetWriteExecutionMode(bool preserve_insertion_order, bool supports_batch_index) {
if (!preserve_insertion_order) {
return CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE;
}
if (supports_batch_index) {
return CopyFunctionExecutionMode::BATCH_COPY_TO_FILE;
}
return CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE;
}
//===--------------------------------------------------------------------===//
// Initialize Logger
//===--------------------------------------------------------------------===//
static void ParquetWriteInitializeOperator(GlobalFunctionData &gstate, const PhysicalOperator &op) {
auto &global_state = gstate.Cast<ParquetWriteGlobalState>();
global_state.op = &op;
}
//===--------------------------------------------------------------------===//
// Prepare Batch
//===--------------------------------------------------------------------===//
struct ParquetWriteBatchData : public PreparedBatchData {
PreparedRowGroup prepared_row_group;
};
static unique_ptr<PreparedBatchData> ParquetWritePrepareBatch(ClientContext &context, FunctionData &bind_data,
GlobalFunctionData &gstate,
unique_ptr<ColumnDataCollection> collection) {
auto &global_state = gstate.Cast<ParquetWriteGlobalState>();
auto result = make_uniq<ParquetWriteBatchData>();
global_state.writer->PrepareRowGroup(*collection, result->prepared_row_group);
return std::move(result);
}
//===--------------------------------------------------------------------===//
// Flush Batch
//===--------------------------------------------------------------------===//
static void ParquetWriteFlushBatch(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate,
PreparedBatchData &batch_p) {
auto &global_state = gstate.Cast<ParquetWriteGlobalState>();
auto &batch = batch_p.Cast<ParquetWriteBatchData>();
global_state.writer->FlushRowGroup(batch.prepared_row_group);
}
//===--------------------------------------------------------------------===//
// Desired Batch Size
//===--------------------------------------------------------------------===//
static idx_t ParquetWriteDesiredBatchSize(ClientContext &context, FunctionData &bind_data_p) {
auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
return bind_data.row_group_size;
}
//===--------------------------------------------------------------------===//
// File rotation
//===--------------------------------------------------------------------===//
static bool ParquetWriteRotateFiles(FunctionData &bind_data_p, const optional_idx &file_size_bytes) {
auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
return file_size_bytes.IsValid() || bind_data.row_groups_per_file.IsValid();
}
static bool ParquetWriteRotateNextFile(GlobalFunctionData &gstate, FunctionData &bind_data_p,
const optional_idx &file_size_bytes) {
auto &global_state = gstate.Cast<ParquetWriteGlobalState>();
auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
if (file_size_bytes.IsValid() && global_state.writer->FileSize() > file_size_bytes.GetIndex()) {
return true;
}
if (bind_data.row_groups_per_file.IsValid() &&
global_state.writer->NumberOfRowGroups() >= bind_data.row_groups_per_file.GetIndex()) {
return true;
}
return false;
}
//===--------------------------------------------------------------------===//
// Scan Replacement
//===--------------------------------------------------------------------===//
static unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context, ReplacementScanInput &input,
optional_ptr<ReplacementScanData> data) {
auto table_name = ReplacementScan::GetFullPath(input);
if (!ReplacementScan::CanReplace(table_name, {"parquet"})) {
return nullptr;
}
auto table_function = make_uniq<TableFunctionRef>();
vector<unique_ptr<ParsedExpression>> children;
children.push_back(make_uniq<ConstantExpression>(Value(table_name)));
table_function->function = make_uniq<FunctionExpression>("parquet_scan", std::move(children));
if (!FileSystem::HasGlob(table_name)) {
auto &fs = FileSystem::GetFileSystem(context);
table_function->alias = fs.ExtractBaseName(table_name);
}
return std::move(table_function);
}
//===--------------------------------------------------------------------===//
// Select
//===--------------------------------------------------------------------===//
// Helper predicates for ParquetWriteSelect
static bool IsTypeNotSupported(const LogicalType &type) {
if (type.IsNested()) {
return false;
}
return !ParquetWriter::TryGetParquetType(type);
}
static bool IsTypeLossy(const LogicalType &type) {
return type.id() == LogicalTypeId::HUGEINT || type.id() == LogicalTypeId::UHUGEINT;
}
static bool IsGeometryType(const LogicalType &type, ClientContext &context) {
if (type.id() != LogicalTypeId::BLOB) {
return false;
}
if (!type.HasAlias()) {
return false;
}
if (type.GetAlias() != "GEOMETRY") {
return false;
}
return GeoParquetFileMetadata::IsGeoParquetConversionEnabled(context);
}
static string GetShredding(case_insensitive_map_t<vector<Value>> &options, const string &col_name) {
//! At this point, the options haven't been parsed yet, so we have to parse them ourselves.
auto it = options.find("shredding");
if (it == options.end()) {
return string();
}
auto &shredding = it->second;
if (shredding.empty()) {
return string();
}
auto &shredding_val = shredding[0];
if (shredding_val.type().id() != LogicalTypeId::STRUCT) {
return string();
}
auto &shredded_variants = StructType::GetChildTypes(shredding_val.type());
auto &values = StructValue::GetChildren(shredding_val);
for (idx_t i = 0; i < shredded_variants.size(); i++) {
auto &shredded_variant = shredded_variants[i];
if (shredded_variant.first != col_name) {
continue;
}
auto &shredded_val = values[i];
if (shredded_val.type().id() != LogicalTypeId::VARCHAR) {
return string();
}
return shredded_val.GetValue<string>();
}
return string();
}
static vector<unique_ptr<Expression>> ParquetWriteSelect(CopyToSelectInput &input) {
auto &context = input.context;
vector<unique_ptr<Expression>> result;
bool any_change = false;
for (auto &expr : input.select_list) {
const auto &type = expr->return_type;
const auto &name = expr->GetAlias();
// Spatial types need to be encoded into WKB when writing GeoParquet.
// But dont perform this conversion if this is a EXPORT DATABASE statement
if (input.copy_to_type == CopyToType::COPY_TO_FILE && IsGeometryType(type, context)) {
LogicalType wkb_blob_type(LogicalTypeId::BLOB);
wkb_blob_type.SetAlias("WKB_BLOB");
auto cast_expr = BoundCastExpression::AddCastToType(context, std::move(expr), wkb_blob_type, false);
cast_expr->SetAlias(name);
result.push_back(std::move(cast_expr));
any_change = true;
} else if (input.copy_to_type == CopyToType::COPY_TO_FILE && type.id() == LogicalTypeId::VARIANT) {
vector<unique_ptr<Expression>> arguments;
arguments.push_back(std::move(expr));
auto shredded_type_str = GetShredding(input.options, name);
if (!shredded_type_str.empty()) {
arguments.push_back(make_uniq<BoundConstantExpression>(Value(shredded_type_str)));
}
auto transform_func = VariantColumnWriter::GetTransformFunction();
transform_func.bind(context, transform_func, arguments);
auto func_expr = make_uniq<BoundFunctionExpression>(transform_func.return_type, transform_func,
std::move(arguments), nullptr, false);
func_expr->SetAlias(name);
result.push_back(std::move(func_expr));
any_change = true;
}
// If this is an EXPORT DATABASE statement, we dont want to write "lossy" types, instead cast them to VARCHAR
else if (input.copy_to_type == CopyToType::EXPORT_DATABASE && TypeVisitor::Contains(type, IsTypeLossy)) {
// Replace all lossy types with VARCHAR
auto new_type = TypeVisitor::VisitReplace(
type, [](const LogicalType &ty) -> LogicalType { return IsTypeLossy(ty) ? LogicalType::VARCHAR : ty; });
// Cast the column to the new type
auto cast_expr = BoundCastExpression::AddCastToType(context, std::move(expr), new_type, false);
cast_expr->SetAlias(name);
result.push_back(std::move(cast_expr));
any_change = true;
}
// Else look if there is any unsupported type
else if (TypeVisitor::Contains(type, IsTypeNotSupported)) {
// If there is at least one unsupported type, replace all unsupported types with varchar
// and perform a CAST
auto new_type = TypeVisitor::VisitReplace(type, [](const LogicalType &ty) -> LogicalType {
return IsTypeNotSupported(ty) ? LogicalType::VARCHAR : ty;
});
auto cast_expr = BoundCastExpression::AddCastToType(context, std::move(expr), new_type, false);
cast_expr->SetAlias(name);
result.push_back(std::move(cast_expr));
any_change = true;
}
// Otherwise, just reference the input column
else {
result.push_back(std::move(expr));
}
}
// If any change was made, return the new expressions
// otherwise, return an empty vector to indicate no change and avoid pushing another projection on to the plan
if (any_change) {
return result;
}
return {};
}
static void LoadInternal(ExtensionLoader &loader) {
auto &db_instance = loader.GetDatabaseInstance();
auto &fs = db_instance.GetFileSystem();
fs.RegisterSubSystem(FileCompressionType::ZSTD, make_uniq<ZStdFileSystem>());
auto scan_fun = ParquetScanFunction::GetFunctionSet();
scan_fun.name = "read_parquet";
loader.RegisterFunction(scan_fun);
scan_fun.name = "parquet_scan";
loader.RegisterFunction(scan_fun);
// parquet_metadata
ParquetMetaDataFunction meta_fun;
loader.RegisterFunction(MultiFileReader::CreateFunctionSet(meta_fun));
// parquet_schema
ParquetSchemaFunction schema_fun;
loader.RegisterFunction(MultiFileReader::CreateFunctionSet(schema_fun));
// parquet_key_value_metadata
ParquetKeyValueMetadataFunction kv_meta_fun;
loader.RegisterFunction(MultiFileReader::CreateFunctionSet(kv_meta_fun));
// parquet_file_metadata
ParquetFileMetadataFunction file_meta_fun;
loader.RegisterFunction(MultiFileReader::CreateFunctionSet(file_meta_fun));
// parquet_bloom_probe
ParquetBloomProbeFunction bloom_probe_fun;
loader.RegisterFunction(MultiFileReader::CreateFunctionSet(bloom_probe_fun));
// variant_to_parquet_variant
loader.RegisterFunction(VariantColumnWriter::GetTransformFunction());
CopyFunction function("parquet");
function.copy_to_select = ParquetWriteSelect;
function.copy_to_bind = ParquetWriteBind;
function.copy_options = ParquetListCopyOptions;
function.copy_to_initialize_global = ParquetWriteInitializeGlobal;
function.copy_to_initialize_local = ParquetWriteInitializeLocal;
function.copy_to_get_written_statistics = ParquetWriteGetWrittenStatistics;
function.copy_to_sink = ParquetWriteSink;
function.copy_to_combine = ParquetWriteCombine;
function.copy_to_finalize = ParquetWriteFinalize;
function.execution_mode = ParquetWriteExecutionMode;
function.initialize_operator = ParquetWriteInitializeOperator;
function.copy_from_bind = MultiFileFunction<ParquetMultiFileInfo>::MultiFileBindCopy;
function.copy_from_function = scan_fun.functions[0];
function.prepare_batch = ParquetWritePrepareBatch;
function.flush_batch = ParquetWriteFlushBatch;
function.desired_batch_size = ParquetWriteDesiredBatchSize;
function.rotate_files = ParquetWriteRotateFiles;
function.rotate_next_file = ParquetWriteRotateNextFile;
function.serialize = ParquetCopySerialize;
function.deserialize = ParquetCopyDeserialize;
function.extension = "parquet";
loader.RegisterFunction(function);
// parquet_key
auto parquet_key_fun = PragmaFunction::PragmaCall("add_parquet_key", ParquetCrypto::AddKey,
{LogicalType::VARCHAR, LogicalType::VARCHAR});
loader.RegisterFunction(parquet_key_fun);
auto &config = DBConfig::GetConfig(db_instance);
config.replacement_scans.emplace_back(ParquetScanReplacement);
config.AddExtensionOption("binary_as_string", "In Parquet files, interpret binary data as a string.",
LogicalType::BOOLEAN, Value(false));
config.AddExtensionOption("disable_parquet_prefetching", "Disable the prefetching mechanism in Parquet",
LogicalType::BOOLEAN, Value(false));
config.AddExtensionOption("prefetch_all_parquet_files",
"Use the prefetching mechanism for all types of parquet files", LogicalType::BOOLEAN,
Value(false));
config.AddExtensionOption("parquet_metadata_cache",
"Cache Parquet metadata - useful when reading the same files multiple times",
LogicalType::BOOLEAN, Value(false));
config.AddExtensionOption(
"enable_geoparquet_conversion",
"Attempt to decode/encode geometry data in/as GeoParquet files if the spatial extension is present.",
LogicalType::BOOLEAN, Value::BOOLEAN(true));
config.AddExtensionOption("variant_legacy_encoding",
"Enables the Parquet reader to identify a Variant structurally.", LogicalType::BOOLEAN,
Value::BOOLEAN(false));
}
void ParquetExtension::Load(ExtensionLoader &loader) {
LoadInternal(loader);
}
std::string ParquetExtension::Name() {
return "parquet";
}
std::string ParquetExtension::Version() const {
#ifdef EXT_VERSION_PARQUET
return EXT_VERSION_PARQUET;
#else
return "";
#endif
}
} // namespace duckdb
#ifdef DUCKDB_BUILD_LOADABLE_EXTENSION
extern "C" {
DUCKDB_CPP_EXTENSION_ENTRY(parquet, loader) { // NOLINT
duckdb::LoadInternal(loader);
}
}
#endif