should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,669 @@
#include "column_writer.hpp"
#include "duckdb.hpp"
#include "geo_parquet.hpp"
#include "parquet_rle_bp_decoder.hpp"
#include "parquet_bss_encoder.hpp"
#include "parquet_statistics.hpp"
#include "parquet_writer.hpp"
#include "writer/array_column_writer.hpp"
#include "writer/boolean_column_writer.hpp"
#include "writer/decimal_column_writer.hpp"
#include "writer/enum_column_writer.hpp"
#include "writer/list_column_writer.hpp"
#include "writer/primitive_column_writer.hpp"
#include "writer/struct_column_writer.hpp"
#include "writer/variant_column_writer.hpp"
#include "writer/templated_column_writer.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/operator/comparison_operators.hpp"
#include "duckdb/common/serializer/buffered_file_writer.hpp"
#include "duckdb/common/serializer/memory_stream.hpp"
#include "duckdb/common/serializer/write_stream.hpp"
#include "duckdb/common/string_map_set.hpp"
#include "duckdb/common/types/hugeint.hpp"
#include "duckdb/common/types/time.hpp"
#include "duckdb/common/types/timestamp.hpp"
#include "duckdb/execution/expression_executor.hpp"
#include "brotli/encode.h"
#include "lz4.hpp"
#include "miniz_wrapper.hpp"
#include "snappy.h"
#include "zstd.h"
#include <cmath>
namespace duckdb {
using namespace duckdb_parquet; // NOLINT
using namespace duckdb_miniz; // NOLINT
using duckdb_parquet::CompressionCodec;
using duckdb_parquet::ConvertedType;
using duckdb_parquet::Encoding;
using duckdb_parquet::FieldRepetitionType;
using duckdb_parquet::FileMetaData;
using duckdb_parquet::PageHeader;
using duckdb_parquet::PageType;
using ParquetRowGroup = duckdb_parquet::RowGroup;
using duckdb_parquet::Type;
constexpr uint16_t ColumnWriter::PARQUET_DEFINE_VALID;
//===--------------------------------------------------------------------===//
// ColumnWriterStatistics
//===--------------------------------------------------------------------===//
ColumnWriterStatistics::~ColumnWriterStatistics() {
}
bool ColumnWriterStatistics::HasStats() {
return false;
}
string ColumnWriterStatistics::GetMin() {
return string();
}
string ColumnWriterStatistics::GetMax() {
return string();
}
string ColumnWriterStatistics::GetMinValue() {
return string();
}
string ColumnWriterStatistics::GetMaxValue() {
return string();
}
bool ColumnWriterStatistics::CanHaveNaN() {
return false;
}
bool ColumnWriterStatistics::HasNaN() {
return false;
}
bool ColumnWriterStatistics::MinIsExact() {
return true;
}
bool ColumnWriterStatistics::MaxIsExact() {
return true;
}
bool ColumnWriterStatistics::HasGeoStats() {
return false;
}
optional_ptr<GeometryStatsData> ColumnWriterStatistics::GetGeoStats() {
return nullptr;
}
void ColumnWriterStatistics::WriteGeoStats(duckdb_parquet::GeospatialStatistics &stats) {
D_ASSERT(false); // this should never be called
}
//===--------------------------------------------------------------------===//
// ColumnWriter
//===--------------------------------------------------------------------===//
ColumnWriter::ColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema,
vector<string> schema_path_p, bool can_have_nulls)
: writer(writer), column_schema(column_schema), schema_path(std::move(schema_path_p)),
can_have_nulls(can_have_nulls) {
}
ColumnWriter::~ColumnWriter() {
}
ColumnWriterState::~ColumnWriterState() {
}
void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
AllocatedData &compressed_buf) {
switch (writer.GetCodec()) {
case CompressionCodec::UNCOMPRESSED:
compressed_size = temp_writer.GetPosition();
compressed_data = temp_writer.GetData();
break;
case CompressionCodec::SNAPPY: {
compressed_size = duckdb_snappy::MaxCompressedLength(temp_writer.GetPosition());
compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
duckdb_snappy::RawCompress(const_char_ptr_cast(temp_writer.GetData()), temp_writer.GetPosition(),
char_ptr_cast(compressed_buf.get()), &compressed_size);
compressed_data = compressed_buf.get();
D_ASSERT(compressed_size <= duckdb_snappy::MaxCompressedLength(temp_writer.GetPosition()));
break;
}
case CompressionCodec::LZ4_RAW: {
compressed_size = duckdb_lz4::LZ4_compressBound(UnsafeNumericCast<int32_t>(temp_writer.GetPosition()));
compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
compressed_size = duckdb_lz4::LZ4_compress_default(
const_char_ptr_cast(temp_writer.GetData()), char_ptr_cast(compressed_buf.get()),
UnsafeNumericCast<int32_t>(temp_writer.GetPosition()), UnsafeNumericCast<int32_t>(compressed_size));
compressed_data = compressed_buf.get();
break;
}
case CompressionCodec::GZIP: {
MiniZStream s;
compressed_size = s.MaxCompressedLength(temp_writer.GetPosition());
compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
s.Compress(const_char_ptr_cast(temp_writer.GetData()), temp_writer.GetPosition(),
char_ptr_cast(compressed_buf.get()), &compressed_size);
compressed_data = compressed_buf.get();
break;
}
case CompressionCodec::ZSTD: {
compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.GetPosition());
compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size,
(const void *)temp_writer.GetData(), temp_writer.GetPosition(),
UnsafeNumericCast<int32_t>(writer.CompressionLevel()));
compressed_data = compressed_buf.get();
break;
}
case CompressionCodec::BROTLI: {
compressed_size = duckdb_brotli::BrotliEncoderMaxCompressedSize(temp_writer.GetPosition());
compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size);
duckdb_brotli::BrotliEncoderCompress(BROTLI_DEFAULT_QUALITY, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE,
temp_writer.GetPosition(), temp_writer.GetData(), &compressed_size,
compressed_buf.get());
compressed_data = compressed_buf.get();
break;
}
default:
throw InternalException("Unsupported codec for Parquet Writer");
}
if (compressed_size > idx_t(NumericLimits<int32_t>::Maximum())) {
throw InternalException("Parquet writer: %d compressed page size out of range for type integer",
temp_writer.GetPosition());
}
}
void ColumnWriter::HandleRepeatLevels(ColumnWriterState &state, ColumnWriterState *parent, idx_t count) const {
if (!parent) {
// no repeat levels without a parent node
return;
}
if (state.repetition_levels.size() >= parent->repetition_levels.size()) {
return;
}
state.repetition_levels.insert(state.repetition_levels.end(),
parent->repetition_levels.begin() + state.repetition_levels.size(),
parent->repetition_levels.end());
}
void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity,
const idx_t count, const uint16_t define_value, const uint16_t null_value) const {
if (parent) {
// parent node: inherit definition level from the parent
idx_t vector_index = 0;
while (state.definition_levels.size() < parent->definition_levels.size()) {
idx_t current_index = state.definition_levels.size();
if (parent->definition_levels[current_index] != PARQUET_DEFINE_VALID) {
//! Inherit nulls from parent
state.definition_levels.push_back(parent->definition_levels[current_index]);
state.parent_null_count++;
} else if (validity.RowIsValid(vector_index)) {
//! Produce a non-null define
state.definition_levels.push_back(define_value);
} else {
//! Produce a null define
if (!can_have_nulls) {
throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
}
state.null_count++;
state.definition_levels.push_back(null_value);
}
D_ASSERT(parent->is_empty.empty() || current_index < parent->is_empty.size());
if (parent->is_empty.empty() || !parent->is_empty[current_index]) {
vector_index++;
}
}
return;
}
// no parent: set definition levels only from this validity mask
if (validity.AllValid()) {
state.definition_levels.insert(state.definition_levels.end(), count, define_value);
} else {
for (idx_t i = 0; i < count; i++) {
const auto is_null = !validity.RowIsValid(i);
state.definition_levels.emplace_back(is_null ? null_value : define_value);
state.null_count += is_null;
}
}
if (!can_have_nulls && state.null_count != 0) {
throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
}
}
//===--------------------------------------------------------------------===//
// Create Column Writer
//===--------------------------------------------------------------------===//
ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::SchemaElement> &schemas,
const LogicalType &type, const string &name, bool allow_geometry,
optional_ptr<const ChildFieldIDs> field_ids,
optional_ptr<const ShreddingType> shredding_types, idx_t max_repeat,
idx_t max_define, bool can_have_nulls) {
auto null_type = can_have_nulls ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED;
if (!can_have_nulls) {
max_define--;
}
idx_t schema_idx = schemas.size();
optional_ptr<const FieldID> field_id;
optional_ptr<const ChildFieldIDs> child_field_ids;
if (field_ids) {
auto field_id_it = field_ids->ids->find(name);
if (field_id_it != field_ids->ids->end()) {
field_id = &field_id_it->second;
child_field_ids = &field_id->child_field_ids;
}
}
optional_ptr<const ShreddingType> shredding_type;
if (shredding_types) {
shredding_type = shredding_types->GetChild(name);
}
if (type.id() == LogicalTypeId::STRUCT && type.GetAlias() == "PARQUET_VARIANT") {
// variant type
// variants are stored as follows:
// group <name> VARIANT {
// metadata BYTE_ARRAY,
// value BYTE_ARRAY,
// [<typed_value>]
// }
const bool is_shredded = shredding_type != nullptr;
child_list_t<LogicalType> child_types;
child_types.emplace_back("metadata", LogicalType::BLOB);
child_types.emplace_back("value", LogicalType::BLOB);
if (is_shredded) {
auto &typed_value_type = shredding_type->type;
if (typed_value_type.id() != LogicalTypeId::ANY) {
child_types.emplace_back("typed_value",
VariantColumnWriter::TransformTypedValueRecursive(typed_value_type));
}
}
// variant group
duckdb_parquet::SchemaElement top_element;
top_element.repetition_type = null_type;
top_element.num_children = child_types.size();
top_element.logicalType.__isset.VARIANT = true;
top_element.logicalType.VARIANT.__isset.specification_version = true;
top_element.logicalType.VARIANT.specification_version = 1;
top_element.__isset.logicalType = true;
top_element.__isset.num_children = true;
top_element.__isset.repetition_type = true;
top_element.name = name;
schemas.push_back(std::move(top_element));
ParquetColumnSchema variant_column(name, type, max_define, max_repeat, schema_idx, 0);
variant_column.children.reserve(child_types.size());
for (auto &child_type : child_types) {
auto &child_name = child_type.first;
bool is_optional;
if (child_name == "metadata") {
is_optional = false;
} else if (child_name == "value") {
if (is_shredded) {
//! When shredding the variant, the 'value' becomes optional
is_optional = true;
} else {
is_optional = false;
}
} else {
D_ASSERT(child_name == "typed_value");
is_optional = true;
}
variant_column.children.emplace_back(FillParquetSchema(schemas, child_type.second, child_type.first,
allow_geometry, child_field_ids, shredding_type,
max_repeat, max_define + 1, is_optional));
}
return variant_column;
}
if (type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::UNION) {
auto &child_types = StructType::GetChildTypes(type);
// set up the schema element for this struct
duckdb_parquet::SchemaElement schema_element;
schema_element.repetition_type = null_type;
schema_element.num_children = UnsafeNumericCast<int32_t>(child_types.size());
schema_element.__isset.num_children = true;
schema_element.__isset.type = false;
schema_element.__isset.repetition_type = true;
schema_element.name = name;
if (field_id && field_id->set) {
schema_element.__isset.field_id = true;
schema_element.field_id = field_id->field_id;
}
schemas.push_back(std::move(schema_element));
ParquetColumnSchema struct_column(name, type, max_define, max_repeat, schema_idx, 0);
// construct the child schemas recursively
struct_column.children.reserve(child_types.size());
for (auto &child_type : child_types) {
struct_column.children.emplace_back(FillParquetSchema(schemas, child_type.second, child_type.first,
allow_geometry, child_field_ids, shredding_type,
max_repeat, max_define + 1, true));
}
return struct_column;
}
if (type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::ARRAY) {
auto is_list = type.id() == LogicalTypeId::LIST;
auto &child_type = is_list ? ListType::GetChildType(type) : ArrayType::GetChildType(type);
// set up the two schema elements for the list
// for some reason we only set the converted type in the OPTIONAL element
// first an OPTIONAL element
duckdb_parquet::SchemaElement optional_element;
optional_element.repetition_type = null_type;
optional_element.num_children = 1;
optional_element.converted_type = ConvertedType::LIST;
optional_element.__isset.num_children = true;
optional_element.__isset.type = false;
optional_element.__isset.repetition_type = true;
optional_element.__isset.converted_type = true;
optional_element.name = name;
if (field_id && field_id->set) {
optional_element.__isset.field_id = true;
optional_element.field_id = field_id->field_id;
}
schemas.push_back(std::move(optional_element));
// then a REPEATED element
duckdb_parquet::SchemaElement repeated_element;
repeated_element.repetition_type = FieldRepetitionType::REPEATED;
repeated_element.num_children = 1;
repeated_element.__isset.num_children = true;
repeated_element.__isset.type = false;
repeated_element.__isset.repetition_type = true;
repeated_element.name = "list";
schemas.push_back(std::move(repeated_element));
ParquetColumnSchema list_column(name, type, max_define, max_repeat, schema_idx, 0);
list_column.children.push_back(FillParquetSchema(schemas, child_type, "element", allow_geometry,
child_field_ids, shredding_type, max_repeat + 1,
max_define + 2, true));
return list_column;
}
if (type.id() == LogicalTypeId::MAP) {
// map type
// maps are stored as follows:
// <map-repetition> group <name> (MAP) {
// repeated group key_value {
// required <key-type> key;
// <value-repetition> <value-type> value;
// }
// }
// top map element
duckdb_parquet::SchemaElement top_element;
top_element.repetition_type = null_type;
top_element.num_children = 1;
top_element.converted_type = ConvertedType::MAP;
top_element.__isset.repetition_type = true;
top_element.__isset.num_children = true;
top_element.__isset.converted_type = true;
top_element.__isset.type = false;
top_element.name = name;
if (field_id && field_id->set) {
top_element.__isset.field_id = true;
top_element.field_id = field_id->field_id;
}
schemas.push_back(std::move(top_element));
// key_value element
duckdb_parquet::SchemaElement kv_element;
kv_element.repetition_type = FieldRepetitionType::REPEATED;
kv_element.num_children = 2;
kv_element.__isset.repetition_type = true;
kv_element.__isset.num_children = true;
kv_element.__isset.type = false;
kv_element.name = "key_value";
schemas.push_back(std::move(kv_element));
// construct the child types recursively
vector<LogicalType> kv_types {MapType::KeyType(type), MapType::ValueType(type)};
vector<string> kv_names {"key", "value"};
ParquetColumnSchema map_column(name, type, max_define, max_repeat, schema_idx, 0);
map_column.children.reserve(2);
for (idx_t i = 0; i < 2; i++) {
// key needs to be marked as REQUIRED
bool is_key = i == 0;
auto child_schema = FillParquetSchema(schemas, kv_types[i], kv_names[i], allow_geometry, child_field_ids,
shredding_type, max_repeat + 1, max_define + 2, !is_key);
map_column.children.push_back(std::move(child_schema));
}
return map_column;
}
duckdb_parquet::SchemaElement schema_element;
schema_element.type = ParquetWriter::DuckDBTypeToParquetType(type);
schema_element.repetition_type = null_type;
schema_element.__isset.num_children = false;
schema_element.__isset.type = true;
schema_element.__isset.repetition_type = true;
schema_element.name = name;
if (field_id && field_id->set) {
schema_element.__isset.field_id = true;
schema_element.field_id = field_id->field_id;
}
ParquetWriter::SetSchemaProperties(type, schema_element, allow_geometry);
schemas.push_back(std::move(schema_element));
return ParquetColumnSchema(name, type, max_define, max_repeat, schema_idx, 0);
}
unique_ptr<ColumnWriter>
ColumnWriter::CreateWriterRecursive(ClientContext &context, ParquetWriter &writer,
const vector<duckdb_parquet::SchemaElement> &parquet_schemas,
const ParquetColumnSchema &schema, vector<string> path_in_schema) {
auto &type = schema.type;
auto can_have_nulls = parquet_schemas[schema.schema_index].repetition_type == FieldRepetitionType::OPTIONAL;
path_in_schema.push_back(schema.name);
if (type.id() == LogicalTypeId::STRUCT && type.GetAlias() == "PARQUET_VARIANT") {
vector<unique_ptr<ColumnWriter>> child_writers;
child_writers.reserve(schema.children.size());
for (idx_t i = 0; i < schema.children.size(); i++) {
child_writers.push_back(
CreateWriterRecursive(context, writer, parquet_schemas, schema.children[i], path_in_schema));
}
return make_uniq<VariantColumnWriter>(writer, schema, path_in_schema, std::move(child_writers), can_have_nulls);
}
if (type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::UNION) {
// construct the child writers recursively
vector<unique_ptr<ColumnWriter>> child_writers;
child_writers.reserve(schema.children.size());
for (auto &child_column : schema.children) {
child_writers.push_back(
CreateWriterRecursive(context, writer, parquet_schemas, child_column, path_in_schema));
}
return make_uniq<StructColumnWriter>(writer, schema, std::move(path_in_schema), std::move(child_writers),
can_have_nulls);
}
if (type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::ARRAY) {
auto is_list = type.id() == LogicalTypeId::LIST;
path_in_schema.push_back("list");
auto child_writer = CreateWriterRecursive(context, writer, parquet_schemas, schema.children[0], path_in_schema);
if (is_list) {
return make_uniq<ListColumnWriter>(writer, schema, std::move(path_in_schema), std::move(child_writer),
can_have_nulls);
} else {
return make_uniq<ArrayColumnWriter>(writer, schema, std::move(path_in_schema), std::move(child_writer),
can_have_nulls);
}
}
if (type.id() == LogicalTypeId::MAP) {
path_in_schema.push_back("key_value");
// construct the child types recursively
vector<unique_ptr<ColumnWriter>> child_writers;
child_writers.reserve(2);
for (idx_t i = 0; i < 2; i++) {
// key needs to be marked as REQUIRED
auto child_writer =
CreateWriterRecursive(context, writer, parquet_schemas, schema.children[i], path_in_schema);
child_writers.push_back(std::move(child_writer));
}
auto struct_writer =
make_uniq<StructColumnWriter>(writer, schema, path_in_schema, std::move(child_writers), can_have_nulls);
return make_uniq<ListColumnWriter>(writer, schema, path_in_schema, std::move(struct_writer), can_have_nulls);
}
if (type.id() == LogicalTypeId::BLOB && type.GetAlias() == "WKB_BLOB") {
return make_uniq<StandardColumnWriter<string_t, string_t, ParquetGeometryOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
}
switch (type.id()) {
case LogicalTypeId::BOOLEAN:
return make_uniq<BooleanColumnWriter>(writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::TINYINT:
return make_uniq<StandardColumnWriter<int8_t, int32_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
case LogicalTypeId::SMALLINT:
return make_uniq<StandardColumnWriter<int16_t, int32_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
case LogicalTypeId::INTEGER:
case LogicalTypeId::DATE:
return make_uniq<StandardColumnWriter<int32_t, int32_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
case LogicalTypeId::BIGINT:
case LogicalTypeId::TIME:
case LogicalTypeId::TIMESTAMP:
case LogicalTypeId::TIMESTAMP_TZ:
case LogicalTypeId::TIMESTAMP_MS:
return make_uniq<StandardColumnWriter<int64_t, int64_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
case LogicalTypeId::TIME_TZ:
return make_uniq<StandardColumnWriter<dtime_tz_t, int64_t, ParquetTimeTZOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::HUGEINT:
return make_uniq<StandardColumnWriter<hugeint_t, double, ParquetHugeintOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::UHUGEINT:
return make_uniq<StandardColumnWriter<uhugeint_t, double, ParquetUhugeintOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::TIMESTAMP_NS:
return make_uniq<StandardColumnWriter<int64_t, int64_t, ParquetTimestampNSOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::TIMESTAMP_SEC:
return make_uniq<StandardColumnWriter<int64_t, int64_t, ParquetTimestampSOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::UTINYINT:
return make_uniq<StandardColumnWriter<uint8_t, int32_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
case LogicalTypeId::USMALLINT:
return make_uniq<StandardColumnWriter<uint16_t, int32_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
case LogicalTypeId::UINTEGER:
return make_uniq<StandardColumnWriter<uint32_t, uint32_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
case LogicalTypeId::UBIGINT:
return make_uniq<StandardColumnWriter<uint64_t, uint64_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
case LogicalTypeId::FLOAT:
return make_uniq<StandardColumnWriter<float_na_equal, float, FloatingPointOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::DOUBLE:
return make_uniq<StandardColumnWriter<double_na_equal, double, FloatingPointOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::DECIMAL:
switch (type.InternalType()) {
case PhysicalType::INT16:
return make_uniq<StandardColumnWriter<int16_t, int32_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
case PhysicalType::INT32:
return make_uniq<StandardColumnWriter<int32_t, int32_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
case PhysicalType::INT64:
return make_uniq<StandardColumnWriter<int64_t, int64_t>>(writer, schema, std::move(path_in_schema),
can_have_nulls);
default:
return make_uniq<FixedDecimalColumnWriter>(writer, schema, std::move(path_in_schema), can_have_nulls);
}
case LogicalTypeId::BLOB:
return make_uniq<StandardColumnWriter<string_t, string_t, ParquetBlobOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::VARCHAR:
return make_uniq<StandardColumnWriter<string_t, string_t, ParquetStringOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::UUID:
return make_uniq<StandardColumnWriter<hugeint_t, ParquetUUIDTargetType, ParquetUUIDOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::INTERVAL:
return make_uniq<StandardColumnWriter<interval_t, ParquetIntervalTargetType, ParquetIntervalOperator>>(
writer, schema, std::move(path_in_schema), can_have_nulls);
case LogicalTypeId::ENUM:
return make_uniq<EnumColumnWriter>(writer, schema, std::move(path_in_schema), can_have_nulls);
default:
throw InternalException("Unsupported type \"%s\" in Parquet writer", type.ToString());
}
}
template <>
struct NumericLimits<float_na_equal> {
static constexpr float Minimum() {
return std::numeric_limits<float>::lowest();
};
static constexpr float Maximum() {
return std::numeric_limits<float>::max();
};
static constexpr bool IsSigned() {
return std::is_signed<float>::value;
}
static constexpr bool IsIntegral() {
return std::is_integral<float>::value;
}
};
template <>
struct NumericLimits<double_na_equal> {
static constexpr double Minimum() {
return std::numeric_limits<double>::lowest();
};
static constexpr double Maximum() {
return std::numeric_limits<double>::max();
};
static constexpr bool IsSigned() {
return std::is_signed<double>::value;
}
static constexpr bool IsIntegral() {
return std::is_integral<double>::value;
}
};
template <>
hash_t Hash(ParquetIntervalTargetType val) {
return Hash(const_char_ptr_cast(val.bytes), ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE);
}
template <>
hash_t Hash(ParquetUUIDTargetType val) {
return Hash(const_char_ptr_cast(val.bytes), ParquetUUIDTargetType::PARQUET_UUID_SIZE);
}
template <>
hash_t Hash(float_na_equal val) {
if (std::isnan(val.val)) {
return Hash<float>(std::numeric_limits<float>::quiet_NaN());
}
return Hash<float>(val.val);
}
template <>
hash_t Hash(double_na_equal val) {
if (std::isnan(val.val)) {
return Hash<double>(std::numeric_limits<double>::quiet_NaN());
}
return Hash<double>(val.val);
}
} // namespace duckdb