#include "column_writer.hpp" #include "duckdb.hpp" #include "geo_parquet.hpp" #include "parquet_rle_bp_decoder.hpp" #include "parquet_bss_encoder.hpp" #include "parquet_statistics.hpp" #include "parquet_writer.hpp" #include "writer/array_column_writer.hpp" #include "writer/boolean_column_writer.hpp" #include "writer/decimal_column_writer.hpp" #include "writer/enum_column_writer.hpp" #include "writer/list_column_writer.hpp" #include "writer/primitive_column_writer.hpp" #include "writer/struct_column_writer.hpp" #include "writer/variant_column_writer.hpp" #include "writer/templated_column_writer.hpp" #include "duckdb/common/exception.hpp" #include "duckdb/common/operator/comparison_operators.hpp" #include "duckdb/common/serializer/buffered_file_writer.hpp" #include "duckdb/common/serializer/memory_stream.hpp" #include "duckdb/common/serializer/write_stream.hpp" #include "duckdb/common/string_map_set.hpp" #include "duckdb/common/types/hugeint.hpp" #include "duckdb/common/types/time.hpp" #include "duckdb/common/types/timestamp.hpp" #include "duckdb/execution/expression_executor.hpp" #include "brotli/encode.h" #include "lz4.hpp" #include "miniz_wrapper.hpp" #include "snappy.h" #include "zstd.h" #include namespace duckdb { using namespace duckdb_parquet; // NOLINT using namespace duckdb_miniz; // NOLINT using duckdb_parquet::CompressionCodec; using duckdb_parquet::ConvertedType; using duckdb_parquet::Encoding; using duckdb_parquet::FieldRepetitionType; using duckdb_parquet::FileMetaData; using duckdb_parquet::PageHeader; using duckdb_parquet::PageType; using ParquetRowGroup = duckdb_parquet::RowGroup; using duckdb_parquet::Type; constexpr uint16_t ColumnWriter::PARQUET_DEFINE_VALID; //===--------------------------------------------------------------------===// // ColumnWriterStatistics //===--------------------------------------------------------------------===// ColumnWriterStatistics::~ColumnWriterStatistics() { } bool ColumnWriterStatistics::HasStats() { return false; } string ColumnWriterStatistics::GetMin() { return string(); } string ColumnWriterStatistics::GetMax() { return string(); } string ColumnWriterStatistics::GetMinValue() { return string(); } string ColumnWriterStatistics::GetMaxValue() { return string(); } bool ColumnWriterStatistics::CanHaveNaN() { return false; } bool ColumnWriterStatistics::HasNaN() { return false; } bool ColumnWriterStatistics::MinIsExact() { return true; } bool ColumnWriterStatistics::MaxIsExact() { return true; } bool ColumnWriterStatistics::HasGeoStats() { return false; } optional_ptr ColumnWriterStatistics::GetGeoStats() { return nullptr; } void ColumnWriterStatistics::WriteGeoStats(duckdb_parquet::GeospatialStatistics &stats) { D_ASSERT(false); // this should never be called } //===--------------------------------------------------------------------===// // ColumnWriter //===--------------------------------------------------------------------===// ColumnWriter::ColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema, vector schema_path_p, bool can_have_nulls) : writer(writer), column_schema(column_schema), schema_path(std::move(schema_path_p)), can_have_nulls(can_have_nulls) { } ColumnWriter::~ColumnWriter() { } ColumnWriterState::~ColumnWriterState() { } void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data, AllocatedData &compressed_buf) { switch (writer.GetCodec()) { case CompressionCodec::UNCOMPRESSED: compressed_size = temp_writer.GetPosition(); compressed_data = temp_writer.GetData(); break; case CompressionCodec::SNAPPY: { compressed_size = duckdb_snappy::MaxCompressedLength(temp_writer.GetPosition()); compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size); duckdb_snappy::RawCompress(const_char_ptr_cast(temp_writer.GetData()), temp_writer.GetPosition(), char_ptr_cast(compressed_buf.get()), &compressed_size); compressed_data = compressed_buf.get(); D_ASSERT(compressed_size <= duckdb_snappy::MaxCompressedLength(temp_writer.GetPosition())); break; } case CompressionCodec::LZ4_RAW: { compressed_size = duckdb_lz4::LZ4_compressBound(UnsafeNumericCast(temp_writer.GetPosition())); compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size); compressed_size = duckdb_lz4::LZ4_compress_default( const_char_ptr_cast(temp_writer.GetData()), char_ptr_cast(compressed_buf.get()), UnsafeNumericCast(temp_writer.GetPosition()), UnsafeNumericCast(compressed_size)); compressed_data = compressed_buf.get(); break; } case CompressionCodec::GZIP: { MiniZStream s; compressed_size = s.MaxCompressedLength(temp_writer.GetPosition()); compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size); s.Compress(const_char_ptr_cast(temp_writer.GetData()), temp_writer.GetPosition(), char_ptr_cast(compressed_buf.get()), &compressed_size); compressed_data = compressed_buf.get(); break; } case CompressionCodec::ZSTD: { compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.GetPosition()); compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size); compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size, (const void *)temp_writer.GetData(), temp_writer.GetPosition(), UnsafeNumericCast(writer.CompressionLevel())); compressed_data = compressed_buf.get(); break; } case CompressionCodec::BROTLI: { compressed_size = duckdb_brotli::BrotliEncoderMaxCompressedSize(temp_writer.GetPosition()); compressed_buf = BufferAllocator::Get(writer.GetContext()).Allocate(compressed_size); duckdb_brotli::BrotliEncoderCompress(BROTLI_DEFAULT_QUALITY, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE, temp_writer.GetPosition(), temp_writer.GetData(), &compressed_size, compressed_buf.get()); compressed_data = compressed_buf.get(); break; } default: throw InternalException("Unsupported codec for Parquet Writer"); } if (compressed_size > idx_t(NumericLimits::Maximum())) { throw InternalException("Parquet writer: %d compressed page size out of range for type integer", temp_writer.GetPosition()); } } void ColumnWriter::HandleRepeatLevels(ColumnWriterState &state, ColumnWriterState *parent, idx_t count) const { if (!parent) { // no repeat levels without a parent node return; } if (state.repetition_levels.size() >= parent->repetition_levels.size()) { return; } state.repetition_levels.insert(state.repetition_levels.end(), parent->repetition_levels.begin() + state.repetition_levels.size(), parent->repetition_levels.end()); } void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity, const idx_t count, const uint16_t define_value, const uint16_t null_value) const { if (parent) { // parent node: inherit definition level from the parent idx_t vector_index = 0; while (state.definition_levels.size() < parent->definition_levels.size()) { idx_t current_index = state.definition_levels.size(); if (parent->definition_levels[current_index] != PARQUET_DEFINE_VALID) { //! Inherit nulls from parent state.definition_levels.push_back(parent->definition_levels[current_index]); state.parent_null_count++; } else if (validity.RowIsValid(vector_index)) { //! Produce a non-null define state.definition_levels.push_back(define_value); } else { //! Produce a null define if (!can_have_nulls) { throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); } state.null_count++; state.definition_levels.push_back(null_value); } D_ASSERT(parent->is_empty.empty() || current_index < parent->is_empty.size()); if (parent->is_empty.empty() || !parent->is_empty[current_index]) { vector_index++; } } return; } // no parent: set definition levels only from this validity mask if (validity.AllValid()) { state.definition_levels.insert(state.definition_levels.end(), count, define_value); } else { for (idx_t i = 0; i < count; i++) { const auto is_null = !validity.RowIsValid(i); state.definition_levels.emplace_back(is_null ? null_value : define_value); state.null_count += is_null; } } if (!can_have_nulls && state.null_count != 0) { throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); } } //===--------------------------------------------------------------------===// // Create Column Writer //===--------------------------------------------------------------------===// ParquetColumnSchema ColumnWriter::FillParquetSchema(vector &schemas, const LogicalType &type, const string &name, bool allow_geometry, optional_ptr field_ids, optional_ptr shredding_types, idx_t max_repeat, idx_t max_define, bool can_have_nulls) { auto null_type = can_have_nulls ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED; if (!can_have_nulls) { max_define--; } idx_t schema_idx = schemas.size(); optional_ptr field_id; optional_ptr child_field_ids; if (field_ids) { auto field_id_it = field_ids->ids->find(name); if (field_id_it != field_ids->ids->end()) { field_id = &field_id_it->second; child_field_ids = &field_id->child_field_ids; } } optional_ptr shredding_type; if (shredding_types) { shredding_type = shredding_types->GetChild(name); } if (type.id() == LogicalTypeId::STRUCT && type.GetAlias() == "PARQUET_VARIANT") { // variant type // variants are stored as follows: // group VARIANT { // metadata BYTE_ARRAY, // value BYTE_ARRAY, // [] // } const bool is_shredded = shredding_type != nullptr; child_list_t child_types; child_types.emplace_back("metadata", LogicalType::BLOB); child_types.emplace_back("value", LogicalType::BLOB); if (is_shredded) { auto &typed_value_type = shredding_type->type; if (typed_value_type.id() != LogicalTypeId::ANY) { child_types.emplace_back("typed_value", VariantColumnWriter::TransformTypedValueRecursive(typed_value_type)); } } // variant group duckdb_parquet::SchemaElement top_element; top_element.repetition_type = null_type; top_element.num_children = child_types.size(); top_element.logicalType.__isset.VARIANT = true; top_element.logicalType.VARIANT.__isset.specification_version = true; top_element.logicalType.VARIANT.specification_version = 1; top_element.__isset.logicalType = true; top_element.__isset.num_children = true; top_element.__isset.repetition_type = true; top_element.name = name; schemas.push_back(std::move(top_element)); ParquetColumnSchema variant_column(name, type, max_define, max_repeat, schema_idx, 0); variant_column.children.reserve(child_types.size()); for (auto &child_type : child_types) { auto &child_name = child_type.first; bool is_optional; if (child_name == "metadata") { is_optional = false; } else if (child_name == "value") { if (is_shredded) { //! When shredding the variant, the 'value' becomes optional is_optional = true; } else { is_optional = false; } } else { D_ASSERT(child_name == "typed_value"); is_optional = true; } variant_column.children.emplace_back(FillParquetSchema(schemas, child_type.second, child_type.first, allow_geometry, child_field_ids, shredding_type, max_repeat, max_define + 1, is_optional)); } return variant_column; } if (type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::UNION) { auto &child_types = StructType::GetChildTypes(type); // set up the schema element for this struct duckdb_parquet::SchemaElement schema_element; schema_element.repetition_type = null_type; schema_element.num_children = UnsafeNumericCast(child_types.size()); schema_element.__isset.num_children = true; schema_element.__isset.type = false; schema_element.__isset.repetition_type = true; schema_element.name = name; if (field_id && field_id->set) { schema_element.__isset.field_id = true; schema_element.field_id = field_id->field_id; } schemas.push_back(std::move(schema_element)); ParquetColumnSchema struct_column(name, type, max_define, max_repeat, schema_idx, 0); // construct the child schemas recursively struct_column.children.reserve(child_types.size()); for (auto &child_type : child_types) { struct_column.children.emplace_back(FillParquetSchema(schemas, child_type.second, child_type.first, allow_geometry, child_field_ids, shredding_type, max_repeat, max_define + 1, true)); } return struct_column; } if (type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::ARRAY) { auto is_list = type.id() == LogicalTypeId::LIST; auto &child_type = is_list ? ListType::GetChildType(type) : ArrayType::GetChildType(type); // set up the two schema elements for the list // for some reason we only set the converted type in the OPTIONAL element // first an OPTIONAL element duckdb_parquet::SchemaElement optional_element; optional_element.repetition_type = null_type; optional_element.num_children = 1; optional_element.converted_type = ConvertedType::LIST; optional_element.__isset.num_children = true; optional_element.__isset.type = false; optional_element.__isset.repetition_type = true; optional_element.__isset.converted_type = true; optional_element.name = name; if (field_id && field_id->set) { optional_element.__isset.field_id = true; optional_element.field_id = field_id->field_id; } schemas.push_back(std::move(optional_element)); // then a REPEATED element duckdb_parquet::SchemaElement repeated_element; repeated_element.repetition_type = FieldRepetitionType::REPEATED; repeated_element.num_children = 1; repeated_element.__isset.num_children = true; repeated_element.__isset.type = false; repeated_element.__isset.repetition_type = true; repeated_element.name = "list"; schemas.push_back(std::move(repeated_element)); ParquetColumnSchema list_column(name, type, max_define, max_repeat, schema_idx, 0); list_column.children.push_back(FillParquetSchema(schemas, child_type, "element", allow_geometry, child_field_ids, shredding_type, max_repeat + 1, max_define + 2, true)); return list_column; } if (type.id() == LogicalTypeId::MAP) { // map type // maps are stored as follows: // group (MAP) { // repeated group key_value { // required key; // value; // } // } // top map element duckdb_parquet::SchemaElement top_element; top_element.repetition_type = null_type; top_element.num_children = 1; top_element.converted_type = ConvertedType::MAP; top_element.__isset.repetition_type = true; top_element.__isset.num_children = true; top_element.__isset.converted_type = true; top_element.__isset.type = false; top_element.name = name; if (field_id && field_id->set) { top_element.__isset.field_id = true; top_element.field_id = field_id->field_id; } schemas.push_back(std::move(top_element)); // key_value element duckdb_parquet::SchemaElement kv_element; kv_element.repetition_type = FieldRepetitionType::REPEATED; kv_element.num_children = 2; kv_element.__isset.repetition_type = true; kv_element.__isset.num_children = true; kv_element.__isset.type = false; kv_element.name = "key_value"; schemas.push_back(std::move(kv_element)); // construct the child types recursively vector kv_types {MapType::KeyType(type), MapType::ValueType(type)}; vector kv_names {"key", "value"}; ParquetColumnSchema map_column(name, type, max_define, max_repeat, schema_idx, 0); map_column.children.reserve(2); for (idx_t i = 0; i < 2; i++) { // key needs to be marked as REQUIRED bool is_key = i == 0; auto child_schema = FillParquetSchema(schemas, kv_types[i], kv_names[i], allow_geometry, child_field_ids, shredding_type, max_repeat + 1, max_define + 2, !is_key); map_column.children.push_back(std::move(child_schema)); } return map_column; } duckdb_parquet::SchemaElement schema_element; schema_element.type = ParquetWriter::DuckDBTypeToParquetType(type); schema_element.repetition_type = null_type; schema_element.__isset.num_children = false; schema_element.__isset.type = true; schema_element.__isset.repetition_type = true; schema_element.name = name; if (field_id && field_id->set) { schema_element.__isset.field_id = true; schema_element.field_id = field_id->field_id; } ParquetWriter::SetSchemaProperties(type, schema_element, allow_geometry); schemas.push_back(std::move(schema_element)); return ParquetColumnSchema(name, type, max_define, max_repeat, schema_idx, 0); } unique_ptr ColumnWriter::CreateWriterRecursive(ClientContext &context, ParquetWriter &writer, const vector &parquet_schemas, const ParquetColumnSchema &schema, vector path_in_schema) { auto &type = schema.type; auto can_have_nulls = parquet_schemas[schema.schema_index].repetition_type == FieldRepetitionType::OPTIONAL; path_in_schema.push_back(schema.name); if (type.id() == LogicalTypeId::STRUCT && type.GetAlias() == "PARQUET_VARIANT") { vector> child_writers; child_writers.reserve(schema.children.size()); for (idx_t i = 0; i < schema.children.size(); i++) { child_writers.push_back( CreateWriterRecursive(context, writer, parquet_schemas, schema.children[i], path_in_schema)); } return make_uniq(writer, schema, path_in_schema, std::move(child_writers), can_have_nulls); } if (type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::UNION) { // construct the child writers recursively vector> child_writers; child_writers.reserve(schema.children.size()); for (auto &child_column : schema.children) { child_writers.push_back( CreateWriterRecursive(context, writer, parquet_schemas, child_column, path_in_schema)); } return make_uniq(writer, schema, std::move(path_in_schema), std::move(child_writers), can_have_nulls); } if (type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::ARRAY) { auto is_list = type.id() == LogicalTypeId::LIST; path_in_schema.push_back("list"); auto child_writer = CreateWriterRecursive(context, writer, parquet_schemas, schema.children[0], path_in_schema); if (is_list) { return make_uniq(writer, schema, std::move(path_in_schema), std::move(child_writer), can_have_nulls); } else { return make_uniq(writer, schema, std::move(path_in_schema), std::move(child_writer), can_have_nulls); } } if (type.id() == LogicalTypeId::MAP) { path_in_schema.push_back("key_value"); // construct the child types recursively vector> child_writers; child_writers.reserve(2); for (idx_t i = 0; i < 2; i++) { // key needs to be marked as REQUIRED auto child_writer = CreateWriterRecursive(context, writer, parquet_schemas, schema.children[i], path_in_schema); child_writers.push_back(std::move(child_writer)); } auto struct_writer = make_uniq(writer, schema, path_in_schema, std::move(child_writers), can_have_nulls); return make_uniq(writer, schema, path_in_schema, std::move(struct_writer), can_have_nulls); } if (type.id() == LogicalTypeId::BLOB && type.GetAlias() == "WKB_BLOB") { return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); } switch (type.id()) { case LogicalTypeId::BOOLEAN: return make_uniq(writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::TINYINT: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::SMALLINT: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::INTEGER: case LogicalTypeId::DATE: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::BIGINT: case LogicalTypeId::TIME: case LogicalTypeId::TIMESTAMP: case LogicalTypeId::TIMESTAMP_TZ: case LogicalTypeId::TIMESTAMP_MS: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::TIME_TZ: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::HUGEINT: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::UHUGEINT: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::TIMESTAMP_NS: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::TIMESTAMP_SEC: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::UTINYINT: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::USMALLINT: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::UINTEGER: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::UBIGINT: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::FLOAT: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::DOUBLE: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::DECIMAL: switch (type.InternalType()) { case PhysicalType::INT16: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); case PhysicalType::INT32: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); case PhysicalType::INT64: return make_uniq>(writer, schema, std::move(path_in_schema), can_have_nulls); default: return make_uniq(writer, schema, std::move(path_in_schema), can_have_nulls); } case LogicalTypeId::BLOB: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::VARCHAR: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::UUID: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::INTERVAL: return make_uniq>( writer, schema, std::move(path_in_schema), can_have_nulls); case LogicalTypeId::ENUM: return make_uniq(writer, schema, std::move(path_in_schema), can_have_nulls); default: throw InternalException("Unsupported type \"%s\" in Parquet writer", type.ToString()); } } template <> struct NumericLimits { static constexpr float Minimum() { return std::numeric_limits::lowest(); }; static constexpr float Maximum() { return std::numeric_limits::max(); }; static constexpr bool IsSigned() { return std::is_signed::value; } static constexpr bool IsIntegral() { return std::is_integral::value; } }; template <> struct NumericLimits { static constexpr double Minimum() { return std::numeric_limits::lowest(); }; static constexpr double Maximum() { return std::numeric_limits::max(); }; static constexpr bool IsSigned() { return std::is_signed::value; } static constexpr bool IsIntegral() { return std::is_integral::value; } }; template <> hash_t Hash(ParquetIntervalTargetType val) { return Hash(const_char_ptr_cast(val.bytes), ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE); } template <> hash_t Hash(ParquetUUIDTargetType val) { return Hash(const_char_ptr_cast(val.bytes), ParquetUUIDTargetType::PARQUET_UUID_SIZE); } template <> hash_t Hash(float_na_equal val) { if (std::isnan(val.val)) { return Hash(std::numeric_limits::quiet_NaN()); } return Hash(val.val); } template <> hash_t Hash(double_na_equal val) { if (std::isnan(val.val)) { return Hash(std::numeric_limits::quiet_NaN()); } return Hash(val.val); } } // namespace duckdb