#include "parquet_field_id.hpp" #include "duckdb/common/exception/binder_exception.hpp" namespace duckdb { constexpr const char *FieldID::DUCKDB_FIELD_ID; ChildFieldIDs::ChildFieldIDs() : ids(make_uniq>()) { } ChildFieldIDs ChildFieldIDs::Copy() const { ChildFieldIDs result; for (const auto &id : *ids) { result.ids->emplace(id.first, id.second.Copy()); } return result; } FieldID::FieldID() : set(false) { } FieldID::FieldID(int32_t field_id_p) : set(true), field_id(field_id_p) { } FieldID FieldID::Copy() const { auto result = set ? FieldID(field_id) : FieldID(); result.child_field_ids = child_field_ids.Copy(); return result; } static case_insensitive_map_t GetChildNameToTypeMap(const LogicalType &type) { case_insensitive_map_t name_to_type_map; switch (type.id()) { case LogicalTypeId::LIST: name_to_type_map.emplace("element", ListType::GetChildType(type)); break; case LogicalTypeId::MAP: name_to_type_map.emplace("key", MapType::KeyType(type)); name_to_type_map.emplace("value", MapType::ValueType(type)); break; case LogicalTypeId::STRUCT: for (auto &child_type : StructType::GetChildTypes(type)) { if (child_type.first == FieldID::DUCKDB_FIELD_ID) { throw BinderException("Cannot have column named \"%s\" with FIELD_IDS", FieldID::DUCKDB_FIELD_ID); } name_to_type_map.emplace(child_type); } break; default: // LCOV_EXCL_START throw InternalException("Unexpected type in GetChildNameToTypeMap"); } // LCOV_EXCL_STOP return name_to_type_map; } static void GetChildNamesAndTypes(const LogicalType &type, vector &child_names, vector &child_types) { switch (type.id()) { case LogicalTypeId::LIST: child_names.emplace_back("element"); child_types.emplace_back(ListType::GetChildType(type)); break; case LogicalTypeId::MAP: child_names.emplace_back("key"); child_names.emplace_back("value"); child_types.emplace_back(MapType::KeyType(type)); child_types.emplace_back(MapType::ValueType(type)); break; case LogicalTypeId::STRUCT: for (auto &child_type : StructType::GetChildTypes(type)) { child_names.emplace_back(child_type.first); child_types.emplace_back(child_type.second); } break; default: // LCOV_EXCL_START throw InternalException("Unexpected type in GetChildNamesAndTypes"); } // LCOV_EXCL_STOP } void FieldID::GenerateFieldIDs(ChildFieldIDs &field_ids, idx_t &field_id, const vector &names, const vector &sql_types) { D_ASSERT(names.size() == sql_types.size()); for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) { const auto &col_name = names[col_idx]; auto inserted = field_ids.ids->insert(make_pair(col_name, FieldID(UnsafeNumericCast(field_id++)))); D_ASSERT(inserted.second); const auto &col_type = sql_types[col_idx]; if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP && col_type.id() != LogicalTypeId::STRUCT) { continue; } // Cannot use GetChildNameToTypeMap here because we lose order, and we want to generate depth-first vector child_names; vector child_types; GetChildNamesAndTypes(col_type, child_names, child_types); GenerateFieldIDs(inserted.first->second.child_field_ids, field_id, child_names, child_types); } } void FieldID::GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids, unordered_set &unique_field_ids, const case_insensitive_map_t &name_to_type_map) { const auto &struct_type = field_ids_value.type(); if (struct_type.id() != LogicalTypeId::STRUCT) { throw BinderException( "Expected FIELD_IDS to be a STRUCT, e.g., {col1: 42, col2: {%s: 43, nested_col: 44}, col3: 44}", FieldID::DUCKDB_FIELD_ID); } const auto &struct_children = StructValue::GetChildren(field_ids_value); D_ASSERT(StructType::GetChildTypes(struct_type).size() == struct_children.size()); for (idx_t i = 0; i < struct_children.size(); i++) { const auto &col_name = StringUtil::Lower(StructType::GetChildName(struct_type, i)); if (col_name == FieldID::DUCKDB_FIELD_ID) { continue; } auto it = name_to_type_map.find(col_name); if (it == name_to_type_map.end()) { string names; for (const auto &name : name_to_type_map) { if (!names.empty()) { names += ", "; } names += name.first; } throw BinderException( "Column name \"%s\" specified in FIELD_IDS not found. Consider using WRITE_PARTITION_COLUMNS if this " "column is a partition column. Available column names: [%s]", col_name, names); } D_ASSERT(field_ids.ids->find(col_name) == field_ids.ids->end()); // Caught by STRUCT - deduplicates keys const auto &child_value = struct_children[i]; const auto &child_type = child_value.type(); optional_ptr field_id_value; optional_ptr child_field_ids_value; if (child_type.id() == LogicalTypeId::STRUCT) { const auto &nested_children = StructValue::GetChildren(child_value); D_ASSERT(StructType::GetChildTypes(child_type).size() == nested_children.size()); for (idx_t nested_i = 0; nested_i < nested_children.size(); nested_i++) { const auto &field_id_or_nested_col = StructType::GetChildName(child_type, nested_i); if (field_id_or_nested_col == FieldID::DUCKDB_FIELD_ID) { field_id_value = &nested_children[nested_i]; } else { child_field_ids_value = &child_value; } } } else { field_id_value = &child_value; } FieldID field_id; if (field_id_value) { Value field_id_integer_value = field_id_value->DefaultCastAs(LogicalType::INTEGER); const uint32_t field_id_int = IntegerValue::Get(field_id_integer_value); if (!unique_field_ids.insert(field_id_int).second) { throw BinderException("Duplicate field_id %s found in FIELD_IDS", field_id_integer_value.ToString()); } field_id = FieldID(UnsafeNumericCast(field_id_int)); } auto inserted = field_ids.ids->insert(make_pair(col_name, std::move(field_id))); D_ASSERT(inserted.second); if (child_field_ids_value) { const auto &col_type = it->second; if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP && col_type.id() != LogicalTypeId::STRUCT) { throw BinderException("Column \"%s\" with type \"%s\" cannot have a nested FIELD_IDS specification", col_name, LogicalTypeIdToString(col_type.id())); } GetFieldIDs(*child_field_ids_value, inserted.first->second.child_field_ids, unique_field_ids, GetChildNameToTypeMap(col_type)); } } } } // namespace duckdb