//===----------------------------------------------------------------------===// // DuckDB // // geo_parquet.hpp // // //===----------------------------------------------------------------------===// #pragma once #include "column_writer.hpp" #include "duckdb/common/string.hpp" #include "duckdb/common/types/data_chunk.hpp" #include "duckdb/common/unordered_map.hpp" #include "duckdb/common/unordered_set.hpp" #include "parquet_types.h" namespace duckdb { struct ParquetColumnSchema; class ParquetReader; class ColumnReader; class ClientContext; class ExpressionExecutor; enum class GeoParquetColumnEncoding : uint8_t { WKB = 1, POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, MULTIPOLYGON, }; enum class GeoParquetVersion : uint8_t { // Write GeoParquet 1.0 metadata // GeoParquet 1.0 has the widest support among readers and writers V1, // Write GeoParquet 2.0 // The GeoParquet 2.0 options is identical to GeoParquet 1.0 except the underlying storage // of spatial columns is Parquet native geometry, where the Parquet writer will include // native statistics according to the underlying Parquet options. Compared to 'BOTH', this will // actually write the metadata as containing GeoParquet version 2.0.0 // However, V2 isnt standardized yet, so this option is still a bit experimental V2, // Write GeoParquet 1.0 metadata, with native Parquet geometry types // This is a bit of a hold-over option for compatibility with systems that // reject GeoParquet 2.0 metadata, but can read Parquet native geometry types as they simply ignore the extra // logical type. DuckDB v1.4.0 falls into this category. BOTH, // Do not write GeoParquet metadata // This option suppresses GeoParquet metadata; however, spatial types will be written as // Parquet native Geometry/Geography. NONE, }; struct GeoParquetColumnMetadata { // The encoding of the geometry column GeoParquetColumnEncoding geometry_encoding; // The statistics of the geometry column GeometryStatsData stats; // The crs of the geometry column (if any) in PROJJSON format string projjson; // Used to track the "primary" geometry column (if any) idx_t insertion_index = 0; }; class GeoParquetFileMetadata { public: explicit GeoParquetFileMetadata(GeoParquetVersion geo_parquet_version) : version(geo_parquet_version) { } void AddGeoParquetStats(const string &column_name, const LogicalType &type, const GeometryStatsData &stats); void Write(duckdb_parquet::FileMetaData &file_meta_data); // Try to read GeoParquet metadata. Returns nullptr if not found, invalid or the required spatial extension is not // available. static unique_ptr TryRead(const duckdb_parquet::FileMetaData &file_meta_data, const ClientContext &context); const unordered_map &GetColumnMeta() const; static unique_ptr CreateColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema, ClientContext &context); bool IsGeometryColumn(const string &column_name) const; static bool IsGeoParquetConversionEnabled(const ClientContext &context); static LogicalType GeometryType(); private: mutex write_lock; unordered_map geometry_columns; GeoParquetVersion version; }; } // namespace duckdb