should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,102 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// geo_parquet.hpp
//
//
//===----------------------------------------------------------------------===//
#pragma once
#include "column_writer.hpp"
#include "duckdb/common/string.hpp"
#include "duckdb/common/types/data_chunk.hpp"
#include "duckdb/common/unordered_map.hpp"
#include "duckdb/common/unordered_set.hpp"
#include "parquet_types.h"
namespace duckdb {
struct ParquetColumnSchema;
class ParquetReader;
class ColumnReader;
class ClientContext;
class ExpressionExecutor;
enum class GeoParquetColumnEncoding : uint8_t {
WKB = 1,
POINT,
LINESTRING,
POLYGON,
MULTIPOINT,
MULTILINESTRING,
MULTIPOLYGON,
};
enum class GeoParquetVersion : uint8_t {
// Write GeoParquet 1.0 metadata
// GeoParquet 1.0 has the widest support among readers and writers
V1,
// Write GeoParquet 2.0
// The GeoParquet 2.0 options is identical to GeoParquet 1.0 except the underlying storage
// of spatial columns is Parquet native geometry, where the Parquet writer will include
// native statistics according to the underlying Parquet options. Compared to 'BOTH', this will
// actually write the metadata as containing GeoParquet version 2.0.0
// However, V2 isnt standardized yet, so this option is still a bit experimental
V2,
// Write GeoParquet 1.0 metadata, with native Parquet geometry types
// This is a bit of a hold-over option for compatibility with systems that
// reject GeoParquet 2.0 metadata, but can read Parquet native geometry types as they simply ignore the extra
// logical type. DuckDB v1.4.0 falls into this category.
BOTH,
// Do not write GeoParquet metadata
// This option suppresses GeoParquet metadata; however, spatial types will be written as
// Parquet native Geometry/Geography.
NONE,
};
struct GeoParquetColumnMetadata {
// The encoding of the geometry column
GeoParquetColumnEncoding geometry_encoding;
// The statistics of the geometry column
GeometryStatsData stats;
// The crs of the geometry column (if any) in PROJJSON format
string projjson;
// Used to track the "primary" geometry column (if any)
idx_t insertion_index = 0;
};
class GeoParquetFileMetadata {
public:
explicit GeoParquetFileMetadata(GeoParquetVersion geo_parquet_version) : version(geo_parquet_version) {
}
void AddGeoParquetStats(const string &column_name, const LogicalType &type, const GeometryStatsData &stats);
void Write(duckdb_parquet::FileMetaData &file_meta_data);
// Try to read GeoParquet metadata. Returns nullptr if not found, invalid or the required spatial extension is not
// available.
static unique_ptr<GeoParquetFileMetadata> TryRead(const duckdb_parquet::FileMetaData &file_meta_data,
const ClientContext &context);
const unordered_map<string, GeoParquetColumnMetadata> &GetColumnMeta() const;
static unique_ptr<ColumnReader> CreateColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema,
ClientContext &context);
bool IsGeometryColumn(const string &column_name) const;
static bool IsGeoParquetConversionEnabled(const ClientContext &context);
static LogicalType GeometryType();
private:
mutex write_lock;
unordered_map<string, GeoParquetColumnMetadata> geometry_columns;
GeoParquetVersion version;
};
} // namespace duckdb