should be it
This commit is contained in:
102
external/duckdb/extension/parquet/include/geo_parquet.hpp
vendored
Normal file
102
external/duckdb/extension/parquet/include/geo_parquet.hpp
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
// DuckDB
|
||||
//
|
||||
// geo_parquet.hpp
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "column_writer.hpp"
|
||||
#include "duckdb/common/string.hpp"
|
||||
#include "duckdb/common/types/data_chunk.hpp"
|
||||
#include "duckdb/common/unordered_map.hpp"
|
||||
#include "duckdb/common/unordered_set.hpp"
|
||||
#include "parquet_types.h"
|
||||
|
||||
namespace duckdb {
|
||||
|
||||
struct ParquetColumnSchema;
|
||||
class ParquetReader;
|
||||
class ColumnReader;
|
||||
class ClientContext;
|
||||
class ExpressionExecutor;
|
||||
|
||||
enum class GeoParquetColumnEncoding : uint8_t {
|
||||
WKB = 1,
|
||||
POINT,
|
||||
LINESTRING,
|
||||
POLYGON,
|
||||
MULTIPOINT,
|
||||
MULTILINESTRING,
|
||||
MULTIPOLYGON,
|
||||
};
|
||||
|
||||
enum class GeoParquetVersion : uint8_t {
|
||||
// Write GeoParquet 1.0 metadata
|
||||
// GeoParquet 1.0 has the widest support among readers and writers
|
||||
V1,
|
||||
|
||||
// Write GeoParquet 2.0
|
||||
// The GeoParquet 2.0 options is identical to GeoParquet 1.0 except the underlying storage
|
||||
// of spatial columns is Parquet native geometry, where the Parquet writer will include
|
||||
// native statistics according to the underlying Parquet options. Compared to 'BOTH', this will
|
||||
// actually write the metadata as containing GeoParquet version 2.0.0
|
||||
// However, V2 isnt standardized yet, so this option is still a bit experimental
|
||||
V2,
|
||||
|
||||
// Write GeoParquet 1.0 metadata, with native Parquet geometry types
|
||||
// This is a bit of a hold-over option for compatibility with systems that
|
||||
// reject GeoParquet 2.0 metadata, but can read Parquet native geometry types as they simply ignore the extra
|
||||
// logical type. DuckDB v1.4.0 falls into this category.
|
||||
BOTH,
|
||||
|
||||
// Do not write GeoParquet metadata
|
||||
// This option suppresses GeoParquet metadata; however, spatial types will be written as
|
||||
// Parquet native Geometry/Geography.
|
||||
NONE,
|
||||
};
|
||||
|
||||
struct GeoParquetColumnMetadata {
|
||||
// The encoding of the geometry column
|
||||
GeoParquetColumnEncoding geometry_encoding;
|
||||
|
||||
// The statistics of the geometry column
|
||||
GeometryStatsData stats;
|
||||
|
||||
// The crs of the geometry column (if any) in PROJJSON format
|
||||
string projjson;
|
||||
|
||||
// Used to track the "primary" geometry column (if any)
|
||||
idx_t insertion_index = 0;
|
||||
};
|
||||
|
||||
class GeoParquetFileMetadata {
|
||||
public:
|
||||
explicit GeoParquetFileMetadata(GeoParquetVersion geo_parquet_version) : version(geo_parquet_version) {
|
||||
}
|
||||
void AddGeoParquetStats(const string &column_name, const LogicalType &type, const GeometryStatsData &stats);
|
||||
void Write(duckdb_parquet::FileMetaData &file_meta_data);
|
||||
|
||||
// Try to read GeoParquet metadata. Returns nullptr if not found, invalid or the required spatial extension is not
|
||||
// available.
|
||||
static unique_ptr<GeoParquetFileMetadata> TryRead(const duckdb_parquet::FileMetaData &file_meta_data,
|
||||
const ClientContext &context);
|
||||
const unordered_map<string, GeoParquetColumnMetadata> &GetColumnMeta() const;
|
||||
|
||||
static unique_ptr<ColumnReader> CreateColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema,
|
||||
ClientContext &context);
|
||||
|
||||
bool IsGeometryColumn(const string &column_name) const;
|
||||
|
||||
static bool IsGeoParquetConversionEnabled(const ClientContext &context);
|
||||
static LogicalType GeometryType();
|
||||
|
||||
private:
|
||||
mutex write_lock;
|
||||
unordered_map<string, GeoParquetColumnMetadata> geometry_columns;
|
||||
GeoParquetVersion version;
|
||||
};
|
||||
|
||||
} // namespace duckdb
|
||||
Reference in New Issue
Block a user