should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,99 @@
# name: test/geoparquet/disabled.test
# group: [geoparquet]
require spatial
require parquet
#------------------------------------------------------------------------------
# Test reading geoparquet when conversion is disabled
#------------------------------------------------------------------------------
query II rowsort
SELECT col, st_astext(geometry) FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-encoding_wkb.parquet'
----
0 POINT (30 10)
1 POINT EMPTY
2 NULL
3 POINT (40 40)
statement ok
SET enable_geoparquet_conversion = false;
query II rowsort
SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-encoding_wkb.parquet'
----
0 \x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00>@\x00\x00\x00\x00\x00\x00$@
1 \x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF8\x7F\x00\x00\x00\x00\x00\x00\xF8\x7F
2 NULL
3 \x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00D@\x00\x00\x00\x00\x00\x00D@
#------------------------------------------------------------------------------
# Test writing geoparquet when conversion is disabled
#------------------------------------------------------------------------------
statement ok
SET enable_geoparquet_conversion = true;
statement ok
COPY (SELECT col, ST_GeomFromText(geometry) as geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-wkt.csv')
TO '__TEST_DIR__/data-point-out-enabled.parquet' (FORMAT PARQUET)
query II rowsort
SELECT col, st_astext(geometry) FROM '__TEST_DIR__/data-point-out-enabled.parquet'
----
0 POINT (30 10)
1 POINT EMPTY
2 NULL
3 POINT (40 40)
# Check that we wrote GeoParquet metadata to the footer
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/data-point-out-enabled.parquet');
----
{"version":"1.0.0","primary_column":"geometry","columns":{"geometry":{"encoding":"WKB","geometry_types":["Point"],"bbox":[30.0,10.0,40.0,40.0]}}}
# Now disable conversion
statement ok
SET enable_geoparquet_conversion = false;
statement ok
COPY (SELECT col, ST_GeomFromText(geometry) as geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-wkt.csv')
TO '__TEST_DIR__/data-point-out-disabled.parquet' (FORMAT PARQUET)
# The bytes differ because this is serialized GEOMETRY, not WKB
query II rowsort
SELECT col, geometry FROM '__TEST_DIR__/data-point-out-disabled.parquet'
----
0 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00>@\x00\x00\x00\x00\x00\x00$@
1 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00
2 NULL
3 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00D@\x00\x00\x00\x00\x00\x00D@
# Check that we didnt write any GeoParquet metadata to the footer
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/data-point-out-disabled.parquet');
----
# Now also do the same but when writing WKB directly
statement ok
COPY (SELECT col, ST_AsWKB(ST_GeomFromText(geometry)) as geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-wkt.csv')
TO '__TEST_DIR__/data-point-out-disabled-wkb.parquet' (FORMAT PARQUET)
query II rowsort
SELECT col, geometry FROM '__TEST_DIR__/data-point-out-disabled-wkb.parquet'
----
0 \x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00>@\x00\x00\x00\x00\x00\x00$@
1 \x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF8\x7F\x00\x00\x00\x00\x00\x00\xF8\x7F
2 NULL
3 \x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00D@\x00\x00\x00\x00\x00\x00D@
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/data-point-out-disabled-wkb.parquet');
----

View File

@@ -0,0 +1,40 @@
# name: test/geoparquet/geoarrow.test
# group: [geoparquet]
require spatial
require parquet
#------------------------------------------------------------------------------
# Test reading geoarrow encoded geometries
#------------------------------------------------------------------------------
query T
SELECT point FROM '__WORKING_DIRECTORY__/test/geoparquet/seattle.parquet'
----
{'x': -122.3321, 'y': 47.6062}
query T
SELECT linestring FROM '__WORKING_DIRECTORY__/test/geoparquet/seattle.parquet'
----
[{'x': -122.3321, 'y': 47.6062}, {'x': -122.3493, 'y': 47.6205}, {'x': -122.354, 'y': 47.622}]
query T
SELECT polygon FROM '__WORKING_DIRECTORY__/test/geoparquet/seattle.parquet'
----
[[{'x': -122.3321, 'y': 47.6062}, {'x': -122.335, 'y': 47.608}, {'x': -122.32, 'y': 47.604}, {'x': -122.3321, 'y': 47.6062}]]
query T
SELECT multipoint FROM '__WORKING_DIRECTORY__/test/geoparquet/seattle.parquet'
----
[{'x': -122.3321, 'y': 47.6062}, {'x': -122.3493, 'y': 47.6205}, {'x': -122.335, 'y': 47.608}]
query T
SELECT multilinestring FROM '__WORKING_DIRECTORY__/test/geoparquet/seattle.parquet'
----
[[{'x': -122.3321, 'y': 47.6062}, {'x': -122.3493, 'y': 47.6205}, {'x': -122.354, 'y': 47.622}], [{'x': -122.354, 'y': 47.622}, {'x': -122.3194, 'y': 47.6179}, {'x': -122.3366, 'y': 47.6276}]]
query T
SELECT multipolygon FROM '__WORKING_DIRECTORY__/test/geoparquet/seattle.parquet'
----
[[[{'x': -122.3321, 'y': 47.6062}, {'x': -122.335, 'y': 47.608}, {'x': -122.32, 'y': 47.604}, {'x': -122.3321, 'y': 47.6062}]], [[{'x': -122.354, 'y': 47.622}, {'x': -122.3194, 'y': 47.6179}, {'x': -122.3366, 'y': 47.6276}, {'x': -122.354, 'y': 47.622}]]]

View File

@@ -0,0 +1,61 @@
# name: test/geoparquet/mixed.test
# group: [geoparquet]
require spatial
require parquet
#------------------------------------------------------------------------------
# Create a table
#------------------------------------------------------------------------------
statement ok
CREATE TABLE t1 (col INT, geom GEOMETRY);
statement ok
INSERT INTO t1 VALUES
(1, 'POINT(0 0)'),
(2, 'POINT Z(1 1 1)'),
(3, 'LINESTRING(0 0, 1 1)'),
(4, 'POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'),
(5, 'MULTIPOINT(0 0, 1 1)'),
(6, 'MULTILINESTRING((0 0, 1 1), (2 2, 3 3))'),
(7, 'MULTIPOLYGON(((0 0, 1 0, 1 1, 0 1, 0 0)))'),
(8, 'GEOMETRYCOLLECTION(POINT(0 0), LINESTRING(0 0, 1 1))');
#------------------------------------------------------------------------------
# Test parquet output
#------------------------------------------------------------------------------
statement ok
COPY (SELECT * FROM t1) TO '__TEST_DIR__/t1.parquet' (FORMAT 'parquet');
# Now read it back
statement ok
CREATE TABLE t2 AS SELECT * FROM '__TEST_DIR__/t1.parquet';
query II rowsort result_1
SELECT * FROM t2;
query II rowsort result_2
SELECT * FROM t1;
#------------------------------------------------------------------------------
# Inspect the parquet file
#------------------------------------------------------------------------------
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/t1.parquet');
----
{"version":"1.0.0","primary_column":"geom","columns":{"geom":{"encoding":"WKB","geometry_types":["Point","LineString","Polygon","MultiPoint","MultiLineString","MultiPolygon","GeometryCollection","Point Z"],"bbox":[0.0,0.0,1.0,3.0,3.0,1.0]}}}
#------------------------------------------------------------------------------
# Write with RETURN_STATS
#------------------------------------------------------------------------------
query IIIIII
COPY (SELECT * FROM t1) TO '__TEST_DIR__/t1.parquet' (FORMAT 'parquet', RETURN_STATS);
----
<REGEX>:.*t1.parquet 8 <REGEX>:\d+ <REGEX>:\d+ <REGEX>:{'"col"'={column_size_bytes=\d+, max=8, min=1, null_count=0}, '"geom"'={bbox_xmax=3.0, bbox_xmin=0.0, bbox_ymax=3.0, bbox_ymin=0.0, bbox_zmax=1.0, bbox_zmin=1.0, column_size_bytes=\d+, geo_types='\[point, linestring, polygon, multipoint, multilinestring, multipolygon, geometrycollection, point_z\]', null_count=0}} NULL

View File

@@ -0,0 +1,113 @@
# name: test/geoparquet/no_spatial.test
# group: [geoparquet]
require parquet
#------------------------------------------------------------------------------
# Test reading geoparquet when conversion is disabled
#------------------------------------------------------------------------------
# Check that this is a geoparquet file
query II
SELECT key, (decode(value)) as col
FROM parquet_kv_metadata('__WORKING_DIRECTORY__/data/geoparquet/data-point-encoding_wkb.parquet')
WHERE key = 'geo'
----
geo {"version": "1.1.0", "primary_column": "geometry", "columns": {"geometry": {"encoding": "WKB", "geometry_types": ["Point"]}}}
query II rowsort raw_wkb
SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-encoding_wkb.parquet'
----
0 \x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00>@\x00\x00\x00\x00\x00\x00$@
1 \x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF8\x7F\x00\x00\x00\x00\x00\x00\xF8\x7F
2 NULL
3 \x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00D@\x00\x00\x00\x00\x00\x00D@
statement ok
SET enable_geoparquet_conversion = false;
query II rowsort raw_wkb
SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-encoding_wkb.parquet'
----
#------------------------------------------------------------------------------
# Test writing geoparquet when conversion is enabled but spatial is not loaded
#------------------------------------------------------------------------------
load __TEST_DIR__/geometry_db_test.db
statement ok
SET enable_geoparquet_conversion = true;
require spatial
# Create a table containing geometries
statement ok
CREATE TABLE t1 AS SELECT col, ST_GeomFromText(geometry) as geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-wkt.csv';
# Restart, but dont load spatial now
restart no_extension_load
require parquet
statement ok
SET enable_geoparquet_conversion = true;
# Write some parquet containing geometry when spatial is not loaded
statement ok
COPY (FROM t1) TO '__TEST_DIR__/data-point-out-enabled.parquet' (FORMAT PARQUET)
# The bytes differ because this is serialized GEOMETRY, not WKB
query II rowsort raw_geo
SELECT col, geometry FROM '__TEST_DIR__/data-point-out-enabled.parquet'
----
0 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00>@\x00\x00\x00\x00\x00\x00$@
1 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00
2 NULL
3 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00D@\x00\x00\x00\x00\x00\x00D@
# Check that we didnt write GeoParquet metadata to the footer (spatial is not loaded)
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/data-point-out-enabled.parquet');
----
#------------------------------------------------------------------------------
# Test writing geoparquet when conversion is disabled and spatial is not loaded
#------------------------------------------------------------------------------
# Now disable conversion
statement ok
SET enable_geoparquet_conversion = false;
require spatial
statement ok
CREATE TABLE t2 AS SELECT col, ST_GeomFromText(geometry) as geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-wkt.csv';
# Restart, but dont load spatial now
restart no_extension_load
require parquet
statement ok
SET enable_geoparquet_conversion = true;
# Write some parquet containing geometry when spatial is not loaded
statement ok
COPY (FROM t1) TO '__TEST_DIR__/data-point-out-disabled.parquet' (FORMAT PARQUET)
# The bytes differ because this is serialized GEOMETRY, not WKB
query II rowsort raw_geo
SELECT col, geometry FROM '__TEST_DIR__/data-point-out-disabled.parquet'
----
# Check that we didnt write any GeoParquet metadata to the footer
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/data-point-out-disabled.parquet');
----

View File

@@ -0,0 +1,141 @@
# name: test/geoparquet/roundtrip.test
# group: [geoparquet]
require spatial
require parquet
#------------------------------------------------------------------------------
# Test points
#------------------------------------------------------------------------------
query II nosort point_result
SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-wkt.csv'
query II nosort point_result
SELECT col, st_astext(geometry) FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-encoding_wkb.parquet'
# Now write it back to parquet
statement ok
COPY (SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-point-wkt.csv')
TO '__TEST_DIR__/data-point-out.parquet' (FORMAT PARQUET)
# Test that the data is the same
query II nosort point_result
SELECT col, geometry FROM '__TEST_DIR__/data-point-out.parquet'
#------------------------------------------------------------------------------
# Test linestrings
#------------------------------------------------------------------------------
query II nosort linestring_result
SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-linestring-wkt.csv'
query II nosort linestring_result
SELECT col, st_astext(geometry) FROM '__WORKING_DIRECTORY__/data/geoparquet/data-linestring-encoding_wkb.parquet'
# Now write it back to parquet
statement ok
COPY (SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-linestring-encoding_wkb.parquet')
TO '__TEST_DIR__/data-linestring-out.parquet' (FORMAT PARQUET)
# Test that the data is the same
query II nosort linestring_result
SELECT col, geometry FROM '__TEST_DIR__/data-linestring-out.parquet'
#------------------------------------------------------------------------------
# Test polygons
#------------------------------------------------------------------------------
query II nosort polygon_result
SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-polygon-wkt.csv'
query II nosort polygon_result
SELECT col, st_astext(geometry) FROM '__WORKING_DIRECTORY__/data/geoparquet/data-polygon-encoding_wkb.parquet'
# Now write it back to parquet
statement ok
COPY (SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-polygon-encoding_wkb.parquet')
TO '__TEST_DIR__/data-polygon-out.parquet' (FORMAT PARQUET)
# Test that the data is the same
query II nosort polygon_result
SELECT col, geometry FROM '__TEST_DIR__/data-polygon-out.parquet'
#------------------------------------------------------------------------------
# Test multipoints
#------------------------------------------------------------------------------
query II nosort multipoint_result
SELECT col, ST_GeomFromText(geometry) FROM '__WORKING_DIRECTORY__/data/geoparquet/data-multipoint-wkt.csv'
query II nosort multipoint_result
SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-multipoint-encoding_wkb.parquet'
# Now write it back to parquet
statement ok
COPY (SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-multipoint-encoding_wkb.parquet')
TO '__TEST_DIR__/data-multipoint-out.parquet' (FORMAT PARQUET)
# Test that the data is the same
query II nosort multipoint_result
SELECT col, geometry FROM '__TEST_DIR__/data-multipoint-out.parquet'
#------------------------------------------------------------------------------
# Test multilinestrings
#------------------------------------------------------------------------------
query II nosort multilinestring_result
SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-multilinestring-wkt.csv'
query II nosort multilinestring_result
SELECT col, st_astext(geometry) FROM '__WORKING_DIRECTORY__/data/geoparquet/data-multilinestring-encoding_wkb.parquet'
# Now write it back to parquet
statement ok
COPY (SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-multilinestring-encoding_wkb.parquet')
TO '__TEST_DIR__/data-multilinestring-out.parquet' (FORMAT PARQUET)
# Test that the data is the same
query II nosort multilinestring_result
SELECT col, geometry FROM '__TEST_DIR__/data-multilinestring-out.parquet'
#------------------------------------------------------------------------------
# Test multipolygons
#------------------------------------------------------------------------------
query II nosort multipolygon_result
SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-multipolygon-wkt.csv'
query II nosort multipolygon_result
SELECT col, st_astext(geometry) FROM '__WORKING_DIRECTORY__/data/geoparquet/data-multipolygon-encoding_wkb.parquet'
# Now write it back to parquet
statement ok
COPY (SELECT col, geometry FROM '__WORKING_DIRECTORY__/data/geoparquet/data-multipolygon-encoding_wkb.parquet')
TO '__TEST_DIR__/data-multipolygon-out.parquet' (FORMAT PARQUET)
# Test that the data is the same
query II nosort multipolygon_result
SELECT col, geometry FROM '__TEST_DIR__/data-multipolygon-out.parquet'
#------------------------------------------------------------------------------
# Inspect metadata
#------------------------------------------------------------------------------
query I
SELECT decode(value) as col
FROM parquet_kv_metadata('__TEST_DIR__/data-multipolygon-out.parquet') WHERE key = 'geo';
----
{"version":"1.0.0","primary_column":"geometry","columns":{"geometry":{"encoding":"WKB","geometry_types":["MultiPolygon"],"bbox":[5.0,5.0,45.0,45.0]}}}

Binary file not shown.

View File

@@ -0,0 +1,40 @@
# name: test/geoparquet/unsupported.test
# group: [geoparquet]
require spatial
require parquet
#------------------------------------------------------------------------------
# Test unsupported geometry type
#------------------------------------------------------------------------------
# This is now ok, but we dont write the geoparquet metadata
statement ok
COPY (SELECT 'POINT ZM (0 1 2 3)'::GEOMETRY as geometry) TO '__TEST_DIR__/t1.parquet' (FORMAT 'parquet');
# Not a geoparquet file
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/t1.parquet');
----
# But still a normal parquet file
query I
SELECT st_astext(st_geomfromwkb(geometry)) FROM '__TEST_DIR__/t1.parquet';
----
POINT ZM (0 1 2 3)
statement ok
COPY (SELECT 'POINT M (0 1 2)'::GEOMETRY as geometry) TO '__TEST_DIR__/t1.parquet' (FORMAT 'parquet');
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/t1.parquet');
----
# But still a normal parquet file
query I
SELECT st_astext(st_geomfromwkb(geometry)) FROM '__TEST_DIR__/t1.parquet';
----
POINT M (0 1 2)

View File

@@ -0,0 +1,90 @@
# name: test/geoparquet/versions.test
# group: [geoparquet]
require spatial
require parquet
# DEFAULT (V1)
statement ok
COPY (SELECT st_point(1,2) as geometry)
TO '__TEST_DIR__/test_default.parquet' (FORMAT PARQUET);
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/test_default.parquet');
----
{"version":"1.0.0","primary_column":"geometry","columns":{"geometry":{"encoding":"WKB","geometry_types":["Point"],"bbox":[1.0,2.0,1.0,2.0]}}}
query I
SELECT geo_types from parquet_metadata('__TEST_DIR__/test_default.parquet');
----
NULL
# V1
statement ok
COPY (SELECT st_point(1,2) as geometry)
TO '__TEST_DIR__/test_v1.parquet' (FORMAT PARQUET, GEOPARQUET_VERSION 'V1');
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/test_v1.parquet');
----
{"version":"1.0.0","primary_column":"geometry","columns":{"geometry":{"encoding":"WKB","geometry_types":["Point"],"bbox":[1.0,2.0,1.0,2.0]}}}
query I
SELECT geo_types from parquet_metadata('__TEST_DIR__/test_v1.parquet');
----
NULL
# NONE
statement ok
COPY (SELECT st_point(1,2) as geometry)
TO '__TEST_DIR__/test_none.parquet' (FORMAT PARQUET, GEOPARQUET_VERSION 'NONE');
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/test_none.parquet');
----
query I
SELECT geo_types from parquet_metadata('__TEST_DIR__/test_none.parquet');
----
[point]
# BOTH
statement ok
COPY (SELECT st_point(1,2) as geometry)
TO '__TEST_DIR__/test_both.parquet' (FORMAT PARQUET, GEOPARQUET_VERSION 'BOTH');
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/test_both.parquet');
----
{"version":"1.0.0","primary_column":"geometry","columns":{"geometry":{"encoding":"WKB","geometry_types":["Point"],"bbox":[1.0,2.0,1.0,2.0]}}}
query I
SELECT geo_types from parquet_metadata('__TEST_DIR__/test_both.parquet');
----
[point]
# V2
statement ok
COPY (SELECT st_point(1,2) as geometry)
TO '__TEST_DIR__/test_v2.parquet' (FORMAT PARQUET, GEOPARQUET_VERSION 'V2');
query I
SELECT (decode(value)) as col
FROM parquet_kv_metadata('__TEST_DIR__/test_v2.parquet');
----
{"version":"2.0.0","primary_column":"geometry","columns":{"geometry":{"encoding":"WKB","geometry_types":["Point"],"bbox":[1.0,2.0,1.0,2.0]}}}
query I
SELECT geo_types from parquet_metadata('__TEST_DIR__/test_v2.parquet');
----
[point]