should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,11 @@
# name: test/sql/json/issues/internal_issue2732.test
# description: Test internal issue 2732 - read_json('data.jsonl', map_inference_threshold=0) crashes
# group: [issues]
require json
statement ok
PRAGMA enable_verification
statement ok
select * from read_json('data/json/internal_2732.json', map_inference_threshold=0);

View File

@@ -0,0 +1,22 @@
# name: test/sql/json/issues/internal_issue3146.test
# description: Test internal issue 3146 - JSON parsing exception: Arrow datatype Map(Field ... ) not supported by Polars
# group: [issues]
# https://github.com/duckdblabs/duckdb-internal/issues/3146#issuecomment-2396148122
require json
statement ok
COPY (SELECT '{}') TO '__TEST_DIR__/empty_object.json' (FORMAT CSV, HEADER FALSE)
# for empty JSON objects we default to MAP(VARCHAR, JSON) as this is generic enough to fit any object
query I
SELECT typeof(json) FROM '__TEST_DIR__/empty_object.json'
----
MAP(VARCHAR, JSON)
# we can disable it with the map_inference_threshold parameter
query I
SELECT typeof(json) FROM read_json('__TEST_DIR__/empty_object.json', map_inference_threshold=-1)
----
JSON

View File

@@ -0,0 +1,10 @@
# name: test/sql/json/issues/internal_issue3197.test
# description: Test internal issue 3197 - AFL++ issue: crashes (segfault) in json reader
# group: [issues]
require json
statement error
from 'data/json/internal_3197.json'
----
Invalid Input Error

View File

@@ -0,0 +1,8 @@
# name: test/sql/json/issues/internal_issue3813.test
# description: Test internal issue 3813 - AFL++ issue: map_inference_threshold causes internal exception in json reader
# group: [issues]
require json
statement ok
SELECT * FROM read_json('data/json/internal_3813.json', map_inference_threshold=10);

View File

@@ -0,0 +1,19 @@
# name: test/sql/json/issues/internal_issue391.test
# description: Test internal issue 391 - SUMMARIZE for a JSON column will not work since min(JSON) is not well defined
# group: [issues]
require json
statement ok
PRAGMA enable_verification
statement ok
create table test as select {i: range}::JSON j from range(10)
query II
select min(j), max(j) from test
----
{"i":0} {"i":9}
statement ok
summarize test

View File

@@ -0,0 +1,8 @@
# name: test/sql/json/issues/internal_issue4014.test
# description: Test internal issue 4014 - AFL++ issue: segfault in json reader
# group: [issues]
require json
statement ok
FROM read_json('data/json/internal_4014.json', map_inference_threshold=0);

View File

@@ -0,0 +1,20 @@
# name: test/sql/json/issues/internal_issue4389.test
# description: Test internal issue 4389 - auto_detect is false for COPY + JSON
# group: [issues]
require json
statement ok
pragma enable_verification
statement ok
CREATE TABLE todos (userId UBIGINT, id UBIGINT, title VARCHAR, completed BOOLEAN);
statement ok
insert into todos values (42, 42, 'duck', true)
statement ok
copy todos to '__TEST_DIR__/todos.json' (ARRAY)
statement ok
copy todos from '__TEST_DIR__/todos.json'

View File

@@ -0,0 +1,13 @@
# name: test/sql/json/issues/internal_issue4403.test
# description: Test internal issue 4403 - AFL fuzzer crash (NULL type specification)
# group: [issues]
require json
statement ok
pragma enable_verification
statement error
SELECT * FROM read_json('data/json/example_n.ndjson', columns={id: NULL::VARCHAR, name: NULL::VARCHAR})
----
Binder Error

View File

@@ -0,0 +1,13 @@
# name: test/sql/json/issues/internal_issue4794.test
# description: Test internal issue 4794 - AFL++ issue: internal exception due to format string json key
# group: [issues]
require json
statement ok
pragma enable_verification
statement error
FROM read_json('data/json/format_string_key.json');
----
Invalid Input Error

View File

@@ -0,0 +1,8 @@
# name: test/sql/json/issues/internal_issue5288.test
# description: Test internal issue 5288 - zstd compression cannot be configured for JSON fields
# group: [issues]
require json
statement ok
create table foo2 (bar JSON using compression 'zstd');

View File

@@ -0,0 +1,26 @@
# name: test/sql/json/issues/issue10751and11152.test
# description: Test issue 10751 and 11152 - Duplicate keys in JSON object ignore_errors
# group: [issues]
require json
# issue 10751
statement error
create or replace table json_test as select * from read_json_auto('data/json/10751.json', format = 'newline_delimited');
----
Not implemented Error: Duplicate name
statement ok
create table json_test as select * from read_json_auto('data/json/10751.json', format = 'newline_delimited', ignore_errors=true);
statement ok
select * from json_test;
# issue 11152
statement error
FROM read_json_auto('data/json/11152.json');
----
Invalid Input Error: Malformed JSON
statement ok
FROM read_json_auto('data/json/11152.json', ignore_errors=true);

View File

@@ -0,0 +1,25 @@
# name: test/sql/json/issues/issue10784.test
# description: Test issue 10784 - read_json_auto has some unexpected behavior
# group: [issues]
require json
# original query from the issue should just return an error because it's not an array of objects
statement error
SELECT * FROM read_json_auto('data/json/arr.json', columns={'v':'VARCHAR','k':'VARCHAR'});
----
Invalid Input Error
# if we ignore errors we get NULLs because the array entries aren't objects
query II
SELECT * FROM read_json_auto('data/json/arr.json', columns={'v':'VARCHAR','k':'VARCHAR'}, ignore_errors=true);
----
NULL NULL
NULL NULL
# if we read it as if it's one column we just get the array values as varchar
query I
SELECT * FROM read_json_auto('data/json/arr.json', columns={'v':'VARCHAR'});
----
4
hello

View File

@@ -0,0 +1,13 @@
# name: test/sql/json/issues/issue10866.test
# description: Test issue 1010866 - uhugeints are truncated when imported from json data
# group: [issues]
require json
statement ok
copy (select '{"col": 277447099861456945273576150847928801582}') to '__TEST_DIR__/10866.json' (format csv, quote '', header 0)
query II
select col, hex(col) from read_json('__TEST_DIR__/10866.json', columns={col: 'uhugeint'})
----
277447099861456945273576150847928801582 D0BA5E258FFCFEE8C4619BA0E21A192E

View File

@@ -0,0 +1,36 @@
# name: test/sql/json/issues/issue11804.test
# description: Test issue 11804 - json_type(...) with path does not return "NULL"
# group: [issues]
require json
query I
select json_type(JSON 'null') = 'NULL';
----
true
query I
select json_type(JSON '{"a": null}', '/a') = 'NULL';
----
true
query I
select json_type(JSON '{"a": null}', '$.a') = 'NULL';
----
true
# Test issue 13436 - JSON_TYPE function produces wrong result if path is a column expression
query II
SELECT
json_type (json '{"a":1,"b":null}', p),
json_type (json '{"a":1,"b":null}', 'b')
FROM (VALUES ('b')) AS t (p);
----
NULL NULL
# let's also test the extract many functionality
query I
select unnest(json_type(JSON '{"a": null}', ['$.a', '$.a'])) = 'NULL';
----
true
true

View File

@@ -0,0 +1,20 @@
# name: test/sql/json/issues/issue12188.test
# description: Test issue 12188 - Issue with Parsing NDJSON File in DuckDB: Unexpected Quotation Marks
# group: [issues]
require parquet
query II
SELECT typeof(field1), typeof(field2) FROM 'data/parquet-testing/parquet_with_json.parquet' LIMIT 1
----
JSON JSON
require json
statement ok
COPY (SELECT * FROM read_ndjson('data/json/12188.ndjson', maximum_depth=1)) TO '__TEST_DIR__/my.parquet';
query II
SELECT typeof(field1), typeof(field2) FROM '__TEST_DIR__/my.parquet' LIMIT 1
----
JSON JSON

View File

@@ -0,0 +1,30 @@
# name: test/sql/json/issues/issue12861.test
# description: Test issue 12861 - Autodetected type of nested JSON field in read_json_auto depends on amount of null values in input
# group: [issues]
require json
statement ok
create table tbl (test struct(one bigint, two varchar));
statement ok
insert into tbl values ({'one': 1, 'two': 2}), (null)
statement ok
copy tbl to '__TEST_DIR__/fewnulls.json'
statement ok
insert into tbl select null from range(9)
statement ok
copy tbl to '__TEST_DIR__/manynulls.json'
query I
select typeof(test) from '__TEST_DIR__/fewnulls.json' limit 1
----
STRUCT(one BIGINT, two VARCHAR)
query I
select typeof(test) from '__TEST_DIR__/manynulls.json' limit 1
----
STRUCT(one BIGINT, two VARCHAR)

View File

@@ -0,0 +1,23 @@
# name: test/sql/json/issues/issue13212.test
# description: Test issue 13212 - Trying to read an empty compressed JSON file deadlocks
# group: [issues]
require json
statement ok
copy (select range::int64 as id, range::varchar as name from range(0)) to '__TEST_DIR__/t1.json.gz' (format json, compression gzip)
statement ok
copy (select id, name from values (1, 'bob'), (2, 'tom') tbl(id, name)) to '__TEST_DIR__/t2.json.gz' (format json, compression gzip)
query II
SELECT * FROM read_ndjson_auto(['__TEST_DIR__/t1.json.gz', '__TEST_DIR__/t2.json.gz']);
----
1 bob
2 tom
query II
SELECT * FROM read_ndjson(['__TEST_DIR__/t1.json.gz', '__TEST_DIR__/t2.json.gz'], columns={id: 'int64', name: 'varchar'});
----
1 bob
2 tom

View File

@@ -0,0 +1,36 @@
# name: test/sql/json/issues/issue13725.test
# description: Test issue 13725 - Using both hive_partitioning and hive_types in read_json_objects intermittently segfaults
# group: [issues]
require json
# path slashes
require notwindows
query III
select *
from read_json_objects('data/json/13725/month=*/*.json', hive_partitioning = true, format = auto, hive_types = {'month': int}, filename = true)
where month = 7;
----
{"hello": "there"} data/json/13725/month=07/mytest.json 7
query I
select count(*)
from read_json_objects('data/json/13725/month=*/*.json', hive_partitioning = true, format = auto, hive_types = {'month': int}, filename = true)
where month = 7;
----
1
query III
select *
from read_json('data/json/13725/month=*/*.json', hive_partitioning = true, format = auto, hive_types = {'month': int}, filename = true)
where month = 7;
----
there data/json/13725/month=07/mytest.json 7
query I
select count(*)
from read_json('data/json/13725/month=*/*.json', hive_partitioning = true, format = auto, hive_types = {'month': int}, filename = true)
where month = 7;
----
1

View File

@@ -0,0 +1,79 @@
# name: test/sql/json/issues/issue13948.test
# description: Test issue 13948 - Json property name with special characters produce inconsistent results with json -> 'propertyname' and json_extract
# group: [issues]
require json
statement ok
pragma enable_verification
query I
SELECT '{"Status / SubStatus": "test"}' -> 'Status / SubStatus';
----
"test"
query I
WITH path AS (
SELECT 'Status / SubStatus' p
)
SELECT '{"Status / SubStatus": "test"}' -> p
FROM path
----
"test"
# TODO at some point we should escape supplied JSON paths automatically so that this works
query I
SELECT '{"\"Status / SubStatus\"": "test"}' -> '"Status / SubStatus"';
----
NULL
query I
WITH path AS (
SELECT NULL p
)
SELECT '{"\"Status / SubStatus\"": "test"}' -> p
FROM path
----
NULL
query I
SELECT '{"Status / SubStatus": "test"}' -> '$."Status / SubStatus"';
----
"test"
query I
WITH path AS (
SELECT '$."Status / SubStatus"' p
)
SELECT '{"Status / SubStatus": "test"}' -> p
FROM path
----
"test"
query I
SELECT '[1, 2, 3]'->0
----
1
query I
WITH path AS (
SELECT 0 p
)
SELECT '[1, 2, 3]' -> p
FROM path
----
1
query I
SELECT '[1, 2, 3]'->'0'
----
NULL
query I
WITH path AS (
SELECT '0' p
)
SELECT '[1, 2, 3]' -> p
FROM path
----
NULL

View File

@@ -0,0 +1,11 @@
# name: test/sql/json/issues/issue14167.test
# description: Test issue 14167 - Dot notation for json field extraction is no longer working in v1.1.*
# group: [issues]
require json
# the auto-detected type is a MAP, but we can still extract using the dot syntax because we rewrite to map_extract
query I
select columns.v4_c6 from read_ndjson_auto('data/json/14167.json');
----
{'statistics': {'nonNullCount': 0}}

View File

@@ -0,0 +1,10 @@
# name: test/sql/json/issues/issue14245.test
# description: Test issue 14245 - The JSONPointer '/'
# group: [issues]
require json
query I
SELECT '{ "foo": ["bar", "baz"], "": 0 }' -> '/'
----
0

View File

@@ -0,0 +1,8 @@
# name: test/sql/json/issues/issue14259.test
# description: Test issue 14259 - DuckDB v1.1.x and above JSON Schema Inference Error - leads to JSON transform error
# group: [issues]
require json
statement ok
from 'data/json/issue14259.json'

View File

@@ -0,0 +1,71 @@
# name: test/sql/json/issues/issue15038.test
# description: Test issue 15038 - TO_JSON results in weird number translation
# group: [issues]
require json
# we support full precision in JSON - yyjson supports RAW values
query I
SELECT to_json(1::HUGEINT << 100)
----
1267650600228229401496703205376
query I
SELECT (1::HUGEINT << 100)::JSON
----
1267650600228229401496703205376
query I
SELECT to_json(1::UHUGEINT << 100)
----
1267650600228229401496703205376
query I
SELECT (1::UHUGEINT << 100)::JSON
----
1267650600228229401496703205376
query I
SELECT to_json((1::UHUGEINT << 100)::DECIMAL(38,0))
----
1267650600228229401496703205376
query I
SELECT (1::UHUGEINT << 100)::DECIMAL(38,0)::JSON
----
1267650600228229401496703205376
query I
SELECT to_json((1::HUGEINT << 100)::BIGNUM)
----
1267650600228229401496703205376
query I
SELECT (1::HUGEINT << 100)::BIGNUM::JSON
----
1267650600228229401496703205376
# original issue (#15038)
query I rowsort
WITH t1 AS (
SELECT 9007199254740993 AS id
UNION ALL
SELECT 1.2 AS id
)
SELECT to_json(id) AS json_objects
FROM t1 AS t;
----
1.2
9007199254740993.0
query I rowsort
WITH t1 AS (
SELECT 9007199254740993 AS id
UNION ALL
SELECT 1.2 AS id
)
SELECT id::JSON AS json_objects
FROM t1 AS t;
----
1.2
9007199254740993.0

View File

@@ -0,0 +1,16 @@
# name: test/sql/json/issues/issue15601.test
# description: Test issue 15601 - JSON reader fails with duplicate column name when reading multiple JSON files of slightly different casing
# group: [issues]
require json
statement ok
PRAGMA enable_verification
# original from the issue
statement ok
FROM 'data/json/15601/fragment*.json'
# created an even worse example
statement ok
FROM 'data/json/15601/bunch_of_key_collisions.json'

View File

@@ -0,0 +1,13 @@
# name: test/sql/json/issues/issue16568.test
# description: Test issue 16568 - Error when loading JSON files with UTF-8 Byte Order Mark (BOM)
# group: [issues]
require json
statement ok
pragma enable_verification
query I
select count(*) from 'data/json/sample_utf8_bom.json'
----
1

View File

@@ -0,0 +1,17 @@
# name: test/sql/json/issues/issue16570.test
# description: Test issue 16570 - JSON type: string slice operation results in result value with JSON type, expected VARCHAR
# group: [issues]
require json
statement ok
pragma enable_verification
query II
with cte as (
select '{"a":1}'::JSON as j
)
select typeof(j[2:3]), typeof(substring(j, 2, 3))
from cte
----
VARCHAR VARCHAR

View File

@@ -0,0 +1,26 @@
# name: test/sql/json/issues/issue16684.test
# description: Test issue 16684 - When using read_json to read data, it always converts the md5 string to uuid format.
# group: [issues]
require json
statement ok
PRAGMA enable_verification
statement ok
copy (select '00000000000000000000000000000000' md5) to '__TEST_DIR__/issue16684.json'
# should be varchar, not uuid (no hyphens)
query II
select md5, typeof(md5) from '__TEST_DIR__/issue16684.json'
----
00000000000000000000000000000000 VARCHAR
statement ok
copy (select '00000000-0000-0000-0000-000000000000' id) to '__TEST_DIR__/issue16684.json'
# if we add hyphens we get a uuid
query II
select id, typeof(id) from '__TEST_DIR__/issue16684.json'
----
00000000-0000-0000-0000-000000000000 UUID

View File

@@ -0,0 +1,28 @@
# name: test/sql/json/issues/issue16968.test
# description: Test issue 16968 - A not descriptive error message when value of BLOB is passed to JSON function
# group: [issues]
require json
statement ok
pragma enable_verification
query I
select from_hex('aa')::json;
----
"\\xAA"
query I
select json(from_hex('aa'));
----
"\\xAA"
query I
select '1101'::BIT::JSON
----
"1101"
query I
select json('1101'::BIT)
----
"1101"

View File

@@ -0,0 +1,33 @@
# name: test/sql/json/issues/issue18301.test
# description: Test issue 18301 - DuckDB JSON Schema Inconsistency - V 1.3.2
# group: [issues]
require json
statement ok
pragma enable_verification
statement ok
SET threads = 2;
statement ok
CREATE OR REPLACE TABLE cricket_staging AS
SELECT * FROM read_json('data/json/18301/*.json', filename=true)
WHERE 1=0;
statement ok
TRUNCATE cricket_staging;
statement ok
INSERT INTO cricket_staging
SELECT * FROM read_json('data/json/18301/*.json',
union_by_name=true,
filename=true
);
query I
SELECT info->>'$.outcome.by' as outcome_by
FROM cricket_staging
WHERE info->>'$.city' = 'Colombo';
----
{"runs":175,"wickets":null,"innings":1}

View File

@@ -0,0 +1,20 @@
# name: test/sql/json/issues/issue19357.test
# description: Test issue 19357 - Expected unified vector format of type VARCHAR, but found type INT32
# group: [issues]
require json
query I
SELECT TO_JSON({'key_1': 'one'}) AS WITHOUT_KEEP_NULL
----
{"key_1":"one"}
query I
SELECT JSON_OBJECT('key_1', 'one', 'key_2', NULL) AS KEEP_NULL_1
----
{"key_1":"one","key_2":null}
statement error
SELECT JSON_OBJECT('key_1', 'one', NULL, 'two') AS KEEP_NULL_2
----
json_object() keys must be VARCHAR

View File

@@ -0,0 +1,23 @@
# name: test/sql/json/issues/issue6722.test
# description: Test issue 6722 - INTERNAL Error: read_json_auto and read_json(auto_detect=true) fail to handle property name case sensitivities
# group: [issues]
require json
statement ok
PRAGMA enable_verification
# this file has 4 columns, name "id", "Id", "iD", and "ID"
query IIII
FROM 'data/json/duplicate_column_names.json'
----
42 43 44 45
# due to case-insensitivity these column names would cause an error, but we add a number to de-duplicate them
query IIIIII
DESCRIBE FROM 'data/json/duplicate_column_names.json'
----
id BIGINT YES NULL NULL NULL
Id_1 BIGINT YES NULL NULL NULL
iD_2 BIGINT YES NULL NULL NULL
ID_3 BIGINT YES NULL NULL NULL

View File

@@ -0,0 +1,28 @@
# name: test/sql/json/issues/issue8695.test
# description: Test issue 8695 - INTERNAL Error: Attempted to dereference unique_ptr that is NULL
# group: [issues]
require json
# these two succeeded
statement ok
SELECT MAX(JSON_ARRAY_LENGTH(filter_keystage))::int - 1 FROM read_json_auto('data/json/filter_keystage.ndjson');
statement ok
WITH RECURSIVE nums AS (
SELECT 0 AS n
UNION ALL
SELECT n + 1 FROM nums
WHERE n < 5
)
SELECT * FROM nums;
# but combining them fails
statement ok
WITH RECURSIVE nums AS (
SELECT 0 AS n
UNION ALL
SELECT n + 1 FROM nums
WHERE n < (SELECT MAX(JSON_ARRAY_LENGTH(filter_keystage))::int - 1 FROM read_json_auto('data/json/filter_keystage.ndjson'))
)
SELECT * FROM nums;

View File

@@ -0,0 +1,84 @@
# name: test/sql/json/issues/large_quoted_string_constant.test
# description: Issue #2986: Large string constant with quotes
# group: [issues]
statement ok
CREATE TABLE j2 (id INT, json VARCHAR, src VARCHAR);
statement ok
INSERT INTO j2(id,json,src)
VALUES(3,'[
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil''s Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
},
{
"id": "0002",
"type": "donut",
"name": "Raised",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
},
{
"id": "0003",
"type": "donut",
"name": "Old Fashioned",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
]','https://adobe.github.io/Spry/samples/data_region/JSONDataSetSample.html');
query I
SELECT len(json) FROM j2;
----
2115

View File

@@ -0,0 +1,28 @@
# name: test/sql/json/issues/read_json_memory_usage.test
# description: Test JSON memory usage (internal issue #1683)
# group: [issues]
require json
statement ok
SET threads=8;
statement ok
SET memory_limit='200MiB';
query I
SELECT * FROM read_json_objects('data/json/example_rn.ndjson', format='nd');
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
statement ok
SET memory_limit='50MiB';
statement error
SELECT * FROM read_json_objects('data/json/example_rn.ndjson', format='nd');
----
Out of Memory Error

View File

@@ -0,0 +1,14 @@
# name: test/sql/json/issues/test_json_temp_8062.test
# description: Test JSON fields in temporary tables for issue 8062
# group: [issues]
require json
statement ok
CREATE TEMP TABLE j1(x json);
statement ok
INSERT INTO j1(x) VALUES ('[1, 2, 3]');
statement ok
SELECT * FROM j1;

View File

@@ -0,0 +1,453 @@
# name: test/sql/json/scalar/json_nested_casts.test
# description: Casts to and from nested types with JSON
# group: [scalar]
require json
require notwindows
statement ok
PRAGMA enable_verification
# list with varchar to json
statement ok
create table t2(blobs json[])
statement ok
insert into t2 values(json('[1,2]'));
query I
SELECT * FROM t2
----
[1, 2]
# varchar to list of json
query I
select cast(json('[1,2]') as json[]);
----
[1, 2]
statement error
select cast(['boom'] as json[]);
----
Conversion Error
query I
select cast(['[1, 2]', '[3, 4]'] as json[]);
----
[[1, 2], [3, 4]]
# struct with varchar to json
query I
SELECT {'a': '[1, 2]'}::ROW(a JSON)
----
{'a': '[1, 2]'}
query I
SELECT {'a': 42, 'b': '[1, 2]'}::ROW(a JSON, b JSON)
----
{'a': 42, 'b': '[1, 2]'}
query I
SELECT {'a': 42, 'b': '[1, 2]'}::ROW(a JSON, b INT[])
----
{'a': 42, 'b': [1, 2]}
statement error
SELECT {'a': 'boom', 'b': '[1, 2]'}::ROW(a JSON, b INT[])
----
Conversion Error
# varchar to struct of json
query I
SELECT '{a: [1, 2]}'::ROW(a JSON)
----
{'a': '[1, 2]'}
# map with varchar to json
query I
SELECT MAP(['42'], ['88'])::MAP(JSON, JSON)
----
{42=88}
# varchar to map of json
query I
SELECT '{42=88}'::MAP(JSON, JSON)
----
{42=88}
# varchar to union with json
query I
SELECT '42'::UNION(u JSON)
----
42
# union with varchar to union with json
query I
SELECT '42'::UNION(u VARCHAR)::UNION(u JSON)
----
42
query I
SELECT ['42']::UNION(u JSON)[]
----
[42]
query I
SELECT '42'::UNION(u VARCHAR)::JSON
----
{"u":"42"}
# try_cast works too
query I
SELECT TRY_CAST('{"duck":42' AS JSON)
----
NULL
# we support our nested to and from JSON (just calls to_json and from_json - well tested)
query I
SELECT {duck: 42}::JSON
----
{"duck":42}
query I
SELECT '{"duck":42}'::JSON::STRUCT(duck INTEGER)
----
{'duck': 42}
query I
SELECT ['duck']::JSON
----
["duck"]
query I
SELECT '["duck"]'::JSON::VARCHAR[]
----
[duck]
query I
SELECT MAP(['duck'], [42])::JSON
----
{"duck":42}
query I
SELECT '{"duck":42}'::JSON::MAP(VARCHAR, INTEGER)
----
{duck=42}
# casting should be strict
statement error
SELECT '{"duck":42}'::JSON::STRUCT(goose INTEGER)
----
Conversion Error
statement error
SELECT '["a", "b", "c"]'::JSON::INT[]
----
Conversion Error
statement error
SELECT '{"duck":42}'::JSON::MAP(INTEGER, INTEGER)
----
Conversion Error
# however, when we encounter a conversion error in a nested type with TRY, only that becomes NULL
query I
SELECT TRY_CAST('{"duck":42}'::JSON AS STRUCT(goose INTEGER))
----
{'goose': NULL}
query I
SELECT TRY_CAST('["a", "b", "c"]'::JSON AS INT[])
----
[NULL, NULL, NULL]
# map keys cannot be NULL
statement error
SELECT TRY_CAST('{"duck":42}'::JSON AS MAP(INTEGER, INTEGER))
----
Conversion Error
# but values can
query I
SELECT TRY_CAST('{"42":"duck"}'::JSON AS MAP(INTEGER, INTEGER))
----
{42=NULL}
# we can also cast implicitly to our types
statement ok
CREATE TABLE structs (v STRUCT(duck INTEGER))
statement ok
CREATE TABLE lists (v VARCHAR[])
statement ok
CREATE table maps (v MAP(VARCHAR, INTEGER))
statement ok
INSERT INTO structs VALUES ('{"duck":42}'::JSON)
statement ok
INSERT INTO lists VALUES ('["duck"]'::JSON)
statement ok
INSERT INTO maps VALUES ('{"duck":42}'::JSON)
query I
SELECT * FROM structs
----
{'duck': 42}
query I
SELECT * FROM lists
----
[duck]
query I
SELECT * FROM maps
----
{duck=42}
# and back to JSON
statement ok
CREATE TABLE jsons (j JSON)
statement ok
INSERT INTO jsons SELECT * FROM structs
statement ok
INSERT INTO jsons SELECT * FROM lists
statement ok
INSERT INTO jsons SELECT * FROM maps
query I
SELECT * FROM jsons
----
{"duck":42}
["duck"]
{"duck":42}
# test_all_types roundtrip, excludes:
# 1. varchar (special case - input is not valid JSON)
# 2. blob - contains '\0'
# 3. bit - contains '\0'
# 4. medium_enum - don't want to create this enum
# 5. large_enum - don't want to create this enum
# and casts DECIMALs with large precision to DOUBLE because JSON can only deal with DOUBLE, and we lose some precision
statement ok
create type small_enum as enum ('DUCK_DUCK_ENUM', 'GOOSE');
query I nosort q0
select bool,
tinyint,
smallint,
int,
bigint,
hugeint,
uhugeint,
utinyint,
usmallint,
uint,
ubigint,
date,
time,
timestamp,
timestamp_s,
timestamp_ms,
timestamp_ns,
time_tz,
timestamp_tz,
float,
double,
dec_4_1,
dec_9_4,
dec_18_6::DOUBLE as dec_18_6,
dec38_10::DOUBLE as dec38_10,
uuid,
interval,
small_enum,
int_array,
double_array,
date_array,
timestamp_array,
timestamptz_array,
varchar_array,
nested_int_array,
struct,
struct_of_arrays,
array_of_structs,
map,
from test_all_types()
----
statement ok
create table all_types_json as
select bool::JSON as bool,
tinyint::JSON as tinyint,
smallint::JSON as smallint,
int::JSON as int,
bigint::JSON as bigint,
hugeint::JSON as hugeint,
uhugeint::JSON as uhugeint,
utinyint::JSON as utinyint,
usmallint::JSON as usmallint,
uint::JSON as uint,
ubigint::JSON as ubigint,
date::JSON as date,
time::JSON as time,
timestamp::JSON as timestamp,
timestamp_s::JSON as timestamp_s,
timestamp_ms::JSON as timestamp_ms,
timestamp_ns::JSON as timestamp_ns,
time_tz::JSON as time_tz,
timestamp_tz::JSON as timestamp_tz,
float::JSON as float,
double::JSON as double,
dec_4_1::JSON as dec_4_1,
dec_9_4::JSON as dec_9_4,
dec_18_6::DOUBLE::JSON as dec_18_6,
dec38_10::DOUBLE::JSON as dec38_10,
uuid::JSON as uuid,
interval::JSON as interval,
small_enum::JSON as small_enum,
int_array::JSON as int_array,
double_array::JSON as double_array,
date_array::JSON as date_array,
timestamp_array::JSON as timestamp_array,
timestamptz_array::JSON as timestamptz_array,
varchar_array::JSON as varchar_array,
nested_int_array::JSON as nested_int_array,
struct::JSON as struct,
struct_of_arrays::JSON as struct_of_arrays,
array_of_structs::JSON as array_of_structs,
map::JSON as map,
from test_all_types()
statement ok
create table roundtrip as
select bool::BOOLEAN as bool,
tinyint::TINYINT as tinyint,
smallint::SMALLINT as smallint,
int::INTEGER as int,
bigint::BIGINT as bigint,
hugeint::HUGEINT as hugeint,
uhugeint::UHUGEINT as uhugeint,
utinyint::UTINYINT as utinyint,
usmallint::USMALLINT as usmallint,
uint::UINTEGER as uint,
ubigint::UBIGINT as ubigint,
date::DATE as date,
time::TIME as time,
timestamp::TIMESTAMP as timestamp,
timestamp_s::TIMESTAMP_S as timestamp_s,
timestamp_ms::TIMESTAMP_MS as timestamp_ms,
timestamp_ns::TIMESTAMP_NS as timestamp_ns,
time_tz::TIME WITH TIME ZONE as time_tz,
timestamp_tz::TIMESTAMP WITH TIME ZONE as timestamp_tz,
float::FLOAT as float,
double::DOUBLE as double,
dec_4_1::DECIMAL(4,1) as dec_4_1,
dec_9_4::DECIMAL(9,4) as dec_9_4,
dec_18_6::DOUBLE as dec_18_6,
dec38_10::DOUBLE as dec38_10,
uuid::UUID as uuid,
interval::INTERVAL as interval,
small_enum::small_enum as small_enum,
int_array::INTEGER[] as int_array,
double_array::DOUBLE[] as double_array,
date_array::DATE[] as date_array,
timestamp_array::TIMESTAMP[] as timestamp_array,
timestamptz_array::TIMESTAMP WITH TIME ZONE[] as timestamptz_array,
varchar_array::VARCHAR[] as varchar_array,
nested_int_array::INTEGER[][] as nested_int_array,
struct::STRUCT(a INTEGER, b VARCHAR) as struct,
struct_of_arrays::STRUCT(a INTEGER[], b VARCHAR[]) as struct_of_arrays,
array_of_structs::STRUCT(a INTEGER, b VARCHAR)[] as array_of_structs,
map::MAP(VARCHAR, VARCHAR) as map
from all_types_json
query I nosort q0
select * from roundtrip
----
# also check that we handle vector types properly
foreach all_flat true false
query I nosort q1
select v from test_vector_types(null::int[], all_flat=${all_flat}) tbl(v)
----
query I nosort q1
select v::JSON::INT[] from test_vector_types(null::int[], all_flat=${all_flat}) tbl(v)
----
query I nosort q2
select v from test_vector_types(NULL::ROW(i INTEGER, j VARCHAR), all_flat=${all_flat}) tbl(v)
----
query I nosort q2
select v::JSON::ROW(i INTEGER, j VARCHAR) from test_vector_types(NULL::ROW(i INTEGER, j VARCHAR), all_flat=${all_flat}) tbl(v)
----
endloop
# we only cast our strings to JSON if they start with an alphabetic character, so this won't work
statement error
select ''::JSON
----
Conversion Error
statement error
select ' '::JSON
----
Conversion Error
# JSON cannot contain the NULL byte
statement error
select chr(0)::JSON
----
Conversion Error
statement error
select ('duck' || chr(0))::JSON
----
Conversion Error
# some varchar -> json[] and json[] -> varchar tests
# this is needed because our varchar -> varchar[] (and vice versa) escape quotes or add them
# this tests the special-case implementation for these casts
# issue 17647
query I
select '[{"some_key":"some_v}alue"}]'::json[];
----
[{"some_key":"some_v}alue"}]
# internal issue 5498
query I
with cte1 as (select 'a' as a),
cte2 as (select array_agg(cte1::json) as value from cte1)
select value::json[] from cte2;
----
[{"a":"a"}]
# larger test
query II
with cte1 as (
select array_agg({duck:42}::json) json_list_value
from range(5_000)
), cte2 as (
select '[' || string_agg('{"duck":42}', ', ') || ']' string_value
from range(5_000)
)
select json_list_value::varchar = string_value, json_list_value = string_value::json[]
from cte1, cte2
----
true true

View File

@@ -0,0 +1,59 @@
# name: test/sql/json/scalar/test_json_array_length.test
# description: Test JSON array length
# group: [scalar]
require json
statement ok
pragma enable_verification
statement ok
create table test (j varchar)
statement ok
insert into test values ('{}'), ('[]'), ('[1, 2, 3]')
query T
select json_array_length(j) from test
----
0
0
3
query T
select json_array_length(j, '$[1]') from test
----
NULL
NULL
0
# some SQLite json_array_length tests
query T
SELECT json_array_length('[1,2,3,4]');
----
4
query T
SELECT json_array_length('[1,2,3,4]', '$');
----
4
query T
SELECT json_array_length('[1,2,3,4]', '$[2]');
----
0
query T
SELECT json_array_length('{"one":[1,2,3]}');
----
0
query T
SELECT json_array_length('{"one":[1,2,3]}', '$.one');
----
3
query T
SELECT json_array_length('{"one":[1,2,3]}', '$.two');
----
NULL

View File

@@ -0,0 +1,25 @@
# name: test/sql/json/scalar/test_json_arrow_expr.test
# description: Test subquery binding of partially bound arrow expressions
# group: [scalar]
require json
statement ok
CREATE TABLE testjson AS SELECT JSON '{ "key" : "value" }' AS example;
query I
SELECT (SELECT (example)->k AS v FROM (SELECT 'key' AS k) keys)
FROM testjson;
----
"value"
query I
SELECT (SELECT json_extract(example, k) AS v FROM (SELECT 'key' AS k) keys)
FROM testjson;
----
"value"
query I
SELECT (SELECT (JSON '{ "key" : "value" }')->k AS v FROM (SELECT 'key' AS k) keys);
----
"value"

View File

@@ -0,0 +1,93 @@
# name: test/sql/json/scalar/test_json_contains.test
# description: Test JSON merge patch
# group: [scalar]
require json
statement ok
pragma enable_verification
# some examples copied from the first link I clicked when googling json_contains:
# https://database.guide/json_contains-examples-in-mysql/
query T
SELECT JSON_CONTAINS('{"a": 1, "b": 2, "c": {"d": 3}}', '{"c": {"d": 3}}') AS Result;
----
True
statement error
SELECT JSON_CONTAINS('{"a": 1, "b": 2, "c": {"d": 3}}', '"c": {"d": 3}') AS Result;
----
# some home-made tests
# empty list is contained in the list
query T
select json_contains('[1, 2]', '[]')
----
True
query T
select json_contains('[1, 2]', '2');
----
True
query T
select json_contains('[1, 2]', '[1, 2]');
----
True
query T
select json_contains('[1, 2, 3]', '[1, 2]');
----
True
# order-independent (follows MySQL behaviour)
query T
select json_contains('[1, 2]', '[2, 1]');
----
True
# {"d": 2} is contained in {"c": 1, "d": 2}, so it counts
query T
select json_contains('{"a": {"b": [{"c": 1, "d": 2}]}}', '{"d": 2}')
----
True
# same with {"c": 1}
query T
select json_contains('{"a": {"b": [{"c": 1, "d": 2}]}}', '{"c": 1}')
----
True
query T
select json_contains('{"a": {"b": [{"c": 1, "d": 2}]}}', '{"c": 1, "d": 2}')
----
True
query T
select json_contains('{"a": {"b": [{"c": 1, "d": 2}]}}', '[{"d": 2, "c": 1}]')
----
True
# same reasoning with the lists here as before, empty list is contained in the list
query T
select json_contains('{"a": {"b": [{"c": 1, "d": 2}]}}', '{"a": {"b": []}}')
----
True
query T
select json_contains('{"a": {"b": [{"c": 1, "d": 2}]}}', '[]')
----
True
# Issue 5960
query T
select json_contains(json('{"key":"value"}'),json('{"blah":"value"}'))
----
False
# Check if the recursion properly stops
query T
select json_contains('{"a": {"b": 42}}', '{"a": 42}')
----
False

View File

@@ -0,0 +1,325 @@
# name: test/sql/json/scalar/test_json_create.test
# description: Test JSON create functions {json_object(), json_array(), json_quote()}
# group: [scalar]
require json
statement ok
pragma enable_verification
# issue #7727
query T
SELECT TRY_CAST('{{P{P{{{{ASD{AS{D{' AS JSON);
----
NULL
query T
select json_quote({n: 42})
----
{"n":42}
# alias
query T
select to_json({n: 42})
----
{"n":42}
statement error
select to_json({n: 42}, {extra: 'argument'})
----
to_json() takes exactly one argument
query T
select to_json(union_value(n := 42))
----
{"n":42}
query T
SELECT to_json(union_value(a := NULL)::UNION(a INTEGER, b VARCHAR))
----
{"a":null}
query T
SELECT to_json(union_value(b := 'abc')::UNION(a INTEGER, b VARCHAR, c FLOAT))
----
{"b":"abc"}
query I
select to_json(i::UNION(a varchar, b bool)) from (VALUES (null), ('test')) tbl(i);
----
{"a":null}
{"a":"test"}
query I
select to_json(null::UNION(a varchar, b bool));
----
NULL
query T
select json_object('duck', 42)
----
{"duck":42}
query T
select json_object('duck', '{"goose": [1, 2, 3, 4, 5, 6, 7]}'::JSON)
----
{"duck":{"goose":[1,2,3,4,5,6,7]}}
query T
select json_object('nested', {duck: 42})
----
{"nested":{"duck":42}}
query T
select json_object('nested', [{duck: 42}, NULL])
----
{"nested":[{"duck":42},null]}
query T
select json_object('nested', map(['duck'], [42]))
----
{"nested":{"duck":42}}
query T
select json_object('nested', map(['duck', 'goose'], [42, 7]))
----
{"nested":{"duck":42,"goose":7}}
query T
select json_object('nested', map(['0', 'goose'], [42, 7]))
----
{"nested":{"0":42,"goose":7}}
query T
select json_object('nested', map(['duck', 'goose'], [NULL, 7]))
----
{"nested":{"duck":null,"goose":7}}
query T
select json_object('nested', [1, 2, 3])
----
{"nested":[1,2,3]}
query T
select json_object('nested', {nested2: [1, 2, 3]})
----
{"nested":{"nested2":[1,2,3]}}
query T
select json_object('nested', [{nested2: 1}, {nested2: 2}, {nested2: 3}])
----
{"nested":[{"nested2":1},{"nested2":2},{"nested2":3}]}
statement ok
create table test (a int, b double, c varchar, d int[], e uinteger)
statement ok
insert into test values
(0, 0.5, 'short', [0, 1, 2, 3, 4, 5, 6, 7, 9], 33),
(42, 1, 'looooooooooooooong', [], 42),
(-42, 0.42, 2, [1, 2, 3], 1111),
(777, 19.96, 'duck', NULL, 1),
(-777, 4.2, 'goose', [4, 2], NULL)
# a b c d e
query T
select json_quote(struct_pack(a := a, b := b, c := c, d := d, e := e)) from test
----
{"a":0,"b":0.5,"c":"short","d":[0,1,2,3,4,5,6,7,9],"e":33}
{"a":42,"b":1.0,"c":"looooooooooooooong","d":[],"e":42}
{"a":-42,"b":0.42,"c":"2","d":[1,2,3],"e":1111}
{"a":777,"b":19.96,"c":"duck","d":null,"e":1}
{"a":-777,"b":4.2,"c":"goose","d":[4,2],"e":null}
query T
select json_array(a, b, c, d, e) from test
----
[0,0.5,"short",[0,1,2,3,4,5,6,7,9],33]
[42,1.0,"looooooooooooooong",[],42]
[-42,0.42,"2",[1,2,3],1111]
[777,19.96,"duck",null,1]
[-777,4.2,"goose",[4,2],null]
query T
select json_object(a::varchar, a, b::varchar, b, c, c, d::varchar, d, e::varchar, e) from test
----
{"0":0,"0.5":0.5,"short":"short","[0, 1, 2, 3, 4, 5, 6, 7, 9]":[0,1,2,3,4,5,6,7,9],"33":33}
{"42":42,"1.0":1.0,"looooooooooooooong":"looooooooooooooong","[]":[],"42":42}
{"-42":-42,"0.42":0.42,"2":"2","[1, 2, 3]":[1,2,3],"1111":1111}
{"777":777,"19.96":19.96,"duck":"duck","1":1}
{"-777":-777,"4.2":4.2,"goose":"goose","[4, 2]":[4,2]}
query T
select json_quote(map(list(a), list(b))) from test
----
{"0":0.5,"42":1.0,"-42":0.42,"777":19.96,"-777":4.2}
query T
select json_quote(map(list(b), list(c))) from test
----
{"0.5":"short","1.0":"looooooooooooooong","0.42":"2","19.96":"duck","4.2":"goose"}
query T
select json_quote(map(list(c), list(d))) from test
----
{"short":[0,1,2,3,4,5,6,7,9],"looooooooooooooong":[],"2":[1,2,3],"duck":null,"goose":[4,2]}
# Histogram not implemented for INT[]
statement error
select json_quote(map(list(d), list(e))) from test
----
query T
select json_quote(map(list(c), list(e))) from test
----
{"short":33,"looooooooooooooong":42,"2":1111,"duck":1,"goose":null}
# some postgres aliases
query T
select row_to_json({a: 42})
----
{"a":42}
statement error
select row_to_json({a: 42}, false)
----
row_to_json() takes exactly one argument
query T
select row_to_json(NULL)
----
NULL
statement error
select row_to_json([42])
----
query T
select array_to_json([42])
----
[42]
statement error
select array_to_json([42], [21])
----
array_to_json() takes exactly one argument
query T
select array_to_json(NULL)
----
NULL
statement error
select array_to_json({a: 42})
----
# Some SQLite tests
query T
SELECT json_array(1,json_object('abc',2.5,'def',null,'ghi','hello'),99);
----
[1,{"abc":2.5,"def":null,"ghi":"hello"},99]
query T
SELECT json_object('a',1,'b',2.5,'c',null,'d','String Test');
----
{"a":1,"b":2.5,"c":null,"d":"String Test"}
# must have even number of args
statement error
SELECT json_object('a',1,'b');
----
query T
SELECT json_array(1,2.5,null,'hello');
----
[1,2.5,null,"hello"]
query T
SELECT json_array(1,'{"abc":2.5,"def":null,"ghi":hello}',99);
----
[1,"{\"abc\":2.5,\"def\":null,\"ghi\":hello}",99]
query T
SELECT json_array(1,json_quote('{"abc":2.5,"def":null,"ghi":"hello"}'),99);
----
[1,"{\"abc\":2.5,\"def\":null,\"ghi\":\"hello\"}",99]
query T
SELECT json_array(1,json_object('abc',2.5,'def',null,'ghi','hello'),99);
----
[1,{"abc":2.5,"def":null,"ghi":"hello"},99]
query T
SELECT json_object('ex','[52,3.14159]');
----
{"ex":"[52,3.14159]"}
query T
SELECT json_object('ex','[52,3.14159]'::JSON);
----
{"ex":[52,3.14159]}
query T
SELECT json_object('ex',json_array(52,3.14159));
----
{"ex":[52,3.14159]}
query T
SELECT json_object('a',2,'c',4);
----
{"a":2,"c":4}
query T
SELECT json_object('a',2,'c','{e:5}');
----
{"a":2,"c":"{e:5}"}
query T
SELECT json_object('a',2,'c',json_object('e',5));
----
{"a":2,"c":{"e":5}}
query T
SELECT json_array(1,2,'3',4);
----
[1,2,"3",4]
query T
SELECT json_array('[1,2]');
----
["[1,2]"]
query T
SELECT json_array(json_array(1,2));
----
[[1,2]]
query T
SELECT json_array(1,null,'3','[4,5]','{"six":7.7}');
----
[1,null,"3","[4,5]","{\"six\":7.7}"]
query T
SELECT json_array(1,null,'3','[4,5]'::JSON,'{"six":7.7}'::JSON);
----
[1,null,"3",[4,5],{"six":7.7}]
query T
SELECT json_array(
-9223372036854775808,9223372036854775807,0,1,-1,
0.0, 1.0, -1.0, -1e99, +2e100,
'one','two','three',
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, NULL, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
'abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ',
'abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ',
'abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ',
99);
----
[-9223372036854775808,9223372036854775807,0,1,-1,0.0,1.0,-1.0,-1e99,2e100,"one","two","three",4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,null,21,22,23,24,25,26,27,28,29,30,31,"abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ",99]
# test issue 12002
query I
SELECT CAST((MAP([5, 3, 4], ['a', 'b', 'c']), 2) AS JSON);
----
{"":{"5":"a","3":"b","4":"c"},"":2}

View File

@@ -0,0 +1,220 @@
# name: test/sql/json/scalar/test_json_dot_syntax.test
# description: Test JSON extract with dot syntax
# group: [scalar]
require json
statement ok
pragma enable_verification
# should work within other functions (no conflict with list Lambda functions)
query T
SELECT json_structure(json('{"duck":"goose"}').duck);
----
"VARCHAR"
# should go to our NULL
query T
select json('{"foo": null}').foo
----
null
query T
select json('{"foo": null}').foo.bar
----
NULL
# also supports this syntax
query T
select json('{"foo": null}')['foo']
----
null
query T
select json('{"foo": null}')['foo']['bar']
----
NULL
query T
select json('null')
----
null
query T
select json('[null]')."0"
----
NULL
# this doesn't work, we can't extract array elements using dots
query T
select json('{"my_field": {"my_nested_field": ["goose", "duck"]}}').my_field.my_nested_field."1"
----
NULL
# but we can using array extract syntax
query T
select json('{"my_field": {"my_nested_field": ["goose", "duck"]}}').my_field.my_nested_field[1]
----
"duck"
# again, this won't work
query T
select ('{"my_field": {"my_nested_field": ["goose", "duck"]}}'::JSON).my_field.my_nested_field."1"
----
NULL
# but this will
query T
select ('{"my_field": {"my_nested_field": ["goose", "duck"]}}'::JSON).my_field.my_nested_field[1]
----
"duck"
# can also access from back
query T
select ('{"my_field": {"my_nested_field": ["goose", "duck"]}}'::JSON).my_field.my_nested_field[-1]
----
"duck"
# array slicing doesn't work (yet) for json extract
# this looks a bit weird, but this actually does a string slice
query T
select ('{"my_field": {"my_nested_field": ["goose", "duck"]}}'::JSON).my_field.my_nested_field[0:1]
----
[
query T
select json('{"my_field": {"my_nested_field": ["goose", "duck"]}}').my_field.my_nested_field[1]
----
"duck"
query T
select json('{"my_field": {"my_nested_field": ["goose", "duckduckduckduck"]}}').my_field.my_nested_field[1]
----
"duckduckduckduck"
query T
select ('{"my_field": {"my_nested_field": ["goose", "duckduckduckduck"]}}'::JSON).my_field.my_nested_field[1]
----
"duckduckduckduck"
query T
select json('{"my_field": {"my_nested_field": ["goose", "duckduckduckduck"]}}').my_field.my_nested_field[1]
----
"duckduckduckduck"
# doesn't work
query T
select json('[1, 2, 42]')."2"
----
NULL
# works!
query T
select json('[1, 2, 42]')[2]
----
42
query T
select json('[1, 2, 42]')[2]::text
----
42
# chained
query T
select ('{"my_field": {"my_nested_field": ["goose", "duck"]}}'::JSON).my_field.my_nested_field[1]
----
"duck"
# some sqlite tests
query T
SELECT json('{"a":2,"c":[4,5,{"f":7}]}').c
----
[4,5,{"f":7}]
query T
SELECT json('{"a":2,"c":[4,5,{"f":7}]}').c[2]
----
{"f":7}
query T
SELECT json('{"a":2,"c":[4,5,{"f":7}]}').c[2].f
----
7
query T
SELECT json('{"a":2,"c":[4,5,{"f":7}]}').x
----
NULL
statement ok
CREATE TABLE obj(x varchar);
statement ok
INSERT INTO obj VALUES('{"a":1,"b":2}');
query T
SELECT json(x).b FROM obj;
----
2
query T
SELECT json(x)."b" FROM obj;
----
2
statement ok
CREATE TABLE t12(x varchar);
statement ok
INSERT INTO t12(x) VALUES(
'{"settings":
{"layer2":
{"hapax.legomenon":
{"forceDisplay":true,
"transliterate":true,
"add.footnote":true,
"summary.report":true},
"dis.legomenon":
{"forceDisplay":true,
"transliterate":false,
"add.footnote":false,
"summary.report":true},
"tris.legomenon":
{"forceDisplay":true,
"transliterate":false,
"add.footnote":false,
"summary.report":false}
}
}
}');
query T
SELECT json(x).settings.layer2."tris.legomenon"."summary.report" FROM t12;
----
false
query T
SELECT (x::JSON).settings.layer2."tris.legomenon"."summary.report" FROM t12;
----
false
# this will be upcast to JSON, cannot parse 'bar'
statement error
SELECT json('{"foo": "bar"}').foo = 'bar';
----
Conversion Error
query T
SELECT json('{"foo": "bar"}').foo = '"bar"';
----
true
query T
SELECT json('{"foo": 1}').foo = 1;
----
true
query T
SELECT json('{"foo": "bar"}') = {foo: 'bar'}
----
true

View File

@@ -0,0 +1,39 @@
# name: test/sql/json/scalar/test_json_exists.test
# description: Test JSON exists
# group: [scalar]
require json
statement ok
pragma enable_verification
query I
SELECT json_exists('{"duck": null}', '$.duck')
----
true
query I
with path AS (
SELECT '$.duck' p
)
SELECT json_exists('{"duck": null}', p) FROM path
----
true
query I
SELECT json_exists('{"duck": null}', '$.goose')
----
false
query I
with path AS (
SELECT '$.goose' p
)
SELECT json_exists('{"duck": null}', p) FROM path
----
false
query I
SELECT json_exists('{"duck": null}', ['$.duck', '$.goose'])
----
[true, false]

View File

@@ -0,0 +1,374 @@
# name: test/sql/json/scalar/test_json_extract.test
# description: Test JSON extract
# group: [scalar]
require json
statement ok
pragma enable_verification
# should work within other functions (no conflict with list Lambda functions)
query T
SELECT json_structure('{"duck":"goose"}'->'duck');
----
"VARCHAR"
# should go to our NULL
query T
select json_extract('{"foo": null}', '$.foo')
----
null
query T
select json_extract('{"foo": null}', '$.foo.bar')
----
NULL
query T
select json_extract('null', '$')
----
null
query T
select json_extract('[null]', '$[0]')
----
null
query T
select json_extract('{"my_field": {"my_nested_field": ["goose", "duck"]}}', '/my_field/my_nested_field/1')
----
"duck"
query T
select json_extract_path('{"my_field": {"my_nested_field": ["goose", "duck"]}}', '/my_field/my_nested_field/1')
----
"duck"
query T
select '{"my_field": {"my_nested_field": ["goose", "duck"]}}'::JSON->'/my_field/my_nested_field/1'
----
"duck"
query T
select json_extract_string('{"my_field": {"my_nested_field": ["goose", "duck"]}}', '/my_field/my_nested_field/1')
----
duck
query T
select json_extract_path_text('{"my_field": {"my_nested_field": ["goose", "duck"]}}', '/my_field/my_nested_field/1')
----
duck
query T
select '{"my_field": {"my_nested_field": ["goose", "duck"]}}'::JSON->>'/my_field/my_nested_field/1'
----
duck
query T
select json_extract('{"my_field": {"my_nested_field": ["goose", "duckduckduckduck"]}}', '/my_field/my_nested_field/1')
----
"duckduckduckduck"
query T
select '{"my_field": {"my_nested_field": ["goose", "duckduckduckduck"]}}'::JSON->'/my_field/my_nested_field/1'
----
"duckduckduckduck"
query T
select json_extract_string('{"my_field": {"my_nested_field": ["goose", "duckduckduckduck"]}}', '/my_field/my_nested_field/1')
----
duckduckduckduck
query T
select '{"my_field": {"my_nested_field": ["goose", "duckduckduckduck"]}}'::JSON->>'/my_field/my_nested_field/1'
----
duckduckduckduck
query T
select json_extract('[1, 2, 42]', 2)
----
42
query T
select json_extract_string('[1, 2, 42]', 2)
----
42
# chained
query T
select '{"my_field": {"my_nested_field": ["goose", "duck"]}}'::JSON->'my_field'->'my_nested_field'->>1
----
duck
# some sqlite tests
query T
SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$');
----
{"a":2,"c":[4,5,{"f":7}]}
query T
SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.c');
----
[4,5,{"f":7}]
query T
SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.c[2]');
----
{"f":7}
query T
SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.c[2].f');
----
7
query T
SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', '$.x');
----
NULL
# list of paths must be constant
statement error
with tbl as (
select '{"a":2,"c":[4,5],"f":7}' j, ['$.c','$.a'] p
)
SELECT json_extract(j, p);
----
Binder Error
query T
SELECT json_extract('{"a":2,"c":[4,5],"f":7}', ['$.c','$.a']);
----
[[4,5], 2]
query T
SELECT json_extract('{"a":2,"c":[4,5,{"f":7}]}', ['$.x', '$.a']);
----
[NULL, 2]
query T
SELECT json_extract(NULL, ['$.x', '$.a']);
----
NULL
statement ok
CREATE TABLE t1(j varchar);
statement ok
INSERT INTO t1(j) VALUES('{"a":1,"b":[1,[2,3],4],"c":99}');
query T
SELECT json_extract(j, '$.b[#]') FROM t1;
----
NULL
# -0 is interpreted as just 0, just like in python
query II
SELECT json_extract(j, '$.b[#-0]') a, a = json_extract(j, '$.b[-0]') FROM t1;
----
1 true
query II
SELECT json_extract(j, '$.b[#-1]') a, a = json_extract(j, '$.b[-1]') FROM t1;
----
4 true
query TT
SELECT json_extract(j, '$.b[#-2]') a, a = json_extract(j, '$.b[-2]') FROM t1;
----
[2,3] true
query TT
SELECT json_extract(j, '$.b[#-02]') a, a = json_extract(j, '$.b[-02]') FROM t1;
----
[2,3] true
query TT
SELECT json_extract(j, '$.b[#-3]') a, a = json_extract(j, '$.b[-3]') FROM t1;
----
1 true
query TT
SELECT json_extract(j, '$.b[#-4]'), json_extract(j, '$.b[-4]') FROM t1;
----
NULL NULL
query TT
SELECT json_extract(j, '$.b[#-2][#-1]') a, a = json_extract(j, '$.b[-2][-1]') FROM t1;
----
3 true
query TT
SELECT j::JSON->'$.b[#-2][#-1]' a, a = (j::JSON->'$.b[-2][-1]') FROM t1;
----
3 true
query TT
SELECT json_extract(j, ['$.b[0]', '$.b[#-1]']) a, a = json_extract(j, ['$.b[0]', '$.b[-1]']) FROM t1;
----
[1, 4] true
query TT
SELECT j::JSON->['$.b[0]', '$.b[#-1]'] a, a = (j::JSON->['$.b[0]', '$.b[-1]']) FROM t1;
----
[1, 4] true
query TT
SELECT json_extract(j, '$.a[#-1]'), json_extract(j, '$.a[-1]') FROM t1;
----
NULL NULL
query TT
SELECT json_extract(j, '$.b[#-000001]') a, a = json_extract(j, '$.b[-000001]') FROM t1;
----
4 true
query TT
SELECT j::JSON->'$.b[#-000001]' a, a = (j::JSON->'$.b[-000001]') FROM t1;
----
4 true
statement error
SELECT json_extract(j, '$.b[#-]') FROM t1;
----
statement error
SELECT json_extract(j, '$.b[-]') FROM t1;
----
statement error
SELECT json_extract(j, '$.b[#9]') FROM t1;
----
statement error
SELECT json_extract(j, '$.b[#+2]') FROM t1;
----
statement error
SELECT json_extract(j, '$.b[#-1') FROM t1;
----
statement error
SELECT json_extract(j, '$.b[#-1x]') FROM t1;
----
statement ok
CREATE TABLE obj(x varchar);
statement ok
INSERT INTO obj VALUES('{"a":1,"b":2}');
query T
SELECT json_extract(x, '$.b') FROM obj;
----
2
query T
SELECT json_extract(x, '$."b"') FROM obj;
----
2
statement ok
CREATE TABLE t12(x varchar);
statement ok
INSERT INTO t12(x) VALUES(
'{"settings":
{"layer2":
{"hapax.legomenon":
{"forceDisplay":true,
"transliterate":true,
"add.footnote":true,
"summary.report":true},
"dis.legomenon":
{"forceDisplay":true,
"transliterate":false,
"add.footnote":false,
"summary.report":true},
"tris.legomenon":
{"forceDisplay":true,
"transliterate":false,
"add.footnote":false,
"summary.report":false}
}
}
}');
query T
SELECT json_extract(x, '$.settings.layer2."tris.legomenon"."summary.report"') FROM t12;
----
false
query T
SELECT x::JSON->'$.settings.layer2."tris.legomenon"."summary.report"' FROM t12;
----
false
# test issue #5063
statement ok
create table test5063 as select '{"a": 1, "b": 2}' js
statement ok
prepare q1 as SELECT js->CAST(? AS STRING) FROM test5063
query T
execute q1('a')
----
1
# test issue 11997
query I
select json_extract_string(json('{"j[so]n_\"key": 67}'), '$."j[so]n_\"key"');
----
67
query I
select '{"\"duck\"": 42}'->'$."\"duck\""';
----
42
query I
select '{"\"du\\ck\"": 42}'->'$."\"du\\ck\""';
----
42
query I
select '{"\"du\\ck\"": 42}'->'$."\"du\ck\""';
----
42
query I
select '{"du\\ck": 42}'->'$.du\ck';
----
42
# characters other than \\ or \" get ignored (for now)
query I
select '{"\"du\nck\"": 42}'->'$."\"du\nck\""';
----
NULL
# need to use chr(10) for \n
query I
select '{"\"du\nck\"": 42}'->('$."\"du' || chr(10) || 'ck\""');
----
42
# json_extract gets the JSON null (PostgreSQL behavior)
query I
select '{"duck":null}'->'$.duck'
----
null
# json_extract_string gets a SQL NULL (PostgreSQL behavior)
query I
select '{"duck":null}'->>'$.duck'
----
NULL
# issue 15217 (we should be able to extract the root using JSONPointer)
# the issue is wrong - using '/' should extract the field with an empty string as key
# but we should still be able to extract the root using a completely empty string
query I
select json_extract('{"hello":1}', '')
----
{"hello":1}

View File

@@ -0,0 +1,58 @@
# name: test/sql/json/scalar/test_json_keys.test
# description: Test JSON keys function
# group: [scalar]
require json
statement ok
pragma enable_verification
query T
select json_keys('{"duck": 42}');
----
[duck]
query T
select json_keys('{"duck": 42, "goose": 43}');
----
[duck, goose]
query T
select json_keys('["duck", "goose"]');
----
[]
query T
select json_keys(NULL)
----
NULL
query T
select json_keys('{"duck": {"key1": 42}, "goose": {"key1": 42, "key2": 43}}', ['duck', 'goose'])
----
[[key1], [key1, key2]]
statement ok
create table t1 as
select range, case when range % 2 = 0 then '{"duck": 42}' else '{"duck": 42, "goose": 43}' end j
from range(10000)
query TT
select range % 2 g, sum(length(json_keys(j))) c
from t1
group by g
order by all
----
0 5000
1 10000
statement ok
create table t2 as
select range, '{"duck": {"key1": 42}, "goose": {"key1": 42, "key2": 43}}' j
from range(10000)
query T
select sum(list_sum([length(l) for l in json_keys(j, ['duck', 'goose'])])) s
from t2
----
30000

View File

@@ -0,0 +1,107 @@
# name: test/sql/json/scalar/test_json_merge_patch.test
# description: Test JSON merge patch
# group: [scalar]
require json
statement ok
pragma enable_verification
# from issue 4227
query T
SELECT json_merge_patch('{"a": 1}', '{"a": 2}')
----
{"a":2}
query T
SELECT json_merge_patch('{"a": 1}', '{"b": 2}')
----
{"a":1,"b":2}
query T
SELECT json_merge_patch('{"a": {"c": 1}}', '{"a": {"d": 2}}')
----
{"a":{"c":1,"d":2}}
query T
SELECT json_merge_patch('{"a": {"b": 1}}', '{"a": {"b": 2}}')
----
{"a":{"b":2}}
# from MySQL documentation
query T
SELECT JSON_MERGE_PATCH('[1, 2]', '[true, false]');
----
[true,false]
query T
SELECT JSON_MERGE_PATCH('{"name": "x"}', '{"id": 47}');
----
{"name":"x","id":47}
query T
SELECT JSON_MERGE_PATCH('1', 'true');
----
true
query T
SELECT JSON_MERGE_PATCH('[1, 2]', '{"id": 47}');
----
{"id":47}
query T
SELECT JSON_MERGE_PATCH('{ "a": 1, "b":2 }','{ "a": 3, "c":4 }');
----
{"b":2,"a":3,"c":4}
query T
SELECT JSON_MERGE_PATCH('{ "a": 1, "b":2 }','{ "a": 3, "c":4 }','{ "a": 5, "d":6 }');
----
{"b":2,"c":4,"a":5,"d":6}
query T
SELECT JSON_MERGE_PATCH('{"a":1, "b":2}', '{"b":null}');
----
{"a":1}
query T
SELECT JSON_MERGE_PATCH('{"a":{"x":1}}', '{"a":{"y":2}}');
----
{"a":{"x":1,"y":2}}
# test NULL behaviour
query T
select json_merge_patch(NULL, '3')
----
3
query T
select json_merge_patch('3', NULL)
----
NULL
query T
select json_merge_patch(NULL, NULL)
----
NULL
# test vararg behaviour
query T
select json_merge_patch('{"a":1}', '{"b":2}', '{"c":3}')
----
{"a":1,"b":2,"c":3}
query T
select json_merge_patch(NULL, '{"b":2}', '{"c":3}')
----
{"b":2,"c":3}
query T
select json_merge_patch('{"a":1}', NULL, '{"c":3}')
----
{"c":3}
query T
select json_merge_patch('{"a":1}', '{"b":2}', NULL)
----
NULL

View File

@@ -0,0 +1,267 @@
# name: test/sql/json/scalar/test_json_path.test
# description: Test JSON extract (path notation)
# group: [scalar]
require json
statement ok
pragma enable_verification
# this used to error but the json pointer spec says we should be able to extract the root using the empty string
query I
select json_extract('{"my_field": "duck"}', '')
----
{"my_field":"duck"}
query T
select json_extract('{"my_field": "duck"}', '$.my_field')
----
"duck"
query T
select json_extract('[0, 1, 2]', '$[1]')
----
1
query T
select json_extract('[[1]]', '$[0][0]')
----
1
statement ok
create table test (j varchar, q varchar)
statement ok
insert into test values ('{"my_field": {"my_nested_field": ["goose", "duck"]}}', '$.my_field.my_nested_field[0]')
# wrong syntax
statement error
select json_extract(j, '$.my_field.my_nested_field[]') from test
----
Binder Error
query T
select json_extract(j, '$.my_field.my_nested_field[0]') from test
----
"goose"
# SQLite's indexing from back of the list requires '#'
query T
select json_extract(j, '$.my_field.my_nested_field[#-1]') from test
----
"duck"
query T
select json_extract(j, '$.my_field.my_nested_field[#-2]') from test
----
"goose"
query T
select json_extract(j, '$.my_field.my_nested_field[#-3]') from test
----
NULL
query T
select json_extract(j, '$.my_field.my_nested_field[#]') from test
----
NULL
# we can also do without '#'
query T
select json_extract(j, '$.my_field.my_nested_field[-1]') from test
----
"duck"
query T
select json_extract(j, '$.my_field.my_nested_field[-2]') from test
----
"goose"
query T
select json_extract(j, '$.my_field.my_nested_field[-3]') from test
----
NULL
query T
select json_extract(j, q) from test
----
"goose"
query T
select json_extract('{"my_field": [{"my_nested_field": ["duck", "goose"]}]}', '$.my_field[0].my_nested_field[0]')
----
"duck"
query T
select json_extract('{"my_field": [{"my_nested_field": ["duck", "goose"]}]}', '$.my_field[#-1].my_nested_field[#-1]')
----
"goose"
# some NULLs
query T
select json_extract(j, '$.my_field.my_nested_field.3') from test
----
NULL
# invalid path error that happens during execution, not binding
statement error
with tbl as (
select '[{"duck":42},{"duck":43}]' j, '$duck' p
)
select json_extract(j, p) from tbl
----
Invalid Input Error
# path error
statement error
select json_extract('{"a": {"b": "c"}}', '$[]');
----
statement error
select json_extract('{"a": {"b": "c"}}', '$[#');
----
statement error
select json_extract(j, '$.my_field[my_nested_field[#-3]') from test
----
statement error
select json_extract(j, '$.my_field.my_nested_field[!]') from test
----
statement error
select json_extract('{"a": {"b": "c"}}', '$.a..');
----
statement error
select json_extract('{"a": {"b": "c"}}', '$[[');
----
statement error
select json_extract('{"a": {"b": "c"}}', '$[.');
----
statement error
select json_extract('{"a": {"b": "c"}}', '$]');
----
# with JSON path we support wildcards in arrays and objects, this results in a LIST
query T
select json_extract('[{"duck":42},{"duck":43}]', '$[*].duck')
----
[42, 43]
query T
select json_extract('{"duck":42, "goose":43}', '$.*');
----
[42, 43]
# check for invalid syntax
statement error
select json_extract('[{"duck":42},{"duck":43}]', '$[*.duck')
----
Binder Error: JSON path error
statement error
select json_extract('[{"duck":42},{"duck":43}]', '$*.duck')
----
Binder Error: JSON path error
statement error
select json_extract('{"duck":42, "goose":43}', '$.[*]');
----
Binder Error: JSON path error
# wildcards yields empty list if none are found
query T
select json_extract('[{"duck":42},{"duck":43}]', '$.*')
----
[]
query T
select json_extract('{"duck":[42,43]}', '$.*[-1]')
----
[43]
# can have multiple wildcards
query T
select json_extract('[{"duck":42},{"duck":43}]', '$[*].*')
----
[42, 43]
# wildcards do not work when path param is not foldable
statement error
with tbl as (
select '[{"duck":42},{"duck":43}]' j, '$[*].duck' p
)
select json_extract(j, p) from tbl
----
Invalid Input Error
# wildcards do not work inside of a multi-extract
statement error
select json_extract('[{"duck":42},{"goose":43}]', ['$[*].duck', '$[*].goose'])
----
Binder Error
# test with a larger input
query T
select sum((to_json({duck:range})->'$.*')[1]::int) = sum(range) from range(10000)
----
1
# test some nulls (JSON Path reference implementation returns NULLs in wildcards),
# if the path is not there, it's not added to the LIST
query T
select json_extract('[{"duck":null},{"duck":42},{"duck":null},{}]', '$[*].*')
----
[null, 42, null]
# test recursive wildcard path
query T
select json_extract('{"a":{"b":1,"c":2},"d":["g","h",[{"b":5},{"x":42}]]}', '$..*')
----
[{"b":1,"c":2}, ["g","h",[{"b":5},{"x":42}]], 1, 2, "g", "h", [{"b":5},{"x":42}], {"b":5}, {"x":42}, 5, 42]
# alternative syntax
query T
select json_extract('{"a":{"b":1,"c":2},"d":["g","h",[{"b":5},{"x":42}]]}', '$.**')
----
[{"b":1,"c":2}, ["g","h",[{"b":5},{"x":42}]], 1, 2, "g", "h", [{"b":5},{"x":42}], {"b":5}, {"x":42}, 5, 42]
# recursive non-wildcard path
query T
select json_extract('{"a":{"b":1,"c":2},"d":["g","h",[{"b":5},{"x":42}]]}', '$..b')
----
[1, 5]
query T
select json_extract('{"a":{"b":1,"c":2},"d":["g","h",[{"b":5},{"x":42}]]}', '$..[0]')
----
["g", {"b":5}]
query I
with tbl as (
select '[{"duck":42},{"duck":{"a":43,"b":null}}]' j
)
select json_extract(j, '$[*]..*') from tbl
----
[42, {"a":43,"b":null}, 43, null]
query T
with tbl as (
select '{"duck":[{"goose":42},{"goose":43}],"goose":null}' j
)
select json_extract(j, '$.duck[*]..goose') from tbl
----
[42, 43]
query T
select json_extract(j,'$..[*]') from test
----
["goose", "duck"]
query T
select json_extract('[[{"a":[1,2,3]}], [{"a":[4,5,6]}]]','$[*]..a[*]')
----
[1, 2, 3, 4, 5, 6]

View File

@@ -0,0 +1,46 @@
# name: test/sql/json/scalar/test_json_pretty.test
# description: Test JSON pretty printer
# group: [scalar]
require json
statement ok
pragma enable_verification
query I
SELECT json_pretty('[1,2,{"a":43, "g":[true, true]}]') = '[
1,
2,
{
"a": 43,
"g": [
true,
true
]
}
]'
----
true
query I
SELECT json_pretty(NULL)
----
NULL
query I
SELECT json_pretty('null'::json);
----
null
query I
SELECT json_pretty('[1,2,null, {"a": null}, 42]'::json) = '[
1,
2,
null,
{
"a": null
},
42
]'
----
true

View File

@@ -0,0 +1,186 @@
# name: test/sql/json/scalar/test_json_structure.test
# description: Test json_structure() function
# group: [scalar]
require json
statement ok
pragma enable_verification
# singletons
query T
select json_structure('42')
----
"UBIGINT"
query T
select json_structure('4.2')
----
"DOUBLE"
query T
select json_structure('null')
----
"NULL"
query T
select json_structure('true')
----
"BOOLEAN"
query T
select json_structure('"duck"')
----
"VARCHAR"
# simple values
query T
select json_structure('{"a": 42}')
----
{"a":"UBIGINT"}
query T
select json_structure('{"a": 4.2}')
----
{"a":"DOUBLE"}
query T
select json_structure('{"a": "42"}')
----
{"a":"VARCHAR"}
query T
select json_structure('{"a": "looooooooooooooooong"}')
----
{"a":"VARCHAR"}
query T
select json_structure('{"a": null}')
----
{"a":"NULL"}
query T
select json_structure('{"a": true}')
----
{"a":"BOOLEAN"}
query T
select json_structure('{"a": []}')
----
{"a":["NULL"]}
query T
select json_structure('{"a": [42]}')
----
{"a":["UBIGINT"]}
query T
select json_structure('{"a": {"b": 42}}')
----
{"a":{"b":"UBIGINT"}}
# we want strong typing for JSON arrays. If we find inconsistent types, we just set the type to JSON
query T
select json_structure('[null,null]')
----
["NULL"]
query T
select json_structure('[true,null,false]')
----
["BOOLEAN"]
query T
select json_structure('[true,null,false,1]')
----
["JSON"]
query T
select json_structure('[true,null,false,1,-1]')
----
["JSON"]
query T
select json_structure('[true,null,false,1,-1,0.42]')
----
["JSON"]
query T
select json_structure('[true,null,false,1,-1,0.42,"42"]')
----
["JSON"]
# nested stuff
query T
select json_structure('[{"a": 42}, {"a": null}]')
----
[{"a":"UBIGINT"}]
query T
select json_structure('[{"a": 42}, {"b": 4.2}]')
----
[{"a":"UBIGINT","b":"DOUBLE"}]
query T
select json_structure('[{"a": [42, null]}, {"a": [7]}]')
----
[{"a":["UBIGINT"]}]
query T
select json_structure('[{"a": [{"b": 42}, {"b": null}]}, {"a": [{"b": 7}]}]')
----
[{"a":[{"b":"UBIGINT"}]}]
query T
select json_structure('[{"a": [{"b": 42}, {"b": null}]}, {"a": [{"c": 7}]}]')
----
[{"a":[{"b":"UBIGINT","c":"UBIGINT"}]}]
# inconsistent types
query T
select json_structure('[1, [1]]')
----
["JSON"]
query T
select json_structure('[1, {"a": 1}]')
----
["JSON"]
query T
select json_structure('[[1], {"a": 1}]')
----
["JSON"]
# duplicate key, used to throw an error, now the error is ignored
query I
select json_structure('{"a": 42, "a": 7}')
----
{"a":"UBIGINT"}
# from a table
statement ok
create table test (j json);
statement ok
insert into test values
('{"family": "anatidae", "species": ["duck", "goose", "swan", null], "coolness": 1000}'),
('{"family": "canidae", "species": ["labrador", null, "bulldog", "shepherd"], "hair": true, "coolness": 999}'),
(NULL),
('{"family": null, "species": null, "hair": null, "coolness": null}'),
('{"family": "felidae", "species": ["tiger", "lion", null, "british shorthair"], "hair": true, "coolness": 999}')
query T
select json_structure(j) from test
----
{"family":"VARCHAR","species":["VARCHAR"],"coolness":"UBIGINT"}
{"family":"VARCHAR","species":["VARCHAR"],"hair":"BOOLEAN","coolness":"UBIGINT"}
NULL
{"family":"NULL","species":"NULL","hair":"NULL","coolness":"NULL"}
{"family":"VARCHAR","species":["VARCHAR"],"hair":"BOOLEAN","coolness":"UBIGINT"}
# issue 11886
query I
select json_structure('{"a": 1, "A": 1}');
----
{"a":"UBIGINT","A":"UBIGINT"}

View File

@@ -0,0 +1,549 @@
# name: test/sql/json/scalar/test_json_transform.test
# description: Test json_transform function
# group: [scalar]
require json
statement ok
pragma enable_verification
# aliases
query T
select from_json('42', '"UBIGINT"')
----
42
query T
select from_json_strict('42', '"UBIGINT"')
----
42
statement error
select from_json_strict('42', '"DATE"')
----
<REGEX>:.*Invalid Input Error.*Unable to cast.*
# singletons
statement error
select json_transform('42', '42')
----
<REGEX>:.*Binder Error.*invalid JSON structure.*
query T
select json_transform('42', '"UBIGINT"')
----
42
query T
select json_transform('4.2', '"DOUBLE"')
----
4.2
query T
select json_transform('null', '"NULL"')
----
NULL
query T
select json_transform('true', '"BOOLEAN"')
----
True
query T
select json_transform('"duck"', '"VARCHAR"')
----
duck
query T
select json_transform('"duuuuuuuuuuuuuuuuck"', '"VARCHAR"')
----
duuuuuuuuuuuuuuuuck
# simple structs
query T
select json_transform('{"a": 42}', '{"a":"UBIGINT"}')
----
{'a': 42}
statement error
select json_transform_strict('{"a": 42, "a":43}', '{"a":"UBIGINT"}')
----
<REGEX>:.*Invalid Input Error.*has duplicate key.*
statement error
select json_transform_strict('{}', '{"a":"UBIGINT"}')
----
<REGEX>:.*Invalid Input Error.*does not have key.*
statement error
select json_transform_strict('{}', '["UBIGINT"]')
----
<REGEX>:.*Invalid Input Error.*Expected ARRAY, but got OBJECT.*
query T
select json_transform('[{}, [42]]', '[["UBIGINT"]]')
----
[NULL, [42]]
query T
select json_transform('{"a": null}', '{"a":"UBIGINT"}')
----
{'a': NULL}
query T
select json_transform('{"a": 42}', '{"a":"NULL"}')
----
{'a': NULL}
statement error
select json_transform('{"a": 42}', '{"a":"ARRAY"}')
----
<REGEX>:.*Invalid Input Error.*can not be converted to a DuckDB Type.*
# arrays
statement error
select json_transform('[1,2,3]', '["UBIGINT", "BIGINT"]')
----
<REGEX>:.*Binder Error.*Too many values in array.*
query T
select list_sum(json_transform(range(5000)::JSON, '["UBIGINT"]'))::UBIGINT s
----
12497500
query T
select json_transform('[1,2,3]', '["UBIGINT"]')
----
[1, 2, 3]
query T
select json_transform('[1,2,3]', '["NULL"]')
----
[NULL, NULL, NULL]
query T
select json_transform('[{"a": 42}, {"a": null}, {"a": 7}]', '[{"a": "UBIGINT"}]')
----
[{'a': 42}, {'a': NULL}, {'a': 7}]
# we can have missing keys, these become NULL
query T
select json_transform('[{"a": 42}, {"a": null, "b": 33}, {"b": 7}]', '[{"a": "UBIGINT", "b": "UBIGINT"}]')
----
[{'a': 42, 'b': NULL}, {'a': NULL, 'b': 33}, {'a': NULL, 'b': 7}]
statement ok
create table test (j json);
statement ok
insert into test values
('{"family": "anatidae", "species": ["duck", "goose", "swan", null], "coolness": 1000}'),
('{"family": "canidae", "species": ["labrador", null, "bulldog", "shepherd"], "hair": true, "coolness": 999}'),
(NULL),
('{"family": null, "species": null, "hair": null, "coolness": null}'),
('{"family": "felidae", "species": ["tiger", "lion", null, "british shorthair"], "hair": true, "coolness": 999}')
query T
select json_transform(j, '{"family": "VARCHAR", "coolness": "UBIGINT", "species": ["VARCHAR"]}') from test
----
{'family': anatidae, 'coolness': 1000, 'species': [duck, goose, swan, NULL]}
{'family': canidae, 'coolness': 999, 'species': [labrador, NULL, bulldog, shepherd]}
NULL
{'family': NULL, 'coolness': NULL, 'species': NULL}
{'family': felidae, 'coolness': 999, 'species': [tiger, lion, NULL, british shorthair]}
# conflicting names
statement error
select json_transform('{"a": 4, "a": 2}', '{"a": "UBIGINT", "a": "UBIGINT"}')
----
<REGEX>:.*Invalid Input Error.*Duplicate keys in object.*
# can actually get parts of the JSON back as string
query T
select json_transform('{"a": {"duck": 42, "goose": 7}}', '{"a": "JSON"}')
----
{'a': '{"duck":42,"goose":7}'}
# loops over all types for coverage
# all integer types
foreach type <integral>
query T
select json_transform('42', '"${type}"')
----
42
query T
select json_transform('42.42', '"${type}"')
----
42
query T
select json_transform('"42"', '"${type}"')
----
42
query T
select json_transform('true', '"${type}"')
----
1
query T
select json_transform('false', '"${type}"')
----
0
query T
select json_transform('[]', '"${type}"')
----
NULL
query T
select json_transform('{}', '"${type}"')
----
NULL
statement error
select json_transform_strict('[]', '"${type}"')
----
<REGEX>:.*Invalid Input Error.*Failed to cast value to numerical.*
statement error
select json_transform_strict('{}', '"${type}"')
----
<REGEX>:.*Invalid Input Error.*Failed to cast value to numerical.*
query T
select json_transform('null', '"${type}"')
----
NULL
endloop
query T
select json_transform('-42', '"INTEGER"')
----
-42
query T
select json_transform('-42', '"UINTEGER"')
----
NULL
statement error
select json_transform_strict('-42', '"UINTEGER"')
----
<REGEX>:.*Invalid Input Error.*Failed to cast value to numerical.*
# varchar
query T
select json_transform('42', '"VARCHAR"')
----
42
query T
select json_transform('42', '"JSON"')
----
42
query T
select json_transform('42', '"BLOB"')
----
42
query T
select json_transform('-42', '"VARCHAR"')
----
-42
query T
select json_transform('42.42', '"VARCHAR"')
----
42.42
query T
select json_transform('true', '"VARCHAR"')
----
true
query T
select json_transform('[]', '"VARCHAR"')
----
[]
query T
select json_transform('{}', '"VARCHAR"')
----
{}
query T
select json_transform('null', '"VARCHAR"')
----
NULL
# decimal
query T
select json_transform('42', '"DECIMAL"')
----
42.000
query T
select json_transform('42', '"DECIMAL(2)"')
----
42
query T
select json_transform('42', '"DECIMAL(3,1)"')
----
42.0
query T
select json_transform('-42', '"DECIMAL(3,1)"')
----
-42.0
query T
select json_transform('"42"', '"DECIMAL(3,1)"')
----
42.0
query T
select json_transform('42.42', '"DECIMAL(4,2)"')
----
42.42
query T
select json_transform('42.42', '"DECIMAL(10,2)"')
----
42.42
query T
select json_transform('42.42', '"DECIMAL(20,2)"')
----
42.42
query T
select json_transform('42.42', '"DECIMAL(38,2)"')
----
42.42
query T
select json_transform('42.42', '"DECIMAL(38,17)"')
----
42.42000000000000000
query T
select json_transform('true', '"DECIMAL(3,1)"')
----
1.0
query T
select json_transform('false', '"DECIMAL(3,1)"')
----
0.0
query T
select json_transform('null', '"DECIMAL(3,1)"')
----
NULL
query T
select json_transform('42', '"DECIMAL(2,1)"')
----
NULL
query T
select json_transform('[]', '"DECIMAL(2,1)"')
----
NULL
query T
select json_transform('{}', '"DECIMAL(2,1)"')
----
NULL
statement error
select json_transform_strict('42', '"DECIMAL(2,1)"')
----
<REGEX>:.*Invalid Input Error.*Failed to cast value to decimal.*
statement error
select json_transform('42', '"DECIMAL(39)"')
----
<REGEX>:.*Invalid Input Error.*can not be converted to a DuckDB Type.*
statement error
select json_transform('42', '"DECIMAL(2,4)"')
----
<REGEX>:.*Invalid Input Error.*can not be converted to a DuckDB Type.*
statement error
select json_transform('42', '"DECIMAL(5,1,1)"')
----
<REGEX>:.*Invalid Input Error.*can not be converted to a DuckDB Type.*
# uuid
query T
select json_transform('"A0EEBC99-9C0B-4EF8-BB6D-6BB9BD380A11"', '"UUID"')
----
a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11
query T
select json_transform('42', '"UUID"')
----
NULL
statement error
select json_transform_strict('42', '"UUID"')
----
<REGEX>:.*Invalid Input Error.*Unable to cast.*
query T
select json_transform('-42', '"UUID"')
----
NULL
query T
select json_transform('42.42', '"UUID"')
----
NULL
query T
select json_transform('true', '"UUID"')
----
NULL
query T
select json_transform('false', '"UUID"')
----
NULL
query T
select json_transform('null', '"UUID"')
----
NULL
statement error
select json_transform_strict('42', '"UUID"')
----
<REGEX>:.*Invalid Input Error.*Unable to cast.*
# date / time
query T
select json_transform('"1996-03-27"', '"DATE"')
----
1996-03-27
query T
select json_transform('"11:59:59"', '"TIME"')
----
11:59:59
query T
select json_transform('42', '"DATE"')
----
NULL
query T
select json_transform('-42', '"DATE"')
----
NULL
query T
select json_transform('42.42', '"DATE"')
----
NULL
query T
select json_transform('"42"', '"DATE"')
----
NULL
query T
select json_transform('null', '"DATE"')
----
NULL
query T
select json_transform('true', '"DATE"')
----
NULL
query T
select json_transform('false', '"DATE"')
----
NULL
query T
select json_transform('[]', '"DATE"')
----
NULL
query T
select json_transform('{}', '"DATE"')
----
NULL
statement error
select json_transform_strict('false', '"DATE"')
----
<REGEX>:.*Invalid Input Error.*Unable to cast.*
# timestamp
query T
select json_transform('"1996-03-27 11:59:59"', '"TIMESTAMP"')
----
1996-03-27 11:59:59
query T
select json_transform('"1996-03-27 11:59:59"', '"TIMESTAMP_MS"')
----
1996-03-27 11:59:59
query T
select json_transform('"1996-03-27 11:59:59"', '"TIMESTAMP_NS"')
----
1996-03-27 11:59:59
query T
select json_transform('"1996-03-27 11:59:59"', '"TIMESTAMP_S"')
----
1996-03-27 11:59:59
query T
select json_transform('null', '"TIMESTAMP"')
----
NULL
query T
select json_transform('42', '"TIMESTAMP"')
----
NULL
statement error
select json_transform_strict('42', '"TIMESTAMP"')
----
<REGEX>:.*Invalid Input Error.*Unable to cast.*
# enum tests
statement ok
CREATE OR REPLACE TYPE test_enum AS ENUM ('a', 'b', 'c');
query T
select json_transform('{"test": "a"}', '{"test": "test_enum"}')
----
{'test': a}
query T
select json_transform('{"test": ["a","b"]}', '{"test": "test_enum[]"}')
----
{'test': [a, b]}
query T
select json_transform('{"test": ["a","b"]}', '{"test": "test_enum[2]"}')
----
{'test': [a, b]}
statement ok
DROP TYPE test_enum;

View File

@@ -0,0 +1,236 @@
# name: test/sql/json/scalar/test_json_type.test
# description: Test JSON type
# group: [scalar]
require json
statement ok
pragma enable_verification
# unary type function
query T
select json_type('{"str": 42}')
----
OBJECT
query T
select json_type('[1, 2, 3]')
----
ARRAY
# invalid JSON but we can still get the type of the singleton
query T
select json_type('"other"')
----
VARCHAR
query T
select json_type('42')
----
UBIGINT
query T
select json_type('NaN')
----
DOUBLE
query T
select json_type('null')
----
NULL
query T
select json_type(NULL)
----
NULL
# binary type function
# tests with constant input and constant query
query T
select json_type('{"str": 42}', 'str')
----
UBIGINT
query T
select json_type('{"str": "quack"}', 'str')
----
VARCHAR
query T
select json_type('{"str": "quack"}', 'str2')
----
NULL
query T
select json_type('{"str": "quack"}', NULL)
----
NULL
query T
select json_type(NULL, 'str')
----
NULL
# NaN and Infinity should become DOUBLE
query T
select json_type('{"null": NaN}', 'null')
----
DOUBLE
query T
select json_type('{"null": nan}', 'null')
----
DOUBLE
query T
select json_type('{"null": Infinity}', 'null')
----
DOUBLE
query T
select json_type('{"null": -Infinity}', 'null')
----
DOUBLE
statement ok
create table test(json varchar, query varchar)
statement ok
insert into test values
('{"str": "quack", "int": 4, "double": 0.42, "bool": true, "arr": [], "nested": {"val": 1}}', '/nested/val'),
('{"str": "woof", "int": -4, "double": -0.42, "bool": false, "arr": [0, 1, 2], "nested": {"val": 42}}', '/arr/2'),
('{"str": null, "int": null, "double": null, "bool": null, "arr": null, "nested": null}', 'bool')
# tests with columnref input and constant query
query T
select json_type(json, 'str') from test
----
VARCHAR
VARCHAR
NULL
query T
select json_type(json, 'int') from test
----
UBIGINT
BIGINT
NULL
query T
select json_type(json, 'double') from test
----
DOUBLE
DOUBLE
NULL
query T
select json_type(json, 'bool') from test
----
BOOLEAN
BOOLEAN
NULL
query T
select json_type(json, 'arr') from test
----
ARRAY
ARRAY
NULL
# json path queries
query T
select json_type(json, '/arr/0') from test
----
NULL
UBIGINT
NULL
query T
select json_type(json, '/nested/val') from test
----
UBIGINT
UBIGINT
NULL
# query multiple paths
query T
select json_type(json, ['str', '/nested/val']) from test
----
[VARCHAR, UBIGINT]
[VARCHAR, UBIGINT]
['NULL', NULL]
# test with columnref input and columnref query
query T
select json_type(json, query) from test
----
UBIGINT
UBIGINT
NULL
# some SQLite json_type tests
query T
SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}');
----
OBJECT
query T
SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$');
----
OBJECT
query T
SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a');
----
ARRAY
query T
SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[0]');
----
UBIGINT
query T
SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[1]');
----
DOUBLE
query T
SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[2]');
----
BOOLEAN
query T
SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[3]');
----
BOOLEAN
query T
SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[4]');
----
NULL
query T
SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[5]');
----
VARCHAR
query T
SELECT json_type('{"a":[2,3.5,true,false,null,"x"]}','$.a[6]');
----
NULL
# test LIST(JSON) compatibility
query T
select json_type(json_extract('{"duck":[1,2,3]}', '$..duck'))
----
ARRAY
query T
select json_type(json_extract('{"a":{"duck":[1]},"b":{"duck":[2,3]}}', '$..duck'))
----
ARRAY
query T
select json_type(json_extract('{"duck":[1,2,3]}', '$.duck[*]'))
----
ARRAY

View File

@@ -0,0 +1,621 @@
# name: test/sql/json/scalar/test_json_valid.test
# description: Test JSON valid
# group: [scalar]
require json
statement ok
pragma enable_verification
# there was a bug with infinity handling in yyjson, therefore we have these tests
# it has since been fixed
query T
select json_valid('{"bla": inf}')
----
true
query T
select json_valid('{"bla": infinity}')
----
true
# some SQLite JSON valid tests
statement ok
CREATE TABLE j1(x varchar);
statement ok
INSERT INTO j1(x)
VALUES('true'),('false'),('null'),('123'),('-234'),('34.5e+6'),
('""'),('"\""'),('"\\"'),('"abcdefghijlmnopqrstuvwxyz"'),
('[]'),('{}'),('[true,false,null,123,-234,34.5e+6,{},[]]'),
('{"a":true,"b":{"c":false}}');
query T
SELECT * FROM j1 WHERE NOT json_valid(x);
----
query T
SELECT json_valid('{"a":55,"b":72,}');
----
true
query T
SELECT json_valid('{"a":55,"b":72}');
----
true
query T
SELECT json_valid('["a",55,"b",72,]');
----
true
query T
SELECT json_valid('["a",55,"b",72]');
----
true
query T
SELECT json_valid('" \ "');
----
false
query T
SELECT json_valid('" \! "');
----
0
query T
SELECT json_valid('" \" "');
----
1
query T
SELECT json_valid('" \# "');
----
0
query T
SELECT json_valid('" \$ "');
----
0
query T
SELECT json_valid('" \% "');
----
0
query T
SELECT json_valid('" \& "');
----
0
query T
SELECT json_valid('" \'' "');
----
0
query T
SELECT json_valid('" \( "');
----
0
query T
SELECT json_valid('" \) "');
----
0
query T
SELECT json_valid('" \* "');
----
0
query T
SELECT json_valid('" \+ "');
----
0
query T
SELECT json_valid('" \, "');
----
0
query T
SELECT json_valid('" \- "');
----
0
query T
SELECT json_valid('" \. "');
----
0
query T
SELECT json_valid('" \/ "');
----
1
query T
SELECT json_valid('" \0 "');
----
0
query T
SELECT json_valid('" \1 "');
----
0
query T
SELECT json_valid('" \2 "');
----
0
query T
SELECT json_valid('" \3 "');
----
0
query T
SELECT json_valid('" \4 "');
----
0
query T
SELECT json_valid('" \5 "');
----
0
query T
SELECT json_valid('" \6 "');
----
0
query T
SELECT json_valid('" \7 "');
----
0
query T
SELECT json_valid('" \8 "');
----
0
query T
SELECT json_valid('" \9 "');
----
0
query T
SELECT json_valid('" \: "');
----
0
query T
SELECT json_valid('" \; "');
----
0
query T
SELECT json_valid('" \< "');
----
0
query T
SELECT json_valid('" \= "');
----
0
query T
SELECT json_valid('" \> "');
----
0
query T
SELECT json_valid('" \? "');
----
0
query T
SELECT json_valid('" \@ "');
----
0
query T
SELECT json_valid('" \A "');
----
0
query T
SELECT json_valid('" \B "');
----
0
query T
SELECT json_valid('" \C "');
----
0
query T
SELECT json_valid('" \D "');
----
0
query T
SELECT json_valid('" \E "');
----
0
query T
SELECT json_valid('" \F "');
----
0
query T
SELECT json_valid('" \G "');
----
0
query T
SELECT json_valid('" \H "');
----
0
query T
SELECT json_valid('" \I "');
----
0
query T
SELECT json_valid('" \J "');
----
0
query T
SELECT json_valid('" \K "');
----
0
query T
SELECT json_valid('" \L "');
----
0
query T
SELECT json_valid('" \M "');
----
0
query T
SELECT json_valid('" \N "');
----
0
query T
SELECT json_valid('" \O "');
----
0
query T
SELECT json_valid('" \P "');
----
0
query T
SELECT json_valid('" \Q "');
----
0
query T
SELECT json_valid('" \R "');
----
0
query T
SELECT json_valid('" \S "');
----
0
query T
SELECT json_valid('" \T "');
----
0
query T
SELECT json_valid('" \U "');
----
0
query T
SELECT json_valid('" \V "');
----
0
query T
SELECT json_valid('" \W "');
----
0
query T
SELECT json_valid('" \X "');
----
0
query T
SELECT json_valid('" \Y "');
----
0
query T
SELECT json_valid('" \Z "');
----
0
query T
SELECT json_valid('" \[ "');
----
0
query T
SELECT json_valid('" \\ "');
----
1
query T
SELECT json_valid('" \] "');
----
0
query T
SELECT json_valid('" \^ "');
----
0
query T
SELECT json_valid('" \_ "');
----
0
query T
SELECT json_valid('" \` "');
----
0
query T
SELECT json_valid('" \a "');
----
0
query T
SELECT json_valid('" \b "');
----
1
query T
SELECT json_valid('" \c "');
----
0
query T
SELECT json_valid('" \d "');
----
0
query T
SELECT json_valid('" \e "');
----
0
query T
SELECT json_valid('" \f "');
----
1
query T
SELECT json_valid('" \g "');
----
0
query T
SELECT json_valid('" \h "');
----
0
query T
SELECT json_valid('" \i "');
----
0
query T
SELECT json_valid('" \j "');
----
0
query T
SELECT json_valid('" \k "');
----
0
query T
SELECT json_valid('" \l "');
----
0
query T
SELECT json_valid('" \m "');
----
0
query T
SELECT json_valid('" \n "');
----
1
query T
SELECT json_valid('" \o "');
----
0
query T
SELECT json_valid('" \p "');
----
0
query T
SELECT json_valid('" \q "');
----
0
query T
SELECT json_valid('" \r "');
----
1
query T
SELECT json_valid('" \s "');
----
0
query T
SELECT json_valid('" \t "');
----
1
query T
SELECT json_valid('" \u "');
----
0
query T
SELECT json_valid('" \ua "');
----
0
query T
SELECT json_valid('" \uab "');
----
0
query T
SELECT json_valid('" \uabc "');
----
0
query T
SELECT json_valid('" \uabcd "');
----
1
query T
SELECT json_valid('" \uFEDC "');
----
1
query T
SELECT json_valid('" \u1234 "');
----
1
query T
SELECT json_valid('" \v "');
----
0
query T
SELECT json_valid('" \w "');
----
0
query T
SELECT json_valid('" \x "');
----
0
query T
SELECT json_valid('" \y "');
----
0
query T
SELECT json_valid('" \z "');
----
0
query T
SELECT json_valid('" \{ "');
----
0
query T
SELECT json_valid('" \| "');
----
0
query T
SELECT json_valid('" \} "');
----
0
query T
SELECT json_valid('" \~ "');
----
0
query T
SELECT json_valid('{"x":01}')
----
0
query T
SELECT json_valid('{"x":-01}')
----
0
query T
SELECT json_valid('{"x":0}')
----
1
query T
SELECT json_valid('{"x":-0}')
----
1
query T
SELECT json_valid('{"x":0.1}')
----
1
query T
SELECT json_valid('{"x":-0.1}')
----
1
query T
SELECT json_valid('{"x":0.0000}')
----
1
query T
SELECT json_valid('{"x":-0.0000}')
----
1
query T
SELECT json_valid('{"x":01.5}')
----
0
query T
SELECT json_valid('{"x":-01.5}')
----
0
query T
SELECT json_valid('{"x":00}')
----
0
query T
SELECT json_valid('{"x":-00}')
----
0

View File

@@ -0,0 +1,193 @@
# name: test/sql/json/scalar/test_json_value.test
# description: Test JSON value
# group: [scalar]
require json
statement ok
pragma enable_verification
# unlike JSON extract, this goes our NULL
query T
select json_value('{"foo": null}', '$.foo')
----
NULL
query T
select json_value('{"foo": null}', '$.foo.bar')
----
NULL
query T
select json_value('null', '$')
----
NULL
query T
select json_value('[null]', '$[0]')
----
NULL
query T
select json_value('{"my_field": {"my_nested_field": ["goose", "duck"]}}', '/my_field/my_nested_field/1')
----
"duck"
query T
select json_value('{"my_field": {"my_nested_field": ["goose", "duckduckduckduck"]}}', '/my_field/my_nested_field/1')
----
"duckduckduckduck"
query T
select json_value('[1, 2, 42]', 2)
----
42
# some sqlite tests
# this one returns NULL because the extracted value is not scalar
query T
SELECT json_value('{"a":2,"c":[4,5,{"f":7}]}', '$');
----
NULL
query T
SELECT json_value('{"a":2,"c":[4,5,{"f":7}]}', '$.c');
----
NULL
query T
SELECT json_value('{"a":2,"c":[4,5,{"f":7}]}', '$.c[2]');
----
NULL
query T
SELECT json_value('{"a":2,"c":[4,5,{"f":7}]}', '$.c[2].f');
----
7
query T
SELECT json_value('{"a":2,"c":[4,5,{"f":7}]}', '$.x');
----
NULL
# list of paths must be constant
statement error
with tbl as (
select '{"a":2,"c":[4,5],"f":7}' j, ['$.c','$.a'] p
)
SELECT json_value(j, p);
----
Binder Error
# first value is not scalar, so it is set to NULL
query T
SELECT json_value('{"a":2,"c":[4,5],"f":7}', ['$.c','$.a']);
----
[NULL, 2]
query T
SELECT json_value('{"a":2,"c":[4,5,{"f":7}]}', ['$.x', '$.a']);
----
[NULL, 2]
query T
SELECT json_value(NULL, ['$.x', '$.a']);
----
NULL
statement ok
CREATE TABLE t1(j varchar);
statement ok
INSERT INTO t1(j) VALUES('{"a":1,"b":[1,[2,3],4],"c":99}');
query T
SELECT json_value(j, '$.b[#]') FROM t1;
----
NULL
query TT
SELECT json_value(j, '$.a[#-1]'), json_value(j, '$.a[-1]') FROM t1;
----
NULL NULL
query TT
SELECT json_value(j, '$.b[#-000001]') a, a = json_value(j, '$.b[-000001]') FROM t1;
----
4 true
statement error
SELECT json_value(j, '$.b[#-]') FROM t1;
----
statement error
SELECT json_value(j, '$.b[-]') FROM t1;
----
statement error
SELECT json_value(j, '$.b[#9]') FROM t1;
----
statement error
SELECT json_value(j, '$.b[#+2]') FROM t1;
----
statement error
SELECT json_value(j, '$.b[#-1') FROM t1;
----
statement error
SELECT json_value(j, '$.b[#-1x]') FROM t1;
----
statement ok
CREATE TABLE obj(x varchar);
statement ok
INSERT INTO obj VALUES('{"a":1,"b":2}');
query T
SELECT json_value(x, '$.b') FROM obj;
----
2
query T
SELECT json_value(x, '$."b"') FROM obj;
----
2
statement ok
CREATE TABLE t12(x varchar);
statement ok
INSERT INTO t12(x) VALUES(
'{"settings":
{"layer2":
{"hapax.legomenon":
{"forceDisplay":true,
"transliterate":true,
"add.footnote":true,
"summary.report":true},
"dis.legomenon":
{"forceDisplay":true,
"transliterate":false,
"add.footnote":false,
"summary.report":true},
"tris.legomenon":
{"forceDisplay":true,
"transliterate":false,
"add.footnote":false,
"summary.report":false}
}
}
}');
query T
SELECT json_value(x, '$.settings.layer2."tris.legomenon"."summary.report"') FROM t12;
----
false
query T
SELECT json_value(x, '$.settings.layer2."tris.legomenon"') FROM t12;
----
NULL

View File

@@ -0,0 +1,14 @@
# name: test/sql/json/table/auto_glob_directory.test
# description: Test auto globbing a directory
# group: [table]
require json
statement ok
COPY (SELECT i%2 AS grp, i FROM range(1000) t(i)) TO '__TEST_DIR__/glob_dir_json' (FORMAT json, PER_THREAD_OUTPUT);
query II
SELECT grp, COUNT(*) FROM read_json('__TEST_DIR__/glob_dir_json') GROUP BY ALL ORDER BY ALL
----
0 500
1 500

View File

@@ -0,0 +1,49 @@
# name: test/sql/json/table/json_empty_array.test
# description: Read json files with empty arrays
# group: [table]
require json
statement ok
pragma enable_verification
# empty file
query I
select * from 'data/json/empty.ndjson'
----
query I
select * from 'data/json/whitespace_only.json'
----
# empty array
query I
SELECT * FROM read_json_auto('data/json/empty_array.json')
----
query I
SELECT * FROM read_json_auto('data/json/empty_no_newline.json')
----
# malformed files
statement error
SELECT * FROM read_json_auto('data/json/malformed/empty_array_malformed.json')
----
Missing closing brace
statement error
SELECT * FROM read_json_auto('data/json/malformed/empty_array_trailing.json', format='array')
----
Empty array with trailing data when parsing JSON array
statement error
SELECT * FROM read_json_auto('data/json/malformed/array_comma_malformed.json', format='array')
----
Malformed JSON
query I
SELECT * FROM read_json_auto('data/json/array_of_empty_arrays.json', format='array')
----
[]
[]
[]

View File

@@ -0,0 +1,164 @@
# name: test/sql/json/table/json_multi_file_reader.test
# description: Test MultiFileReader integration in JSON reader
# group: [table]
require json
statement ok
create table test as SELECT i as i, to_json([i%4]) as j FROM range(0,20) as tbl(i)
# FIXME: we can't do partitioned JSON writes yet because the column we partition by is packed into a to_json
# because we just push an expression and then use the csv writer, this uses the csv writer for now
statement ok
COPY test TO '__TEST_DIR__/json_part' (FORMAT csv, quote '', PARTITION_BY (j), HEADER 0);
# some tests for read_json first
query III
select * exclude (filename), replace(filename, '\', '/') as filename from read_json_auto('data/json/example_*.ndjson', filename=true) order by all
----
1 O Brother, Where Art Thou? data/json/example_n.ndjson
1 O Brother, Where Art Thou? data/json/example_r.ndjson
1 O Brother, Where Art Thou? data/json/example_rn.ndjson
2 Home for the Holidays data/json/example_n.ndjson
2 Home for the Holidays data/json/example_r.ndjson
2 Home for the Holidays data/json/example_rn.ndjson
3 The Firm data/json/example_n.ndjson
3 The Firm data/json/example_r.ndjson
3 The Firm data/json/example_rn.ndjson
4 Broadcast News data/json/example_n.ndjson
4 Broadcast News data/json/example_r.ndjson
4 Broadcast News data/json/example_rn.ndjson
5 Raising Arizona data/json/example_n.ndjson
5 Raising Arizona data/json/example_r.ndjson
5 Raising Arizona data/json/example_rn.ndjson
# virtual column
query III
select *, replace(filename, '\', '/') from read_json_auto('data/json/example_*.ndjson') order by all
----
1 O Brother, Where Art Thou? data/json/example_n.ndjson
1 O Brother, Where Art Thou? data/json/example_r.ndjson
1 O Brother, Where Art Thou? data/json/example_rn.ndjson
2 Home for the Holidays data/json/example_n.ndjson
2 Home for the Holidays data/json/example_r.ndjson
2 Home for the Holidays data/json/example_rn.ndjson
3 The Firm data/json/example_n.ndjson
3 The Firm data/json/example_r.ndjson
3 The Firm data/json/example_rn.ndjson
4 Broadcast News data/json/example_n.ndjson
4 Broadcast News data/json/example_r.ndjson
4 Broadcast News data/json/example_rn.ndjson
5 Raising Arizona data/json/example_n.ndjson
5 Raising Arizona data/json/example_r.ndjson
5 Raising Arizona data/json/example_rn.ndjson
query III
select * from read_json_auto(['data/json/example_n.ndjson', 'data/json/top_level_array.json'], union_by_name=true) order by all
----
1 O Brother, Where Art Thou? NULL
2 Home for the Holidays NULL
3 The Firm NULL
4 Broadcast News NULL
5 Raising Arizona NULL
NULL NULL cancelled
NULL NULL cancelled
# despite not being able to do partitioned writes, we can do partitioned json reads already!
query II
SELECT j, count(*) FROM read_json_auto('__TEST_DIR__/json_part/j=*/*.csv', HIVE_PARTITIONING=1) group by j order by j;
----
[0] 5
[1] 5
[2] 5
[3] 5
# also test read_json_objects
query II
select * exclude (filename), replace(filename, '\', '/') as filename from read_json_objects_auto('data/json/example_*.ndjson', filename=true) order by all
----
{"id":1,"name":"O Brother, Where Art Thou?"} data/json/example_n.ndjson
{"id":1,"name":"O Brother, Where Art Thou?"} data/json/example_r.ndjson
{"id":1,"name":"O Brother, Where Art Thou?"} data/json/example_rn.ndjson
{"id":2,"name":"Home for the Holidays"} data/json/example_n.ndjson
{"id":2,"name":"Home for the Holidays"} data/json/example_r.ndjson
{"id":2,"name":"Home for the Holidays"} data/json/example_rn.ndjson
{"id":3,"name":"The Firm"} data/json/example_n.ndjson
{"id":3,"name":"The Firm"} data/json/example_r.ndjson
{"id":3,"name":"The Firm"} data/json/example_rn.ndjson
{"id":4,"name":"Broadcast News"} data/json/example_n.ndjson
{"id":4,"name":"Broadcast News"} data/json/example_r.ndjson
{"id":4,"name":"Broadcast News"} data/json/example_rn.ndjson
{"id":5,"name":"Raising Arizona"} data/json/example_n.ndjson
{"id":5,"name":"Raising Arizona"} data/json/example_r.ndjson
{"id":5,"name":"Raising Arizona"} data/json/example_rn.ndjson
query I
select * from read_json_objects_auto(['data/json/example_n.ndjson', 'data/json/top_level_array.json'], union_by_name=true) order by all
----
{"conclusion":"cancelled"}
{"conclusion":"cancelled"}
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
query II
select j, count(*) from read_json_objects_auto('__TEST_DIR__/json_part/j=*/*.csv', HIVE_PARTITIONING=1) group by j order by j
----
[0] 5
[1] 5
[2] 5
[3] 5
# also test the filter pushdown
query II
SELECT j, count(*)
FROM read_json_auto('__TEST_DIR__/json_part/j=*/*.csv', HIVE_PARTITIONING=1)
where j='[2]'
group by j
order by j;
----
[2] 5
query II
SELECT j, count(*)
FROM read_json_auto('__TEST_DIR__/json_part/j=*/*.csv', HIVE_PARTITIONING=1)
where j>'[2]'
group by j
order by j;
----
[3] 5
query II
SELECT j, count(*)
FROM read_json_auto('__TEST_DIR__/json_part/j=*/*.csv', HIVE_PARTITIONING=1)
where sqrt(j[2]::int) > 1.5
group by j
order by j;
----
[3] 5
# the JSON multi-file reader is a bit different, because we always sample sample_size
# even across multiple files when union_by_name=false
# there two files have a different schema, but we can read them together nonetheless
statement ok
SELECT * FROM read_json_auto(['data/json/with_uuid.json', 'data/json/example_n.ndjson'])
# both have 5 rows, so if we set sample_size=1, and maximum_sample_files=1, we cannot read them together anymore
statement error
SELECT * FROM read_json_auto(['data/json/with_uuid.json', 'data/json/example_n.ndjson'], sample_size=1, maximum_sample_files=1)
----
Invalid Input Error
# if we increase maximum_sample_files, or set union_by_name=true, then we can read them again
statement ok
SELECT * FROM read_json_auto(['data/json/with_uuid.json', 'data/json/example_n.ndjson'], sample_size=1, maximum_sample_files=99)
# if we set union_by_name=true, then we sample sample_size rows per file, so then we can read them again
statement ok
SELECT * FROM read_json_auto(['data/json/with_uuid.json', 'data/json/example_n.ndjson'], sample_size=1, union_by_name=true)
# with sample size 6 we sample 1 line from the second file, and of course we can read it again
statement ok
SELECT * FROM read_json_auto(['data/json/with_uuid.json', 'data/json/example_n.ndjson'], sample_size=6)

View File

@@ -0,0 +1,23 @@
# name: test/sql/json/table/multi_file_hang.test
# description: Test that we do not hang when reading multiple JSON files while only sampling one
# group: [table]
require json
# needs more threads than the number of files for this to happen
statement ok
set threads=8
# only happened with these parameters
statement error
from read_json('data/json/multi_file_hang/*.json', sample_size=1, maximum_sample_files=1)
----
Invalid Input Error: JSON transform error
# the fuzzer also detected a single file hang, because we tried not to error here
# we cannot ignore errors of this kind when the data is not newline-delimited
# because we wouldn't know how to continue
statement error
SELECT * FROM read_json('data/json/fuzzer_hang.json', ignore_errors=true);
----
Invalid Input Error

View File

@@ -0,0 +1,414 @@
# name: test/sql/json/table/read_json.test
# description: Read json files straight to columnar data
# group: [table]
require json
statement ok
pragma enable_verification
statement error
SELECT * FROM read_json('data/json/example_n.ndjson', auto_detect=false)
----
Binder Error
# can't read ndjson with array
statement error
SELECT * FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='array')
----
Invalid Input Error: Expected top-level JSON array
# read_ndjson works
query II
SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'})
----
1 O Brother, Where Art Thou?
2 Home for the Holidays
3 The Firm
4 Broadcast News
5 Raising Arizona
# We can also read only one of the columns
query I
SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER'})
----
1
2
3
4
5
query I
SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={name: 'VARCHAR'})
----
O Brother, Where Art Thou?
Home for the Holidays
The Firm
Broadcast News
Raising Arizona
# what about a broken JSON file
query II
SELECT * FROM read_ndjson('data/json/unterminated_quotes.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, ignore_errors=true)
----
1 O Brother, Where Art Thou?
2 Home for the Holidays
NULL NULL
4 Broadcast News
5 Raising Arizona
# some of these values don't have "name"
query II
SELECT * FROM read_ndjson('data/json/different_schemas.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'})
----
1 O Brother, Where Art Thou?
2 NULL
3 The Firm
4 NULL
5 Raising Arizona
# test projection pushdown (unstructured json)
query I
SELECT id FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='unstructured')
----
1
2
3
4
5
query I
SELECT name FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='unstructured')
----
O Brother, Where Art Thou?
Home for the Holidays
The Firm
Broadcast News
Raising Arizona
# test projection pushdown (newline-delimited json)
query I
SELECT id FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='newline_delimited')
----
1
2
3
4
5
query I
SELECT name FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='nd')
----
O Brother, Where Art Thou?
Home for the Holidays
The Firm
Broadcast News
Raising Arizona
# auto-detect
query II
SELECT * FROM read_json_auto('data/json/example_n.ndjson')
----
1 O Brother, Where Art Thou?
2 Home for the Holidays
3 The Firm
4 Broadcast News
5 Raising Arizona
query II
SELECT * FROM 'data/json/example_n.ndjson'
----
1 O Brother, Where Art Thou?
2 Home for the Holidays
3 The Firm
4 Broadcast News
5 Raising Arizona
# we can detect at varying levels, level 0 is just JSON
query I
SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=0)
----
{"id":1,"name":["O","Brother,","Where","Art","Thou?"]}
{"id":2,"name":["Home","for","the","Holidays"]}
{"id":3,"name":["The","Firm"]}
{"id":4,"name":["Broadcast","News"]}
{"id":5,"name":["Raising","Arizona"]}
# at level one we get JSON and JSON
query II
SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=1)
----
1 ["O","Brother,","Where","Art","Thou?"]
2 ["Home","for","the","Holidays"]
3 ["The","Firm"]
4 ["Broadcast","News"]
5 ["Raising","Arizona"]
# at level 2 we get BIGINT and JSON[]
query II
SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=2)
----
1 ["O", "Brother,", "Where", "Art", "Thou?"]
2 ["Home", "for", "the", "Holidays"]
3 ["The", "Firm"]
4 ["Broadcast", "News"]
5 ["Raising", "Arizona"]
# at level 3 it's fully detected, and we get BIGINT and VARCHAR[]
query II
SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=3)
----
1 [O, 'Brother,', Where, Art, Thou?]
2 [Home, for, the, Holidays]
3 [The, Firm]
4 [Broadcast, News]
5 [Raising, Arizona]
# we can detect lists too
query III
SELECT id, typeof(name), unnest(name) FROM 'data/json/with_list.json'
----
1 VARCHAR[] O
1 VARCHAR[] Brother,
1 VARCHAR[] Where
1 VARCHAR[] Art
1 VARCHAR[] Thou?
2 VARCHAR[] Home
2 VARCHAR[] for
2 VARCHAR[] the
2 VARCHAR[] Holidays
3 VARCHAR[] The
3 VARCHAR[] Firm
4 VARCHAR[] Broadcast
4 VARCHAR[] News
5 VARCHAR[] Raising
5 VARCHAR[] Arizona
# with depth 2 we don't bother detecting inside of the list - defaults to JSON
query III
SELECT id, typeof(name), unnest(name) FROM read_json_auto('data/json/with_list.json', maximum_depth=2)
----
1 JSON[] "O"
1 JSON[] "Brother,"
1 JSON[] "Where"
1 JSON[] "Art"
1 JSON[] "Thou?"
2 JSON[] "Home"
2 JSON[] "for"
2 JSON[] "the"
2 JSON[] "Holidays"
3 JSON[] "The"
3 JSON[] "Firm"
4 JSON[] "Broadcast"
4 JSON[] "News"
5 JSON[] "Raising"
5 JSON[] "Arizona"
# with depth 0 we don't bother detecting anything, everything defaults to JSON (even the "id" column in this case)
query II
SELECT typeof(id), typeof(name) FROM read_json_auto('data/json/with_list.json', maximum_depth=1)
----
JSON JSON
JSON JSON
JSON JSON
JSON JSON
JSON JSON
# we can detect UUID's
query II
SELECT id, typeof(id) FROM 'data/json/with_uuid.json'
----
bbd05ae7-76e5-4f1a-a31f-247408251fc9 UUID
d5c52052-5f8e-473f-bc8d-176342643ef5 UUID
3b6a6de3-0732-4591-93ed-8df6091eb00d UUID
ae24e69e-e0bf-4e85-9848-27d35df85b8b UUID
63928b16-1814-436f-8b30-b3c40cc31d51 UUID
# top-level array of values
query I
select * from read_json('data/json/top_level_array.json', columns={conclusion: 'VARCHAR'})
----
cancelled
cancelled
query I
select * from read_json('data/json/top_level_array.json', auto_detect=true)
----
cancelled
cancelled
# if we try to read it as 'unstructured' records
statement error
select * from read_json('data/json/top_level_array.json', columns={conclusion: 'VARCHAR'}, format='unstructured', records=true)
----
Invalid Input Error: JSON transform error in file "data/json/top_level_array.json", in record/value 1: Expected OBJECT, but got ARRAY
# if we try to read an ndjson file as if it is an array of values, we get an error
statement error
select * from read_json_auto('data/json/example_n.ndjson', format='array')
----
Invalid Input Error: Expected top-level JSON array
# test that we can read a list of longer than STANDARD_VECTOR_SIZE properly
statement ok
copy (select 42 duck from range(10000)) to '__TEST_DIR__/my_file.json' (array true)
query T
select count(*) from read_json('__TEST_DIR__/my_file.json', columns={duck: 'INTEGER'}, format='array')
----
10000
query T
select sum(duck) = 42*10000 from read_json('__TEST_DIR__/my_file.json', columns={duck: 'INTEGER'}, format='array')
----
true
# read_json_auto also understands ARRAY format
query T
select count(*) from '__TEST_DIR__/my_file.json'
----
10000
query T
select sum(duck) = 42*10000 from '__TEST_DIR__/my_file.json'
----
true
# what if we do an array of non-records?
statement ok
copy (select list(range) from range(10)) to '__TEST_DIR__/my_file.json' (format csv, quote '', HEADER 0)
query T
select * from '__TEST_DIR__/my_file.json'
----
0
1
2
3
4
5
6
7
8
9
# fails because it's not records
statement error
select * from read_json('__TEST_DIR__/my_file.json', format='array', columns={range: 'INTEGER'}, records=true)
----
Invalid Input Error: JSON transform error
# fails because it's not records
statement error
select * from read_json_auto('__TEST_DIR__/my_file.json', format='array', records=true)
----
Binder Error: json_read expected records
query T
select * from read_json('__TEST_DIR__/my_file.json', format='auto', records=false, auto_detect=true)
----
0
1
2
3
4
5
6
7
8
9
# need to supply columns
statement error
select * from read_json('__TEST_DIR__/my_file.json', format='auto', records='false', auto_detect=false)
----
Binder Error
# read as unstructured values, so we just get the array
query T
select * from read_json('__TEST_DIR__/my_file.json', format='unstructured', records='false', auto_detect=true)
----
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# array of non-records
query T
select * from read_json('__TEST_DIR__/my_file.json', format='array', records='false', auto_detect=true)
----
0
1
2
3
4
5
6
7
8
9
# also works with auto
query T
select * from read_json('__TEST_DIR__/my_file.json', format='array', records='auto', auto_detect=true)
----
0
1
2
3
4
5
6
7
8
9
# lower thread count so the next tests don't OOM on many-core machines
statement ok
SET threads=2
# issue 6646, this is not an array, but we try to read it as one
statement error
select json_structure(json ->> '$.metadata') as structure,
from read_json('data/json/issue.json', format='array', columns={'json': 'JSON'}, maximum_object_size=104857600)
limit 1;
----
Invalid Input Error: Expected top-level JSON array
# let's try a variation
statement error
select json_structure(json ->> '$.metadata') as structure,
from read_json('data/json/issue.json', format='array', records='false', columns={'json': 'JSON'}, maximum_object_size=104857600)
limit 1;
----
Invalid Input Error: Expected top-level JSON array
# we can parse it as unstructured values, and give it a different col name
query I
select json_structure(my_json ->> '$.metadata') as structure,
from read_json('data/json/issue.json', format='unstructured', records='false', columns={'my_json': 'JSON'}, maximum_object_size=104857600)
limit 1;
----
{"argv":["VARCHAR"],"dag":{"dag_size":"VARCHAR","tasks":{"load_oscar":{"status":"VARCHAR","type":"VARCHAR","upstream":"VARCHAR","products":{"nb":"VARCHAR"}},"load_weather":{"status":"VARCHAR","type":"VARCHAR","upstream":"VARCHAR","products":{"nb":"VARCHAR"}},"compress":{"status":"VARCHAR","type":"VARCHAR","upstream":{"load_oscar":"VARCHAR"},"products":{"nb":"VARCHAR"}}}}}
statement ok
pragma disable_verification
# test that we can read a JSON list that spans more than one buffer size
# the JSON is 55 bytes, and the minimum buffer size is 32MB
# let's do 50k to be safe
statement ok
copy (select 42 this_is_a_very_long_field_name_yes_very_much_so from range(50000)) to '__TEST_DIR__/my_file.json' (array true)
query T
select sum(this_is_a_very_long_field_name_yes_very_much_so) = 42 * 50000 from '__TEST_DIR__/my_file.json'
----
true
require httpfs
query II
select * from read_json_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/example_rn.ndjson');
----
1 O Brother, Where Art Thou?
2 Home for the Holidays
3 The Firm
4 Broadcast News
5 Raising Arizona

View File

@@ -0,0 +1,354 @@
# name: test/sql/json/table/read_json_auto.test_slow
# description: Read json files - schema detection
# group: [table]
require json
statement ok
pragma enable_verification
# some arrow tests (python/pyarrow/tests/test_json.py) on their github
# these are very similar to the pandas tests, so let's not copy those
# instead of adding all of these files to data/test we just create them on the fly here
# whenever we add a '' at the end it's just to check we skip the newline at the end that's sometimes there
statement ok
copy (select * from (values ('{"a": 1, "b": 2}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0);
query II
select * from '__TEST_DIR__/my_file.json'
----
1 2
statement ok
copy (select * from (values ('{"a": 1}'), ('{"a": 2}'), ('{"a": 3}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
query I
select * from '__TEST_DIR__/my_file.json'
----
1
2
3
query I
select count(*) from '__TEST_DIR__/my_file.json'
----
3
statement ok
copy (select * from (values ('{"a": 1,"b": 2, "c": 3}'), ('{"a": 4,"b": 5, "c": 6}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
query III
select * from '__TEST_DIR__/my_file.json'
----
1 2 3
4 5 6
statement ok
copy (select * from (values ('{"a": 1,"b": 2, "c": "3", "d": false}'), ('{"a": 4.0, "b": -5, "c": "foo", "d": true}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
query IIII
select * from '__TEST_DIR__/my_file.json'
----
1.0 2 3 false
4.0 -5 foo true
# mixed types that cannot be resolved, defaults to JSON (column 3)
statement ok
copy (select * from (values ('{"a": 1, "b": 2, "c": null, "d": null, "e": null}'), ('{"a": null, "b": -5, "c": "foo", "d": null, "e": true}'), ('{"a": 4.5, "b": null, "c": "nan", "d": null,"e": false}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
query IIIII
select * from '__TEST_DIR__/my_file.json'
----
1.0 2 NULL NULL NULL
NULL -5 foo NULL true
4.5 NULL nan NULL false
# mixed types are resolved to DOUBLE here
statement ok
copy (select * from (values ('{"a": 1}'), ('{"a": 1.45}'), ('{"a": -23.456}'), ('{}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
query II
select typeof(a), a from '__TEST_DIR__/my_file.json'
----
DOUBLE 1.0
DOUBLE 1.45
DOUBLE -23.456
DOUBLE NULL
statement ok
copy (select * from (values ('{"foo": "bar", "num": 0}'), ('{"foo": "baz", "num": 1}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
query II
select * from '__TEST_DIR__/my_file.json'
----
bar 0
baz 1
# we can read values from a top-level list
query I
select * from 'data/json/top_level_array.json'
----
cancelled
cancelled
query I
select count(*) from 'data/json/top_level_array.json'
----
2
# for maximum_depth=0 this is two records of JSON
query I
select * from read_json_auto('data/json/top_level_array.json', maximum_depth=0)
----
{"conclusion":"cancelled"}
{"conclusion":"cancelled"}
# for 1 it's 1 column of JSON
query I
select * from read_json_auto('data/json/top_level_array.json', maximum_depth=1)
----
"cancelled"
"cancelled"
# if we read this with records='false', we get the struct instead of the unpacked columns
query I
select typeof(json) from read_json_auto('data/json/top_level_array.json', records='false')
----
STRUCT(conclusion VARCHAR)
STRUCT(conclusion VARCHAR)
# however, if there are multiple top-level arrays, we default to reading them as lists
query I
select * from 'data/json/top_level_two_arrays.json'
----
[{'conclusion': cancelled}, {'conclusion': cancelled}]
[{'conclusion': cancelled}, {'conclusion': cancelled}]
# if we read a top-level array as if it is a record, then we get an error
statement error
select * from read_json_auto('data/json/top_level_array.json', format='unstructured', records='true')
----
Binder Error: json_read expected records
# issue Mark found when analyzing a JSON dump of our CI - projection pushdown wasn't working properly
statement ok
select * from 'data/json/projection_pushdown_example.json' WHERE status <> 'completed'
# different schema's - this one should work regardless of sampling 1 or all lines
query II
select * from read_json_auto('data/json/different_schemas.ndjson', sample_size=1)
----
1 O Brother, Where Art Thou?
2 NULL
3 The Firm
4 NULL
5 Raising Arizona
query II
select * from read_json_auto('data/json/different_schemas.ndjson', sample_size=-1)
----
1 O Brother, Where Art Thou?
2 NULL
3 The Firm
4 NULL
5 Raising Arizona
# if we require fields to appear in all objects by setting field_appearance_threshold=1, we default to MAP
query I
select typeof(COLUMNS(*)) from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=1) limit 1
----
MAP(VARCHAR, JSON)
query I
select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=1)
----
{id=1, name='"O Brother, Where Art Thou?"'}
{id=2}
{id=3, name='"The Firm"'}
{id=4}
{id=5, name='"Raising Arizona"'}
# if we set it to 0.5 it should work already since "name" appears in 3/5 objects, which is greater than 0.5
query II
select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=0.5)
----
1 O Brother, Where Art Thou?
2 NULL
3 The Firm
4 NULL
5 Raising Arizona
# can't set it to less than 0 or more than 1
statement error
select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=-1)
----
Binder Error: read_json_auto "field_appearance_threshold" parameter must be between 0 and 1
statement error
select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=2)
----
Binder Error: read_json_auto "field_appearance_threshold" parameter must be between 0 and 1
# inconsistent schema's - if we only sample 1 row, we get an error, because we only see a NULL value for the 2nd column
statement error
select * from read_json_auto('data/json/inconsistent_schemas.ndjson', sample_size=1, convert_strings_to_integers=true)
----
Invalid Input Error: JSON transform error in file "data/json/inconsistent_schemas.ndjson", in line 3
# if we increase the sample size to 2, we can read it just fine
query II
select * from read_json_auto('data/json/inconsistent_schemas.ndjson', sample_size=2)
----
"1" NULL
2 Home for the Holidays
[3] The Firm
4 Broadcast News
5 Raising Arizona
# we can also find bigint in strings (happens a lot in JSON for some reason ...)
statement ok
copy (select * from (values ('{"id": "26941143801"}'), ('{"id": "26941143807"}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
# but only if we set the parameter to true
query T
select typeof(id) from read_json('__TEST_DIR__/my_file.json', convert_strings_to_integers=true)
----
BIGINT
BIGINT
# empty array and the example file works
query II
select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson']);
----
1 O Brother, Where Art Thou?
2 Home for the Holidays
3 The Firm
4 Broadcast News
5 Raising Arizona
# Simple map inference with default threshold
query T
select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl')
----
MAP(VARCHAR, BIGINT)
# Test setting map_inference_threshold high
query T
select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=1000)
----
MAP(VARCHAR, BIGINT)
# Map inference can be disabled
query T
select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=-1, field_appearance_threshold=0)
----
STRUCT("1" JSON, "2" BIGINT, "3" BIGINT, "4" BIGINT, "5" BIGINT, "6" BIGINT, "7" BIGINT, "8" BIGINT, "9" BIGINT, "10" BIGINT, "11" BIGINT, "12" BIGINT, "13" BIGINT, "14" BIGINT, "15" BIGINT, "16" JSON, "17" BIGINT, "18" BIGINT, "19" BIGINT, "20" BIGINT, "21" BIGINT, "22" BIGINT, "23" BIGINT, "24" BIGINT, "25" BIGINT, "26" BIGINT, "27" BIGINT, "28" BIGINT, "29" BIGINT, "30" BIGINT, "31" BIGINT, "32" BIGINT, "33" BIGINT, "34" BIGINT, "35" BIGINT, "36" BIGINT, "37" BIGINT, "38" BIGINT, "39" BIGINT, "40" BIGINT, "41" BIGINT, "42" BIGINT, "43" BIGINT, "44" BIGINT, "45" BIGINT, "46" BIGINT, "47" BIGINT, "48" BIGINT, "49" BIGINT, "50" BIGINT, "51" BIGINT, "52" BIGINT, "53" BIGINT, "54" BIGINT, "55" BIGINT, "56" BIGINT, "57" BIGINT, "58" BIGINT, "59" BIGINT, "60" BIGINT, "61" BIGINT, "62" BIGINT, "63" BIGINT, "64" BIGINT, "65" BIGINT, "66" BIGINT, "67" BIGINT, "68" BIGINT, "69" BIGINT, "70" BIGINT, "71" BIGINT, "72" BIGINT, "73" BIGINT, "74" BIGINT, "75" BIGINT, "76" BIGINT, "77" BIGINT, "78" BIGINT, "79" BIGINT, "80" BIGINT, "81" BIGINT, "82" BIGINT, "83" BIGINT, "84" BIGINT, "85" BIGINT, "86" BIGINT, "87" BIGINT, "88" BIGINT, "89" BIGINT, "90" BIGINT, "91" BIGINT, "92" BIGINT, "93" BIGINT, "94" BIGINT, "95" BIGINT, "96" BIGINT, "97" BIGINT, "98" BIGINT, "99" BIGINT, "100" BIGINT)
# Map inference with max_depth works as expected
query T
select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', maximum_depth=2)
----
MAP(VARCHAR, JSON)
query T
select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', maximum_depth=1)
----
JSON
# Map where all values are null
query T
select distinct typeof(a) from read_json_auto('data/json/map_of_nulls.jsonl')
----
MAP(VARCHAR, JSON)
# Map type can be inferred at the top level
query T
select distinct typeof(json) from read_json_auto('data/json/top_level_map.jsonl')
----
MAP(VARCHAR, BIGINT)
# Map type can be inferred for struct value type
query T
select distinct typeof(a) from read_json_auto('data/json/map_of_structs.jsonl')
----
MAP(VARCHAR, STRUCT(b BIGINT))
# Map 80% similarity check works
query T
select distinct typeof(a) from read_json_auto('data/json/map_50_50.jsonl', map_inference_threshold=10)
----
STRUCT(s1 STRUCT(f1 BIGINT[]), s2 STRUCT(f2 BIGINT[]), s3 STRUCT(f1 BIGINT[]), s4 STRUCT(f2 BIGINT[]), s5 STRUCT(f1 BIGINT[]), s6 STRUCT(f2 BIGINT[]), s7 STRUCT(f1 BIGINT[]), s8 STRUCT(f2 BIGINT[]), s9 STRUCT(f1 BIGINT[]), s10 STRUCT(f2 BIGINT[]))
# Map of maps
query T
select distinct typeof(a) from read_json_auto('data/json/map_of_map.jsonl', map_inference_threshold=10)
----
MAP(VARCHAR, MAP(VARCHAR, BIGINT))
# All NULL types get converted to JSON if we do map inference
query T
select distinct typeof(a) from read_json_auto('data/json/map_of_struct_with_nulls.jsonl', map_inference_threshold=10)
----
MAP(VARCHAR, STRUCT(a JSON[]))
# Candidate types are properly handled for map inference
query I
SELECT distinct typeof(a) FROM read_json_auto('data/json/map_of_dates.jsonl', map_inference_threshold=25)
----
MAP(VARCHAR, DATE)
# Mixed candidate types are also handled
query I
SELECT distinct typeof(a) FROM read_json_auto('data/json/map_of_mixed_date_timestamps.jsonl', map_inference_threshold=25)
----
MAP(VARCHAR, VARCHAR)
# Incompatible types are handled correctly
query T
select distinct typeof(a) from read_json_auto('data/json/map_incompatible.jsonl', map_inference_threshold=10)
----
STRUCT(s1 STRUCT("1" JSON), s2 STRUCT("1" MAP(VARCHAR, JSON)), s3 STRUCT("1" VARCHAR), s4 STRUCT("1" BIGINT[]), s5 STRUCT("1" BIGINT), s6 STRUCT("1" VARCHAR), s7 STRUCT("1" BIGINT[]), s8 STRUCT("1" BIGINT), s9 STRUCT("1" VARCHAR), s10 STRUCT("1" BIGINT[]))
# Can't set map_inference_threshold to a negative value (except -1)
statement error
select * from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=-10)
----
Binder Error: read_json_auto "map_inference_threshold" parameter must be 0 or positive, or -1 to disable map inference for consistent objects.
# if we only sample the first file, we default to a single JSON column
query I
select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=1);
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# -1 is unlimited
query II
select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=-1);
----
1 O Brother, Where Art Thou?
2 Home for the Holidays
3 The Firm
4 Broadcast News
5 Raising Arizona
# can't be -2 or lower
statement error
select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=-2);
----
Binder Error
# can't be 0
statement error
select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=0);
----
Binder Error
# cannot be NULL either
statement error
select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=NULL);
----
Binder Error

View File

@@ -0,0 +1,130 @@
# name: test/sql/json/table/read_json_dates.test
# description: Read json files - date detection
# group: [table]
require json
statement ok
pragma enable_verification
# issue #6774
query I
select * from read_json_auto('data/json/simple_timestamp.json', columns={"ts": "TIMESTAMP[]"});
----
['2022-06-01 06:41:58', '2021-08-21 08:26:55.5', '2009-11-15 21:58:54.636']
# create date and timestamp tables
statement ok
create table date_test as select '1996/03/27'::DATE d
statement ok
create table timestamp_test as select '1996-03-27 07:42:33'::TIMESTAMP t
# cannot be empty
statement error
copy (select d from date_test) to '__TEST_DIR__/my_file.json' (dateformat)
----
Binder Error
statement error
copy (select d from date_test) to '__TEST_DIR__/my_file.json' (timestampformat)
----
Binder Error
statement error
copy date_test from 'data/json/simple_timestamp.json' (dateformat)
----
Binder Error
statement error
copy date_test from 'data/json/simple_timestamp.json' (timestampformat)
----
Binder Error
# test all supported date formats
foreach date_format '%m-%d-%Y' '%m-%d-%y' '%d-%m-%Y' '%d-%m-%y' '%Y-%m-%d' '%y-%m-%d'
statement ok
copy (select d from date_test) to '__TEST_DIR__/my_file.json' (dateformat ${date_format})
# auto-detect
query II
select typeof(d), d from '__TEST_DIR__/my_file.json'
----
DATE 1996-03-27
# forced format read_ndjson
query II
select typeof(d), d from read_ndjson('__TEST_DIR__/my_file.json', columns={d: 'DATE'}, dateformat=${date_format})
----
DATE 1996-03-27
# wrong format read_ndjson
statement error
select typeof(d), d from read_ndjson('__TEST_DIR__/my_file.json', columns={d: 'DATE'}, dateformat='%d-%Y-%m')
----
Invalid Input Error
# forced format COPY
statement ok
drop table if exists date_copy_test
statement ok
create table date_copy_test (d date)
statement ok
copy date_copy_test from '__TEST_DIR__/my_file.json' (dateformat ${date_format})
query II
select typeof(d), d from date_copy_test
----
DATE 1996-03-27
endloop
# test all supported timestamp formats (hacky way to do foreach parameters that need spaces in them)
foreach a,b,c '%Y-%m-%d,%H:%M:%S.%f,' '%m-%d-%Y,%I:%M:%S,%p' '%m-%d-%y,%I:%M:%S,%p' '%d-%m-%Y,%H:%M:%S,' '%d-%m-%y,%H:%M:%S,' '%Y-%m-%d,%H:%M:%S,' '%y-%m-%d,%H:%M:%S,'
statement ok
copy (select t from timestamp_test) to '__TEST_DIR__/my_file.json' (format json, timestampformat ${a} ${b} ${c})
# auto-detect
query II
select typeof(t), t from '__TEST_DIR__/my_file.json'
----
TIMESTAMP 1996-03-27 07:42:33
# forced format read_ndjson
query II
select typeof(t), t from read_ndjson('__TEST_DIR__/my_file.json', columns={t: 'TIMESTAMP'}, timestamp_format=${a} ${b} ${c})
----
TIMESTAMP 1996-03-27 07:42:33
# wrong format read_ndjson
statement error
select typeof(t), t from read_ndjson('__TEST_DIR__/my_file.json', columns={t: 'TIMESTAMP'}, timestamp_format='%H:%M:%S%y-%m-%d')
----
Invalid Input Error
# forced format COPY
statement ok
drop table if exists timestamp_copy_test
statement ok
create table timestamp_copy_test (t timestamp)
statement ok
copy timestamp_copy_test from '__TEST_DIR__/my_file.json' (format json, timestampformat ${a} ${b} ${c})
query II
select typeof(t), t from timestamp_copy_test
----
TIMESTAMP 1996-03-27 07:42:33
endloop
# test this format too
query II
select typeof(createdAt), createdAt from 'data/json/timestamp_example.json'
----
TIMESTAMP 2023-02-07 19:12:28

View File

@@ -0,0 +1,33 @@
# name: test/sql/json/table/read_json_many_files.test_slow
# description: Read > 1000 json files (issue #6249)
# group: [table]
require json
statement ok
create table input as select range as a from range(1, 4);
loop i 0 2000
statement ok
copy input to '__TEST_DIR__/input${i}.json';
endloop
query T
select count(*) from read_json_auto('__TEST_DIR__/input*.json');
----
6000
# also test gzipped (issue #6588)
loop i 0 2000
statement ok
copy input to '__TEST_DIR__/input${i}.json.gz' (COMPRESSION GZIP);
endloop
query T
select count(*) from read_json_auto('__TEST_DIR__/input*.json.gz');
----
6000

View File

@@ -0,0 +1,231 @@
# name: test/sql/json/table/read_json_objects.test
# description: Read ndjson files
# group: [table]
require json
# we cannot check the error output for the specific byte, because on Windows the \n are replaced with \r\n
# therefore, the byte count is different. So, we cut off the error message here
statement error
select * from read_json_objects('data/json/unterminated_quotes.ndjson')
----
Invalid Input Error: Malformed JSON
# now it should work!
query I
SELECT * FROM read_csv('data/json/example_n.ndjson', columns={'json': 'JSON'}, delim=NULL, header=0, quote=NULL, escape=NULL, auto_detect = false)
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# example_n is with regular \n newlines
query I
SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson')
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# this one does not have the 'records' param
statement error
SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson', records='false')
----
Binder Error: Invalid named parameter
query I
SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson')
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# we can auto-detect that it's newline-delimited
query I
SELECT * FROM read_json_objects('data/json/example_n.ndjson', format='auto')
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# example_r is with \r newlines - works with unstructured
query I
SELECT * FROM read_json_objects('data/json/example_r.ndjson', format='unstructured')
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# we can detect that it's not newline-delimited
query I
SELECT * FROM read_json_objects('data/json/example_r.ndjson', format='auto')
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# \r newlines are NOT valid according to ndjson spec - this does not work, all a single line
statement error
SELECT * FROM read_ndjson_objects('data/json/example_r.ndjson')
----
Invalid Input Error: Malformed JSON in file "data/json/example_r.ndjson"
# example_rn is with \r\n newlines
query I
SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson')
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
query I
SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson')
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# same but gzipped
query I
SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson.gz')
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
query I
SELECT * FROM read_json_objects('data/json/example_rn.ndjson.gz', format='nd')
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# multi-file scan
query I
SELECT count(*) from read_json_objects(['data/json/example_n.ndjson', 'data/json/example_r.ndjson', 'data/json/example_rn.ndjson'], format='auto')
----
15
query I
SELECT count(*) from read_ndjson_objects(['data/json/example_n.ndjson', 'data/json/example_rn.ndjson'])
----
10
# globbing
query I
SELECT count(*) from read_json_objects('data/json/example_*.ndjson', format='auto')
----
15
query I
SELECT count(*) from read_ndjson_objects('data/json/example_*n.ndjson')
----
10
# empty file
query I
select * from read_json_objects('data/json/empty.ndjson')
----
query I
select * from read_ndjson_objects('data/json/empty.ndjson')
----
# invalid json stuff
statement error
select * from read_json_objects('data/json/unterminated_quotes.ndjson', format='nd')
----
Invalid Input Error: Malformed JSON in file "data/json/unterminated_quotes.ndjson"
statement error
select * from read_ndjson_objects('data/json/unterminated_quotes.ndjson')
----
Invalid Input Error: Malformed JSON in file "data/json/unterminated_quotes.ndjson"
# we can auto-detect and ignore the error (becomes NULL)
query I
select * from read_json_objects('data/json/unterminated_quotes.ndjson', format='auto', ignore_errors=true)
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
NULL
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# multiple values per line (works for read_json_objects)
query I
select * from read_json_objects('data/json/multiple_objects_per_line.ndjson', format='unstructured')
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
# does not work for read_ndjson_objects
statement error
select * from read_ndjson_objects('data/json/multiple_objects_per_line.ndjson')
----
Invalid Input Error: Malformed JSON in file "data/json/multiple_objects_per_line.ndjson"
# what if we try to read a CSV?
statement error
select * from read_json_objects('data/csv/tpcds_14.csv')
----
Invalid Input Error: Malformed JSON
statement error
select * from read_ndjson_objects('data/csv/tpcds_14.csv')
----
Invalid Input Error: Malformed JSON in file "data/csv/tpcds_14.csv"
# how about parquet?
statement error
select * from read_json_objects('data/parquet-testing/blob.parquet')
----
Invalid Input Error: Malformed JSON
statement error
select * from read_ndjson_objects('data/parquet-testing/blob.parquet')
----
Invalid Input Error: Malformed JSON in file "data/parquet-testing/blob.parquet"
# we can also read the objects from a JSON array (not newline-delimited)
query I
select * from read_json_objects('data/json/top_level_array.json')
----
{"conclusion":"cancelled"}
{"conclusion":"cancelled"}
# and auto-detect it
query I
select * from read_json_objects('data/json/top_level_array.json', format='auto')
----
{"conclusion":"cancelled"}
{"conclusion":"cancelled"}
# the file only has one line, so if we read this as ndjson, we just get the array
query I
select * from read_json_objects('data/json/top_level_array.json', format='nd')
----
[{"conclusion":"cancelled"}, {"conclusion":"cancelled"}]

View File

@@ -0,0 +1,35 @@
# name: test/sql/json/table/read_json_union.test
# description: Read json files with unions straight to columnar data
# group: [table]
require json
statement ok
pragma enable_verification
query I
SELECT data FROM read_ndjson('data/json/union.ndjson', columns={data: 'UNION(name VARCHAR, age INT, veteran BOOL)'})
----
Frank
5
false
statement error
SELECT * FROM read_ndjson('data/json/malformed/union/bad_key.ndjson', columns={data: 'UNION(name VARCHAR, age INT, veteran BOOL)'})
----
Found object containing unknown key, instead of union
statement error
SELECT * FROM read_ndjson('data/json/malformed/union/empty_object.ndjson', columns={data: 'UNION(name VARCHAR, age INT, veteran BOOL)'})
----
Found empty object, instead of union
statement error
SELECT * FROM read_ndjson('data/json/malformed/union/non_object.ndjson', columns={data: 'UNION(name VARCHAR, age INT, veteran BOOL)'})
----
Expected an object representing a union, got uint
statement error
SELECT * FROM read_ndjson('data/json/malformed/union/too_many_keys.ndjson', columns={data: 'UNION(name VARCHAR, age INT, veteran BOOL)'})
----
Found object containing more than one key, instead of union

View File

@@ -0,0 +1,51 @@
# name: test/sql/json/table/test_json_nested_struct_projection_pushdown.test
# description: Test JSON struct projection pushdown on deeply nested data
# group: [table]
require json
statement ok
pragma enable_verification
statement ok
COPY (SELECT {goose: 42, pigeon: 4.2, nested_duck: {nested_nested_duck: {goose: 42, pigeon: 4.2, nested_nested_nested_duck: {goose: 42, pigeon: 4.2}}, goose: 42, pigeon: 4.2}} AS duck) TO '__TEST_DIR__/nested.json'
query I
SELECT duck.goose FROM '__TEST_DIR__/nested.json'
----
42
query I
SELECT json.duck.goose FROM read_json('__TEST_DIR__/nested.json', records=false)
----
42
query I
SELECT duck.nested_duck.goose FROM '__TEST_DIR__/nested.json'
----
42
query I
SELECT json.duck.nested_duck.goose FROM read_json('__TEST_DIR__/nested.json', records=false)
----
42
query I
SELECT duck.nested_duck.nested_nested_duck.goose FROM '__TEST_DIR__/nested.json'
----
42
query I
SELECT json.duck.nested_duck.nested_nested_duck.goose FROM read_json('__TEST_DIR__/nested.json', records=false)
----
42
query I
SELECT duck.nested_duck.nested_nested_duck.nested_nested_nested_duck.goose FROM '__TEST_DIR__/nested.json'
----
42
query I
SELECT json.duck.nested_duck.nested_nested_duck.nested_nested_nested_duck.goose FROM read_json('__TEST_DIR__/nested.json', records=false)
----
42

View File

@@ -0,0 +1,748 @@
# name: test/sql/json/table/test_json_table_in_out.test_slow
# description: Test JSON table in/out functions (json_each/json_tree)
# group: [table]
require json
statement ok
pragma enable_verification
# some larger-than-vector-size tests
query I
select count(*) from json_each(range(3000));
----
3000
# this should be equal to the 3000th triangular number
query I
select count(*) = 3000*(3000+1)//2 from range(1, 3001), json_each(range(range));
----
true
##### SQLITE TESTS #####
### JSON101-5 ###
statement ok
CREATE OR REPLACE TABLE j2(id INTEGER PRIMARY KEY, json JSON, src VARCHAR);
statement ok
INSERT INTO j2(id,json,src)
VALUES(1,'{
"firstName": "John",
"lastName": "Smith",
"isAlive": true,
"age": 25,
"address": {
"streetAddress": "21 2nd Street",
"city": "New York",
"state": "NY",
"postalCode": "10021-3100"
},
"phoneNumbers": [
{
"type": "home",
"number": "212 555-1234"
},
{
"type": "office",
"number": "646 555-4567"
}
],
"children": [],
"spouse": null
}','https://en.wikipedia.org/wiki/JSON');
statement ok
INSERT INTO j2(id,json,src)
VALUES(2, '{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil''s Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}','https://adobe.github.io/Spry/samples/data_region/JSONDataSetSample.html');
statement ok
INSERT INTO j2(id,json,src)
VALUES(3,'[
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil''s Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
},
{
"id": "0002",
"type": "donut",
"name": "Raised",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
},
{
"id": "0003",
"type": "donut",
"name": "Old Fashioned",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
]','https://adobe.github.io/Spry/samples/data_region/JSONDataSetSample.html');
query I
select count(*) from j2, json_tree(j2.json);
----
154
query IIIII
SELECT j2.rowid, jx.rowid, fullkey, path, key
FROM j2, json_tree(j2.json) AS jx
WHERE fullkey!=(path || CASE WHEN TRY_CAST(key AS UBIGINT) IS NOT NULL THEN '['||key||']'
ELSE '.'||key END);
----
query IIIII
SELECT j2.rowid, jx.rowid, fullkey, path, key
FROM j2, json_each(j2.json) AS jx
WHERE fullkey!=(path || CASE WHEN TRY_CAST(key AS UBIGINT) IS NOT NULL THEN '['||key||']'
ELSE '.'||key END);
----
query IIIII
SELECT j2.rowid, jx.rowid, fullkey, path, key
FROM j2, json_each(j2.json) AS jx
WHERE jx.json<>j2.json;
----
query IIIII
SELECT j2.rowid, jx.rowid, fullkey, path, key
FROM j2, json_tree(j2.json) AS jx
WHERE jx.json<>j2.json;
----
query IIIII
SELECT j2.rowid, jx.rowid, fullkey, path, key
FROM j2, json_each(j2.json) AS jx
WHERE jx.value<>jx.atom AND type NOT IN ('ARRAY','OBJECT');
----
query IIIII
SELECT j2.rowid, jx.rowid, fullkey, path, key
FROM j2, json_tree(j2.json) AS jx
WHERE jx.value<>jx.atom AND type NOT IN ('ARRAY','OBJECT');
----
### JSON101-13 ###
statement ok
DROP TABLE IF EXISTS t1;
statement ok
DROP TABLE IF EXISTS t2;
statement ok
CREATE OR REPLACE TABLE t1(id INTEGER, json JSON);
statement ok
INSERT INTO t1(id,json) VALUES(1,'{"items":[3,5]}');
statement ok
CREATE OR REPLACE TABLE t2(id INTEGER, json JSON);
statement ok
INSERT INTO t2(id,json) VALUES(2,'{"value":2}');
statement ok
INSERT INTO t2(id,json) VALUES(3,'{"value":3}');
statement ok
INSERT INTO t2(id,json) VALUES(4,'{"value":4}');
statement ok
INSERT INTO t2(id,json) VALUES(5,'{"value":5}');
statement ok
INSERT INTO t2(id,json) VALUES(6,'{"value":6}');
query I
select count(*) from t2, json_each(t2.json) je;
----
5
query I
select je.rowid from t2, json_each(t2.json) je;
----
0
0
0
0
0
# our result here differs from SQLite because our correlated subquery behavior is different
query IIII rowsort
SELECT * FROM t1 CROSS JOIN t2
WHERE EXISTS(SELECT 1 FROM json_each(t1.json,'$.items') AS Z
WHERE Z.value==t2.id);
----
1 {"items":[3,5]} 3 {"value":3}
1 {"items":[3,5]} 5 {"value":5}
query IIII rowsort
SELECT * FROM t2 CROSS JOIN t1
WHERE EXISTS(SELECT 1 FROM json_each(t1.json,'$.items') AS Z
WHERE Z.value==t2.id);
----
3 {"value":3} 1 {"items":[3,5]}
5 {"value":5} 1 {"items":[3,5]}
### JSON101-14 ###
query I
SELECT fullkey FROM json_each('123');
----
$
query I
SELECT fullkey FROM json_each('123.56');
----
$
query I
SELECT fullkey FROM json_each('"hello"');
----
$
query I
SELECT fullkey FROM json_each('null');
----
$
query I
SELECT fullkey FROM json_tree('123');
----
$
query I
SELECT fullkey FROM json_tree('123.56');
----
$
query I
SELECT fullkey FROM json_tree('"hello"');
----
$
query I
SELECT fullkey FROM json_tree('null');
----
$
### JSON101-15 ###
query IIIIIIII
SELECT * FROM JSON_EACH('{"a":1, "b":2}');
----
a 1 UBIGINT 1 2 NULL $.a $
b 2 UBIGINT 2 4 NULL $.b $
query IIIIIIII
SELECT xyz.* FROM JSON_EACH('{"a":1, "b":2}') AS xyz;
----
a 1 UBIGINT 1 2 NULL $.a $
b 2 UBIGINT 2 4 NULL $.b $
query IIIIIIII
SELECT * FROM (FROM JSON_EACH('{"a":1, "b":2}'));
----
a 1 UBIGINT 1 2 NULL $.a $
b 2 UBIGINT 2 4 NULL $.b $
query IIIIIIII
SELECT xyz.* FROM (FROM JSON_EACH('{"a":1, "b":2}')) AS xyz;
----
a 1 UBIGINT 1 2 NULL $.a $
b 2 UBIGINT 2 4 NULL $.b $
### JSON101-17 ###
query I
SELECT count(*) FROM json_each(NULL);
----
0
query I
SELECT count(*) FROM json_tree(NULL);
----
0
### JSON102-1000 ###
statement ok
CREATE OR REPLACE TABLE user(name VARCHAR,phone JSON);
statement ok
INSERT INTO user(name,phone) VALUES
('Alice','["919-555-2345","804-555-3621"]'),
('Bob','["201-555-8872"]'),
('Cindy','["704-555-9983"]'),
('Dave','["336-555-8421","704-555-4321","803-911-4421"]');
query I rowsort
SELECT DISTINCT user.name
FROM user, json_each(user.phone)
WHERE json_each.value LIKE '"704-%'
ORDER BY 1;
----
Cindy
Dave
statement ok
UPDATE user
SET phone=json_extract(phone,'$[0]')
WHERE json_array_length(phone)<2;
query II rowsort
SELECT name, substr(phone,1,5) FROM user ORDER BY name;
----
Alice ["919
Bob "201-
Cindy "704-
Dave ["336
query I rowsort
SELECT name FROM user WHERE phone LIKE '"704-%'
UNION
SELECT user.name
FROM user, json_each(user.phone)
WHERE json_valid(user.phone)
AND json_each.value LIKE '"704-%';
----
Cindy
Dave
### JSON102-1010 ###
statement ok
CREATE OR REPLACE TABLE big(json JSON);
statement ok
INSERT INTO big(json) VALUES('{
"id":123,
"stuff":[1,2,3,4],
"partlist":[
{"uuid":"bb108722-572e-11e5-9320-7f3b63a4ca74"},
{"uuid":"c690dc14-572e-11e5-95f9-dfc8861fd535"},
{"subassembly":[
{"uuid":"6fa5181e-5721-11e5-a04e-57f3d7b32808"}
]}
]
}');
statement ok
INSERT INTO big(json) VALUES('{
"id":456,
"stuff":["hello","world","xyzzy"],
"partlist":[
{"uuid":false},
{"uuid":"c690dc14-572e-11e5-95f9-dfc8861fd535"}
]
}');
query III nosort q0
SELECT big.rowid, fullkey, value
FROM big, json_tree(big.json)
WHERE json_tree.type NOT IN ('OBJECT','ARRAY')
ORDER BY +big.rowid, +json_tree.id;
----
0 $.id 123
0 $stuff[0] 1
0 $stuff[1] 2
0 $stuff[2] 3
0 $stuff[3] 4
0 $partlist.uuid "bb108722-572e-11e5-9320-7f3b63a4ca74"
0 $partlist.uuid "c690dc14-572e-11e5-95f9-dfc8861fd535"
0 $partlistsubassembly.uuid "6fa5181e-5721-11e5-a04e-57f3d7b32808"
1 $.id 456
1 $stuff[0] "hello"
1 $stuff[1] "world"
1 $stuff[2] "xyzzy"
1 $partlist.uuid false
1 $partlist.uuid "c690dc14-572e-11e5-95f9-dfc8861fd535"
query III nosort q0
SELECT big.rowid, fullkey, atom
FROM big, json_tree(big.json)
WHERE atom IS NOT NULL
ORDER BY +big.rowid, +json_tree.id
----
query I
SELECT DISTINCT json_extract(big.json,'$.id')
FROM big, json_tree(big.json,'$.partlist')
WHERE json_tree.key='uuid'
AND json_tree.value='"6fa5181e-5721-11e5-a04e-57f3d7b32808"';
----
123
query I
SELECT DISTINCT json_extract(big.json,'$.id')
FROM big, json_tree(big.json,'$')
WHERE json_tree.key='uuid'
AND json_tree.value='"6fa5181e-5721-11e5-a04e-57f3d7b32808"';
----
123
query I
SELECT DISTINCT json_extract(big.json,'$.id')
FROM big, json_tree(big.json)
WHERE json_tree.key='uuid'
AND json_tree.value='"6fa5181e-5721-11e5-a04e-57f3d7b32808"';
----
123
### JSON107 ###
query II
SELECT key, value FROM json_tree('{"a":123,"b":456}')
WHERE atom;
----
a 123
b 456
### JSON502 ###
statement ok
CREATE OR REPLACE TABLE t1(x JSON);
statement ok
INSERT INTO t1(x) VALUES('{"a":{"b":{"c":"hello",},},}');
query I
SELECT fullkey FROM t1, json_tree(x) order by json_tree.rowid;
----
$
$.a
$.a.b
$.a.b.c
### JOIN-23 ###
statement ok
CREATE OR REPLACE TABLE a(value TEXT);
statement ok
INSERT INTO a(value) SELECT value FROM json_each('["a", "b", null]');
statement ok
CREATE OR REPLACE TABLE b(value TEXT);
statement ok
INSERT INTO b(value) SELECT value FROM json_each('["a", "c", null]');
query II rowsort q1
SELECT a.value, b.value FROM a RIGHT JOIN b ON a.value = b.value;
----
"a" "a"
null null
NULL "c"
query II rowsort q1
SELECT a.value, b.value FROM b LEFT JOIN a ON a.value = b.value;
----
query II rowsort q1
SELECT a.value, b.value
FROM json_each('["a", "c", null]') AS b
LEFT JOIN
json_each('["a", "b", null]') AS a ON a.value = b.value;
----
query II rowsort q1
SELECT a.value, b.value
FROM json_each('["a", "b", null]') AS a
RIGHT JOIN
json_each('["a", "c", null]') AS b ON a.value = b.value;
----
query II rowsort q1
SELECT a.value, b.value
FROM json_each('["a", "b", null]') AS a
RIGHT JOIN
b ON a.value = b.value;
----
query II rowsort q1
SELECT a.value, b.value
FROM a
RIGHT JOIN
json_each('["a", "c", null]') AS b ON a.value = b.value;
----
### JOIN8-6000 ###
statement ok
CREATE OR REPLACE TABLE t1(a INTEGER PRIMARY KEY, b TEXT, c TEXT, d REAL);
statement ok
INSERT INTO t1 VALUES(0,'A','aa',2.5);
query IIII
SELECT * FROM t1 AS t2 NATURAL RIGHT JOIN t1 AS t3
WHERE (a,b) IN (SELECT rowid, b FROM t1);
----
0 A aa 2.5
statement ok
DROP TABLE IF EXISTS t1;
statement ok
CREATE OR REPLACE TABLE t1(a INT PRIMARY KEY, b TEXT, c TEXT, d INT);
statement ok
INSERT INTO t1 VALUES(15,'xray','baker',42);
query IIIII
SELECT value, t1.* FROM json_each('7') RIGHT JOIN t1 USING (rowid)
WHERE (a,b) IN (SELECT a, b FROM t1);
----
7 15 xray baker 42
statement ok
DROP TABLE IF EXISTS t1;
statement ok
CREATE OR REPLACE TABLE t1(a INTEGER PRIMARY KEY,b INTEGER);
statement ok
INSERT INTO t1 VALUES(0,NULL),(1,2);
query III
SELECT value, t1.* FROM json_each('null') RIGHT JOIN t1 USING (rowid)
WHERE (a,b) IN (SELECT rowid, b FROM t1);
----
NULL 1 2
statement ok
CREATE OR REPLACE TABLE a(key TEXT);
statement ok
INSERT INTO a(key) VALUES('a'),('b');
query II
SELECT to_json(a.key), b.value
FROM a RIGHT JOIN json_each('["a","c"]') AS b ON to_json(a.key)=b.value;
----
"a" "a"
NULL "c"
### WindowB-11 ###
query I
SELECT value FROM json_each('[1,2,3,4,5]');
----
1
2
3
4
5
query II
SELECT key, value FROM json_each('[1,2,3,4,5]');
----
0 1
1 2
2 3
3 4
4 5
query II
SELECT rowid, value FROM json_each('[1,2,3,4,5]');
----
0 1
1 2
2 3
3 4
4 5
query I
SELECT sum(value::int) OVER (ORDER BY rowid) FROM json_each('[1,2,3,4,5]')
----
1
3
6
10
15
query I
SELECT sum(value::int) OVER (
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) FROM json_each('[1,2,3,4,5]')
----
1
3
6
10
15
query I
SELECT sum(value::int) OVER (ORDER BY rowid DESC) FROM json_each('[1,2,3,4,5]')
----
5
9
12
14
15
query I
SELECT sum(value::int) OVER (ORDER BY value ASC) FROM json_each('[2,1,4,3,5]')
----
1
3
6
10
15
### WhereF-6 ###
statement ok
CREATE OR REPLACE TABLE t6(x JSON);
query I
SELECT * FROM t6 WHERE 1 IN (SELECT value FROM json_each(x));
----
statement ok
DROP TABLE t6;
statement ok
CREATE OR REPLACE TABLE t6(a int,b int,c json);
statement ok
INSERT INTO t6 VALUES
(0,null,'{"a":0,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}'),
(1,null,'{"a":1,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}'),
(2,null,'{"a":9,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}');
query III
SELECT * FROM t6
WHERE (EXISTS (SELECT 1 FROM json_each(t6.c) AS x WHERE x.type = 'UBIGINT' AND x.value=1));
----
1 NULL {"a":1,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}
# Another test case derived from a posting by Wout Mertens on the
# sqlite-users mailing list on 2017-10-04.
statement ok
DROP TABLE IF EXISTS t;
statement ok
CREATE OR REPLACE TABLE t(json JSON);
query I
SELECT * FROM t
WHERE(EXISTS(SELECT 1 FROM json_each(t.json,'$.foo') j
WHERE j.value = 'meep'));
----
statement ok
INSERT INTO t VALUES('{"xyzzy":null}');
statement ok
INSERT INTO t VALUES('{"foo":"meep","other":12345}');
statement ok
INSERT INTO t VALUES('{"foo":"bingo","alt":5.25}');
query I
SELECT * FROM t
WHERE(EXISTS(SELECT 1 FROM json_each(t.json,'$.foo') j
WHERE j.value = '"meep"'));
----
{"foo":"meep","other":12345}
# internal issue 5080
statement ok
create table json_table as
select '{"my_array":[{"my_key":42},{"my_key":9001}]}' as my_json;
query II
select fullkey, path from json_table, json_tree(json_table.my_json) order by json_tree.rowid;
----
$ $
$.my_array $
$.my_array[0] $.my_array
$.my_array[0].my_key $.my_array[0]
$.my_array[1] $.my_array
$.my_array[1].my_key $.my_array[1]
# internal issues 5772 and 5776
statement ok
create table all_types as select * exclude(small_enum, medium_enum, large_enum) from test_all_types() limit 0;
statement ok
SELECT NULL FROM json_each(6051, NULL)

View File

@@ -0,0 +1,9 @@
# name: test/sql/json/test_json_bool.test
# description: Test if Json can parse bool
# group: [json]
statement ok
set custom_profiling_settings='{"optimizer_filter_pullup":true}';
statement ok
set custom_profiling_settings='{"optimizer_filter_pullup":"true"}';

View File

@@ -0,0 +1,266 @@
# name: test/sql/json/test_json_copy.test_slow
# description: Test JSON COPY
# group: [json]
require json
require parquet
require no_extension_autoloading "FIXME: Autoloading on zstd compression (parquet) not yet there"
# test automatic detection even with .gz
statement ok
create table integers as select 42 i
statement ok
copy integers to '__TEST_DIR__/integers.json.gz' (FORMAT JSON, COMPRESSION GZIP)
statement ok
delete from integers
query I
copy integers from '__TEST_DIR__/integers.json.gz'
----
1
query T
select i from integers
----
42
# test writing all types to file
statement ok
create type small_enum as enum ('DUCK_DUCK_ENUM', 'GOOSE');
query I nosort q0
select * exclude (varchar, blob, bit, medium_enum, large_enum, hugeint, uhugeint, bignum)
replace (dec_18_6::DOUBLE as dec_18_6, dec38_10::DOUBLE as dec38_10)
from test_all_types()
----
statement ok
copy (
select * exclude (varchar, blob, bit, medium_enum, large_enum, hugeint, uhugeint, bignum)
replace (dec_18_6::DOUBLE as dec_18_6, dec38_10::DOUBLE as dec38_10)
from test_all_types()
) to '__TEST_DIR__/all_types.ndjson'
statement ok
create table roundtrip as
select * exclude (varchar, blob, bit, medium_enum, large_enum, hugeint, uhugeint, bignum)
replace (dec_18_6::DOUBLE as dec_18_6, dec38_10::DOUBLE as dec38_10)
from test_all_types()
limit 0
statement ok
copy roundtrip from '__TEST_DIR__/all_types.ndjson'
query I nosort q0
select * from roundtrip
----
statement ok
delete from roundtrip
statement ok
copy (
select * exclude (varchar, blob, bit, medium_enum, large_enum, hugeint, uhugeint, bignum)
replace (dec_18_6::DOUBLE as dec_18_6, dec38_10::DOUBLE as dec38_10)
from test_all_types()
) to '__TEST_DIR__/all_types.json' (array true)
statement ok
copy roundtrip from '__TEST_DIR__/all_types.json' (array true)
query I nosort q0
select * from roundtrip
----
# test issue 18816
statement ok
copy (select 42 i)
to '__TEST_DIR__/json_batch'
(format json, per_thread_output true, overwrite true);
statement ok
copy (select 42 i)
to '__TEST_DIR__/json_batch'
(format json, per_thread_output true, append true);
# test issue #6305
statement ok
copy (
select * from values
(uuid(), 10),
(uuid(), 10),
(uuid(), 15),
(uuid(), 5)
v (order_id, revenue)
) to '__TEST_DIR__/query.json' (format json)
query II
select typeof(order_id), revenue from '__TEST_DIR__/query.json'
----
UUID 10
UUID 10
UUID 15
UUID 5
# struct star expression should work too
statement ok
copy (
select v.* from values
({order_id: uuid(), revenue: 10}),
({order_id: uuid(), revenue: 10}),
({order_id: uuid(), revenue: 15}),
({order_id: uuid(), revenue: 5}),
t (v)
) to '__TEST_DIR__/query.json' (format json)
query II
select typeof(order_id), revenue from '__TEST_DIR__/query.json'
----
UUID 10
UUID 10
UUID 15
UUID 5
# exclude
statement ok
copy (
select order_id, * exclude (order_id) from values
(uuid(), 10),
(uuid(), 10),
(uuid(), 15),
(uuid(), 5)
v (order_id, revenue)
) to '__TEST_DIR__/query.json' (format json)
query II
select typeof(order_id), revenue from '__TEST_DIR__/query.json'
----
UUID 10
UUID 10
UUID 15
UUID 5
# and finally, replace
statement ok
copy (
select * replace (revenue + 1 as revenue) from values
(uuid(), 10),
(uuid(), 10),
(uuid(), 15),
(uuid(), 5)
v (order_id, revenue)
) to '__TEST_DIR__/query.json' (format json)
query II
select typeof(order_id), revenue from '__TEST_DIR__/query.json'
----
UUID 11
UUID 11
UUID 16
UUID 6
statement ok
copy (select 42 as a, a + 1) to '__TEST_DIR__/out.json' (format json);
query II
select * from '__TEST_DIR__/out.json'
----
42 43
statement ok
create table conclusions (conclusion varchar)
# works because we auto-detect by default
statement ok
copy conclusions from 'data/json/top_level_array.json'
# doesn't work if we disable auto-detection
statement error
copy conclusions from 'data/json/top_level_array.json' (AUTO_DETECT FALSE)
----
Invalid Input Error
statement ok
delete from conclusions;
# and also if we say it's an array
statement ok
copy conclusions from 'data/json/top_level_array.json' (ARRAY TRUE)
query I
select * from conclusions
----
cancelled
cancelled
# same with ARRAY FALSE
statement error
copy conclusions from 'data/json/top_level_array.json' (ARRAY FALSE)
----
Invalid Input Error
# we can also write JSON arrays instead of newline-delimited
statement ok
copy (select range as i from range(10)) to '__TEST_DIR__/my.json' (ARRAY TRUE)
query T
select * from read_json_auto('__TEST_DIR__/my.json', format='array')
----
0
1
2
3
4
5
6
7
8
9
# compression stuff (cannot be empty)
statement error
copy (select range as i from range(10)) to '__TEST_DIR__/my.json' (COMPRESSION)
----
Invalid Input Error
statement ok
copy (select range as i from range(10)) to '__TEST_DIR__/my.json.gz' (COMPRESSION GZIP)
statement ok
create table my_range (i bigint)
statement ok
copy my_range from '__TEST_DIR__/my.json.gz' (COMPRESSION GZIP)
# we can auto-detect even though we have compressed
statement ok
select * from '__TEST_DIR__/my.json.gz'
# works with zstd too, but we skip this test for now
# it works in CLI, but not in unittest for some reason (ZSTD is not in VirtualFileSystem::compressed_fs)
require parquet
statement ok
copy (select range as i from range(10)) to '__TEST_DIR__/my.json.zst' (COMPRESSION ZSTD)
statement ok
select * from '__TEST_DIR__/my.json.zst'
query I
select * from my_range
----
0
1
2
3
4
5
6
7
8
9

View File

@@ -0,0 +1,225 @@
# name: test/sql/json/test_json_copy_tpch.test_slow
# description: Test JSON COPY using TPC-H
# group: [json]
require json
require tpch
statement ok
set threads=4
statement ok
start transaction
statement ok
call dbgen(sf=0.1)
# export lineitem as array (ARRAY does not need TRUE to be passed)
statement ok
COPY lineitem TO '__TEST_DIR__/lineitem.json' (ARRAY)
# also export the whole thing
statement ok
EXPORT DATABASE '__TEST_DIR__/export_json_test' (FORMAT JSON)
statement ok
rollback
# test the array first
statement ok
start transaction
statement ok
call dbgen(sf=0)
# sf0.1 lineitem json is ~0.2GB so this tests that we can do streaming reads of json arrays
statement ok
set memory_limit='100mb'
statement ok
COPY lineitem FROM '__TEST_DIR__/lineitem.json' (ARRAY)
# 500mb should be enough for the rest
statement ok
set memory_limit='500mb'
query I
PRAGMA tpch(1)
----
<FILE>:extension/tpch/dbgen/answers/sf0.1/q01.csv
# also test gzipped
statement ok
COPY lineitem TO '__TEST_DIR__/lineitem.json.gz'
statement ok
FROM '__TEST_DIR__/lineitem.json.gz'
statement ok
rollback
statement ok
start transaction
# test the import
statement ok
import database '__TEST_DIR__/export_json_test'
loop i 1 9
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf0.1/q0${i}.csv
endloop
loop i 10 23
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf0.1/q${i}.csv
endloop
# we can also run straight on JSON by creating views
# this also tests projection pushdown well
statement ok
rollback
statement ok
CREATE VIEW lineitem AS SELECT * FROM read_ndjson(
'__TEST_DIR__/export_json_test/lineitem.json',
columns={
l_orderkey: 'INTEGER',
l_partkey: 'INTEGER',
l_suppkey: 'INTEGER',
l_linenumber: 'INTEGER',
l_quantity: 'INTEGER',
l_extendedprice: 'DECIMAL(15,2)',
l_discount: 'DECIMAL(15,2)',
l_tax: 'DECIMAL(15,2)',
l_returnflag: 'VARCHAR',
l_linestatus: 'VARCHAR',
l_shipdate: 'DATE',
l_commitdate: 'DATE',
l_receiptdate: 'DATE',
l_shipinstruct: 'VARCHAR',
l_shipmode: 'VARCHAR',
l_comment: 'VARCHAR'
}
);
statement ok
CREATE VIEW orders AS SELECT * FROM read_ndjson(
'__TEST_DIR__/export_json_test/orders.json',
columns={
o_orderkey: 'INTEGER',
o_custkey: 'INTEGER',
o_orderstatus: 'VARCHAR',
o_totalprice: 'DECIMAL(15,2)',
o_orderdate: 'DATE',
o_orderpriority: 'VARCHAR',
o_clerk: 'VARCHAR',
o_shippriority: 'INTEGER',
o_comment: 'VARCHAR'
}
);
statement ok
CREATE VIEW partsupp AS SELECT * FROM read_ndjson(
'__TEST_DIR__/export_json_test/partsupp.json',
columns={
ps_partkey: 'INTEGER',
ps_suppkey: 'INTEGER',
ps_availqty: 'INTEGER',
ps_supplycost: 'DECIMAL(15,2)',
ps_comment: 'VARCHAR'
}
);
statement ok
CREATE VIEW part AS SELECT * FROM read_ndjson(
'__TEST_DIR__/export_json_test/part.json',
columns={
p_partkey: 'INTEGER',
p_name: 'VARCHAR',
p_mfgr: 'VARCHAR',
p_brand: 'VARCHAR',
p_type: 'VARCHAR',
p_size: 'INTEGER',
p_container: 'VARCHAR',
p_retailprice: 'DECIMAL(15,2)',
p_comment: 'VARCHAR'
}
);
statement ok
CREATE VIEW customer AS SELECT * FROM read_ndjson(
'__TEST_DIR__/export_json_test/customer.json',
columns={
c_custkey: 'INTEGER',
c_name: 'VARCHAR',
c_address: 'VARCHAR',
c_nationkey: 'INTEGER',
c_phone: 'VARCHAR',
c_acctbal: 'DECIMAL(15,2)',
c_mktsegment: 'VARCHAR',
c_comment: 'VARCHAR'
}
);
statement ok
CREATE VIEW supplier AS SELECT * FROM read_ndjson(
'__TEST_DIR__/export_json_test/supplier.json',
columns={
s_suppkey: 'INTEGER',
s_name: 'VARCHAR',
s_address: 'VARCHAR',
s_nationkey: 'INTEGER',
s_phone: 'VARCHAR',
s_acctbal: 'DECIMAL(15,2)',
s_comment: 'VARCHAR'
}
);
statement ok
CREATE VIEW nation AS SELECT * FROM read_ndjson(
'__TEST_DIR__/export_json_test/nation.json',
columns={
n_nationkey: 'INTEGER',
n_name: 'VARCHAR',
n_regionkey: 'INTEGER',
n_comment: 'VARCHAR'
}
);
statement ok
CREATE VIEW region AS SELECT * FROM read_ndjson(
'__TEST_DIR__/export_json_test/region.json',
columns={
r_regionkey: 'INTEGER',
r_name: 'VARCHAR',
r_comment: 'VARCHAR'
}
);
loop i 1 9
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf0.1/q0${i}.csv
endloop
loop i 10 23
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf0.1/q${i}.csv
endloop

View File

@@ -0,0 +1,60 @@
# name: test/sql/json/test_json_empty_object.test
# description: Test empty objects in JSON (DuckDB cannot have empty STRUCT)
# group: [json]
require json
statement ok
pragma enable_verification
# returns JSON instead of {}
query I
select json_structure('{}')
----
"JSON"
statement error
select json_transform('{}', '{}')
----
Binder Error: Empty object in JSON structure
# create a JSON file with an empty struct
statement ok
copy (select '{"a": {}}') to '__TEST_DIR__/my.json' (FORMAT CSV, quote '', header 0)
# auto-detection should not give back an empty struct
query T
select typeof(a) from '__TEST_DIR__/my.json'
----
MAP(VARCHAR, JSON)
# can't force it to have an empty struct
statement error
select * from read_json('__TEST_DIR__/my.json', columns={a: 'STRUCT()'})
----
Invalid Input Error: Value "STRUCT()" can not be converted to a DuckDB Type.
# test issue 6443
statement ok
copy (select unnest(['{"c1":"val11","c2":{"k1":"val11","k2":{}}}','{"c1":"val21","c2":{"k1":"val21","k2":{}}}'])) to '__TEST_DIR__/data.ndjson' (FORMAT CSV, quote '', header 0)
statement ok
CREATE OR REPLACE TABLE tbl AS SELECT * FROM read_ndjson_auto('__TEST_DIR__/data.ndjson');
# no empty struct here either
query II
select typeof(c1), typeof(c2) from tbl;
----
VARCHAR STRUCT(k1 VARCHAR, k2 MAP(VARCHAR, JSON))
VARCHAR STRUCT(k1 VARCHAR, k2 MAP(VARCHAR, JSON))
require parquet
statement ok
copy tbl to '__TEST_DIR__/data.parquet';
query II
select * from '__TEST_DIR__/data.parquet';
----
val11 {'k1': val11, 'k2': {}}
val21 {'k1': val21, 'k2': {}}

View File

@@ -0,0 +1,50 @@
# name: test/sql/json/test_json_export.test
# description: Test Json Export
# group: [json]
require json
statement ok
PRAGMA enable_verification
statement ok
create table text(i varchar);
statement ok
insert into text values ('bla');
statement ok
export database '__TEST_DIR__/test_export' (FORMAT JSON);
statement ok
drop table text;
statement ok
import database '__TEST_DIR__/test_export';
query I
select * from text;
----
bla
statement ok
COPY text TO '__TEST_DIR__/text.json' (ARRAY)
statement ok
drop table text;
statement ok
create table text(i varchar);
statement ok
COPY text from '__TEST_DIR__/text.json' (ARRAY)
query I
select * from text;
----
bla
query I
select * from '__TEST_DIR__/text.json';
----
bla

View File

@@ -0,0 +1,52 @@
# name: test/sql/json/test_json_macros.test
# description: Test JSON default macro's
# group: [json]
require json
statement ok
pragma enable_verification
statement ok
create table t1 as select range + 10 n, range v from range(10)
query T
select json_group_array(v) from t1
----
[0,1,2,3,4,5,6,7,8,9]
query T
select json_group_object(n, v) from t1
----
{"10":0,"11":1,"12":2,"13":3,"14":4,"15":5,"16":6,"17":7,"18":8,"19":9}
query T
select json_group_object(n, v) from t1 group by n % 2 order by all
----
{"10":0,"12":2,"14":4,"16":6,"18":8}
{"11":1,"13":3,"15":5,"17":7,"19":9}
statement ok
insert into t1 values (0, NULL), (20, NULL), (21, NULL), (1, 10), (2, 11)
query T
select json_group_object(n, v) from t1 group by n % 2 order by all
----
{"10":0,"12":2,"14":4,"16":6,"18":8,"0":null,"20":null,"2":11}
{"11":1,"13":3,"15":5,"17":7,"19":9,"21":null,"1":10}
statement ok
create table t2 (j json)
statement ok
insert into t2 values ('{"a": 42}'), ('{"a": 42.42, "b": "duck"}')
query T
select json_group_structure(j) from t2
----
{"a":"DOUBLE","b":"VARCHAR"}
query T
select json(' { "this" : "is", "a": [ "test" ] }')
----
{"this":"is","a":["test"]}

View File

@@ -0,0 +1,30 @@
# name: test/sql/json/test_json_persistence.test
# description: Test persistence of the JSON type
# group: [json]
require json
# load the DB from disk
load __TEST_DIR__/json_type.db
statement ok
create table test (j json)
statement ok
insert into test values ('{"duck": 42}'), (NULL), ('{"goose": 43}')
query II
select typeof(j), j from test
----
JSON {"duck": 42}
JSON NULL
JSON {"goose": 43}
restart
query II
select typeof(j), j from test
----
JSON {"duck": 42}
JSON NULL
JSON {"goose": 43}

View File

@@ -0,0 +1,41 @@
# name: test/sql/json/test_json_serialize_plan.test
# group: [json]
require json
statement ok
CREATE TABLE tbl1 (i int);
# Example with simple query
query I
SELECT json_serialize_plan('SELECT 1 + 2 FROM tbl1');
----
<REGEX>:.*LOGICAL_PROJECTION.*LOGICAL_GET.*
# Example with skip_null and skip_empty
query I
SELECT json_serialize_plan('SELECT *, 1 + 2 FROM tbl1', skip_null := true, skip_empty := true);
----
<REGEX>:.*LOGICAL_PROJECTION.*LOGICAL_GET.*
# Example with skip_null and skip_empty and optimize
query I
SELECT json_serialize_plan('SELECT *, 1 + 2 FROM tbl1', skip_null := true, skip_empty := true, optimize := true);
----
<REGEX>:.*LOGICAL_PROJECTION.*LOGICAL_GET.*
# Example with syntax error
query I
SELECT json_serialize_plan('SELECT AND LAUNCH ROCKETS WHERE 1 = 1');
----
<REGEX>:.*syntax error at or near.*
# Example with binding error
# The binding error message "did you mean table xyz" is not deterministic, so use a LIKE here.
query I
SELECT json_serialize_plan('SELECT * FROM nonexistent_table') LIKE '%Table with name nonexistent_table does not exist%';
----
true
statement ok
select json_serialize_plan('select blob ''\\x00''');

View File

@@ -0,0 +1,118 @@
# name: test/sql/json/test_json_serialize_sql.test
# group: [json]
require json
# Example with simple query
statement ok
SELECT json_serialize_sql('SELECT 1 + 2 FROM tbl1');
# Example with skip_null and skip_empty
statement ok
SELECT json_serialize_sql('SELECT *, 1 + 2 FROM tbl1', skip_null := true, skip_empty := true);
# Example with subquery
statement ok
SELECT json_serialize_sql('SELECT * FROM (SELECT 1 + 2)', skip_null := true, skip_empty := true);
# Example with all parameters
statement ok
SELECT json_serialize_sql('SELECT * FROM (SELECT 1 + 2)', skip_default := true, skip_empty := true, skip_null := true, format := true);
# Example with syntax error
query I
SELECT json_serialize_sql('SELECT AND LAUNCH ROCKETS WHERE 1 = 1');
----
<REGEX>:.*syntax error at or near.*
# Test Deserialize
query I
SELECT json_deserialize_sql(json_serialize_sql('SELECT 1 + 2 FROM tbl1'));
----
SELECT (1 + 2) FROM tbl1
query I
SELECT json_deserialize_sql(json_serialize_sql('SELECT *, 1 + 2 FROM tbl1'));
----
SELECT *, (1 + 2) FROM tbl1
query I
SELECT json_deserialize_sql(json_serialize_sql('SELECT * FROM (SELECT 1 + 2)'));
----
SELECT * FROM (SELECT (1 + 2))
query I
SELECT json_deserialize_sql(json_serialize_sql('SELECT * FROM t1 JOIN t2 USING("$id")'));
----
SELECT * FROM t1 INNER JOIN t2 USING ("$id")
statement error
SELECT json_deserialize_sql(json_serialize_sql('SELECT AND LAUNCH ROCKETS WHERE 1 = 1'));
----
Parser Error: Error parsing json: parser: syntax error at or near "AND"
# Test Execute JSON Serialized SQL
query I
SELECT * FROM json_execute_serialized_sql(json_serialize_sql('SELECT 1 + 2'));
----
3
statement ok
CREATE TABLE tbl2 (a INT, b INT, c INT);
statement ok
INSERT INTO tbl2 VALUES (1, 2, 3), (4, 5, 6), (7, 8, 9);
query III
SELECT * FROM json_execute_serialized_sql(json_serialize_sql('SELECT * FROM tbl2'));
----
1 2 3
4 5 6
7 8 9
query I
SELECT * FROM json_execute_serialized_sql(json_serialize_sql('SELECT a + b + c FROM tbl2'));
----
6
15
24
query I
PRAGMA json_execute_serialized_sql(
'{"error":false,"statements":[{"node":{"type":"SELECT_NODE","modifiers":[],"cte_map":{"map":[]},"select_list":[{"class":"FUNCTION","type":"FUNCTION","alias":"","function_name":"+","schema":"","children":[{"class":"FUNCTION","type":"FUNCTION","alias":"","function_name":"+","schema":"","children":[{"class":"COLUMN_REF","type":"COLUMN_REF","alias":"","column_names":["a"]},{"class":"COLUMN_REF","type":"COLUMN_REF","alias":"","column_names":["b"]}],"filter":null,"order_bys":{"type":"ORDER_MODIFIER","orders":[]},"distinct":false,"is_operator":true,"export_state":false,"catalog":""},{"class":"COLUMN_REF","type":"COLUMN_REF","alias":"","column_names":["c"]}],"filter":null,"order_bys":{"type":"ORDER_MODIFIER","orders":[]},"distinct":false,"is_operator":true,"export_state":false,"catalog":""}],"from_table":{"type":"BASE_TABLE","alias":"","sample":null,"schema_name":"","table_name":"tbl2","column_name_alias":[],"catalog_name":""},"where_clause":null,"group_expressions":[],"group_sets":[],"aggregate_handling":"STANDARD_HANDLING","having":null,"sample":null,"qualify":null}}]}'
)
----
6
15
24
# TODO: We should add an option for the deserializer to allow missing properties in the JSON if they can be default constructed
# Alternatively, make them optional for all the Deserializer's.
statement error
SELECT * FROM json_execute_serialized_sql(json_serialize_sql('SELECT * FROM tbl2', skip_null := true, skip_empty := true));
----
Parser Error: Expected but did not find property 'cte_map' in json object
# Test execute json serialized sql with multiple nested type tags
query II
SELECT * FROM json_execute_serialized_sql(json_serialize_sql('WITH a(i) as (SELECT 1) SELECT a1.i as i1, a2.i as i2 FROM a as a1, a as a2'));
----
1 1
# Missing select nodes should throw an error
statement error
SELECT json_deserialize_sql('{ "statements": [ {"expression_class": "BOUND_COMPARISON"}]}');
----
Parser Error: Error parsing json: no select node found in json
statement error
SELECT * FROM json_execute_serialized_sql('{ "statements": [ {"expression_class": "BOUND_COMPARISON"}]}');
----
Parser Error: Error parsing json: no select node found in json
# Test execute json serialized sql with multiple select nodes
query I
SELECT json_deserialize_sql(json_serialize_sql('SELECT 1;SELECT 2'));
----
SELECT 1; SELECT 2

View File

@@ -0,0 +1,176 @@
# name: test/sql/json/test_json_sqlite.test
# description: Test JSON sqlite tests
# group: [json]
require json
statement ok
pragma enable_verification
# some of the more elaborate tests from SQLite JSON1 extension
# https://github.com/sqlite/sqlite/blob/master/test/json101.test
statement ok
CREATE TABLE j1(x varchar);
statement ok
INSERT INTO j1(x)
VALUES('true'),('false'),('null'),('123'),('-234'),('34.5e+6'),
('""'),('"\""'),('"\\"'),('"abcdefghijlmnopqrstuvwxyz"'),
('[]'),('{}'),('[true,false,null,123,-234,34.5e+6,{},[]]'),
('{"a":true,"b":{"c":false}}');
query I
SELECT count(*) FROM j1 WHERE json_type(x) IN ('OBJECT','ARRAY');
----
4
query I
SELECT x FROM j1
WHERE json_extract(x,'$')<>x
AND json_type(x) IN ('OBJECT','ARRAY');
----
[true,false,null,123,-234,34.5e+6,{},[]]
statement ok
CREATE TABLE j2(id INTEGER PRIMARY KEY, json VARCHAR, src VARCHAR);
statement ok
INSERT INTO j2(id,json,src)
VALUES(1,'{
"firstName": "John",
"lastName": "Smith",
"isAlive": true,
"age": 25,
"address": {
"streetAddress": "21 2nd Street",
"city": "New York",
"state": "NY",
"postalCode": "10021-3100"
},
"phoneNumbers": [
{
"type": "home",
"number": "212 555-1234"
},
{
"type": "office",
"number": "646 555-4567"
}
],
"children": [],
"spouse": null
}','https://en.wikipedia.org/wiki/JSON');
statement ok
INSERT INTO j2(id,json,src)
VALUES(2, '{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil''s Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}','https://adobe.github.io/Spry/samples/data_region/JSONDataSetSample.html');
statement ok
INSERT INTO j2(id,json,src)
VALUES(3,'[
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil''s Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
},
{
"id": "0002",
"type": "donut",
"name": "Raised",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
},
{
"id": "0003",
"type": "donut",
"name": "Old Fashioned",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
]','https://adobe.github.io/Spry/samples/data_region/JSONDataSetSample.html');
query T
SELECT count(*) FROM j2;
----
3
query TTT
SELECT id, json_valid(json), json_type(json) FROM j2 ORDER BY id;
----
1 true OBJECT
2 true OBJECT
3 true ARRAY

View File

@@ -0,0 +1,164 @@
# name: test/sql/json/test_json_struct_projection_pushdown.test_slow
# description: Test JSON struct projection pushdown with TPC-H
# group: [json]
require json
require tpch
statement ok
call dbgen(sf=1)
statement ok
export database '__TEST_DIR__/tpch_json' (format json)
statement ok
DROP TABLE customer;
statement ok
DROP TABLE lineitem;
statement ok
DROP TABLE nation;
statement ok
DROP TABLE orders;
statement ok
DROP TABLE part;
statement ok
DROP TABLE partsupp;
statement ok
DROP TABLE region;
statement ok
DROP TABLE supplier;
statement ok
CREATE VIEW customer AS
SELECT
json['c_custkey']::INTEGER AS c_custkey,
json['c_name']::VARCHAR AS c_name,
json['c_address']::VARCHAR AS c_address,
json['c_nationkey']::INTEGER AS c_nationkey,
json['c_phone']::VARCHAR AS c_phone,
json['c_acctbal']::DECIMAL(15,2) AS c_acctbal,
json['c_mktsegment']::VARCHAR AS c_mktsegment,
json['c_comment']::VARCHAR AS c_comment,
FROM
read_json_auto('__TEST_DIR__/tpch_json/customer.json', records=false)
statement ok
CREATE VIEW lineitem AS
SELECT
json['l_orderkey']::INTEGER AS l_orderkey,
json['l_partkey']::INTEGER AS l_partkey,
json['l_suppkey']::INTEGER AS l_suppkey,
json['l_linenumber']::INTEGER AS l_linenumber,
json['l_quantity']::DECIMAL(15,2) AS l_quantity,
json['l_extendedprice']::DECIMAL(15,2) AS l_extendedprice,
json['l_discount']::DECIMAL(15,2) AS l_discount,
json['l_tax']::DECIMAL(15,2) AS l_tax,
json['l_returnflag']::VARCHAR AS l_returnflag,
json['l_linestatus']::VARCHAR AS l_linestatus,
json['l_shipdate']::DATE AS l_shipdate,
json['l_commitdate']::DATE AS l_commitdate,
json['l_receiptdate']::DATE AS l_receiptdate,
json['l_shipinstruct']::VARCHAR AS l_shipinstruct,
json['l_shipmode']::VARCHAR AS l_shipmode,
json['l_comment']::VARCHAR AS l_comment,
FROM
read_json_auto('__TEST_DIR__/tpch_json/lineitem.json', records=false)
statement ok
CREATE VIEW nation AS
SELECT
json['n_nationkey']::INTEGER AS n_nationkey,
json['n_name']::VARCHAR AS n_name,
json['n_regionkey']::INTEGER AS n_regionkey,
json['n_comment']::VARCHAR AS n_comment,
FROM
read_json_auto('__TEST_DIR__/tpch_json/nation.json', records=false)
statement ok
CREATE VIEW orders AS
SELECT
json['o_orderkey']::INTEGER AS o_orderkey,
json['o_custkey']::INTEGER AS o_custkey,
json['o_orderstatus']::VARCHAR AS o_orderstatus,
json['o_totalprice']::DECIMAL(15,2) AS o_totalprice,
json['o_orderdate']::DATE AS o_orderdate,
json['o_orderpriority']::VARCHAR AS o_orderpriority,
json['o_clerk']::VARCHAR AS o_clerk,
json['o_shippriority']::INTEGER AS o_shippriority,
json['o_comment']::VARCHAR AS o_comment,
FROM
read_json_auto('__TEST_DIR__/tpch_json/orders.json', records=false)
statement ok
CREATE VIEW part AS
SELECT
json['p_partkey']::INTEGER AS p_partkey,
json['p_name']::VARCHAR AS p_name,
json['p_mfgr']::VARCHAR AS p_mfgr,
json['p_brand']::VARCHAR AS p_brand,
json['p_type']::VARCHAR AS p_type,
json['p_size']::INTEGER AS p_size,
json['p_container']::VARCHAR AS p_container,
json['p_retailprice']::DECIMAL(15,2) AS p_retailprice,
json['p_comment']::VARCHAR AS p_comment,
FROM
read_json_auto('__TEST_DIR__/tpch_json/part.json', records=false)
statement ok
CREATE VIEW partsupp AS
SELECT
json['ps_partkey']::INTEGER AS ps_partkey,
json['ps_suppkey']::INTEGER AS ps_suppkey,
json['ps_availqty']::INTEGER AS ps_availqty,
json['ps_supplycost']::DECIMAL(15,2) AS ps_supplycost,
json['ps_comment']::VARCHAR AS ps_comment,
FROM
read_json_auto('__TEST_DIR__/tpch_json/partsupp.json', records=false)
statement ok
CREATE VIEW region AS
SELECT
json['r_regionkey']::INTEGER AS r_regionkey,
json['r_name']::VARCHAR AS r_name,
json['r_comment']::VARCHAR AS r_comment,
FROM
read_json_auto('__TEST_DIR__/tpch_json/region.json', records=false)
statement ok
CREATE VIEW supplier AS
SELECT
json['s_suppkey']::INTEGER AS s_suppkey,
json['s_name']::VARCHAR AS s_name,
json['s_address']::VARCHAR AS s_address,
json['s_nationkey']::INTEGER AS s_nationkey,
json['s_phone']::VARCHAR AS s_phone,
json['s_acctbal']::DECIMAL(15,2) AS s_acctbal,
json['s_comment']::VARCHAR AS s_comment,
FROM
read_json_auto('__TEST_DIR__/tpch_json/supplier.json', records=false)
loop i 1 9
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
endloop
loop i 10 23
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
endloop

View File

@@ -0,0 +1,69 @@
# name: test/sql/json/test_json_tpch_sf001.test_slow
# description: Test TPCH with JSON
# group: [json]
require json
require tpch
statement ok
call dbgen(sf=0.01)
query IIIIIIIIIIIII nosort q0
select * from lineitem order by all
----
# create lineitem json table
statement ok
create table lineitem_j as
select json_quote(struct_pack(l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,
l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment)) as j
from lineitem
# get the json structure
query T
select json_structure(j) from lineitem_j limit 1
----
{"l_orderkey":"UBIGINT","l_partkey":"UBIGINT","l_suppkey":"UBIGINT","l_linenumber":"UBIGINT","l_quantity":"DOUBLE","l_extendedprice":"DOUBLE","l_discount":"DOUBLE","l_tax":"DOUBLE","l_returnflag":"VARCHAR","l_linestatus":"VARCHAR","l_shipdate":"VARCHAR","l_commitdate":"VARCHAR","l_receiptdate":"VARCHAR","l_shipinstruct":"VARCHAR","l_shipmode":"VARCHAR","l_comment":"VARCHAR"}
# transform the structure back to what it was and verify it is the same as on the original lineitem table
query IIIIIIIIIIIII nosort q0
with transformed as (
select json_transform(j, '{"l_orderkey":"INTEGER","l_partkey":"INTEGER","l_suppkey":"INTEGER","l_linenumber":"INTEGER","l_quantity":"DECIMAL(15,2)","l_extendedprice":"DECIMAL(15,2)","l_discount":"DECIMAL(15,2)","l_tax":"DECIMAL(15,2)","l_returnflag":"VARCHAR","l_linestatus":"VARCHAR","l_shipdate":"DATE","l_commitdate":"DATE","l_receiptdate":"DATE","l_shipinstruct":"VARCHAR","l_shipmode":"VARCHAR","l_comment":"VARCHAR"}') as j
from lineitem_j
)
select j.l_orderkey,j.l_partkey,j.l_suppkey,j.l_linenumber,j.l_quantity,j.l_extendedprice,j.l_discount,j.l_tax,
j.l_returnflag,j.l_linestatus,j.l_shipdate,j.l_commitdate,j.l_receiptdate,j.l_shipinstruct,j.l_shipmode,j.l_comment
from transformed
order by all
----
# run TPCH-Q1 straight on JSON
query IIIIIIIIII
with transformed as (
select json_transform(j, '{"l_orderkey":"INTEGER","l_partkey":"INTEGER","l_suppkey":"INTEGER","l_linenumber":"INTEGER","l_quantity":"DECIMAL(15,2)","l_extendedprice":"DECIMAL(15,2)","l_discount":"DECIMAL(15,2)","l_tax":"DECIMAL(15,2)","l_returnflag":"VARCHAR","l_linestatus":"VARCHAR","l_shipdate":"DATE","l_commitdate":"DATE","l_receiptdate":"DATE","l_shipinstruct":"VARCHAR","l_shipmode":"VARCHAR","l_comment":"VARCHAR"}') as j
from lineitem_j
)
SELECT
j.l_returnflag,
j.l_linestatus,
sum(j.l_quantity) AS sum_qty,
sum(j.l_extendedprice) AS sum_base_price,
sum(j.l_extendedprice * (1 - j.l_discount)) AS sum_disc_price,
sum(j.l_extendedprice * (1 - j.l_discount) * (1 + j.l_tax)) AS sum_charge,
avg(j.l_quantity) AS avg_qty,
avg(j.l_extendedprice) AS avg_price,
avg(j.l_discount) AS avg_disc,
count(*) AS count_order
FROM
transformed
WHERE
j.l_shipdate <= CAST('1998-09-02' AS date)
GROUP BY
j.l_returnflag,
j.l_linestatus
ORDER BY
j.l_returnflag,
j.l_linestatus
----
<FILE>:extension/tpch/dbgen/answers/sf0.01/q01.csv

View File

@@ -0,0 +1,69 @@
# name: test/sql/json/test_json_tpch_sf01.test_slow
# description: Test TPCH with JSON
# group: [json]
require json
require tpch
statement ok
call dbgen(sf=0.1)
query IIIIIIIIIIIII nosort q0
select * from lineitem order by all
----
# create lineitem json table
statement ok
create table lineitem_j as
select json_quote(struct_pack(l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,
l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment)) as j
from lineitem
# get the json structure
query T
select json_structure(j) from lineitem_j limit 1
----
{"l_orderkey":"UBIGINT","l_partkey":"UBIGINT","l_suppkey":"UBIGINT","l_linenumber":"UBIGINT","l_quantity":"DOUBLE","l_extendedprice":"DOUBLE","l_discount":"DOUBLE","l_tax":"DOUBLE","l_returnflag":"VARCHAR","l_linestatus":"VARCHAR","l_shipdate":"VARCHAR","l_commitdate":"VARCHAR","l_receiptdate":"VARCHAR","l_shipinstruct":"VARCHAR","l_shipmode":"VARCHAR","l_comment":"VARCHAR"}
# transform the structure back to what it was and verify it is the same as on the original lineitem table
query IIIIIIIIIIIII nosort q0
with transformed as (
select json_transform(j, '{"l_orderkey":"INTEGER","l_partkey":"INTEGER","l_suppkey":"INTEGER","l_linenumber":"INTEGER","l_quantity":"DECIMAL(15,2)","l_extendedprice":"DECIMAL(15,2)","l_discount":"DECIMAL(15,2)","l_tax":"DECIMAL(15,2)","l_returnflag":"VARCHAR","l_linestatus":"VARCHAR","l_shipdate":"DATE","l_commitdate":"DATE","l_receiptdate":"DATE","l_shipinstruct":"VARCHAR","l_shipmode":"VARCHAR","l_comment":"VARCHAR"}') as j
from lineitem_j
)
select j.l_orderkey,j.l_partkey,j.l_suppkey,j.l_linenumber,j.l_quantity,j.l_extendedprice,j.l_discount,j.l_tax,
j.l_returnflag,j.l_linestatus,j.l_shipdate,j.l_commitdate,j.l_receiptdate,j.l_shipinstruct,j.l_shipmode,j.l_comment
from transformed
order by all
----
# run TPCH-Q1 straight on JSON
query IIIIIIIIII
with transformed as (
select json_transform(j, '{"l_orderkey":"INTEGER","l_partkey":"INTEGER","l_suppkey":"INTEGER","l_linenumber":"INTEGER","l_quantity":"DECIMAL(15,2)","l_extendedprice":"DECIMAL(15,2)","l_discount":"DECIMAL(15,2)","l_tax":"DECIMAL(15,2)","l_returnflag":"VARCHAR","l_linestatus":"VARCHAR","l_shipdate":"DATE","l_commitdate":"DATE","l_receiptdate":"DATE","l_shipinstruct":"VARCHAR","l_shipmode":"VARCHAR","l_comment":"VARCHAR"}') as j
from lineitem_j
)
SELECT
j.l_returnflag,
j.l_linestatus,
sum(j.l_quantity) AS sum_qty,
sum(j.l_extendedprice) AS sum_base_price,
sum(j.l_extendedprice * (1 - j.l_discount)) AS sum_disc_price,
sum(j.l_extendedprice * (1 - j.l_discount) * (1 + j.l_tax)) AS sum_charge,
avg(j.l_quantity) AS avg_qty,
avg(j.l_extendedprice) AS avg_price,
avg(j.l_discount) AS avg_disc,
count(*) AS count_order
FROM
transformed
WHERE
j.l_shipdate <= CAST('1998-09-02' AS date)
GROUP BY
j.l_returnflag,
j.l_linestatus
ORDER BY
j.l_returnflag,
j.l_linestatus
----
<FILE>:extension/tpch/dbgen/answers/sf0.1/q01.csv

View File

@@ -0,0 +1,65 @@
# name: test/sql/json/tpch_round_trip.test_slow
# description: TPCH JSON round trip
# group: [json]
require tpch
require json
statement ok
start transaction;
statement ok
call dbgen(sf=1)
statement ok
copy lineitem to '__TEST_DIR__/lineitem.json'
statement ok
rollback;
statement ok
create view lineitem_struct as select from_json(json, '{
"l_orderkey":"INTEGER",
"l_partkey":"INTEGER",
"l_suppkey":"INTEGER",
"l_linenumber":"INTEGER",
"l_quantity":"INTEGER",
"l_extendedprice":"DECIMAL(15,2)",
"l_discount":"DECIMAL(15,2)",
"l_tax":"DECIMAL(15,2)",
"l_returnflag":"VARCHAR",
"l_linestatus":"VARCHAR",
"l_shipdate":"DATE",
"l_commitdate":"DATE",
"l_receiptdate":"DATE",
"l_shipinstruct":"VARCHAR",
"l_shipmode":"VARCHAR",
"l_comment":"VARCHAR"
}') j
from read_ndjson_objects('__TEST_DIR__/lineitem.json')
statement ok
CREATE VIEW lineitem AS SELECT
j['l_orderkey'] AS l_orderkey,
j['l_partkey'] AS l_partkey,
j['l_suppkey'] AS l_suppkey,
j['l_linenumber'] AS l_linenumber,
j['l_quantity'] AS l_quantity,
j['l_extendedprice'] AS l_extendedprice,
j['l_discount'] AS l_discount,
j['l_tax'] AS l_tax,
j['l_returnflag'] AS l_returnflag,
j['l_linestatus'] AS l_linestatus,
j['l_shipdate'] AS l_shipdate,
j['l_commitdate'] AS l_commitdate,
j['l_receiptdate'] AS l_receiptdate,
j['l_shipinstruct'] AS l_shipinstruct,
j['l_shipmode'] AS l_shipmode,
j['l_comment'] AS l_comment
FROM lineitem_struct
query I
PRAGMA tpch(1)
----
<FILE>:extension/tpch/dbgen/answers/sf1/q01.csv