should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,11 @@
# name: test/sql/json/issues/internal_issue2732.test
# description: Test internal issue 2732 - read_json('data.jsonl', map_inference_threshold=0) crashes
# group: [issues]
require json
statement ok
PRAGMA enable_verification
statement ok
select * from read_json('data/json/internal_2732.json', map_inference_threshold=0);

View File

@@ -0,0 +1,22 @@
# name: test/sql/json/issues/internal_issue3146.test
# description: Test internal issue 3146 - JSON parsing exception: Arrow datatype Map(Field ... ) not supported by Polars
# group: [issues]
# https://github.com/duckdblabs/duckdb-internal/issues/3146#issuecomment-2396148122
require json
statement ok
COPY (SELECT '{}') TO '__TEST_DIR__/empty_object.json' (FORMAT CSV, HEADER FALSE)
# for empty JSON objects we default to MAP(VARCHAR, JSON) as this is generic enough to fit any object
query I
SELECT typeof(json) FROM '__TEST_DIR__/empty_object.json'
----
MAP(VARCHAR, JSON)
# we can disable it with the map_inference_threshold parameter
query I
SELECT typeof(json) FROM read_json('__TEST_DIR__/empty_object.json', map_inference_threshold=-1)
----
JSON

View File

@@ -0,0 +1,10 @@
# name: test/sql/json/issues/internal_issue3197.test
# description: Test internal issue 3197 - AFL++ issue: crashes (segfault) in json reader
# group: [issues]
require json
statement error
from 'data/json/internal_3197.json'
----
Invalid Input Error

View File

@@ -0,0 +1,8 @@
# name: test/sql/json/issues/internal_issue3813.test
# description: Test internal issue 3813 - AFL++ issue: map_inference_threshold causes internal exception in json reader
# group: [issues]
require json
statement ok
SELECT * FROM read_json('data/json/internal_3813.json', map_inference_threshold=10);

View File

@@ -0,0 +1,19 @@
# name: test/sql/json/issues/internal_issue391.test
# description: Test internal issue 391 - SUMMARIZE for a JSON column will not work since min(JSON) is not well defined
# group: [issues]
require json
statement ok
PRAGMA enable_verification
statement ok
create table test as select {i: range}::JSON j from range(10)
query II
select min(j), max(j) from test
----
{"i":0} {"i":9}
statement ok
summarize test

View File

@@ -0,0 +1,8 @@
# name: test/sql/json/issues/internal_issue4014.test
# description: Test internal issue 4014 - AFL++ issue: segfault in json reader
# group: [issues]
require json
statement ok
FROM read_json('data/json/internal_4014.json', map_inference_threshold=0);

View File

@@ -0,0 +1,20 @@
# name: test/sql/json/issues/internal_issue4389.test
# description: Test internal issue 4389 - auto_detect is false for COPY + JSON
# group: [issues]
require json
statement ok
pragma enable_verification
statement ok
CREATE TABLE todos (userId UBIGINT, id UBIGINT, title VARCHAR, completed BOOLEAN);
statement ok
insert into todos values (42, 42, 'duck', true)
statement ok
copy todos to '__TEST_DIR__/todos.json' (ARRAY)
statement ok
copy todos from '__TEST_DIR__/todos.json'

View File

@@ -0,0 +1,13 @@
# name: test/sql/json/issues/internal_issue4403.test
# description: Test internal issue 4403 - AFL fuzzer crash (NULL type specification)
# group: [issues]
require json
statement ok
pragma enable_verification
statement error
SELECT * FROM read_json('data/json/example_n.ndjson', columns={id: NULL::VARCHAR, name: NULL::VARCHAR})
----
Binder Error

View File

@@ -0,0 +1,13 @@
# name: test/sql/json/issues/internal_issue4794.test
# description: Test internal issue 4794 - AFL++ issue: internal exception due to format string json key
# group: [issues]
require json
statement ok
pragma enable_verification
statement error
FROM read_json('data/json/format_string_key.json');
----
Invalid Input Error

View File

@@ -0,0 +1,8 @@
# name: test/sql/json/issues/internal_issue5288.test
# description: Test internal issue 5288 - zstd compression cannot be configured for JSON fields
# group: [issues]
require json
statement ok
create table foo2 (bar JSON using compression 'zstd');

View File

@@ -0,0 +1,26 @@
# name: test/sql/json/issues/issue10751and11152.test
# description: Test issue 10751 and 11152 - Duplicate keys in JSON object ignore_errors
# group: [issues]
require json
# issue 10751
statement error
create or replace table json_test as select * from read_json_auto('data/json/10751.json', format = 'newline_delimited');
----
Not implemented Error: Duplicate name
statement ok
create table json_test as select * from read_json_auto('data/json/10751.json', format = 'newline_delimited', ignore_errors=true);
statement ok
select * from json_test;
# issue 11152
statement error
FROM read_json_auto('data/json/11152.json');
----
Invalid Input Error: Malformed JSON
statement ok
FROM read_json_auto('data/json/11152.json', ignore_errors=true);

View File

@@ -0,0 +1,25 @@
# name: test/sql/json/issues/issue10784.test
# description: Test issue 10784 - read_json_auto has some unexpected behavior
# group: [issues]
require json
# original query from the issue should just return an error because it's not an array of objects
statement error
SELECT * FROM read_json_auto('data/json/arr.json', columns={'v':'VARCHAR','k':'VARCHAR'});
----
Invalid Input Error
# if we ignore errors we get NULLs because the array entries aren't objects
query II
SELECT * FROM read_json_auto('data/json/arr.json', columns={'v':'VARCHAR','k':'VARCHAR'}, ignore_errors=true);
----
NULL NULL
NULL NULL
# if we read it as if it's one column we just get the array values as varchar
query I
SELECT * FROM read_json_auto('data/json/arr.json', columns={'v':'VARCHAR'});
----
4
hello

View File

@@ -0,0 +1,13 @@
# name: test/sql/json/issues/issue10866.test
# description: Test issue 1010866 - uhugeints are truncated when imported from json data
# group: [issues]
require json
statement ok
copy (select '{"col": 277447099861456945273576150847928801582}') to '__TEST_DIR__/10866.json' (format csv, quote '', header 0)
query II
select col, hex(col) from read_json('__TEST_DIR__/10866.json', columns={col: 'uhugeint'})
----
277447099861456945273576150847928801582 D0BA5E258FFCFEE8C4619BA0E21A192E

View File

@@ -0,0 +1,36 @@
# name: test/sql/json/issues/issue11804.test
# description: Test issue 11804 - json_type(...) with path does not return "NULL"
# group: [issues]
require json
query I
select json_type(JSON 'null') = 'NULL';
----
true
query I
select json_type(JSON '{"a": null}', '/a') = 'NULL';
----
true
query I
select json_type(JSON '{"a": null}', '$.a') = 'NULL';
----
true
# Test issue 13436 - JSON_TYPE function produces wrong result if path is a column expression
query II
SELECT
json_type (json '{"a":1,"b":null}', p),
json_type (json '{"a":1,"b":null}', 'b')
FROM (VALUES ('b')) AS t (p);
----
NULL NULL
# let's also test the extract many functionality
query I
select unnest(json_type(JSON '{"a": null}', ['$.a', '$.a'])) = 'NULL';
----
true
true

View File

@@ -0,0 +1,20 @@
# name: test/sql/json/issues/issue12188.test
# description: Test issue 12188 - Issue with Parsing NDJSON File in DuckDB: Unexpected Quotation Marks
# group: [issues]
require parquet
query II
SELECT typeof(field1), typeof(field2) FROM 'data/parquet-testing/parquet_with_json.parquet' LIMIT 1
----
JSON JSON
require json
statement ok
COPY (SELECT * FROM read_ndjson('data/json/12188.ndjson', maximum_depth=1)) TO '__TEST_DIR__/my.parquet';
query II
SELECT typeof(field1), typeof(field2) FROM '__TEST_DIR__/my.parquet' LIMIT 1
----
JSON JSON

View File

@@ -0,0 +1,30 @@
# name: test/sql/json/issues/issue12861.test
# description: Test issue 12861 - Autodetected type of nested JSON field in read_json_auto depends on amount of null values in input
# group: [issues]
require json
statement ok
create table tbl (test struct(one bigint, two varchar));
statement ok
insert into tbl values ({'one': 1, 'two': 2}), (null)
statement ok
copy tbl to '__TEST_DIR__/fewnulls.json'
statement ok
insert into tbl select null from range(9)
statement ok
copy tbl to '__TEST_DIR__/manynulls.json'
query I
select typeof(test) from '__TEST_DIR__/fewnulls.json' limit 1
----
STRUCT(one BIGINT, two VARCHAR)
query I
select typeof(test) from '__TEST_DIR__/manynulls.json' limit 1
----
STRUCT(one BIGINT, two VARCHAR)

View File

@@ -0,0 +1,23 @@
# name: test/sql/json/issues/issue13212.test
# description: Test issue 13212 - Trying to read an empty compressed JSON file deadlocks
# group: [issues]
require json
statement ok
copy (select range::int64 as id, range::varchar as name from range(0)) to '__TEST_DIR__/t1.json.gz' (format json, compression gzip)
statement ok
copy (select id, name from values (1, 'bob'), (2, 'tom') tbl(id, name)) to '__TEST_DIR__/t2.json.gz' (format json, compression gzip)
query II
SELECT * FROM read_ndjson_auto(['__TEST_DIR__/t1.json.gz', '__TEST_DIR__/t2.json.gz']);
----
1 bob
2 tom
query II
SELECT * FROM read_ndjson(['__TEST_DIR__/t1.json.gz', '__TEST_DIR__/t2.json.gz'], columns={id: 'int64', name: 'varchar'});
----
1 bob
2 tom

View File

@@ -0,0 +1,36 @@
# name: test/sql/json/issues/issue13725.test
# description: Test issue 13725 - Using both hive_partitioning and hive_types in read_json_objects intermittently segfaults
# group: [issues]
require json
# path slashes
require notwindows
query III
select *
from read_json_objects('data/json/13725/month=*/*.json', hive_partitioning = true, format = auto, hive_types = {'month': int}, filename = true)
where month = 7;
----
{"hello": "there"} data/json/13725/month=07/mytest.json 7
query I
select count(*)
from read_json_objects('data/json/13725/month=*/*.json', hive_partitioning = true, format = auto, hive_types = {'month': int}, filename = true)
where month = 7;
----
1
query III
select *
from read_json('data/json/13725/month=*/*.json', hive_partitioning = true, format = auto, hive_types = {'month': int}, filename = true)
where month = 7;
----
there data/json/13725/month=07/mytest.json 7
query I
select count(*)
from read_json('data/json/13725/month=*/*.json', hive_partitioning = true, format = auto, hive_types = {'month': int}, filename = true)
where month = 7;
----
1

View File

@@ -0,0 +1,79 @@
# name: test/sql/json/issues/issue13948.test
# description: Test issue 13948 - Json property name with special characters produce inconsistent results with json -> 'propertyname' and json_extract
# group: [issues]
require json
statement ok
pragma enable_verification
query I
SELECT '{"Status / SubStatus": "test"}' -> 'Status / SubStatus';
----
"test"
query I
WITH path AS (
SELECT 'Status / SubStatus' p
)
SELECT '{"Status / SubStatus": "test"}' -> p
FROM path
----
"test"
# TODO at some point we should escape supplied JSON paths automatically so that this works
query I
SELECT '{"\"Status / SubStatus\"": "test"}' -> '"Status / SubStatus"';
----
NULL
query I
WITH path AS (
SELECT NULL p
)
SELECT '{"\"Status / SubStatus\"": "test"}' -> p
FROM path
----
NULL
query I
SELECT '{"Status / SubStatus": "test"}' -> '$."Status / SubStatus"';
----
"test"
query I
WITH path AS (
SELECT '$."Status / SubStatus"' p
)
SELECT '{"Status / SubStatus": "test"}' -> p
FROM path
----
"test"
query I
SELECT '[1, 2, 3]'->0
----
1
query I
WITH path AS (
SELECT 0 p
)
SELECT '[1, 2, 3]' -> p
FROM path
----
1
query I
SELECT '[1, 2, 3]'->'0'
----
NULL
query I
WITH path AS (
SELECT '0' p
)
SELECT '[1, 2, 3]' -> p
FROM path
----
NULL

View File

@@ -0,0 +1,11 @@
# name: test/sql/json/issues/issue14167.test
# description: Test issue 14167 - Dot notation for json field extraction is no longer working in v1.1.*
# group: [issues]
require json
# the auto-detected type is a MAP, but we can still extract using the dot syntax because we rewrite to map_extract
query I
select columns.v4_c6 from read_ndjson_auto('data/json/14167.json');
----
{'statistics': {'nonNullCount': 0}}

View File

@@ -0,0 +1,10 @@
# name: test/sql/json/issues/issue14245.test
# description: Test issue 14245 - The JSONPointer '/'
# group: [issues]
require json
query I
SELECT '{ "foo": ["bar", "baz"], "": 0 }' -> '/'
----
0

View File

@@ -0,0 +1,8 @@
# name: test/sql/json/issues/issue14259.test
# description: Test issue 14259 - DuckDB v1.1.x and above JSON Schema Inference Error - leads to JSON transform error
# group: [issues]
require json
statement ok
from 'data/json/issue14259.json'

View File

@@ -0,0 +1,71 @@
# name: test/sql/json/issues/issue15038.test
# description: Test issue 15038 - TO_JSON results in weird number translation
# group: [issues]
require json
# we support full precision in JSON - yyjson supports RAW values
query I
SELECT to_json(1::HUGEINT << 100)
----
1267650600228229401496703205376
query I
SELECT (1::HUGEINT << 100)::JSON
----
1267650600228229401496703205376
query I
SELECT to_json(1::UHUGEINT << 100)
----
1267650600228229401496703205376
query I
SELECT (1::UHUGEINT << 100)::JSON
----
1267650600228229401496703205376
query I
SELECT to_json((1::UHUGEINT << 100)::DECIMAL(38,0))
----
1267650600228229401496703205376
query I
SELECT (1::UHUGEINT << 100)::DECIMAL(38,0)::JSON
----
1267650600228229401496703205376
query I
SELECT to_json((1::HUGEINT << 100)::BIGNUM)
----
1267650600228229401496703205376
query I
SELECT (1::HUGEINT << 100)::BIGNUM::JSON
----
1267650600228229401496703205376
# original issue (#15038)
query I rowsort
WITH t1 AS (
SELECT 9007199254740993 AS id
UNION ALL
SELECT 1.2 AS id
)
SELECT to_json(id) AS json_objects
FROM t1 AS t;
----
1.2
9007199254740993.0
query I rowsort
WITH t1 AS (
SELECT 9007199254740993 AS id
UNION ALL
SELECT 1.2 AS id
)
SELECT id::JSON AS json_objects
FROM t1 AS t;
----
1.2
9007199254740993.0

View File

@@ -0,0 +1,16 @@
# name: test/sql/json/issues/issue15601.test
# description: Test issue 15601 - JSON reader fails with duplicate column name when reading multiple JSON files of slightly different casing
# group: [issues]
require json
statement ok
PRAGMA enable_verification
# original from the issue
statement ok
FROM 'data/json/15601/fragment*.json'
# created an even worse example
statement ok
FROM 'data/json/15601/bunch_of_key_collisions.json'

View File

@@ -0,0 +1,13 @@
# name: test/sql/json/issues/issue16568.test
# description: Test issue 16568 - Error when loading JSON files with UTF-8 Byte Order Mark (BOM)
# group: [issues]
require json
statement ok
pragma enable_verification
query I
select count(*) from 'data/json/sample_utf8_bom.json'
----
1

View File

@@ -0,0 +1,17 @@
# name: test/sql/json/issues/issue16570.test
# description: Test issue 16570 - JSON type: string slice operation results in result value with JSON type, expected VARCHAR
# group: [issues]
require json
statement ok
pragma enable_verification
query II
with cte as (
select '{"a":1}'::JSON as j
)
select typeof(j[2:3]), typeof(substring(j, 2, 3))
from cte
----
VARCHAR VARCHAR

View File

@@ -0,0 +1,26 @@
# name: test/sql/json/issues/issue16684.test
# description: Test issue 16684 - When using read_json to read data, it always converts the md5 string to uuid format.
# group: [issues]
require json
statement ok
PRAGMA enable_verification
statement ok
copy (select '00000000000000000000000000000000' md5) to '__TEST_DIR__/issue16684.json'
# should be varchar, not uuid (no hyphens)
query II
select md5, typeof(md5) from '__TEST_DIR__/issue16684.json'
----
00000000000000000000000000000000 VARCHAR
statement ok
copy (select '00000000-0000-0000-0000-000000000000' id) to '__TEST_DIR__/issue16684.json'
# if we add hyphens we get a uuid
query II
select id, typeof(id) from '__TEST_DIR__/issue16684.json'
----
00000000-0000-0000-0000-000000000000 UUID

View File

@@ -0,0 +1,28 @@
# name: test/sql/json/issues/issue16968.test
# description: Test issue 16968 - A not descriptive error message when value of BLOB is passed to JSON function
# group: [issues]
require json
statement ok
pragma enable_verification
query I
select from_hex('aa')::json;
----
"\\xAA"
query I
select json(from_hex('aa'));
----
"\\xAA"
query I
select '1101'::BIT::JSON
----
"1101"
query I
select json('1101'::BIT)
----
"1101"

View File

@@ -0,0 +1,33 @@
# name: test/sql/json/issues/issue18301.test
# description: Test issue 18301 - DuckDB JSON Schema Inconsistency - V 1.3.2
# group: [issues]
require json
statement ok
pragma enable_verification
statement ok
SET threads = 2;
statement ok
CREATE OR REPLACE TABLE cricket_staging AS
SELECT * FROM read_json('data/json/18301/*.json', filename=true)
WHERE 1=0;
statement ok
TRUNCATE cricket_staging;
statement ok
INSERT INTO cricket_staging
SELECT * FROM read_json('data/json/18301/*.json',
union_by_name=true,
filename=true
);
query I
SELECT info->>'$.outcome.by' as outcome_by
FROM cricket_staging
WHERE info->>'$.city' = 'Colombo';
----
{"runs":175,"wickets":null,"innings":1}

View File

@@ -0,0 +1,20 @@
# name: test/sql/json/issues/issue19357.test
# description: Test issue 19357 - Expected unified vector format of type VARCHAR, but found type INT32
# group: [issues]
require json
query I
SELECT TO_JSON({'key_1': 'one'}) AS WITHOUT_KEEP_NULL
----
{"key_1":"one"}
query I
SELECT JSON_OBJECT('key_1', 'one', 'key_2', NULL) AS KEEP_NULL_1
----
{"key_1":"one","key_2":null}
statement error
SELECT JSON_OBJECT('key_1', 'one', NULL, 'two') AS KEEP_NULL_2
----
json_object() keys must be VARCHAR

View File

@@ -0,0 +1,23 @@
# name: test/sql/json/issues/issue6722.test
# description: Test issue 6722 - INTERNAL Error: read_json_auto and read_json(auto_detect=true) fail to handle property name case sensitivities
# group: [issues]
require json
statement ok
PRAGMA enable_verification
# this file has 4 columns, name "id", "Id", "iD", and "ID"
query IIII
FROM 'data/json/duplicate_column_names.json'
----
42 43 44 45
# due to case-insensitivity these column names would cause an error, but we add a number to de-duplicate them
query IIIIII
DESCRIBE FROM 'data/json/duplicate_column_names.json'
----
id BIGINT YES NULL NULL NULL
Id_1 BIGINT YES NULL NULL NULL
iD_2 BIGINT YES NULL NULL NULL
ID_3 BIGINT YES NULL NULL NULL

View File

@@ -0,0 +1,28 @@
# name: test/sql/json/issues/issue8695.test
# description: Test issue 8695 - INTERNAL Error: Attempted to dereference unique_ptr that is NULL
# group: [issues]
require json
# these two succeeded
statement ok
SELECT MAX(JSON_ARRAY_LENGTH(filter_keystage))::int - 1 FROM read_json_auto('data/json/filter_keystage.ndjson');
statement ok
WITH RECURSIVE nums AS (
SELECT 0 AS n
UNION ALL
SELECT n + 1 FROM nums
WHERE n < 5
)
SELECT * FROM nums;
# but combining them fails
statement ok
WITH RECURSIVE nums AS (
SELECT 0 AS n
UNION ALL
SELECT n + 1 FROM nums
WHERE n < (SELECT MAX(JSON_ARRAY_LENGTH(filter_keystage))::int - 1 FROM read_json_auto('data/json/filter_keystage.ndjson'))
)
SELECT * FROM nums;

View File

@@ -0,0 +1,84 @@
# name: test/sql/json/issues/large_quoted_string_constant.test
# description: Issue #2986: Large string constant with quotes
# group: [issues]
statement ok
CREATE TABLE j2 (id INT, json VARCHAR, src VARCHAR);
statement ok
INSERT INTO j2(id,json,src)
VALUES(3,'[
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil''s Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
},
{
"id": "0002",
"type": "donut",
"name": "Raised",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
},
{
"id": "0003",
"type": "donut",
"name": "Old Fashioned",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
]','https://adobe.github.io/Spry/samples/data_region/JSONDataSetSample.html');
query I
SELECT len(json) FROM j2;
----
2115

View File

@@ -0,0 +1,28 @@
# name: test/sql/json/issues/read_json_memory_usage.test
# description: Test JSON memory usage (internal issue #1683)
# group: [issues]
require json
statement ok
SET threads=8;
statement ok
SET memory_limit='200MiB';
query I
SELECT * FROM read_json_objects('data/json/example_rn.ndjson', format='nd');
----
{"id":1,"name":"O Brother, Where Art Thou?"}
{"id":2,"name":"Home for the Holidays"}
{"id":3,"name":"The Firm"}
{"id":4,"name":"Broadcast News"}
{"id":5,"name":"Raising Arizona"}
statement ok
SET memory_limit='50MiB';
statement error
SELECT * FROM read_json_objects('data/json/example_rn.ndjson', format='nd');
----
Out of Memory Error

View File

@@ -0,0 +1,14 @@
# name: test/sql/json/issues/test_json_temp_8062.test
# description: Test JSON fields in temporary tables for issue 8062
# group: [issues]
require json
statement ok
CREATE TEMP TABLE j1(x json);
statement ok
INSERT INTO j1(x) VALUES ('[1, 2, 3]');
statement ok
SELECT * FROM j1;