should be it

2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions
--- a/external/duckdb/test/sql/json/table/auto_glob_directory.test
+++ b/external/duckdb/test/sql/json/table/auto_glob_directory.test
@@ -0,0 +1,14 @@
+# name: test/sql/json/table/auto_glob_directory.test
+# description: Test auto globbing a directory
+# group: [table]
+
+require json
+
+statement ok
+COPY (SELECT i%2 AS grp, i FROM range(1000) t(i)) TO '__TEST_DIR__/glob_dir_json' (FORMAT json, PER_THREAD_OUTPUT);
+
+query II
+SELECT grp, COUNT(*) FROM read_json('__TEST_DIR__/glob_dir_json') GROUP BY ALL ORDER BY ALL
+----
+0	500
+1	500
--- a/external/duckdb/test/sql/json/table/json_empty_array.test
+++ b/external/duckdb/test/sql/json/table/json_empty_array.test
@@ -0,0 +1,49 @@
+# name: test/sql/json/table/json_empty_array.test
+# description: Read json files with empty arrays
+# group: [table]
+
+require json
+
+statement ok
+pragma enable_verification
+
+# empty file
+query I
+select * from 'data/json/empty.ndjson'
+----
+
+query I
+select * from 'data/json/whitespace_only.json'
+----
+
+# empty array
+query I
+SELECT * FROM read_json_auto('data/json/empty_array.json')
+----
+
+query I
+SELECT * FROM read_json_auto('data/json/empty_no_newline.json')
+----
+
+# malformed files
+statement error
+SELECT * FROM read_json_auto('data/json/malformed/empty_array_malformed.json')
+----
+Missing closing brace
+
+statement error
+SELECT * FROM read_json_auto('data/json/malformed/empty_array_trailing.json', format='array')
+----
+Empty array with trailing data when parsing JSON array
+
+statement error
+SELECT * FROM read_json_auto('data/json/malformed/array_comma_malformed.json', format='array')
+----
+Malformed JSON
+
+query I
+SELECT * FROM read_json_auto('data/json/array_of_empty_arrays.json', format='array')
+----
+[]
+[]
+[]
--- a/external/duckdb/test/sql/json/table/json_multi_file_reader.test
+++ b/external/duckdb/test/sql/json/table/json_multi_file_reader.test
@@ -0,0 +1,164 @@
+# name: test/sql/json/table/json_multi_file_reader.test
+# description: Test MultiFileReader integration in JSON reader
+# group: [table]
+
+require json
+
+statement ok
+create table test as SELECT i as i, to_json([i%4]) as j FROM range(0,20) as tbl(i)
+
+# FIXME: we can't do partitioned JSON writes yet because the column we partition by is packed into a to_json
+# because we just push an expression and then use the csv writer, this uses the csv writer for now
+statement ok
+COPY test TO '__TEST_DIR__/json_part' (FORMAT csv, quote '', PARTITION_BY (j), HEADER 0);
+
+# some tests for read_json first
+query III
+select * exclude (filename), replace(filename, '\', '/') as filename from read_json_auto('data/json/example_*.ndjson', filename=true) order by all
+----
+1	O Brother, Where Art Thou?	data/json/example_n.ndjson
+1	O Brother, Where Art Thou?	data/json/example_r.ndjson
+1	O Brother, Where Art Thou?	data/json/example_rn.ndjson
+2	Home for the Holidays	data/json/example_n.ndjson
+2	Home for the Holidays	data/json/example_r.ndjson
+2	Home for the Holidays	data/json/example_rn.ndjson
+3	The Firm	data/json/example_n.ndjson
+3	The Firm	data/json/example_r.ndjson
+3	The Firm	data/json/example_rn.ndjson
+4	Broadcast News	data/json/example_n.ndjson
+4	Broadcast News	data/json/example_r.ndjson
+4	Broadcast News	data/json/example_rn.ndjson
+5	Raising Arizona	data/json/example_n.ndjson
+5	Raising Arizona	data/json/example_r.ndjson
+5	Raising Arizona	data/json/example_rn.ndjson
+
+# virtual column
+query III
+select *, replace(filename, '\', '/') from read_json_auto('data/json/example_*.ndjson') order by all
+----
+1	O Brother, Where Art Thou?	data/json/example_n.ndjson
+1	O Brother, Where Art Thou?	data/json/example_r.ndjson
+1	O Brother, Where Art Thou?	data/json/example_rn.ndjson
+2	Home for the Holidays	data/json/example_n.ndjson
+2	Home for the Holidays	data/json/example_r.ndjson
+2	Home for the Holidays	data/json/example_rn.ndjson
+3	The Firm	data/json/example_n.ndjson
+3	The Firm	data/json/example_r.ndjson
+3	The Firm	data/json/example_rn.ndjson
+4	Broadcast News	data/json/example_n.ndjson
+4	Broadcast News	data/json/example_r.ndjson
+4	Broadcast News	data/json/example_rn.ndjson
+5	Raising Arizona	data/json/example_n.ndjson
+5	Raising Arizona	data/json/example_r.ndjson
+5	Raising Arizona	data/json/example_rn.ndjson
+
+query III
+select * from read_json_auto(['data/json/example_n.ndjson', 'data/json/top_level_array.json'], union_by_name=true) order by all
+----
+1	O Brother, Where Art Thou?	NULL
+2	Home for the Holidays	NULL
+3	The Firm	NULL
+4	Broadcast News	NULL
+5	Raising Arizona	NULL
+NULL	NULL	cancelled
+NULL	NULL	cancelled
+
+# despite not being able to do partitioned writes, we can do partitioned json reads already!
+query II
+SELECT j, count(*) FROM read_json_auto('__TEST_DIR__/json_part/j=*/*.csv', HIVE_PARTITIONING=1) group by j order by j;
+----
+[0]	5
+[1]	5
+[2]	5
+[3]	5
+
+# also test read_json_objects
+query II
+select * exclude (filename), replace(filename, '\', '/') as filename from read_json_objects_auto('data/json/example_*.ndjson', filename=true) order by all
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}	data/json/example_n.ndjson
+{"id":1,"name":"O Brother, Where Art Thou?"}	data/json/example_r.ndjson
+{"id":1,"name":"O Brother, Where Art Thou?"}	data/json/example_rn.ndjson
+{"id":2,"name":"Home for the Holidays"}	data/json/example_n.ndjson
+{"id":2,"name":"Home for the Holidays"}	data/json/example_r.ndjson
+{"id":2,"name":"Home for the Holidays"}	data/json/example_rn.ndjson
+{"id":3,"name":"The Firm"}	data/json/example_n.ndjson
+{"id":3,"name":"The Firm"}	data/json/example_r.ndjson
+{"id":3,"name":"The Firm"}	data/json/example_rn.ndjson
+{"id":4,"name":"Broadcast News"}	data/json/example_n.ndjson
+{"id":4,"name":"Broadcast News"}	data/json/example_r.ndjson
+{"id":4,"name":"Broadcast News"}	data/json/example_rn.ndjson
+{"id":5,"name":"Raising Arizona"}	data/json/example_n.ndjson
+{"id":5,"name":"Raising Arizona"}	data/json/example_r.ndjson
+{"id":5,"name":"Raising Arizona"}	data/json/example_rn.ndjson
+
+query I
+select * from read_json_objects_auto(['data/json/example_n.ndjson', 'data/json/top_level_array.json'], union_by_name=true) order by all
+----
+{"conclusion":"cancelled"}
+{"conclusion":"cancelled"}
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+query II
+select j, count(*) from read_json_objects_auto('__TEST_DIR__/json_part/j=*/*.csv', HIVE_PARTITIONING=1) group by j order by j
+----
+[0]	5
+[1]	5
+[2]	5
+[3]	5
+
+# also test the filter pushdown
+query II
+SELECT j, count(*)
+FROM read_json_auto('__TEST_DIR__/json_part/j=*/*.csv', HIVE_PARTITIONING=1)
+where j='[2]'
+group by j
+order by j;
+----
+[2]	5
+
+query II
+SELECT j, count(*)
+FROM read_json_auto('__TEST_DIR__/json_part/j=*/*.csv', HIVE_PARTITIONING=1)
+where j>'[2]'
+group by j
+order by j;
+----
+[3]	5
+
+query II
+SELECT j, count(*)
+FROM read_json_auto('__TEST_DIR__/json_part/j=*/*.csv', HIVE_PARTITIONING=1)
+where sqrt(j[2]::int) > 1.5
+group by j
+order by j;
+----
+[3]	5
+
+# the JSON multi-file reader is a bit different, because we always sample sample_size
+# even across multiple files when union_by_name=false
+# there two files have a different schema, but we can read them together nonetheless
+statement ok
+SELECT * FROM read_json_auto(['data/json/with_uuid.json', 'data/json/example_n.ndjson'])
+
+# both have 5 rows, so if we set sample_size=1, and maximum_sample_files=1, we cannot read them together anymore
+statement error
+SELECT * FROM read_json_auto(['data/json/with_uuid.json', 'data/json/example_n.ndjson'], sample_size=1, maximum_sample_files=1)
+----
+Invalid Input Error
+
+# if we increase maximum_sample_files, or set union_by_name=true, then we can read them again
+statement ok
+SELECT * FROM read_json_auto(['data/json/with_uuid.json', 'data/json/example_n.ndjson'], sample_size=1, maximum_sample_files=99)
+
+# if we set union_by_name=true, then we sample sample_size rows per file, so then we can read them again
+statement ok
+SELECT * FROM read_json_auto(['data/json/with_uuid.json', 'data/json/example_n.ndjson'], sample_size=1, union_by_name=true)
+
+# with sample size 6 we sample 1 line from the second file, and of course we can read it again
+statement ok
+SELECT * FROM read_json_auto(['data/json/with_uuid.json', 'data/json/example_n.ndjson'], sample_size=6)
--- a/external/duckdb/test/sql/json/table/multi_file_hang.test
+++ b/external/duckdb/test/sql/json/table/multi_file_hang.test
@@ -0,0 +1,23 @@
+# name: test/sql/json/table/multi_file_hang.test
+# description: Test that we do not hang when reading multiple JSON files while only sampling one
+# group: [table]
+
+require json
+
+# needs more threads than the number of files for this to happen
+statement ok
+set threads=8
+
+# only happened with these parameters
+statement error
+from read_json('data/json/multi_file_hang/*.json', sample_size=1, maximum_sample_files=1)
+----
+Invalid Input Error: JSON transform error
+
+# the fuzzer also detected a single file hang, because we tried not to error here
+# we cannot ignore errors of this kind when the data is not newline-delimited
+# because we wouldn't know how to continue
+statement error
+SELECT * FROM read_json('data/json/fuzzer_hang.json', ignore_errors=true);
+----
+Invalid Input Error
--- a/external/duckdb/test/sql/json/table/read_json.test
+++ b/external/duckdb/test/sql/json/table/read_json.test
@@ -0,0 +1,414 @@
+# name: test/sql/json/table/read_json.test
+# description: Read json files straight to columnar data
+# group: [table]
+
+require json
+
+statement ok
+pragma enable_verification
+
+statement error
+SELECT * FROM read_json('data/json/example_n.ndjson', auto_detect=false)
+----
+Binder Error
+
+# can't read ndjson with array
+statement error
+SELECT * FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='array')
+----
+Invalid Input Error: Expected top-level JSON array
+
+# read_ndjson works
+query II
+SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'})
+----
+1	O Brother, Where Art Thou?
+2	Home for the Holidays
+3	The Firm
+4	Broadcast News
+5	Raising Arizona
+
+# We can also read only one of the columns
+query I
+SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER'})
+----
+1
+2
+3
+4
+5
+
+query I
+SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={name: 'VARCHAR'})
+----
+O Brother, Where Art Thou?
+Home for the Holidays
+The Firm
+Broadcast News
+Raising Arizona
+
+# what about a broken JSON file
+query II
+SELECT * FROM read_ndjson('data/json/unterminated_quotes.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, ignore_errors=true)
+----
+1	O Brother, Where Art Thou?
+2	Home for the Holidays
+NULL	NULL
+4	Broadcast News
+5	Raising Arizona
+
+# some of these values don't have "name"
+query II
+SELECT * FROM read_ndjson('data/json/different_schemas.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'})
+----
+1	O Brother, Where Art Thou?
+2	NULL
+3	The Firm
+4	NULL
+5	Raising Arizona
+
+# test projection pushdown (unstructured json)
+query I
+SELECT id FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='unstructured')
+----
+1
+2
+3
+4
+5
+
+query I
+SELECT name FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='unstructured')
+----
+O Brother, Where Art Thou?
+Home for the Holidays
+The Firm
+Broadcast News
+Raising Arizona
+
+# test projection pushdown (newline-delimited json)
+query I
+SELECT id FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='newline_delimited')
+----
+1
+2
+3
+4
+5
+
+query I
+SELECT name FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='nd')
+----
+O Brother, Where Art Thou?
+Home for the Holidays
+The Firm
+Broadcast News
+Raising Arizona
+
+# auto-detect
+query II
+SELECT * FROM read_json_auto('data/json/example_n.ndjson')
+----
+1	O Brother, Where Art Thou?
+2	Home for the Holidays
+3	The Firm
+4	Broadcast News
+5	Raising Arizona
+
+query II
+SELECT * FROM 'data/json/example_n.ndjson'
+----
+1	O Brother, Where Art Thou?
+2	Home for the Holidays
+3	The Firm
+4	Broadcast News
+5	Raising Arizona
+
+# we can detect at varying levels, level 0 is just JSON
+query I
+SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=0)
+----
+{"id":1,"name":["O","Brother,","Where","Art","Thou?"]}
+{"id":2,"name":["Home","for","the","Holidays"]}
+{"id":3,"name":["The","Firm"]}
+{"id":4,"name":["Broadcast","News"]}
+{"id":5,"name":["Raising","Arizona"]}
+
+# at level one we get JSON and JSON
+query II
+SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=1)
+----
+1	["O","Brother,","Where","Art","Thou?"]
+2	["Home","for","the","Holidays"]
+3	["The","Firm"]
+4	["Broadcast","News"]
+5	["Raising","Arizona"]
+
+# at level 2 we get BIGINT and JSON[]
+query II
+SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=2)
+----
+1	["O", "Brother,", "Where", "Art", "Thou?"]
+2	["Home", "for", "the", "Holidays"]
+3	["The", "Firm"]
+4	["Broadcast", "News"]
+5	["Raising", "Arizona"]
+
+# at level 3 it's fully detected, and we get BIGINT and VARCHAR[]
+query II
+SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=3)
+----
+1	[O, 'Brother,', Where, Art, Thou?]
+2	[Home, for, the, Holidays]
+3	[The, Firm]
+4	[Broadcast, News]
+5	[Raising, Arizona]
+
+# we can detect lists too
+query III
+SELECT id, typeof(name), unnest(name) FROM 'data/json/with_list.json'
+----
+1	VARCHAR[]	O
+1	VARCHAR[]	Brother,
+1	VARCHAR[]	Where
+1	VARCHAR[]	Art
+1	VARCHAR[]	Thou?
+2	VARCHAR[]	Home
+2	VARCHAR[]	for
+2	VARCHAR[]	the
+2	VARCHAR[]	Holidays
+3	VARCHAR[]	The
+3	VARCHAR[]	Firm
+4	VARCHAR[]	Broadcast
+4	VARCHAR[]	News
+5	VARCHAR[]	Raising
+5	VARCHAR[]	Arizona
+
+# with depth 2 we don't bother detecting inside of the list - defaults to JSON
+query III
+SELECT id, typeof(name), unnest(name) FROM read_json_auto('data/json/with_list.json', maximum_depth=2)
+----
+1	JSON[]	"O"
+1	JSON[]	"Brother,"
+1	JSON[]	"Where"
+1	JSON[]	"Art"
+1	JSON[]	"Thou?"
+2	JSON[]	"Home"
+2	JSON[]	"for"
+2	JSON[]	"the"
+2	JSON[]	"Holidays"
+3	JSON[]	"The"
+3	JSON[]	"Firm"
+4	JSON[]	"Broadcast"
+4	JSON[]	"News"
+5	JSON[]	"Raising"
+5	JSON[]	"Arizona"
+
+# with depth 0 we don't bother detecting anything, everything defaults to JSON (even the "id" column in this case)
+query II
+SELECT typeof(id), typeof(name) FROM read_json_auto('data/json/with_list.json', maximum_depth=1)
+----
+JSON	JSON
+JSON	JSON
+JSON	JSON
+JSON	JSON
+JSON	JSON
+
+# we can detect UUID's
+query II
+SELECT id, typeof(id) FROM 'data/json/with_uuid.json'
+----
+bbd05ae7-76e5-4f1a-a31f-247408251fc9	UUID
+d5c52052-5f8e-473f-bc8d-176342643ef5	UUID
+3b6a6de3-0732-4591-93ed-8df6091eb00d	UUID
+ae24e69e-e0bf-4e85-9848-27d35df85b8b	UUID
+63928b16-1814-436f-8b30-b3c40cc31d51	UUID
+
+# top-level array of values
+query I
+select * from read_json('data/json/top_level_array.json', columns={conclusion: 'VARCHAR'})
+----
+cancelled
+cancelled
+
+query I
+select * from read_json('data/json/top_level_array.json', auto_detect=true)
+----
+cancelled
+cancelled
+
+# if we try to read it as 'unstructured' records
+statement error
+select * from read_json('data/json/top_level_array.json', columns={conclusion: 'VARCHAR'}, format='unstructured', records=true)
+----
+Invalid Input Error: JSON transform error in file "data/json/top_level_array.json", in record/value 1: Expected OBJECT, but got ARRAY
+
+# if we try to read an ndjson file as if it is an array of values, we get an error
+statement error
+select * from read_json_auto('data/json/example_n.ndjson', format='array')
+----
+Invalid Input Error: Expected top-level JSON array
+
+# test that we can read a list of longer than STANDARD_VECTOR_SIZE properly
+statement ok
+copy (select 42 duck from range(10000)) to '__TEST_DIR__/my_file.json' (array true)
+
+query T
+select count(*) from read_json('__TEST_DIR__/my_file.json', columns={duck: 'INTEGER'}, format='array')
+----
+10000
+
+query T
+select sum(duck) = 42*10000 from read_json('__TEST_DIR__/my_file.json', columns={duck: 'INTEGER'}, format='array')
+----
+true
+
+# read_json_auto also understands ARRAY format
+query T
+select count(*) from '__TEST_DIR__/my_file.json'
+----
+10000
+
+query T
+select sum(duck) = 42*10000 from '__TEST_DIR__/my_file.json'
+----
+true
+
+# what if we do an array of non-records?
+statement ok
+copy (select list(range) from range(10)) to '__TEST_DIR__/my_file.json' (format csv, quote '', HEADER 0)
+
+query T
+select * from '__TEST_DIR__/my_file.json'
+----
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+# fails because it's not records
+statement error
+select * from read_json('__TEST_DIR__/my_file.json', format='array', columns={range: 'INTEGER'}, records=true)
+----
+Invalid Input Error: JSON transform error
+
+# fails because it's not records
+statement error
+select * from read_json_auto('__TEST_DIR__/my_file.json', format='array', records=true)
+----
+Binder Error: json_read expected records
+
+query T
+select * from read_json('__TEST_DIR__/my_file.json', format='auto', records=false, auto_detect=true)
+----
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+# need to supply columns
+statement error
+select * from read_json('__TEST_DIR__/my_file.json', format='auto', records='false', auto_detect=false)
+----
+Binder Error
+
+# read as unstructured values, so we just get the array
+query T
+select * from read_json('__TEST_DIR__/my_file.json', format='unstructured', records='false', auto_detect=true)
+----
+[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+
+# array of non-records
+query T
+select * from read_json('__TEST_DIR__/my_file.json', format='array', records='false', auto_detect=true)
+----
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+# also works with auto
+query T
+select * from read_json('__TEST_DIR__/my_file.json', format='array', records='auto', auto_detect=true)
+----
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+# lower thread count so the next tests don't OOM on many-core machines
+statement ok
+SET threads=2
+
+# issue 6646, this is not an array, but we try to read it as one
+statement error
+select json_structure(json ->> '$.metadata') as structure,
+from read_json('data/json/issue.json', format='array', columns={'json': 'JSON'}, maximum_object_size=104857600)
+limit 1;
+----
+Invalid Input Error: Expected top-level JSON array
+
+# let's try a variation
+statement error
+select json_structure(json ->> '$.metadata') as structure,
+from read_json('data/json/issue.json', format='array', records='false', columns={'json': 'JSON'}, maximum_object_size=104857600)
+limit 1;
+----
+Invalid Input Error: Expected top-level JSON array
+
+# we can parse it as unstructured values, and give it a different col name
+query I
+select json_structure(my_json ->> '$.metadata') as structure,
+from read_json('data/json/issue.json', format='unstructured', records='false', columns={'my_json': 'JSON'}, maximum_object_size=104857600)
+limit 1;
+----
+{"argv":["VARCHAR"],"dag":{"dag_size":"VARCHAR","tasks":{"load_oscar":{"status":"VARCHAR","type":"VARCHAR","upstream":"VARCHAR","products":{"nb":"VARCHAR"}},"load_weather":{"status":"VARCHAR","type":"VARCHAR","upstream":"VARCHAR","products":{"nb":"VARCHAR"}},"compress":{"status":"VARCHAR","type":"VARCHAR","upstream":{"load_oscar":"VARCHAR"},"products":{"nb":"VARCHAR"}}}}}
+
+statement ok
+pragma disable_verification
+
+# test that we can read a JSON list that spans more than one buffer size
+# the JSON is 55 bytes, and the minimum buffer size is 32MB
+# let's do 50k to be safe
+statement ok
+copy (select 42 this_is_a_very_long_field_name_yes_very_much_so from range(50000)) to '__TEST_DIR__/my_file.json' (array true)
+
+query T
+select sum(this_is_a_very_long_field_name_yes_very_much_so) = 42 * 50000 from '__TEST_DIR__/my_file.json'
+----
+true
+
+require httpfs
+
+query II
+select * from read_json_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/example_rn.ndjson');
+----
+1	O Brother, Where Art Thou?
+2	Home for the Holidays
+3	The Firm
+4	Broadcast News
+5	Raising Arizona
--- a/external/duckdb/test/sql/json/table/read_json_auto.test_slow
+++ b/external/duckdb/test/sql/json/table/read_json_auto.test_slow
@@ -0,0 +1,354 @@
+# name: test/sql/json/table/read_json_auto.test_slow
+# description: Read json files - schema detection
+# group: [table]
+
+require json
+
+statement ok
+pragma enable_verification
+
+# some arrow tests (python/pyarrow/tests/test_json.py) on their github
+# these are very similar to the pandas tests, so let's not copy those
+# instead of adding all of these files to data/test we just create them on the fly here
+# whenever we add a '' at the end it's just to check we skip the newline at the end that's sometimes there
+statement ok
+copy (select * from (values ('{"a": 1, "b": 2}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0);
+
+query II
+select * from '__TEST_DIR__/my_file.json'
+----
+1	2
+
+statement ok
+copy (select * from (values ('{"a": 1}'), ('{"a": 2}'), ('{"a": 3}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
+
+query I
+select * from '__TEST_DIR__/my_file.json'
+----
+1
+2
+3
+
+query I
+select count(*) from '__TEST_DIR__/my_file.json'
+----
+3
+
+statement ok
+copy (select * from (values ('{"a": 1,"b": 2, "c": 3}'), ('{"a": 4,"b": 5, "c": 6}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
+
+query III
+select * from '__TEST_DIR__/my_file.json'
+----
+1	2	3
+4	5	6
+
+statement ok
+copy (select * from (values ('{"a": 1,"b": 2, "c": "3", "d": false}'), ('{"a": 4.0, "b": -5, "c": "foo", "d": true}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
+
+query IIII
+select * from '__TEST_DIR__/my_file.json'
+----
+1.0	2	3	false
+4.0	-5	foo	true
+
+# mixed types that cannot be resolved, defaults to JSON (column 3)
+statement ok
+copy (select * from (values ('{"a": 1, "b": 2, "c": null, "d": null, "e": null}'), ('{"a": null, "b": -5, "c": "foo", "d": null, "e": true}'), ('{"a": 4.5, "b": null, "c": "nan", "d": null,"e": false}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
+
+query IIIII
+select * from '__TEST_DIR__/my_file.json'
+----
+1.0	2	NULL	NULL	NULL
+NULL	-5	foo	NULL	true
+4.5	NULL	nan	NULL	false
+
+# mixed types are resolved to DOUBLE here
+statement ok
+copy (select * from (values ('{"a": 1}'), ('{"a": 1.45}'), ('{"a": -23.456}'), ('{}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
+
+query II
+select typeof(a), a from '__TEST_DIR__/my_file.json'
+----
+DOUBLE	1.0
+DOUBLE	1.45
+DOUBLE	-23.456
+DOUBLE	NULL
+
+statement ok
+copy (select * from (values ('{"foo": "bar", "num": 0}'), ('{"foo": "baz", "num": 1}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
+
+query II
+select * from '__TEST_DIR__/my_file.json'
+----
+bar	0
+baz	1
+
+# we can read values from a top-level list
+query I
+select * from 'data/json/top_level_array.json'
+----
+cancelled
+cancelled
+
+query I
+select count(*) from 'data/json/top_level_array.json'
+----
+2
+
+# for maximum_depth=0 this is two records of JSON
+query I
+select * from read_json_auto('data/json/top_level_array.json', maximum_depth=0)
+----
+{"conclusion":"cancelled"}
+{"conclusion":"cancelled"}
+
+# for 1 it's 1 column of JSON
+query I
+select * from read_json_auto('data/json/top_level_array.json', maximum_depth=1)
+----
+"cancelled"
+"cancelled"
+
+# if we read this with records='false', we get the struct instead of the unpacked columns
+query I
+select typeof(json) from read_json_auto('data/json/top_level_array.json', records='false')
+----
+STRUCT(conclusion VARCHAR)
+STRUCT(conclusion VARCHAR)
+
+# however, if there are multiple top-level arrays, we default to reading them as lists
+query I
+select * from 'data/json/top_level_two_arrays.json'
+----
+[{'conclusion': cancelled}, {'conclusion': cancelled}]
+[{'conclusion': cancelled}, {'conclusion': cancelled}]
+
+# if we read a top-level array as if it is a record, then we get an error
+statement error
+select * from read_json_auto('data/json/top_level_array.json', format='unstructured', records='true')
+----
+Binder Error: json_read expected records
+
+# issue Mark found when analyzing a JSON dump of our CI - projection pushdown wasn't working properly
+statement ok
+select * from 'data/json/projection_pushdown_example.json' WHERE status <> 'completed'
+
+# different schema's - this one should work regardless of sampling 1 or all lines
+query II
+select * from read_json_auto('data/json/different_schemas.ndjson', sample_size=1)
+----
+1	O Brother, Where Art Thou?
+2	NULL
+3	The Firm
+4	NULL
+5	Raising Arizona
+
+query II
+select * from read_json_auto('data/json/different_schemas.ndjson', sample_size=-1)
+----
+1	O Brother, Where Art Thou?
+2	NULL
+3	The Firm
+4	NULL
+5	Raising Arizona
+
+# if we require fields to appear in all objects by setting field_appearance_threshold=1, we default to MAP
+query I
+select typeof(COLUMNS(*)) from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=1) limit 1
+----
+MAP(VARCHAR, JSON)
+
+query I
+select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=1)
+----
+{id=1, name='"O Brother, Where Art Thou?"'}
+{id=2}
+{id=3, name='"The Firm"'}
+{id=4}
+{id=5, name='"Raising Arizona"'}
+
+# if we set it to 0.5 it should work already since "name" appears in 3/5 objects, which is greater than 0.5
+query II
+select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=0.5)
+----
+1	O Brother, Where Art Thou?
+2	NULL
+3	The Firm
+4	NULL
+5	Raising Arizona
+
+# can't set it to less than 0 or more than 1
+statement error
+select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=-1)
+----
+Binder Error: read_json_auto "field_appearance_threshold" parameter must be between 0 and 1
+
+statement error
+select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=2)
+----
+Binder Error: read_json_auto "field_appearance_threshold" parameter must be between 0 and 1
+
+# inconsistent schema's - if we only sample 1 row, we get an error, because we only see a NULL value for the 2nd column
+statement error
+select * from read_json_auto('data/json/inconsistent_schemas.ndjson', sample_size=1, convert_strings_to_integers=true)
+----
+Invalid Input Error: JSON transform error in file "data/json/inconsistent_schemas.ndjson", in line 3
+
+# if we increase the sample size to 2, we can read it just fine
+query II
+select * from read_json_auto('data/json/inconsistent_schemas.ndjson', sample_size=2)
+----
+"1"	NULL
+2	Home for the Holidays
+[3]	The Firm
+4	Broadcast News
+5	Raising Arizona
+
+# we can also find bigint in strings (happens a lot in JSON for some reason ...)
+statement ok
+copy (select * from (values ('{"id": "26941143801"}'), ('{"id": "26941143807"}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0)
+
+# but only if we set the parameter to true
+query T
+select typeof(id) from read_json('__TEST_DIR__/my_file.json', convert_strings_to_integers=true)
+----
+BIGINT
+BIGINT
+
+# empty array and the example file works
+query II
+select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson']);
+----
+1	O Brother, Where Art Thou?
+2	Home for the Holidays
+3	The Firm
+4	Broadcast News
+5	Raising Arizona
+
+# Simple map inference with default threshold
+query T
+select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl')
+----
+MAP(VARCHAR, BIGINT)
+
+# Test setting map_inference_threshold high
+query T
+select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=1000)
+----
+MAP(VARCHAR, BIGINT)
+
+# Map inference can be disabled
+query T
+select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=-1, field_appearance_threshold=0)
+----
+STRUCT("1" JSON, "2" BIGINT, "3" BIGINT, "4" BIGINT, "5" BIGINT, "6" BIGINT, "7" BIGINT, "8" BIGINT, "9" BIGINT, "10" BIGINT, "11" BIGINT, "12" BIGINT, "13" BIGINT, "14" BIGINT, "15" BIGINT, "16" JSON, "17" BIGINT, "18" BIGINT, "19" BIGINT, "20" BIGINT, "21" BIGINT, "22" BIGINT, "23" BIGINT, "24" BIGINT, "25" BIGINT, "26" BIGINT, "27" BIGINT, "28" BIGINT, "29" BIGINT, "30" BIGINT, "31" BIGINT, "32" BIGINT, "33" BIGINT, "34" BIGINT, "35" BIGINT, "36" BIGINT, "37" BIGINT, "38" BIGINT, "39" BIGINT, "40" BIGINT, "41" BIGINT, "42" BIGINT, "43" BIGINT, "44" BIGINT, "45" BIGINT, "46" BIGINT, "47" BIGINT, "48" BIGINT, "49" BIGINT, "50" BIGINT, "51" BIGINT, "52" BIGINT, "53" BIGINT, "54" BIGINT, "55" BIGINT, "56" BIGINT, "57" BIGINT, "58" BIGINT, "59" BIGINT, "60" BIGINT, "61" BIGINT, "62" BIGINT, "63" BIGINT, "64" BIGINT, "65" BIGINT, "66" BIGINT, "67" BIGINT, "68" BIGINT, "69" BIGINT, "70" BIGINT, "71" BIGINT, "72" BIGINT, "73" BIGINT, "74" BIGINT, "75" BIGINT, "76" BIGINT, "77" BIGINT, "78" BIGINT, "79" BIGINT, "80" BIGINT, "81" BIGINT, "82" BIGINT, "83" BIGINT, "84" BIGINT, "85" BIGINT, "86" BIGINT, "87" BIGINT, "88" BIGINT, "89" BIGINT, "90" BIGINT, "91" BIGINT, "92" BIGINT, "93" BIGINT, "94" BIGINT, "95" BIGINT, "96" BIGINT, "97" BIGINT, "98" BIGINT, "99" BIGINT, "100" BIGINT)
+
+# Map inference with max_depth works as expected
+query T
+select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', maximum_depth=2)
+----
+MAP(VARCHAR, JSON)
+
+query T
+select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', maximum_depth=1)
+----
+JSON
+
+# Map where all values are null
+query T
+select distinct typeof(a) from read_json_auto('data/json/map_of_nulls.jsonl')
+----
+MAP(VARCHAR, JSON)
+
+# Map type can be inferred at the top level
+query T
+select distinct typeof(json) from read_json_auto('data/json/top_level_map.jsonl')
+----
+MAP(VARCHAR, BIGINT)
+
+# Map type can be inferred for struct value type
+query T
+select distinct typeof(a) from read_json_auto('data/json/map_of_structs.jsonl')
+----
+MAP(VARCHAR, STRUCT(b BIGINT))
+
+# Map 80% similarity check works
+query T
+select distinct typeof(a) from read_json_auto('data/json/map_50_50.jsonl', map_inference_threshold=10)
+----
+STRUCT(s1 STRUCT(f1 BIGINT[]), s2 STRUCT(f2 BIGINT[]), s3 STRUCT(f1 BIGINT[]), s4 STRUCT(f2 BIGINT[]), s5 STRUCT(f1 BIGINT[]), s6 STRUCT(f2 BIGINT[]), s7 STRUCT(f1 BIGINT[]), s8 STRUCT(f2 BIGINT[]), s9 STRUCT(f1 BIGINT[]), s10 STRUCT(f2 BIGINT[]))
+
+# Map of maps
+query T
+select distinct typeof(a) from read_json_auto('data/json/map_of_map.jsonl', map_inference_threshold=10)
+----
+MAP(VARCHAR, MAP(VARCHAR, BIGINT))
+
+# All NULL types get converted to JSON if we do map inference
+query T
+select distinct typeof(a) from read_json_auto('data/json/map_of_struct_with_nulls.jsonl', map_inference_threshold=10)
+----
+MAP(VARCHAR, STRUCT(a JSON[]))
+
+# Candidate types are properly handled for map inference
+query I
+SELECT distinct typeof(a) FROM read_json_auto('data/json/map_of_dates.jsonl', map_inference_threshold=25)
+----
+MAP(VARCHAR, DATE)
+
+# Mixed candidate types are also handled
+query I
+SELECT distinct typeof(a) FROM read_json_auto('data/json/map_of_mixed_date_timestamps.jsonl', map_inference_threshold=25)
+----
+MAP(VARCHAR, VARCHAR)
+
+# Incompatible types are handled correctly
+query T
+select distinct typeof(a) from read_json_auto('data/json/map_incompatible.jsonl', map_inference_threshold=10)
+----
+STRUCT(s1 STRUCT("1" JSON), s2 STRUCT("1" MAP(VARCHAR, JSON)), s3 STRUCT("1" VARCHAR), s4 STRUCT("1" BIGINT[]), s5 STRUCT("1" BIGINT), s6 STRUCT("1" VARCHAR), s7 STRUCT("1" BIGINT[]), s8 STRUCT("1" BIGINT), s9 STRUCT("1" VARCHAR), s10 STRUCT("1" BIGINT[]))
+
+# Can't set map_inference_threshold to a negative value (except -1)
+statement error
+select * from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=-10)
+----
+Binder Error: read_json_auto "map_inference_threshold" parameter must be 0 or positive, or -1 to disable map inference for consistent objects.
+
+# if we only sample the first file, we default to a single JSON column
+query I
+select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=1);
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# -1 is unlimited
+query II
+select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=-1);
+----
+1	O Brother, Where Art Thou?
+2	Home for the Holidays
+3	The Firm
+4	Broadcast News
+5	Raising Arizona
+
+# can't be -2 or lower
+statement error
+select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=-2);
+----
+Binder Error
+
+# can't be 0
+statement error
+select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=0);
+----
+Binder Error
+
+# cannot be NULL either
+statement error
+select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=NULL);
+----
+Binder Error
--- a/external/duckdb/test/sql/json/table/read_json_dates.test
+++ b/external/duckdb/test/sql/json/table/read_json_dates.test
@@ -0,0 +1,130 @@
+# name: test/sql/json/table/read_json_dates.test
+# description: Read json files - date detection
+# group: [table]
+
+require json
+
+statement ok
+pragma enable_verification
+
+# issue #6774
+query I
+select * from read_json_auto('data/json/simple_timestamp.json', columns={"ts": "TIMESTAMP[]"});
+----
+['2022-06-01 06:41:58', '2021-08-21 08:26:55.5', '2009-11-15 21:58:54.636']
+
+# create date and timestamp tables
+statement ok
+create table date_test as select '1996/03/27'::DATE d
+
+statement ok
+create table timestamp_test as select '1996-03-27 07:42:33'::TIMESTAMP t
+
+# cannot be empty
+statement error
+copy (select d from date_test) to '__TEST_DIR__/my_file.json' (dateformat)
+----
+Binder Error
+
+statement error
+copy (select d from date_test) to '__TEST_DIR__/my_file.json' (timestampformat)
+----
+Binder Error
+
+statement error
+copy date_test from 'data/json/simple_timestamp.json' (dateformat)
+----
+Binder Error
+
+statement error
+copy date_test from 'data/json/simple_timestamp.json' (timestampformat)
+----
+Binder Error
+
+# test all supported date formats
+foreach date_format '%m-%d-%Y' '%m-%d-%y' '%d-%m-%Y' '%d-%m-%y' '%Y-%m-%d' '%y-%m-%d'
+
+statement ok
+copy (select d from date_test) to '__TEST_DIR__/my_file.json' (dateformat ${date_format})
+
+# auto-detect
+query II
+select typeof(d), d from '__TEST_DIR__/my_file.json'
+----
+DATE	1996-03-27
+
+# forced format read_ndjson
+query II
+select typeof(d), d from read_ndjson('__TEST_DIR__/my_file.json', columns={d: 'DATE'}, dateformat=${date_format})
+----
+DATE	1996-03-27
+
+# wrong format read_ndjson
+statement error
+select typeof(d), d from read_ndjson('__TEST_DIR__/my_file.json', columns={d: 'DATE'}, dateformat='%d-%Y-%m')
+----
+Invalid Input Error
+
+# forced format COPY
+statement ok
+drop table if exists date_copy_test
+
+statement ok
+create table date_copy_test (d date)
+
+statement ok
+copy date_copy_test from '__TEST_DIR__/my_file.json' (dateformat ${date_format})
+
+query II
+select typeof(d), d from date_copy_test
+----
+DATE	1996-03-27
+
+endloop
+
+# test all supported timestamp formats (hacky way to do foreach parameters that need spaces in them)
+foreach a,b,c '%Y-%m-%d,%H:%M:%S.%f,' '%m-%d-%Y,%I:%M:%S,%p' '%m-%d-%y,%I:%M:%S,%p' '%d-%m-%Y,%H:%M:%S,' '%d-%m-%y,%H:%M:%S,' '%Y-%m-%d,%H:%M:%S,' '%y-%m-%d,%H:%M:%S,'
+
+statement ok
+copy (select t from timestamp_test) to '__TEST_DIR__/my_file.json' (format json, timestampformat ${a} ${b} ${c})
+
+# auto-detect
+query II
+select typeof(t), t from '__TEST_DIR__/my_file.json'
+----
+TIMESTAMP	1996-03-27 07:42:33
+
+# forced format read_ndjson
+query II
+select typeof(t), t from read_ndjson('__TEST_DIR__/my_file.json', columns={t: 'TIMESTAMP'}, timestamp_format=${a} ${b} ${c})
+----
+TIMESTAMP	1996-03-27 07:42:33
+
+# wrong format read_ndjson
+statement error
+select typeof(t), t from read_ndjson('__TEST_DIR__/my_file.json', columns={t: 'TIMESTAMP'}, timestamp_format='%H:%M:%S%y-%m-%d')
+----
+Invalid Input Error
+
+# forced format COPY
+statement ok
+drop table if exists timestamp_copy_test
+
+statement ok
+create table timestamp_copy_test (t timestamp)
+
+statement ok
+copy timestamp_copy_test from '__TEST_DIR__/my_file.json' (format json, timestampformat ${a} ${b} ${c})
+
+query II
+select typeof(t), t from timestamp_copy_test
+----
+TIMESTAMP	1996-03-27 07:42:33
+
+endloop
+
+# test this format too
+query II
+select typeof(createdAt), createdAt from 'data/json/timestamp_example.json'
+----
+TIMESTAMP	2023-02-07 19:12:28
--- a/external/duckdb/test/sql/json/table/read_json_many_files.test_slow
+++ b/external/duckdb/test/sql/json/table/read_json_many_files.test_slow
@@ -0,0 +1,33 @@
+# name: test/sql/json/table/read_json_many_files.test_slow
+# description: Read > 1000 json files (issue #6249)
+# group: [table]
+
+require json
+
+statement ok
+create table input as select range as a from range(1, 4);
+
+loop i 0 2000
+
+statement ok
+copy input to '__TEST_DIR__/input${i}.json';
+
+endloop
+
+query T
+select count(*) from read_json_auto('__TEST_DIR__/input*.json');
+----
+6000
+
+# also test gzipped (issue #6588)
+loop i 0 2000
+
+statement ok
+copy input to '__TEST_DIR__/input${i}.json.gz' (COMPRESSION GZIP);
+
+endloop
+
+query T
+select count(*) from read_json_auto('__TEST_DIR__/input*.json.gz');
+----
+6000
--- a/external/duckdb/test/sql/json/table/read_json_objects.test
+++ b/external/duckdb/test/sql/json/table/read_json_objects.test
@@ -0,0 +1,231 @@
+# name: test/sql/json/table/read_json_objects.test
+# description: Read ndjson files
+# group: [table]
+
+require json
+
+# we cannot check the error output for the specific byte, because on Windows the \n are replaced with \r\n
+# therefore, the byte count is different. So, we cut off the error message here
+statement error
+select * from read_json_objects('data/json/unterminated_quotes.ndjson')
+----
+Invalid Input Error: Malformed JSON
+
+# now it should work!
+query I
+SELECT * FROM read_csv('data/json/example_n.ndjson', columns={'json': 'JSON'}, delim=NULL, header=0, quote=NULL, escape=NULL, auto_detect = false)
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# example_n is with regular \n newlines
+query I
+SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson')
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# this one does not have the 'records' param
+statement error
+SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson', records='false')
+----
+Binder Error: Invalid named parameter
+
+query I
+SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson')
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# we can auto-detect that it's newline-delimited
+query I
+SELECT * FROM read_json_objects('data/json/example_n.ndjson', format='auto')
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# example_r is with \r newlines - works with unstructured
+query I
+SELECT * FROM read_json_objects('data/json/example_r.ndjson', format='unstructured')
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# we can detect that it's not newline-delimited
+query I
+SELECT * FROM read_json_objects('data/json/example_r.ndjson', format='auto')
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# \r newlines are NOT valid according to ndjson spec - this does not work, all a single line
+statement error
+SELECT * FROM read_ndjson_objects('data/json/example_r.ndjson')
+----
+Invalid Input Error: Malformed JSON in file "data/json/example_r.ndjson"
+
+# example_rn is with \r\n newlines
+query I
+SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson')
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+query I
+SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson')
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# same but gzipped
+query I
+SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson.gz')
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+query I
+SELECT * FROM read_json_objects('data/json/example_rn.ndjson.gz', format='nd')
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# multi-file scan
+query I
+SELECT count(*) from read_json_objects(['data/json/example_n.ndjson', 'data/json/example_r.ndjson', 'data/json/example_rn.ndjson'], format='auto')
+----
+15
+
+query I
+SELECT count(*) from read_ndjson_objects(['data/json/example_n.ndjson', 'data/json/example_rn.ndjson'])
+----
+10
+
+# globbing
+query I
+SELECT count(*) from read_json_objects('data/json/example_*.ndjson', format='auto')
+----
+15
+
+query I
+SELECT count(*) from read_ndjson_objects('data/json/example_*n.ndjson')
+----
+10
+
+# empty file
+query I
+select * from read_json_objects('data/json/empty.ndjson')
+----
+
+query I
+select * from read_ndjson_objects('data/json/empty.ndjson')
+----
+
+# invalid json stuff
+statement error
+select * from read_json_objects('data/json/unterminated_quotes.ndjson', format='nd')
+----
+Invalid Input Error: Malformed JSON in file "data/json/unterminated_quotes.ndjson"
+
+statement error
+select * from read_ndjson_objects('data/json/unterminated_quotes.ndjson')
+----
+Invalid Input Error: Malformed JSON in file "data/json/unterminated_quotes.ndjson"
+
+# we can auto-detect and ignore the error (becomes NULL)
+query I
+select * from read_json_objects('data/json/unterminated_quotes.ndjson', format='auto', ignore_errors=true)
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+NULL
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# multiple values per line (works for read_json_objects)
+query I
+select * from read_json_objects('data/json/multiple_objects_per_line.ndjson', format='unstructured')
+----
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
+
+# does not work for read_ndjson_objects
+statement error
+select * from read_ndjson_objects('data/json/multiple_objects_per_line.ndjson')
+----
+Invalid Input Error: Malformed JSON in file "data/json/multiple_objects_per_line.ndjson"
+
+# what if we try to read a CSV?
+statement error
+select * from read_json_objects('data/csv/tpcds_14.csv')
+----
+Invalid Input Error: Malformed JSON
+
+statement error
+select * from read_ndjson_objects('data/csv/tpcds_14.csv')
+----
+Invalid Input Error: Malformed JSON in file "data/csv/tpcds_14.csv"
+
+# how about parquet?
+statement error
+select * from read_json_objects('data/parquet-testing/blob.parquet')
+----
+Invalid Input Error: Malformed JSON
+
+statement error
+select * from read_ndjson_objects('data/parquet-testing/blob.parquet')
+----
+Invalid Input Error: Malformed JSON in file "data/parquet-testing/blob.parquet"
+
+# we can also read the objects from a JSON array (not newline-delimited)
+query I
+select * from read_json_objects('data/json/top_level_array.json')
+----
+{"conclusion":"cancelled"}
+{"conclusion":"cancelled"}
+
+# and auto-detect it
+query I
+select * from read_json_objects('data/json/top_level_array.json', format='auto')
+----
+{"conclusion":"cancelled"}
+{"conclusion":"cancelled"}
+
+# the file only has one line, so if we read this as ndjson, we just get the array
+query I
+select * from read_json_objects('data/json/top_level_array.json', format='nd')
+----
+[{"conclusion":"cancelled"}, {"conclusion":"cancelled"}]
--- a/external/duckdb/test/sql/json/table/read_json_union.test
+++ b/external/duckdb/test/sql/json/table/read_json_union.test
@@ -0,0 +1,35 @@
+# name: test/sql/json/table/read_json_union.test
+# description: Read json files with unions straight to columnar data
+# group: [table]
+
+require json
+
+statement ok
+pragma enable_verification
+
+query I
+SELECT data FROM read_ndjson('data/json/union.ndjson', columns={data: 'UNION(name VARCHAR, age INT, veteran BOOL)'})
+----
+Frank
+5
+false
+
+statement error
+SELECT * FROM read_ndjson('data/json/malformed/union/bad_key.ndjson', columns={data: 'UNION(name VARCHAR, age INT, veteran BOOL)'})
+----
+Found object containing unknown key, instead of union
+
+statement error
+SELECT * FROM read_ndjson('data/json/malformed/union/empty_object.ndjson', columns={data: 'UNION(name VARCHAR, age INT, veteran BOOL)'})
+----
+Found empty object, instead of union
+
+statement error
+SELECT * FROM read_ndjson('data/json/malformed/union/non_object.ndjson', columns={data: 'UNION(name VARCHAR, age INT, veteran BOOL)'})
+----
+Expected an object representing a union, got uint
+
+statement error
+SELECT * FROM read_ndjson('data/json/malformed/union/too_many_keys.ndjson', columns={data: 'UNION(name VARCHAR, age INT, veteran BOOL)'})
+----
+Found object containing more than one key, instead of union
--- a/external/duckdb/test/sql/json/table/test_json_nested_struct_projection_pushdown.test
+++ b/external/duckdb/test/sql/json/table/test_json_nested_struct_projection_pushdown.test
@@ -0,0 +1,51 @@
+# name: test/sql/json/table/test_json_nested_struct_projection_pushdown.test
+# description: Test JSON struct projection pushdown on deeply nested data
+# group: [table]
+
+require json
+
+statement ok
+pragma enable_verification
+
+statement ok
+COPY (SELECT {goose: 42, pigeon: 4.2, nested_duck: {nested_nested_duck: {goose: 42, pigeon: 4.2, nested_nested_nested_duck: {goose: 42, pigeon: 4.2}}, goose: 42, pigeon: 4.2}} AS duck) TO '__TEST_DIR__/nested.json'
+
+query I
+SELECT duck.goose FROM '__TEST_DIR__/nested.json'
+----
+42
+
+query I
+SELECT json.duck.goose FROM read_json('__TEST_DIR__/nested.json', records=false)
+----
+42
+
+query I
+SELECT duck.nested_duck.goose FROM '__TEST_DIR__/nested.json'
+----
+42
+
+query I
+SELECT json.duck.nested_duck.goose FROM read_json('__TEST_DIR__/nested.json', records=false)
+----
+42
+
+query I
+SELECT duck.nested_duck.nested_nested_duck.goose FROM '__TEST_DIR__/nested.json'
+----
+42
+
+query I
+SELECT json.duck.nested_duck.nested_nested_duck.goose FROM read_json('__TEST_DIR__/nested.json', records=false)
+----
+42
+
+query I
+SELECT duck.nested_duck.nested_nested_duck.nested_nested_nested_duck.goose FROM '__TEST_DIR__/nested.json'
+----
+42
+
+query I
+SELECT json.duck.nested_duck.nested_nested_duck.nested_nested_nested_duck.goose FROM read_json('__TEST_DIR__/nested.json', records=false)
+----
+42
--- a/external/duckdb/test/sql/json/table/test_json_table_in_out.test_slow
+++ b/external/duckdb/test/sql/json/table/test_json_table_in_out.test_slow
@@ -0,0 +1,748 @@
+# name: test/sql/json/table/test_json_table_in_out.test_slow
+# description: Test JSON table in/out functions (json_each/json_tree)
+# group: [table]
+
+require json
+
+statement ok
+pragma enable_verification
+
+# some larger-than-vector-size tests
+query I
+select count(*) from json_each(range(3000));
+----
+3000
+
+# this should be equal to the 3000th triangular number
+query I
+select count(*) = 3000*(3000+1)//2 from range(1, 3001), json_each(range(range));
+----
+true
+
+##### SQLITE TESTS #####
+
+### JSON101-5 ###
+statement ok
+CREATE OR REPLACE TABLE j2(id INTEGER PRIMARY KEY, json JSON, src VARCHAR);
+
+statement ok
+INSERT INTO j2(id,json,src)
+VALUES(1,'{
+"firstName": "John",
+"lastName": "Smith",
+"isAlive": true,
+"age": 25,
+"address": {
+  "streetAddress": "21 2nd Street",
+  "city": "New York",
+  "state": "NY",
+  "postalCode": "10021-3100"
+},
+"phoneNumbers": [
+  {
+    "type": "home",
+    "number": "212 555-1234"
+  },
+  {
+    "type": "office",
+    "number": "646 555-4567"
+  }
+],
+"children": [],
+"spouse": null
+}','https://en.wikipedia.org/wiki/JSON');
+
+statement ok
+INSERT INTO j2(id,json,src)
+VALUES(2, '{
+"id": "0001",
+"type": "donut",
+"name": "Cake",
+"ppu": 0.55,
+"batters":
+    {
+        "batter":
+            [
+                { "id": "1001", "type": "Regular" },
+                { "id": "1002", "type": "Chocolate" },
+                { "id": "1003", "type": "Blueberry" },
+                { "id": "1004", "type": "Devil''s Food" }
+            ]
+    },
+"topping":
+    [
+        { "id": "5001", "type": "None" },
+        { "id": "5002", "type": "Glazed" },
+        { "id": "5005", "type": "Sugar" },
+        { "id": "5007", "type": "Powdered Sugar" },
+        { "id": "5006", "type": "Chocolate with Sprinkles" },
+        { "id": "5003", "type": "Chocolate" },
+        { "id": "5004", "type": "Maple" }
+    ]
+}','https://adobe.github.io/Spry/samples/data_region/JSONDataSetSample.html');
+
+statement ok
+INSERT INTO j2(id,json,src)
+VALUES(3,'[
+{
+    "id": "0001",
+    "type": "donut",
+    "name": "Cake",
+    "ppu": 0.55,
+    "batters":
+        {
+            "batter":
+                [
+                    { "id": "1001", "type": "Regular" },
+                    { "id": "1002", "type": "Chocolate" },
+                    { "id": "1003", "type": "Blueberry" },
+                    { "id": "1004", "type": "Devil''s Food" }
+                ]
+        },
+    "topping":
+        [
+            { "id": "5001", "type": "None" },
+            { "id": "5002", "type": "Glazed" },
+            { "id": "5005", "type": "Sugar" },
+            { "id": "5007", "type": "Powdered Sugar" },
+            { "id": "5006", "type": "Chocolate with Sprinkles" },
+            { "id": "5003", "type": "Chocolate" },
+            { "id": "5004", "type": "Maple" }
+        ]
+},
+{
+    "id": "0002",
+    "type": "donut",
+    "name": "Raised",
+    "ppu": 0.55,
+    "batters":
+        {
+            "batter":
+                [
+                    { "id": "1001", "type": "Regular" }
+                ]
+        },
+    "topping":
+        [
+            { "id": "5001", "type": "None" },
+            { "id": "5002", "type": "Glazed" },
+            { "id": "5005", "type": "Sugar" },
+            { "id": "5003", "type": "Chocolate" },
+            { "id": "5004", "type": "Maple" }
+        ]
+},
+{
+    "id": "0003",
+    "type": "donut",
+    "name": "Old Fashioned",
+    "ppu": 0.55,
+    "batters":
+        {
+            "batter":
+                [
+                    { "id": "1001", "type": "Regular" },
+                    { "id": "1002", "type": "Chocolate" }
+                ]
+        },
+    "topping":
+        [
+            { "id": "5001", "type": "None" },
+            { "id": "5002", "type": "Glazed" },
+            { "id": "5003", "type": "Chocolate" },
+            { "id": "5004", "type": "Maple" }
+        ]
+}
+]','https://adobe.github.io/Spry/samples/data_region/JSONDataSetSample.html');
+
+query I
+select count(*) from j2, json_tree(j2.json);
+----
+154
+
+query IIIII
+SELECT j2.rowid, jx.rowid, fullkey, path, key
+FROM j2, json_tree(j2.json) AS jx
+WHERE fullkey!=(path || CASE WHEN TRY_CAST(key AS UBIGINT) IS NOT NULL THEN '['||key||']'
+                            ELSE '.'||key END);
+----
+
+
+query IIIII
+SELECT j2.rowid, jx.rowid, fullkey, path, key
+FROM j2, json_each(j2.json) AS jx
+WHERE fullkey!=(path || CASE WHEN TRY_CAST(key AS UBIGINT) IS NOT NULL THEN '['||key||']'
+                            ELSE '.'||key END);
+----
+
+
+query IIIII
+SELECT j2.rowid, jx.rowid, fullkey, path, key
+FROM j2, json_each(j2.json) AS jx
+WHERE jx.json<>j2.json;
+----
+
+
+query IIIII
+SELECT j2.rowid, jx.rowid, fullkey, path, key
+FROM j2, json_tree(j2.json) AS jx
+WHERE jx.json<>j2.json;
+----
+
+
+query IIIII
+SELECT j2.rowid, jx.rowid, fullkey, path, key
+FROM j2, json_each(j2.json) AS jx
+WHERE jx.value<>jx.atom AND type NOT IN ('ARRAY','OBJECT');
+----
+
+
+query IIIII
+SELECT j2.rowid, jx.rowid, fullkey, path, key
+FROM j2, json_tree(j2.json) AS jx
+WHERE jx.value<>jx.atom AND type NOT IN ('ARRAY','OBJECT');
+----
+
+
+### JSON101-13 ###
+statement ok
+DROP TABLE IF EXISTS t1;
+
+statement ok
+DROP TABLE IF EXISTS t2;
+
+statement ok
+CREATE OR REPLACE TABLE t1(id INTEGER, json JSON);
+
+statement ok
+INSERT INTO t1(id,json) VALUES(1,'{"items":[3,5]}');
+
+statement ok
+CREATE OR REPLACE TABLE t2(id INTEGER, json JSON);
+
+statement ok
+INSERT INTO t2(id,json) VALUES(2,'{"value":2}');
+
+statement ok
+INSERT INTO t2(id,json) VALUES(3,'{"value":3}');
+
+statement ok
+INSERT INTO t2(id,json) VALUES(4,'{"value":4}');
+
+statement ok
+INSERT INTO t2(id,json) VALUES(5,'{"value":5}');
+
+statement ok
+INSERT INTO t2(id,json) VALUES(6,'{"value":6}');
+
+query I
+select count(*) from t2, json_each(t2.json) je;
+----
+5
+
+query I
+select je.rowid from t2, json_each(t2.json) je;
+----
+0
+0
+0
+0
+0
+
+# our result here differs from SQLite because our correlated subquery behavior is different
+query IIII rowsort
+SELECT * FROM t1 CROSS JOIN t2
+WHERE EXISTS(SELECT 1 FROM json_each(t1.json,'$.items') AS Z
+             WHERE Z.value==t2.id);
+----
+1	{"items":[3,5]}	3	{"value":3}
+1	{"items":[3,5]}	5	{"value":5}
+
+query IIII rowsort
+SELECT * FROM t2 CROSS JOIN t1
+WHERE EXISTS(SELECT 1 FROM json_each(t1.json,'$.items') AS Z
+             WHERE Z.value==t2.id);
+----
+3	{"value":3}	1	{"items":[3,5]}
+5	{"value":5}	1	{"items":[3,5]}
+
+### JSON101-14 ###
+query I
+SELECT fullkey FROM json_each('123');
+----
+$
+
+query I
+SELECT fullkey FROM json_each('123.56');
+----
+$
+
+query I
+SELECT fullkey FROM json_each('"hello"');
+----
+$
+
+query I
+SELECT fullkey FROM json_each('null');
+----
+$
+
+query I
+SELECT fullkey FROM json_tree('123');
+----
+$
+
+query I
+SELECT fullkey FROM json_tree('123.56');
+----
+$
+
+query I
+SELECT fullkey FROM json_tree('"hello"');
+----
+$
+
+query I
+SELECT fullkey FROM json_tree('null');
+----
+$
+
+### JSON101-15 ###
+query IIIIIIII
+SELECT * FROM JSON_EACH('{"a":1, "b":2}');
+----
+a	1	UBIGINT	1	2	NULL	$.a	$
+b	2	UBIGINT	2	4	NULL	$.b	$
+
+query IIIIIIII
+SELECT xyz.* FROM JSON_EACH('{"a":1, "b":2}') AS xyz;
+----
+a	1	UBIGINT	1	2	NULL	$.a	$
+b	2	UBIGINT	2	4	NULL	$.b	$
+
+query IIIIIIII
+SELECT * FROM (FROM JSON_EACH('{"a":1, "b":2}'));
+----
+a	1	UBIGINT	1	2	NULL	$.a	$
+b	2	UBIGINT	2	4	NULL	$.b	$
+
+query IIIIIIII
+SELECT xyz.* FROM (FROM JSON_EACH('{"a":1, "b":2}')) AS xyz;
+----
+a	1	UBIGINT	1	2	NULL	$.a	$
+b	2	UBIGINT	2	4	NULL	$.b	$
+
+### JSON101-17 ###
+query I
+SELECT count(*) FROM json_each(NULL);
+----
+0
+
+query I
+SELECT count(*) FROM json_tree(NULL);
+----
+0
+
+### JSON102-1000 ###
+statement ok
+CREATE OR REPLACE TABLE user(name VARCHAR,phone JSON);
+
+statement ok
+INSERT INTO user(name,phone) VALUES
+   ('Alice','["919-555-2345","804-555-3621"]'),
+   ('Bob','["201-555-8872"]'),
+   ('Cindy','["704-555-9983"]'),
+   ('Dave','["336-555-8421","704-555-4321","803-911-4421"]');
+
+query I rowsort
+SELECT DISTINCT user.name
+  FROM user, json_each(user.phone)
+ WHERE json_each.value LIKE '"704-%'
+ ORDER BY 1;
+----
+Cindy
+Dave
+
+statement ok
+UPDATE user
+   SET phone=json_extract(phone,'$[0]')
+ WHERE json_array_length(phone)<2;
+
+query II rowsort
+SELECT name, substr(phone,1,5) FROM user ORDER BY name;
+----
+Alice	["919
+Bob	"201-
+Cindy	"704-
+Dave	["336
+
+query I rowsort
+SELECT name FROM user WHERE phone LIKE '"704-%'
+UNION
+SELECT user.name
+  FROM user, json_each(user.phone)
+ WHERE json_valid(user.phone)
+   AND json_each.value LIKE '"704-%';
+----
+Cindy
+Dave
+
+### JSON102-1010 ###
+statement ok
+CREATE OR REPLACE TABLE big(json JSON);
+
+statement ok
+INSERT INTO big(json) VALUES('{
+"id":123,
+"stuff":[1,2,3,4],
+"partlist":[
+   {"uuid":"bb108722-572e-11e5-9320-7f3b63a4ca74"},
+   {"uuid":"c690dc14-572e-11e5-95f9-dfc8861fd535"},
+   {"subassembly":[
+      {"uuid":"6fa5181e-5721-11e5-a04e-57f3d7b32808"}
+   ]}
+]
+}');
+
+statement ok
+INSERT INTO big(json) VALUES('{
+"id":456,
+"stuff":["hello","world","xyzzy"],
+"partlist":[
+   {"uuid":false},
+   {"uuid":"c690dc14-572e-11e5-95f9-dfc8861fd535"}
+]
+}');
+
+query III nosort q0
+SELECT big.rowid, fullkey, value
+  FROM big, json_tree(big.json)
+ WHERE json_tree.type NOT IN ('OBJECT','ARRAY')
+ ORDER BY +big.rowid, +json_tree.id;
+----
+0	$.id	123
+0	$stuff[0]	1
+0	$stuff[1]	2
+0	$stuff[2]	3
+0	$stuff[3]	4
+0	$partlist.uuid	"bb108722-572e-11e5-9320-7f3b63a4ca74"
+0	$partlist.uuid	"c690dc14-572e-11e5-95f9-dfc8861fd535"
+0	$partlistsubassembly.uuid	"6fa5181e-5721-11e5-a04e-57f3d7b32808"
+1	$.id	456
+1	$stuff[0]	"hello"
+1	$stuff[1]	"world"
+1	$stuff[2]	"xyzzy"
+1	$partlist.uuid	false
+1	$partlist.uuid	"c690dc14-572e-11e5-95f9-dfc8861fd535"
+
+query III nosort q0
+SELECT big.rowid, fullkey, atom
+  FROM big, json_tree(big.json)
+ WHERE atom IS NOT NULL
+ ORDER BY +big.rowid, +json_tree.id
+----
+
+query I
+SELECT DISTINCT json_extract(big.json,'$.id')
+FROM big, json_tree(big.json,'$.partlist')
+WHERE json_tree.key='uuid'
+ AND json_tree.value='"6fa5181e-5721-11e5-a04e-57f3d7b32808"';
+----
+123
+
+query I
+SELECT DISTINCT json_extract(big.json,'$.id')
+FROM big, json_tree(big.json,'$')
+WHERE json_tree.key='uuid'
+ AND json_tree.value='"6fa5181e-5721-11e5-a04e-57f3d7b32808"';
+----
+123
+
+query I
+SELECT DISTINCT json_extract(big.json,'$.id')
+FROM big, json_tree(big.json)
+WHERE json_tree.key='uuid'
+ AND json_tree.value='"6fa5181e-5721-11e5-a04e-57f3d7b32808"';
+----
+123
+
+### JSON107 ###
+query II
+SELECT key, value FROM json_tree('{"a":123,"b":456}')
+  WHERE atom;
+----
+a	123
+b	456
+
+### JSON502 ###
+statement ok
+CREATE OR REPLACE TABLE t1(x JSON);
+
+statement ok
+INSERT INTO t1(x) VALUES('{"a":{"b":{"c":"hello",},},}');
+
+query I
+SELECT fullkey FROM t1, json_tree(x) order by json_tree.rowid;
+----
+$
+$.a
+$.a.b
+$.a.b.c
+
+### JOIN-23 ###
+statement ok
+CREATE OR REPLACE TABLE a(value TEXT);
+
+statement ok
+INSERT INTO a(value) SELECT value FROM json_each('["a", "b", null]');
+
+statement ok
+CREATE OR REPLACE TABLE b(value TEXT);
+
+statement ok
+INSERT INTO b(value) SELECT value FROM json_each('["a", "c", null]');
+
+query II rowsort q1
+SELECT a.value, b.value FROM a RIGHT JOIN b ON a.value = b.value;
+----
+"a"	"a"
+null	null
+NULL	"c"
+
+query II rowsort q1
+SELECT a.value, b.value FROM b LEFT JOIN a ON a.value = b.value;
+----
+
+
+query II rowsort q1
+SELECT a.value, b.value
+  FROM json_each('["a", "c", null]') AS b
+       LEFT JOIN
+       json_each('["a", "b", null]') AS a ON a.value = b.value;
+----
+
+
+query II rowsort q1
+SELECT a.value, b.value
+  FROM json_each('["a", "b", null]') AS a
+       RIGHT JOIN
+       json_each('["a", "c", null]') AS b ON a.value = b.value;
+----
+
+
+query II rowsort q1
+SELECT a.value, b.value
+  FROM json_each('["a", "b", null]') AS a
+       RIGHT JOIN
+       b ON a.value = b.value;
+----
+
+
+query II rowsort q1
+SELECT a.value, b.value
+  FROM a
+       RIGHT JOIN
+       json_each('["a", "c", null]') AS b ON a.value = b.value;
+----
+
+### JOIN8-6000 ###
+statement ok
+CREATE OR REPLACE TABLE t1(a INTEGER PRIMARY KEY, b TEXT, c TEXT, d REAL);
+
+statement ok
+INSERT INTO t1 VALUES(0,'A','aa',2.5);
+
+query IIII
+SELECT * FROM t1 AS t2 NATURAL RIGHT JOIN t1 AS t3
+ WHERE (a,b) IN (SELECT rowid, b FROM t1);
+----
+0	A	aa	2.5
+
+statement ok
+DROP TABLE IF EXISTS t1;
+
+statement ok
+CREATE OR REPLACE TABLE t1(a INT PRIMARY KEY, b TEXT, c TEXT, d INT);
+
+statement ok
+INSERT INTO t1 VALUES(15,'xray','baker',42);
+
+query IIIII
+SELECT value, t1.* FROM json_each('7') RIGHT JOIN t1 USING (rowid)
+ WHERE (a,b) IN (SELECT a, b FROM t1);
+----
+7	15	xray	baker	42
+
+statement ok
+DROP TABLE IF EXISTS t1;
+
+statement ok
+CREATE OR REPLACE TABLE t1(a INTEGER PRIMARY KEY,b INTEGER);
+
+statement ok
+INSERT INTO t1 VALUES(0,NULL),(1,2);
+
+query III
+SELECT value, t1.* FROM json_each('null') RIGHT JOIN t1 USING (rowid)
+ WHERE (a,b) IN (SELECT rowid, b FROM t1);
+----
+NULL	1	2
+
+statement ok
+CREATE OR REPLACE TABLE a(key TEXT);
+
+statement ok
+INSERT INTO a(key) VALUES('a'),('b');
+
+query II
+SELECT to_json(a.key), b.value
+  FROM a RIGHT JOIN json_each('["a","c"]') AS b ON to_json(a.key)=b.value;
+----
+"a"	"a"
+NULL	"c"
+
+### WindowB-11 ###
+query I
+SELECT value FROM json_each('[1,2,3,4,5]');
+----
+1
+2
+3
+4
+5
+
+query II
+SELECT key, value FROM json_each('[1,2,3,4,5]');
+----
+0	1
+1	2
+2	3
+3	4
+4	5
+
+query II
+SELECT rowid, value FROM json_each('[1,2,3,4,5]');
+----
+0	1
+1	2
+2	3
+3	4
+4	5
+
+query I
+SELECT sum(value::int) OVER (ORDER BY rowid) FROM json_each('[1,2,3,4,5]')
+----
+1
+3
+6
+10
+15
+
+query I
+SELECT sum(value::int) OVER (
+    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+) FROM json_each('[1,2,3,4,5]')
+----
+1
+3
+6
+10
+15
+
+query I
+SELECT sum(value::int) OVER (ORDER BY rowid DESC) FROM json_each('[1,2,3,4,5]')
+----
+5
+9
+12
+14
+15
+
+query I
+SELECT sum(value::int) OVER (ORDER BY value ASC) FROM json_each('[2,1,4,3,5]')
+----
+1
+3
+6
+10
+15
+
+### WhereF-6 ###
+statement ok
+CREATE OR REPLACE TABLE t6(x JSON);
+
+query I
+SELECT * FROM t6 WHERE 1 IN (SELECT value FROM json_each(x));
+----
+
+
+statement ok
+DROP TABLE t6;
+
+statement ok
+CREATE OR REPLACE TABLE t6(a int,b int,c json);
+
+statement ok
+INSERT INTO t6 VALUES
+ (0,null,'{"a":0,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}'),
+ (1,null,'{"a":1,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}'),
+ (2,null,'{"a":9,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}');
+
+query III
+SELECT * FROM t6
+ WHERE (EXISTS (SELECT 1 FROM json_each(t6.c) AS x WHERE x.type = 'UBIGINT' AND x.value=1));
+----
+1	NULL	{"a":1,"b":[3,4,5],"c":{"x":4.5,"y":7.8}}
+
+# Another test case derived from a posting by Wout Mertens on the
+# sqlite-users mailing list on 2017-10-04.
+
+statement ok
+DROP TABLE IF EXISTS t;
+
+statement ok
+CREATE OR REPLACE TABLE t(json JSON);
+
+query I
+SELECT * FROM t
+ WHERE(EXISTS(SELECT 1 FROM json_each(t.json,'$.foo') j
+               WHERE j.value = 'meep'));
+----
+
+
+statement ok
+INSERT INTO t VALUES('{"xyzzy":null}');
+
+statement ok
+INSERT INTO t VALUES('{"foo":"meep","other":12345}');
+
+statement ok
+INSERT INTO t VALUES('{"foo":"bingo","alt":5.25}');
+
+query I
+SELECT * FROM t
+ WHERE(EXISTS(SELECT 1 FROM json_each(t.json,'$.foo') j
+               WHERE j.value = '"meep"'));
+----
+{"foo":"meep","other":12345}
+
+# internal issue 5080
+statement ok
+create table json_table as
+    select '{"my_array":[{"my_key":42},{"my_key":9001}]}' as my_json;
+
+query II
+select fullkey, path from json_table, json_tree(json_table.my_json) order by json_tree.rowid;
+----
+$	$
+$.my_array	$
+$.my_array[0]	$.my_array
+$.my_array[0].my_key	$.my_array[0]
+$.my_array[1]	$.my_array
+$.my_array[1].my_key	$.my_array[1]
+
+# internal issues 5772 and 5776
+statement ok
+create table all_types as select * exclude(small_enum, medium_enum, large_enum) from test_all_types() limit 0;
+
+statement ok
+SELECT NULL FROM json_each(6051, NULL)