should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,45 @@
using DataFrames
using Tables
using DuckDB
using Test
using Dates
using FixedPointDecimals
using UUIDs
test_files = [
"test_appender.jl",
"test_basic_queries.jl",
"test_big_nested.jl",
"test_config.jl",
"test_connection.jl",
"test_tbl_scan.jl",
"test_prepare.jl",
"test_transaction.jl",
"test_sqlite.jl",
"test_replacement_scan.jl",
"test_table_function.jl",
"test_old_interface.jl",
"test_all_types.jl",
"test_union_type.jl",
"test_decimals.jl",
"test_threading.jl",
"test_tpch.jl",
"test_tpch_multithread.jl",
"test_stream_data_chunk.jl",
"test_scalar_udf.jl"
]
if length(ARGS) > 0 && !isempty(ARGS[1])
filtered_test_files = []
for test_file in test_files
if test_file == ARGS[1]
push!(filtered_test_files, test_file)
end
end
test_files = filtered_test_files
end
for fname in test_files
println(fname)
include(fname)
end

View File

@@ -0,0 +1,190 @@
# test_all_types.jl
@testset "Test All Types" begin
db = DBInterface.connect(DuckDB.DB)
con = DBInterface.connect(db)
df = DataFrame(
DBInterface.execute(
con,
"""SELECT * EXCLUDE(time, time_tz, fixed_int_array, fixed_varchar_array, fixed_nested_int_array,
fixed_nested_varchar_array, fixed_struct_array, struct_of_fixed_array, fixed_array_of_int_list,
list_of_fixed_int_array, bignum)
, CASE WHEN time = '24:00:00'::TIME THEN '23:59:59.999999'::TIME ELSE time END AS time
, CASE WHEN time_tz = '24:00:00-15:59:59'::TIMETZ THEN '23:59:59.999999-15:59:59'::TIMETZ ELSE time_tz END AS time_tz
FROM test_all_types()
"""
)
)
#println(names(df))
# we can also use 'propertynames()' to get the column names as symbols, that might make for a better testing approach
# If we add a dictionary that maps from the symbol to the expected result
@test isequal(df.bool, [false, true, missing])
@test isequal(df.tinyint, [-128, 127, missing])
@test isequal(df.smallint, [-32768, 32767, missing])
@test isequal(df.int, [-2147483648, 2147483647, missing])
@test isequal(df.bigint, [-9223372036854775808, 9223372036854775807, missing])
@test isequal(
df.hugeint,
[-170141183460469231731687303715884105728, 170141183460469231731687303715884105727, missing]
)
@test isequal(df.uhugeint, [0, 340282366920938463463374607431768211455, missing])
@test isequal(df.utinyint, [0, 255, missing])
@test isequal(df.usmallint, [0, 65535, missing])
@test isequal(df.uint, [0, 4294967295, missing])
@test isequal(df.ubigint, [0, 18446744073709551615, missing])
@test isequal(df.float, [-3.4028235f38, 3.4028235f38, missing])
@test isequal(df.double, [-1.7976931348623157e308, 1.7976931348623157e308, missing])
@test isequal(df.dec_4_1, [-999.9, 999.9, missing])
@test isequal(df.dec_9_4, [-99999.9999, 99999.9999, missing])
@test isequal(df.dec_18_6, [-999999999999.999999, 999999999999.999999, missing])
@test isequal(
df.dec38_10,
[-9999999999999999999999999999.9999999999, 9999999999999999999999999999.9999999999, missing]
)
@test isequal(
df.dec38_10,
[-9999999999999999999999999999.9999999999, 9999999999999999999999999999.9999999999, missing]
)
@test isequal(
df.dec38_10,
[-9999999999999999999999999999.9999999999, 9999999999999999999999999999.9999999999, missing]
)
@test isequal(df.small_enum, ["DUCK_DUCK_ENUM", "GOOSE", missing])
@test isequal(df.medium_enum, ["enum_0", "enum_299", missing])
@test isequal(df.large_enum, ["enum_0", "enum_69999", missing])
@test isequal(df.date, [Dates.Date(-5877641, 6, 25), Dates.Date(5881580, 7, 10), missing])
@test isequal(df.time, [Dates.Time(0, 0, 0), Dates.Time(23, 59, 59, 999, 999), missing])
@test isequal(df.time_tz, [Dates.Time(0, 0, 0), Dates.Time(23, 59, 59, 999, 999), missing])
@test isequal(
df.timestamp,
[Dates.DateTime(-290308, 12, 22, 0, 0, 0), Dates.DateTime(294247, 1, 10, 4, 0, 54, 775), missing]
)
@test isequal(
df.timestamp_tz,
[Dates.DateTime(-290308, 12, 22, 0, 0, 0), Dates.DateTime(294247, 1, 10, 4, 0, 54, 775), missing]
)
@test isequal(
df.timestamp_s,
[Dates.DateTime(-290308, 12, 22, 0, 0, 0), Dates.DateTime(294247, 1, 10, 4, 0, 54, 0), missing]
)
@test isequal(
df.timestamp_ms,
[Dates.DateTime(-290308, 12, 22, 0, 0, 0), Dates.DateTime(294247, 1, 10, 4, 0, 54, 775), missing]
)
@test isequal(
df.timestamp_ns,
[Dates.DateTime(1677, 9, 22, 0, 0, 0, 0), Dates.DateTime(2262, 4, 11, 23, 47, 16, 854), missing]
)
@test isequal(
df.interval,
[
Dates.CompoundPeriod(Dates.Month(0), Dates.Day(0), Dates.Microsecond(0)),
Dates.CompoundPeriod(Dates.Month(999), Dates.Day(999), Dates.Microsecond(999999999)),
missing
]
)
@test isequal(df.varchar, ["🦆🦆🦆🦆🦆🦆", "goo\0se", missing])
@test isequal(
df.blob,
[
UInt8[
0x74,
0x68,
0x69,
0x73,
0x69,
0x73,
0x61,
0x6c,
0x6f,
0x6e,
0x67,
0x62,
0x6c,
0x6f,
0x62,
0x00,
0x77,
0x69,
0x74,
0x68,
0x6e,
0x75,
0x6c,
0x6c,
0x62,
0x79,
0x74,
0x65,
0x73
],
UInt8[0x00, 0x00, 0x00, 0x61],
missing
]
)
@test isequal(df.uuid, [UUID(0), UUID(UInt128(340282366920938463463374607431768211455)), missing])
@test isequal(df.int_array, [[], [42, 999, missing, missing, -42], missing])
@test isequal(df.double_array, [[], [42, NaN, Inf, -Inf, missing, -42], missing])
@test isequal(
df.date_array,
[
[],
[
Dates.Date(1970, 1, 1),
Dates.Date(5881580, 7, 11),
Dates.Date(-5877641, 6, 24),
missing,
Dates.Date(2022, 5, 12)
],
missing
]
)
@test isequal(
df.timestamp_array,
[
[],
[
Dates.DateTime(1970, 1, 1),
Dates.DateTime(294247, 1, 10, 4, 0, 54, 775),
Dates.DateTime(-290308, 12, 21, 19, 59, 5, 225),
missing,
Dates.DateTime(2022, 5, 12, 16, 23, 45)
],
missing
]
)
@test isequal(
df.timestamptz_array,
[
[],
[
Dates.DateTime(1970, 1, 1),
Dates.DateTime(294247, 1, 10, 4, 0, 54, 775),
Dates.DateTime(-290308, 12, 21, 19, 59, 5, 225),
missing,
Dates.DateTime(2022, 05, 12, 23, 23, 45)
],
missing
]
)
@test isequal(df.varchar_array, [[], ["🦆🦆🦆🦆🦆🦆", "goose", missing, ""], missing])
@test isequal(
df.nested_int_array,
[[], [[], [42, 999, missing, missing, -42], missing, [], [42, 999, missing, missing, -42]], missing]
)
@test isequal(df.struct, [(a = missing, b = missing), (a = 42, b = "🦆🦆🦆🦆🦆🦆"), missing])
@test isequal(
df.struct_of_arrays,
[
(a = missing, b = missing),
(a = [42, 999, missing, missing, -42], b = ["🦆🦆🦆🦆🦆🦆", "goose", missing, ""]),
missing
]
)
@test isequal(df.array_of_structs, [[], [(a = missing, b = missing), (a = 42, b = "🦆🦆🦆🦆🦆🦆"), missing], missing])
@test isequal(df.map, [Dict(), Dict("key1" => "🦆🦆🦆🦆🦆🦆", "key2" => "goose"), missing])
end

View File

@@ -0,0 +1,158 @@
@testset "Appender Error" begin
db = DBInterface.connect(DuckDB.DB)
con = DBInterface.connect(db)
@test_throws DuckDB.QueryException DuckDB.Appender(db, "nonexistanttable")
@test_throws DuckDB.QueryException DuckDB.Appender(con, "t")
end
@testset "Appender Usage - Schema $(schema_provided ? "Provided" : "Not Provided")" for schema_provided in (false, true)
db = DBInterface.connect(DuckDB.DB)
table_name = "integers"
if schema_provided
schema_name = "test"
full_table_name = "$(schema_name).$(table_name)"
DBInterface.execute(db, "CREATE SCHEMA $(schema_name)")
else
schema_name = nothing
full_table_name = table_name
end
DBInterface.execute(db, "CREATE TABLE $(full_table_name)(i INTEGER)")
appender = DuckDB.Appender(db, table_name, schema_name)
DuckDB.close(appender)
DuckDB.close(appender)
# close!
appender = DuckDB.Appender(db, table_name, schema_name)
DBInterface.close!(appender)
appender = DuckDB.Appender(db, table_name, schema_name)
for i in 0:9
DuckDB.append(appender, i)
DuckDB.end_row(appender)
end
DuckDB.flush(appender)
DuckDB.close(appender)
results = DBInterface.execute(db, "SELECT * FROM $(full_table_name)")
df = DataFrame(results)
@test names(df) == ["i"]
@test size(df, 1) == 10
@test df.i == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# close the database
DuckDB.close(appender)
end
@testset "Appender API" begin
# Open the database
db = DBInterface.connect(DuckDB.DB)
uuid = Base.UUID("a36a5689-48ec-4104-b147-9fed600d8250")
# Test data for the appender api test
# - `col_name`: DuckDB column name
# - `duck_type`: DuckDB column type
# - `append_value`: Value to insert via DuckDB.append
# - `ref_value`: (optional) Expected value from querying the DuckDB table. If not provided, uses `append_value`
test_data = [
(; col_name = :bool, duck_type = "BOOLEAN", append_value = true, ref_value = true),
(; col_name = :tint, duck_type = "TINYINT", append_value = -1, ref_value = Int8(-1)),
(; col_name = :sint, duck_type = "SMALLINT", append_value = -2, ref_value = Int16(-2)),
(; col_name = :int, duck_type = "INTEGER", append_value = -3, ref_value = Int32(-3)),
(; col_name = :bint, duck_type = "BIGINT", append_value = -4, ref_value = Int64(-4)),
(; col_name = :hint, duck_type = "HUGEINT", append_value = Int128(-5), ref_value = Int128(-5)),
(; col_name = :utint, duck_type = "UTINYINT", append_value = 1, ref_value = UInt8(1)),
(; col_name = :usint, duck_type = "USMALLINT", append_value = 2, ref_value = UInt16(2)),
(; col_name = :uint, duck_type = "UINTEGER", append_value = 3, ref_value = UInt32(3)),
(; col_name = :ubint, duck_type = "UBIGINT", append_value = 4, ref_value = UInt64(4)),
(; col_name = :uhint, duck_type = "UHUGEINT", append_value = UInt128(5), ref_value = UInt128(5)),
(; col_name = :dec16, duck_type = "DECIMAL(4,2)", append_value = FixedDecimal{Int16, 2}(1.01)),
(; col_name = :dec32, duck_type = "DECIMAL(9,2)", append_value = FixedDecimal{Int32, 2}(1.02)),
(; col_name = :dec64, duck_type = "DECIMAL(18,2)", append_value = FixedDecimal{Int64, 2}(1.03)),
(; col_name = :dec128, duck_type = "DECIMAL(38,2)", append_value = FixedDecimal{Int128, 2}(1.04)),
(; col_name = :float, duck_type = "FLOAT", append_value = 1.0, ref_value = Float32(1.0)),
(; col_name = :double, duck_type = "DOUBLE", append_value = 2.0, ref_value = Float64(2.0)),
(; col_name = :date, duck_type = "DATE", append_value = Dates.Date("1970-04-11")),
(; col_name = :time, duck_type = "TIME", append_value = Dates.Time(0, 0, 0, 0, 200)),
(; col_name = :timestamp, duck_type = "TIMESTAMP", append_value = Dates.DateTime("1970-01-02T01:23:45.678")),
(; col_name = :missingval, duck_type = "INTEGER", append_value = missing),
(; col_name = :nothingval, duck_type = "INTEGER", append_value = nothing, ref_value = missing),
(; col_name = :largeval, duck_type = "INTEGER", append_value = Int32(2^16)),
(; col_name = :uuid, duck_type = "UUID", append_value = uuid),
(; col_name = :varchar, duck_type = "VARCHAR", append_value = "Foo"),
# lists
(; col_name = :list_bool, duck_type = "BOOLEAN[]", append_value = Vector{Bool}([true, false, true])),
(; col_name = :list_int8, duck_type = "TINYINT[]", append_value = Vector{Int8}([1, -2, 3])),
(; col_name = :list_int16, duck_type = "SMALLINT[]", append_value = Vector{Int16}([1, -2, 3])),
(; col_name = :list_int32, duck_type = "INTEGER[]", append_value = Vector{Int32}([1, -2, 3])),
(; col_name = :list_int64, duck_type = "BIGINT[]", append_value = Vector{Int64}([1, -2, 3])),
(;
col_name = :list_int128,
duck_type = "HUGEINT[]",
append_value = Vector{Int128}([Int128(1), Int128(-2), Int128(3)])
),
# (; col_name = :list_uint8, duck_type = "UTINYINT[]", append_value = Vector{UInt8}([1, 2, 3])),
(; col_name = :list_uint16, duck_type = "USMALLINT[]", append_value = Vector{UInt16}([1, 2, 3])),
(; col_name = :list_uint32, duck_type = "UINTEGER[]", append_value = Vector{UInt32}([1, 2, 3])),
(; col_name = :list_uint64, duck_type = "UBIGINT[]", append_value = Vector{UInt64}([1, 2, 3])),
(;
col_name = :list_uint128,
duck_type = "UHUGEINT[]",
append_value = Vector{UInt128}([UInt128(1), UInt128(2), UInt128(3)])
),
(; col_name = :list_float, duck_type = "FLOAT[]", append_value = Vector{Float32}([1.0, 2.0, 3.0])),
(; col_name = :list_double, duck_type = "DOUBLE[]", append_value = Vector{Float64}([1.0, 2.0, 3.0])),
(; col_name = :list_string, duck_type = "VARCHAR[]", append_value = Vector{String}(["a", "bb", "ccc"])),
(;
col_name = :list_date,
duck_type = "DATE[]",
append_value = Vector{Dates.Date}([
Dates.Date("1970-01-01"),
Dates.Date("1970-01-02"),
Dates.Date("1970-01-03")
])
),
(;
col_name = :list_time,
duck_type = "TIME[]",
append_value = Vector{Dates.Time}([Dates.Time(1), Dates.Time(1, 2), Dates.Time(1, 2, 3)])
),
(;
col_name = :list_timestamp,
duck_type = "TIMESTAMP[]",
append_value = Vector{Dates.DateTime}([
Dates.DateTime("1970-01-01T00:00:00"),
Dates.DateTime("1970-01-02T00:00:00"),
Dates.DateTime("1970-01-03T00:00:00")
])
)
]
sql = """CREATE TABLE dtypes(
$(join(("$(row.col_name) $(row.duck_type)" for row in test_data), ",\n"))
)"""
DuckDB.execute(db, sql)
appender = DuckDB.Appender(db, "dtypes")
for row in test_data
DuckDB.append(appender, row.append_value)
end
# End the row of the appender
DuckDB.end_row(appender)
# Destroy the appender and flush the data
DuckDB.flush(appender)
DuckDB.close(appender)
results = DBInterface.execute(db, "select * from dtypes;")
df = DataFrame(results)
for row in test_data
ref_value = get(row, :ref_value, row.append_value)
@test isequal(df[!, row.col_name], [ref_value])
end
# close the database
DBInterface.close!(db)
end

View File

@@ -0,0 +1,181 @@
# test_basic_queries.jl
using Tables: partitions
@testset "Test DBInterface.execute" begin
con = DBInterface.connect(DuckDB.DB)
results = DBInterface.execute(con, "SELECT 42 a")
# iterator
for row in Tables.rows(results)
@test row.a == 42
@test row[1] == 42
end
# convert to DataFrame
df = DataFrame(results)
@test names(df) == ["a"]
@test size(df, 1) == 1
@test df.a == [42]
# do block syntax to automatically close cursor
df = DBInterface.execute(con, "SELECT 42 a") do results
return DataFrame(results)
end
@test names(df) == ["a"]
@test size(df, 1) == 1
@test df.a == [42]
DBInterface.close!(con)
end
@testset "Test numeric data types" begin
con = DBInterface.connect(DuckDB.DB)
results = DBInterface.execute(
con,
"""
SELECT 42::TINYINT a, 42::INT16 b, 42::INT32 c, 42::INT64 d, 42::UINT8 e, 42::UINT16 f, 42::UINT32 g, 42::UINT64 h
UNION ALL
SELECT NULL, NULL, NULL, NULL, NULL, NULL, 43, NULL
"""
)
df = DataFrame(results)
@test size(df, 1) == 2
@test isequal(df.a, [42, missing])
@test isequal(df.b, [42, missing])
@test isequal(df.c, [42, missing])
@test isequal(df.d, [42, missing])
@test isequal(df.e, [42, missing])
@test isequal(df.f, [42, missing])
@test isequal(df.g::Vector{Int}, [42, 43])
@test isequal(df.h, [42, missing])
DBInterface.close!(con)
end
@testset "Test strings" begin
con = DBInterface.connect(DuckDB.DB)
results = DBInterface.execute(
con,
"""
SELECT 'hello world' s
UNION ALL
SELECT NULL
UNION ALL
SELECT 'this is a long string'
UNION ALL
SELECT 'obligatory mühleisen'
UNION ALL
SELECT '🦆🍞🦆'
"""
)
df = DataFrame(results)
@test size(df, 1) == 5
@test isequal(df.s, ["hello world", missing, "this is a long string", "obligatory mühleisen", "🦆🍞🦆"])
for s in ["foo", "🦆DB", SubString("foobar", 1, 3), SubString("🦆ling", 1, 6)]
results = DBInterface.execute(con, "SELECT length(?) as len", [s])
@test only(results).len == 3
end
DBInterface.close!(con)
end
@testset "DBInterface.execute - parser error" begin
con = DBInterface.connect(DuckDB.DB)
# parser error
@test_throws DuckDB.QueryException DBInterface.execute(con, "SELEC")
DBInterface.close!(con)
end
@testset "DBInterface.execute - binder error" begin
con = DBInterface.connect(DuckDB.DB)
# binder error
@test_throws DuckDB.QueryException DBInterface.execute(con, "SELECT * FROM this_table_does_not_exist")
DBInterface.close!(con)
end
@testset "DBInterface.execute - runtime error" begin
con = DBInterface.connect(DuckDB.DB)
res = DBInterface.execute(con, "select current_setting('threads')")
df = DataFrame(res)
print(df)
# run-time error
@test_throws DuckDB.QueryException DBInterface.execute(
con,
"SELECT i::int FROM (SELECT '42' UNION ALL SELECT 'hello') tbl(i)"
)
DBInterface.close!(con)
end
# test a PIVOT query that generates multiple prepared statements and will fail with execute
@testset "Test DBInterface.query" begin
db = DuckDB.DB()
con = DuckDB.connect(db)
DuckDB.execute(con, "CREATE TABLE Cities (Country VARCHAR, Name VARCHAR, Year INT, Population INT);")
DuckDB.execute(con, "INSERT INTO Cities VALUES ('NL', 'Amsterdam', 2000, 1005)")
DuckDB.execute(con, "INSERT INTO Cities VALUES ('NL', 'Amsterdam', 2010, 1065)")
results = DuckDB.query(con, "PIVOT Cities ON Year USING first(Population);")
# iterator
for row in Tables.rows(results)
@test row[:Name] == "Amsterdam"
@test row[4] == 1065
end
# convert to DataFrame
df = DataFrame(results)
@test names(df) == ["Country", "Name", "2000", "2010"]
@test size(df, 1) == 1
@test df[1, :Country] == "NL"
@test df[1, :Name] == "Amsterdam"
@test df[1, "2000"] == 1005
@test df[1, 4] == 1065
@test DataFrame(DuckDB.query(db, "select 'a'; select 2;"))[1, 1] == "a"
DBInterface.close!(con)
end
@testset "Test chunked response" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE chunked_table AS SELECT * FROM range(2049)")
result = DBInterface.execute(con, "SELECT * FROM chunked_table;")
chunks_it = partitions(result)
chunks = collect(chunks_it)
@test length(chunks) == 2
@test_throws DuckDB.NotImplementedException collect(chunks_it)
result = DBInterface.execute(con, "SELECT * FROM chunked_table;", DuckDB.StreamResult)
chunks_it = partitions(result)
chunks = collect(chunks_it)
@test length(chunks) == 2
@test_throws DuckDB.NotImplementedException collect(chunks_it)
DuckDB.execute(
con,
"""
CREATE TABLE large (x1 INT, x2 INT, x3 INT, x4 INT, x5 INT, x6 INT, x7 INT, x8 INT, x9 INT, x10 INT, x11 INT);
"""
)
DuckDB.execute(con, "INSERT INTO large VALUES (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);")
result = DBInterface.execute(con, "SELECT * FROM large ;")
chunks_it = partitions(result)
chunks = collect(chunks_it)
@test length(chunks) == 1
DBInterface.close!(con)
end

View File

@@ -0,0 +1,74 @@
@testset "Test big list" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE list_table (int_list INT[]);")
DBInterface.execute(con, "INSERT INTO list_table VALUES (range(2049));")
df = DataFrame(DBInterface.execute(con, "SELECT * FROM list_table;"))
@test length(df[1, :int_list]) == 2049
DBInterface.close!(con)
end
@testset "Test big bitstring" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE bit_table (bits BIT);")
# 131073 = 64 * 2048 + 1
DBInterface.execute(con, "INSERT INTO bit_table VALUES (bitstring('1010', 131073));")
df = DataFrame(DBInterface.execute(con, "SELECT * FROM bit_table;"))
# Currently mapped to Julia in an odd way.
# Can reenable following https://github.com/duckdb/duckdb/issues/7065
@test length(df[1, :bits]) == 131073 skip = true
DBInterface.close!(con)
end
@testset "Test big string" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE str_table (str VARCHAR);")
DBInterface.execute(con, "INSERT INTO str_table VALUES (repeat('🦆', 1024) || '🪿');")
df = DataFrame(DBInterface.execute(con, "SELECT * FROM str_table;"))
@test length(df[1, :str]) == 1025
DBInterface.close!(con)
end
@testset "Test big map" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE map_table (map MAP(VARCHAR, INT));")
DBInterface.execute(
con,
"INSERT INTO map_table VALUES (map_from_entries([{'k': 'billy' || num, 'v': num} for num in range(2049)]));"
)
df = DataFrame(DBInterface.execute(con, "SELECT * FROM map_table;"))
@test length(df[1, :map]) == 2049
DBInterface.close!(con)
end
@testset "Test big struct" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE struct_table (stct STRUCT(a INT[], b INT[]));")
DBInterface.execute(con, "INSERT INTO struct_table VALUES ({'a': range(1024), 'b': range(1025)});")
df = DataFrame(DBInterface.execute(con, "SELECT * FROM struct_table;"))
s = df[1, :stct]
@test length(s.a) == 1024
@test length(s.b) == 1025
DBInterface.close!(con)
end
@testset "Test big union" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE union_table (uni UNION(a INT[], b INT));")
DBInterface.execute(con, "INSERT INTO union_table (uni) VALUES (union_value(a := range(2049))), (42);")
df = DataFrame(DBInterface.execute(con, "SELECT * FROM union_table;"))
@test length(df[1, :uni]) == 2049
DBInterface.close!(con)
end

View File

@@ -0,0 +1,12 @@
@testset "C API Type Checks" begin
# Check struct sizes.
# Timestamp struct size mismatch, eventually structs are stored as pointers. This happens if they are declared as mutable structs.
@test sizeof(DuckDB.duckdb_timestamp_struct) ==
sizeof(DuckDB.duckdb_date_struct) + sizeof(DuckDB.duckdb_time_struct)
# Bot structs are equivalent and actually stored as a Union type in C.
@test sizeof(DuckDB.duckdb_string_t) == sizeof(DuckDB.duckdb_string_t_ptr)
end

View File

@@ -0,0 +1,96 @@
# test_config.jl
@testset "Test configuration parameters" begin
# by default NULLs come first
con = DBInterface.connect(DuckDB.DB, ":memory:")
results = DBInterface.execute(con, "SELECT 42 a UNION ALL SELECT NULL ORDER BY a")
tbl = rowtable(results)
@test isequal(tbl, [(a = 42,), (a = missing,)])
DBInterface.close!(con)
# if we add this configuration flag, nulls should come last
config = DuckDB.Config()
DuckDB.set_config(config, "default_null_order", "nulls_first")
con = DBInterface.connect(DuckDB.DB, ":memory:", config)
# NULL should come last now
results = DBInterface.execute(con, "SELECT 42 a UNION ALL SELECT NULL ORDER BY a")
df = DataFrame(results)
@test names(df) == ["a"]
@test size(df, 1) == 2
@test isequal(df.a, [missing, 42])
DBInterface.close!(con)
DuckDB.set_config(config, "unrecognized option", "aaa")
@test_throws DuckDB.ConnectionException con = DBInterface.connect(DuckDB.DB, ":memory:", config)
DBInterface.close!(config)
DBInterface.close!(config)
# test different ways to create a config object, all should be equivalent
conf1 = DuckDB.Config()
DuckDB.set_config(conf1, "default_null_order", "nulls_first")
conf2 = DuckDB.Config()
conf2["default_null_order"] = "nulls_first"
conf3 = DuckDB.Config(default_null_order = "nulls_first")
conf4 = DuckDB.Config(["default_null_order" => "nulls_first"])
@testset for config in [conf1, conf2, conf3, conf4]
con = DBInterface.connect(DuckDB.DB, ":memory:", config)
# NULL should come last now
results = DBInterface.execute(con, "SELECT 42 a UNION ALL SELECT NULL ORDER BY a")
tbl = rowtable(results)
@test isequal(tbl, [(a = missing,), (a = 42,)])
DBInterface.close!(con)
DuckDB.set_config(config, "unrecognized option", "aaa")
@test_throws DuckDB.ConnectionException con = DBInterface.connect(DuckDB.DB, ":memory:", config)
DBInterface.close!(config)
DBInterface.close!(config)
end
# config options can be specified directly in the call
con = DBInterface.connect(DuckDB.DB, ":memory:"; config = ["default_null_order" => "nulls_first"])
tbl = DBInterface.execute(con, "SELECT 42 a UNION ALL SELECT NULL ORDER BY a") |> rowtable
@test isequal(tbl, [(a = missing,), (a = 42,)])
close(con)
con = DBInterface.connect(DuckDB.DB, ":memory:"; config = (; default_null_order = "nulls_first"))
tbl = DBInterface.execute(con, "SELECT 42 a UNION ALL SELECT NULL ORDER BY a") |> rowtable
@test isequal(tbl, [(a = missing,), (a = 42,)])
close(con)
# special handling of the readonly option
file = tempname()
con = DBInterface.connect(DuckDB.DB, file)
DBInterface.execute(con, "CREATE TABLE t1(a INTEGER)")
close(con)
con = DBInterface.connect(DuckDB.DB, file; readonly = true)
@test_throws DuckDB.QueryException DBInterface.execute(con, "CREATE TABLE t2(a INTEGER)")
close(con)
end
@testset "Test Set TimeZone" begin
con = DBInterface.connect(DuckDB.DB, ":memory:")
DBInterface.execute(con, "SET TimeZone='UTC'")
results = DBInterface.execute(con, "SELECT CURRENT_SETTING('TimeZone') AS tz")
df = DataFrame(results)
@test isequal(df[1, "tz"], "UTC")
DBInterface.execute(con, "SET TimeZone='America/Los_Angeles'")
results = DBInterface.execute(con, "SELECT CURRENT_SETTING('TimeZone') AS tz")
df = DataFrame(results)
@test isequal(df[1, "tz"], "America/Los_Angeles")
DBInterface.close!(con)
end

View File

@@ -0,0 +1,55 @@
# test_connection.jl
@testset "Test opening and closing an in-memory database" begin
con = DBInterface.connect(DuckDB.DB, ":memory:")
DBInterface.close!(con)
# verify that double-closing does not cause any problems
DBInterface.close!(con)
DBInterface.close!(con)
@test 1 == 1
con = DBInterface.connect(DuckDB.DB, ":memory:")
@test isopen(con)
close(con)
@test !isopen(con)
end
@testset "Test opening a bogus directory" begin
@test_throws DuckDB.ConnectionException DBInterface.connect(DuckDB.DB, "/path/to/bogus/directory")
end
@testset "Test opening and closing an on-disk database" begin
# This checks for an issue where the DB and the connection are
# closed but the actual db is not (and subsequently cannot be opened
# in a different process). To check this, we create a DB, write some
# data to it, close the connection and check if the WAL file exists.
#
# Ideally, the WAL file should not exist, but Garbage Collection of Julia
# may not have run yet, so open database handles may still exist, preventing
# the database from being closed properly.
db_path = joinpath(mktempdir(), "duckdata.db")
db_path_wal = db_path * ".wal"
function write_data(dbfile::String)
db = DuckDB.DB(dbfile)
conn = DBInterface.connect(db)
DBInterface.execute(conn, "CREATE OR REPLACE TABLE test (a INTEGER, b INTEGER);")
DBInterface.execute(conn, "INSERT INTO test VALUES (1, 2);")
DBInterface.close!(conn)
DuckDB.close_database(db)
return true
end
write_data(db_path) # call the function
@test isfile(db_path_wal) === false # WAL file should not exist
@test isfile(db_path) # check if the database file exists
# check if the database can be opened
if haskey(ENV, "JULIA_DUCKDB_LIBRARY")
duckdb_binary = joinpath(dirname(ENV["JULIA_DUCKDB_LIBRARY"]), "..", "duckdb")
result = run(`$duckdb_binary $db_path -c "SELECT * FROM test LIMIT 1"`) # check if the database can be opened
@test success(result)
end
end

View File

@@ -0,0 +1,89 @@
# test_decimals.jl
@testset "Test decimal support" begin
con = DBInterface.connect(DuckDB.DB)
results = DBInterface.execute(
con,
"SELECT 42.3::DECIMAL(4,1) a, 4923.3::DECIMAL(9,1) b, 421.423::DECIMAL(18,3) c, 129481294.3392::DECIMAL(38,4) d"
)
# convert to DataFrame
df = DataFrame(results)
@test names(df) == ["a", "b", "c", "d"]
@test size(df, 1) == 1
@test df.a == [42.3]
@test df.b == [4923.3]
@test df.c == [421.423]
@test df.d == [129481294.3392]
DBInterface.close!(con)
end
# test returning decimals in a table function
function my_bind_function(info::DuckDB.BindInfo)
DuckDB.add_result_column(info, "a", FixedDecimal{Int16, 0})
DuckDB.add_result_column(info, "b", FixedDecimal{Int32, 1})
DuckDB.add_result_column(info, "c", FixedDecimal{Int64, 2})
DuckDB.add_result_column(info, "d", FixedDecimal{Int128, 3})
return missing
end
mutable struct MyInitStruct
pos::Int64
function MyInitStruct()
return new(0)
end
end
function my_init_function(info::DuckDB.InitInfo)
return MyInitStruct()
end
function my_main_function(info::DuckDB.FunctionInfo, output::DuckDB.DataChunk)
init_info = DuckDB.get_init_info(info, MyInitStruct)
a_array = DuckDB.get_array(output, 1, Int16)
b_array = DuckDB.get_array(output, 2, Int32)
c_array = DuckDB.get_array(output, 3, Int64)
d_array = DuckDB.get_array(output, 4, Int128)
count = 0
multiplier = 1
for i in 1:(DuckDB.VECTOR_SIZE)
if init_info.pos >= 3
break
end
a_array[count + 1] = 42 * multiplier
b_array[count + 1] = 42 * multiplier
c_array[count + 1] = 42 * multiplier
d_array[count + 1] = 42 * multiplier
count += 1
init_info.pos += 1
multiplier *= 10
end
DuckDB.set_size(output, count)
return
end
@testset "Test returning decimals from a table functions" begin
con = DBInterface.connect(DuckDB.DB)
arguments::Vector{DataType} = Vector()
DuckDB.create_table_function(con, "my_function", arguments, my_bind_function, my_init_function, my_main_function)
GC.gc()
# 3 elements
results = DBInterface.execute(con, "SELECT * FROM my_function()")
GC.gc()
df = DataFrame(results)
@test names(df) == ["a", "b", "c", "d"]
@test size(df, 1) == 3
@test df.a == [42, 420, 4200]
@test df.b == [4.2, 42, 420]
@test df.c == [0.42, 4.2, 42]
@test df.d == [0.042, 0.42, 4.2]
end

View File

@@ -0,0 +1,190 @@
# test_old_interface.jl
@testset "DB Connection" begin
db = DuckDB.open(":memory:")
con = DuckDB.connect(db)
@test isa(con, DuckDB.Connection)
DuckDB.disconnect(con)
DuckDB.close(db)
end
@testset "Test append DataFrame" begin
# Open the database
db = DuckDB.open(":memory:")
con = DuckDB.connect(db)
# Create the table the data is appended to
DuckDB.execute(
con,
"CREATE TABLE dtypes(bool BOOLEAN, tint TINYINT, sint SMALLINT, int INTEGER, bint BIGINT, utint UTINYINT, usint USMALLINT, uint UINTEGER, ubint UBIGINT, float FLOAT, double DOUBLE, date DATE, time TIME, vchar VARCHAR, nullval INTEGER)"
)
# Create test DataFrame
input_df = DataFrame(
bool = [true, false],
tint = Int8.(1:2),
sint = Int16.(1:2),
int = Int32.(1:2),
bint = Int64.(1:2),
utint = UInt8.(1:2),
usint = UInt16.(1:2),
uint = UInt32.(1:2),
ubint = UInt64.(1:2),
float = Float32.(1:2),
double = Float64.(1:2),
date = [Dates.Date("1970-04-11"), Dates.Date("1970-04-12")],
time = [Dates.Time(0, 0, 0, 100, 0), Dates.Time(0, 0, 0, 200, 0)],
vchar = ["Foo", "Bar"],
nullval = [missing, Int32(2)]
)
# append the DataFrame to the table
DuckDB.appendDataFrame(input_df, con, "dtypes")
# Output the data from the table
output_df = DataFrame(DuckDB.toDataFrame(con, "select * from dtypes;"))
# Compare each column of the input and output dataframe with each other
for (col_pos, input_col) in enumerate(eachcol(input_df))
@test isequal(input_col, output_df[:, col_pos])
end
# Disconnect and close the database
DuckDB.disconnect(con)
DuckDB.close(db)
end
@testset "Test README" begin
db = DuckDB.open(":memory:")
con = DuckDB.connect(db)
res = DuckDB.execute(con, "CREATE TABLE integers(date DATE, jcol INTEGER)")
res = DuckDB.execute(con, "INSERT INTO integers VALUES ('2021-09-27', 4), ('2021-09-28', 6), ('2021-09-29', 8)")
res = DuckDB.execute(con, "SELECT * FROM integers")
df = DataFrame(DuckDB.toDataFrame(res))
@test isa(df, DataFrame)
df = DataFrame(DuckDB.toDataFrame(con, "SELECT * FROM integers"))
println(typeof(df))
@test isa(df, DataFrame)
DuckDB.appendDataFrame(df, con, "integers")
DuckDB.disconnect(con)
DuckDB.close(db)
end
#
@testset "HUGE Int test" begin
db = DuckDB.open(":memory:")
con = DuckDB.connect(db)
res = DuckDB.execute(con, "CREATE TABLE huge(id INTEGER,data HUGEINT);")
res = DuckDB.execute(con, "INSERT INTO huge VALUES (1,NULL), (2, 1761718171), (3, 171661889178);")
res = DuckDB.toDataFrame(con, "SELECT * FROM huge")
DuckDB.disconnect(con)
DuckDB.close(db)
end
@testset "Interval type" begin
db = DuckDB.open(":memory:")
con = DuckDB.connect(db)
res = DuckDB.execute(con, "CREATE TABLE interval(interval INTERVAL);")
res = DuckDB.execute(
con,
"""
INSERT INTO interval VALUES
(INTERVAL 5 HOUR),
(INTERVAL 12 MONTH),
(INTERVAL 12 MICROSECOND),
(INTERVAL 1 YEAR);
"""
)
res = DataFrame(DuckDB.toDataFrame(con, "SELECT * FROM interval;"))
@test isa(res, DataFrame)
DuckDB.disconnect(con)
DuckDB.close(db)
end
@testset "Timestamp" begin
db = DuckDB.open(":memory:")
con = DuckDB.connect(db)
# insert without timezone, display as UTC
res = DuckDB.execute(con, "CREATE TABLE timestamp(timestamp TIMESTAMP , data INTEGER);")
res = DuckDB.execute(
con,
"INSERT INTO timestamp VALUES ('2021-09-27 11:30:00.000', 4), ('2021-09-28 12:30:00.000', 6), ('2021-09-29 13:30:00.000', 8);"
)
res = DuckDB.execute(con, "SELECT * FROM timestamp WHERE timestamp='2021-09-27T11:30:00Z';")
df = DataFrame(res)
@test isequal(df[1, "timestamp"], DateTime(2021, 9, 27, 11, 30, 0))
# insert with timezone, display as UTC
res = DuckDB.execute(con, "CREATE TABLE timestamp1(timestamp TIMESTAMP , data INTEGER);")
res = DuckDB.execute(
con,
"INSERT INTO timestamp1 VALUES ('2021-09-27T10:30:00.000', 4), ('2021-09-28T11:30:00.000', 6), ('2021-09-29T12:30:00.000', 8);"
)
res = DuckDB.execute(con, "SELECT * FROM timestamp1 WHERE timestamp=?;", [DateTime(2021, 9, 27, 10, 30, 0)])
df = DataFrame(res)
@test isequal(df[1, "timestamp"], DateTime(2021, 9, 27, 10, 30, 0))
# query with local datetime, display as UTC
res = DuckDB.execute(con, "SELECT * FROM timestamp1 WHERE timestamp='2021-09-27T10:30:00.000';")
df = DataFrame(res)
@test isequal(df[1, "timestamp"], DateTime(2021, 9, 27, 10, 30, 0))
DuckDB.disconnect(con)
DuckDB.close(db)
end
@testset "TimestampTZ" begin
db = DuckDB.open(":memory:")
con = DuckDB.connect(db)
DuckDB.execute(con, "SET TimeZone='Asia/Shanghai'") # UTC+8
res = DuckDB.execute(con, "SELECT TIMESTAMPTZ '2021-09-27 11:30:00' tz, TIMESTAMP '2021-09-27 11:30:00' ts;")
df = DataFrame(res)
@test isequal(df[1, "tz"], DateTime(2021, 9, 27, 3, 30, 0))
@test isequal(df[1, "ts"], DateTime(2021, 9, 27, 11, 30, 0))
res = DuckDB.execute(con, "CREATE TABLE timestamptz(timestamp TIMESTAMPTZ , data INTEGER);")
res = DuckDB.execute(
con,
"INSERT INTO timestamptz VALUES ('2021-09-27 11:30:00.000', 4), ('2021-09-28 12:30:00.000', 6), ('2021-09-29 13:30:00.000', 8);"
)
res = DuckDB.execute(con, "SELECT * FROM timestamptz WHERE timestamp='2021-09-27 11:30:00'")
df = DataFrame(res)
@test isequal(df[1, "data"], 4)
@test isequal(df[1, "timestamp"], DateTime(2021, 9, 27, 3, 30, 0))
res = DuckDB.execute(con, "SELECT * FROM timestamptz WHERE timestamp='2021-09-27T03:30:00Z'")
df = DataFrame(res)
@test isequal(df[1, "data"], 4)
@test isequal(df[1, "timestamp"], DateTime(2021, 9, 27, 3, 30, 0))
res = DuckDB.execute(con, "SELECT * FROM timestamptz WHERE timestamp='2021-09-27T12:30:00+09'")
df = DataFrame(res)
@test isequal(df[1, "data"], 4)
@test isequal(df[1, "timestamp"], DateTime(2021, 9, 27, 3, 30, 0))
DuckDB.disconnect(con)
DuckDB.close(db)
end
@testset "Items table" begin
db = DuckDB.open(":memory:")
con = DuckDB.connect(db)
res = DuckDB.execute(con, "CREATE TABLE items(item VARCHAR, value DECIMAL(10,2), count INTEGER);")
res = DuckDB.execute(con, "INSERT INTO items VALUES ('jeans', 20.0, 1), ('hammer', 42.2, 2);")
res = DataFrame(DuckDB.toDataFrame(con, "SELECT * FROM items;"))
@test isa(res, DataFrame)
DuckDB.disconnect(con)
end
@testset "Integers and dates table" begin
db = DuckDB.DB()
res = DBInterface.execute(db, "CREATE TABLE integers(date DATE, data INTEGER);")
res =
DBInterface.execute(db, "INSERT INTO integers VALUES ('2021-09-27', 4), ('2021-09-28', 6), ('2021-09-29', 8);")
res = DBInterface.execute(db, "SELECT * FROM integers;")
res = DataFrame(DuckDB.toDataFrame(res))
@test res.date == [Date(2021, 9, 27), Date(2021, 9, 28), Date(2021, 9, 29)]
@test isa(res, DataFrame)
DBInterface.close!(db)
end

View File

@@ -0,0 +1,154 @@
# test_prepare.jl
@testset "Test DBInterface.prepare" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE test_table(i INTEGER, j DOUBLE)")
stmt = DBInterface.prepare(con, "INSERT INTO test_table VALUES(?, ?)")
DBInterface.execute(stmt, [1, 3.5])
DBInterface.execute(stmt, [missing, nothing])
DBInterface.execute(stmt, [2, 0.5])
results = DBInterface.execute(con, "SELECT * FROM test_table")
df = DataFrame(results)
@test isequal(df.i, [1, missing, 2])
@test isequal(df.j, [3.5, missing, 0.5])
# execute many
DBInterface.executemany(stmt, (col1 = [1, 2, 3, 4, 5], col2 = [1, 2, 4, 8, -0.5]))
results = DBInterface.execute(con, "SELECT * FROM test_table")
df = DataFrame(results)
@test isequal(df.i, [1, missing, 2, 1, 2, 3, 4, 5])
@test isequal(df.j, [3.5, missing, 0.5, 1, 2, 4, 8, -0.5])
# can bind vectors to parameters
stmt = DBInterface.prepare(con, "FROM test_table WHERE i IN ?;")
results = DBInterface.execute(stmt, ([1, 2],))
df = DataFrame(results)
@test all(df.i .∈ Ref([1, 2]))
# verify that double-closing does not cause any problems
DBInterface.close!(stmt)
DBInterface.close!(stmt)
DBInterface.close!(con)
DBInterface.close!(con)
end
@testset "Test DBInterface.prepare with various types" begin
con = DBInterface.connect(DuckDB.DB)
type_names = [
"BOOLEAN",
"TINYINT",
"SMALLINT",
"INTEGER",
"BIGINT",
"UTINYINT",
"USMALLINT",
"UINTEGER",
"UBIGINT",
"FLOAT",
"DOUBLE",
"DATE",
"TIME",
"TIMESTAMP",
"VARCHAR",
"INTEGER",
"BLOB"
]
type_values = [
Bool(true),
Int8(3),
Int16(4),
Int32(8),
Int64(20),
UInt8(42),
UInt16(300),
UInt32(420421),
UInt64(43294832),
Float32(0.5),
Float64(0.25),
Date(1992, 9, 20),
Time(23, 10, 33),
DateTime(1992, 9, 20, 23, 10, 33),
String("hello world"),
missing,
rand(UInt8, 100)
]
for i in 1:size(type_values, 1)
stmt = DBInterface.prepare(con, string("SELECT ?::", type_names[i], " a"))
result = DataFrame(DBInterface.execute(stmt, [type_values[i]]))
@test isequal(result.a, [type_values[i]])
end
end
@testset "DBInterface.prepare: named parameters not supported yet" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE test_table(i INTEGER, j DOUBLE)")
@test_throws DuckDB.QueryException DBInterface.prepare(con, "INSERT INTO test_table VALUES(:col1, :col2)")
DBInterface.close!(con)
end
@testset "prepare: Named parameters" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE test_table(i INTEGER, j DOUBLE)")
# Check named syntax with Kwargs and Dict
stmt = DBInterface.prepare(con, raw"INSERT INTO test_table VALUES($col1, $col2)")
DBInterface.execute(stmt, Dict(["col1" => 1, "col2" => 3.5]))
DBInterface.execute(stmt; col1 = 2, col2 = 4.5)
results = DBInterface.execute(con, "SELECT * FROM test_table") |> DataFrame
@test isequal(results.i, [1, 2])
@test isequal(results.j, [3.5, 4.5])
# Check positional syntax
DBInterface.execute(con, "TRUNCATE TABLE test_table")
stmt = DBInterface.prepare(con, raw"INSERT INTO test_table VALUES($2, $1)")
DBInterface.execute(stmt, (3.5, 1))
DBInterface.execute(stmt, (4.5, 2))
results = DBInterface.execute(con, "SELECT * FROM test_table") |> DataFrame
@test isequal(results.i, [1, 2])
@test isequal(results.j, [3.5, 4.5])
DBInterface.close!(con)
end
@testset "DBInterface.prepare: execute many" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE test_table(i INTEGER, j DOUBLE)")
@test_throws DuckDB.QueryException DBInterface.prepare(con, "INSERT INTO test_table VALUES(:col1, :col2)")
stmt = DBInterface.prepare(con, raw"INSERT INTO test_table VALUES($col1, $col2)")
col1 = [1, 2, 3, 4, 5]
col2 = [1, 2, 4, 8, -0.5]
DBInterface.executemany(stmt, (col1 = col1, col2 = col2))
results = DBInterface.execute(con, "SELECT * FROM test_table") |> DataFrame
@test isequal(results.i, col1)
@test isequal(results.j, col2)
DBInterface.close!(con)
end
@testset "DBInterface.prepare: ambiguous parameters" begin
con = DBInterface.connect(DuckDB.DB)
stmt = DBInterface.prepare(con, "SELECT ? AS a")
result = DataFrame(DBInterface.execute(stmt, [42]))
@test isequal(result.a, [42])
result = DataFrame(DBInterface.execute(stmt, ["hello world"]))
@test isequal(result.a, ["hello world"])
result = DataFrame(DBInterface.execute(stmt, [DateTime(1992, 9, 20, 23, 10, 33)]))
@test isequal(result.a, [DateTime(1992, 9, 20, 23, 10, 33)])
end

View File

@@ -0,0 +1,72 @@
# test_replacement_scan.jl
function RangeReplacementScan(info)
table_name = DuckDB.get_table_name(info)
number = tryparse(Int64, table_name)
if number === nothing
return
end
DuckDB.set_function_name(info, "range")
DuckDB.add_function_parameter(info, DuckDB.create_value(number))
return
end
@testset "Test replacement scans" begin
con = DBInterface.connect(DuckDB.DB)
# add a replacement scan that turns any number provided as a table name into range(X)
DuckDB.add_replacement_scan!(con, RangeReplacementScan, nothing)
df = DataFrame(DBInterface.execute(con, "SELECT * FROM \"2\" tbl(a)"))
@test df.a == [0, 1]
# this still fails
@test_throws DuckDB.QueryException DBInterface.execute(con, "SELECT * FROM nonexistant")
DBInterface.close!(con)
end
function RepeatReplacementScan(info)
table_name = DuckDB.get_table_name(info)
splits = split(table_name, "*")
if size(splits, 1) != 2
return
end
number = tryparse(Int64, splits[2])
if number === nothing
return
end
DuckDB.set_function_name(info, "repeat")
DuckDB.add_function_parameter(info, DuckDB.create_value(splits[1]))
DuckDB.add_function_parameter(info, DuckDB.create_value(number))
return
end
@testset "Test string replacement scans" begin
con = DBInterface.connect(DuckDB.DB)
# add a replacement scan that turns any number provided as a table name into range(X)
DuckDB.add_replacement_scan!(con, RepeatReplacementScan, nothing)
df = DataFrame(DBInterface.execute(con, "SELECT * FROM \"hello*2\" tbl(a)"))
@test df.a == ["hello", "hello"]
# this still fails
@test_throws DuckDB.QueryException DBInterface.execute(con, "SELECT * FROM nonexistant")
DBInterface.close!(con)
end
function ErrorReplacementScan(info)
throw("replacement scan eek")
end
@testset "Test error replacement scans" begin
con = DBInterface.connect(DuckDB.DB)
DuckDB.add_replacement_scan!(con, ErrorReplacementScan, nothing)
@test_throws DuckDB.QueryException DBInterface.execute(con, "SELECT * FROM nonexistant")
DBInterface.close!(con)
end

View File

@@ -0,0 +1,436 @@
# test_scalar_udf.jl
# Define a simple scalar UDF that doubles the input value
function my_double_function(
info::DuckDB.duckdb_function_info,
input::DuckDB.duckdb_data_chunk,
output::DuckDB.duckdb_vector
)
# Convert input data chunk to DataChunk object
input_chunk = DuckDB.DataChunk(input, false)
n = DuckDB.get_size(input_chunk)
# Get input vector (assuming one input parameter)
input_vector = DuckDB.get_vector(input_chunk, 1)
input_array = DuckDB.get_array(input_vector, Int64, n)
# Get output vector
output_array = DuckDB.get_array(DuckDB.Vec(output), Int64, n)
# Perform the operation: double each input value
for i in 1:n
output_array[i] = input_array[i] * 2
end
end
# Define a scalar UDF that returns NULL for odd numbers and the number itself for even numbers
function my_null_function(
info::DuckDB.duckdb_function_info,
input::DuckDB.duckdb_data_chunk,
output::DuckDB.duckdb_vector
)
# Convert input data chunk to DataChunk object
input_chunk = DuckDB.DataChunk(input, false)
n = DuckDB.get_size(input_chunk)
# Get input vector
input_vector = DuckDB.get_vector(input_chunk, 1)
input_array = DuckDB.get_array(input_vector, Int64, n)
validity_input = DuckDB.get_validity(input_vector)
# Get output vector
output_vector = DuckDB.Vec(output)
output_array = DuckDB.get_array(output_vector, Int64, n)
validity_output = DuckDB.get_validity(output_vector)
# Perform the operation
for i in 1:n
if DuckDB.isvalid(validity_input, i)
if input_array[i] % 2 == 0
output_array[i] = input_array[i]
# Validity is true by default, no need to set
else
# Set output as NULL
DuckDB.setinvalid(validity_output, i)
end
else
# Input is NULL, set output as NULL
DuckDB.setinvalid(validity_output, i)
end
end
end
# Define a scalar UDF that always throws an error
function my_error_function(
info::DuckDB.duckdb_function_info,
input::DuckDB.duckdb_data_chunk,
output::DuckDB.duckdb_vector
)
throw(ErrorException("Runtime error in scalar function"))
end
function my_string_function_count_a(
info::DuckDB.duckdb_function_info,
input::DuckDB.duckdb_data_chunk,
output::DuckDB.duckdb_vector
)
input_chunk = DuckDB.DataChunk(input, false)
output_vec = DuckDB.Vec(output)
n = DuckDB.get_size(input_chunk)
chunks = [input_chunk]
extra_info_ptr = DuckDB.duckdb_scalar_function_get_extra_info(info)
extra_info::DuckDB.ScalarFunction = unsafe_pointer_to_objref(extra_info_ptr)
conversion_data = DuckDB.ColumnConversionData(chunks, 1, extra_info.logical_parameters[1], nothing)
a_data_converted = DuckDB.DuckDB.convert_column(conversion_data)
output_data = DuckDB.get_array(DuckDB.Vec(output), Int, n)
# # # @info "Values" a_data b_data
for row in 1:n
result = count(x -> x == 'a', a_data_converted[row])
output_data[row] = result
end
return nothing
end
function my_string_function_reverse_concat(
info::DuckDB.duckdb_function_info,
input::DuckDB.duckdb_data_chunk,
output::DuckDB.duckdb_vector
)
input_chunk = DuckDB.DataChunk(input, false)
output_vec = DuckDB.Vec(output)
n = Int64(DuckDB.get_size(input_chunk))
chunks = [input_chunk]
extra_info_ptr = DuckDB.duckdb_scalar_function_get_extra_info(info)
extra_info::DuckDB.ScalarFunction = unsafe_pointer_to_objref(extra_info_ptr)
conversion_data_a = DuckDB.ColumnConversionData(chunks, 1, extra_info.logical_parameters[1], nothing)
conversion_data_b = DuckDB.ColumnConversionData(chunks, 2, extra_info.logical_parameters[2], nothing)
a_data_converted = DuckDB.DuckDB.convert_column(conversion_data_a)
b_data_converted = DuckDB.DuckDB.convert_column(conversion_data_b)
for row in 1:n
result = string(reverse(a_data_converted[row]), b_data_converted[row])
DuckDB.assign_string_element(output_vec, row, result)
end
return nothing
end
@testset "Test custom scalar functions" begin
# Connect to DuckDB
db = DuckDB.DB()
con = DuckDB.connect(db)
# Create the test table
DuckDB.query(con, "CREATE TABLE test_table AS SELECT i FROM range(10) t(i)")
# Define logical type BIGINT
type_bigint = DuckDB.duckdb_create_logical_type(DuckDB.DUCKDB_TYPE_BIGINT)
# Test 1: Double Function
# Create the scalar function
f_double = DuckDB.duckdb_create_scalar_function()
DuckDB.duckdb_scalar_function_set_name(f_double, "double_value")
# Set parameter types
DuckDB.duckdb_scalar_function_add_parameter(f_double, type_bigint)
# Set return type
DuckDB.duckdb_scalar_function_set_return_type(f_double, type_bigint)
# Set the function
CMyDoubleFunction = @cfunction(
my_double_function,
Cvoid,
(DuckDB.duckdb_function_info, DuckDB.duckdb_data_chunk, DuckDB.duckdb_vector)
)
DuckDB.duckdb_scalar_function_set_function(f_double, CMyDoubleFunction)
# Register the function
res = DuckDB.duckdb_register_scalar_function(con.handle, f_double)
@test res == DuckDB.DuckDBSuccess
# Execute the function in a query
results = DuckDB.query(con, "SELECT i, double_value(i) as doubled FROM test_table")
df = DataFrame(results)
@test names(df) == ["i", "doubled"]
@test size(df, 1) == 10
@test df.doubled == df.i .* 2
# Test 2: Null Function
# Create the scalar function
f_null = DuckDB.duckdb_create_scalar_function()
DuckDB.duckdb_scalar_function_set_name(f_null, "null_if_odd")
# Set parameter types
DuckDB.duckdb_scalar_function_add_parameter(f_null, type_bigint)
# Set return type
DuckDB.duckdb_scalar_function_set_return_type(f_null, type_bigint)
# Set the function
CMyNullFunction = @cfunction(
my_null_function,
Cvoid,
(DuckDB.duckdb_function_info, DuckDB.duckdb_data_chunk, DuckDB.duckdb_vector)
)
DuckDB.duckdb_scalar_function_set_function(f_null, CMyNullFunction)
# Register the function
res_null = DuckDB.duckdb_register_scalar_function(con.handle, f_null)
@test res_null == DuckDB.DuckDBSuccess
# Execute the function in a query
results_null = DuckDB.query(con, "SELECT i, null_if_odd(i) as value_or_null FROM test_table")
df_null = DataFrame(results_null)
@test names(df_null) == ["i", "value_or_null"]
@test size(df_null, 1) == 10
expected_values = Vector{Union{Missing, Int64}}(undef, 10)
for idx in 1:10
i = idx - 1 # Since i ranges from 0 to 9
if i % 2 == 0
expected_values[idx] = i
else
expected_values[idx] = missing
end
end
@test all(df_null.value_or_null .=== expected_values)
# Adjusted Test 3: Error Function
# Create the scalar function
f_error = DuckDB.duckdb_create_scalar_function()
DuckDB.duckdb_scalar_function_set_name(f_error, "error_function")
# Set parameter types
DuckDB.duckdb_scalar_function_add_parameter(f_error, type_bigint)
# Set return type
DuckDB.duckdb_scalar_function_set_return_type(f_error, type_bigint)
# Set the function
CMyErrorFunction = @cfunction(
my_error_function,
Cvoid,
(DuckDB.duckdb_function_info, DuckDB.duckdb_data_chunk, DuckDB.duckdb_vector)
)
DuckDB.duckdb_scalar_function_set_function(f_error, CMyErrorFunction)
# Register the function
res_error = DuckDB.duckdb_register_scalar_function(con.handle, f_error)
@test res_error == DuckDB.DuckDBSuccess
# Adjusted test to expect ErrorException
@test_throws ErrorException DuckDB.query(con, "SELECT error_function(i) FROM test_table")
# Clean up logical type
DuckDB.duckdb_destroy_logical_type(type_bigint)
# Disconnect and close
DuckDB.disconnect(con)
DuckDB.close(db)
end
mysum(a, b) = a + b # Dummy function
my_reverse(s) = string(reverse(s))
@testset "UDF_Macro" begin
# Parse Expression
expr = :(mysum(a::Int, b::String)::Int)
func_name, func_params, return_value = DuckDB._udf_parse_function_expr(expr)
@test func_name == :mysum
@test func_params == [(:a, :Int), (:b, :String)]
@test return_value == :Int
# Build expressions
var_names, expressions =
DuckDB._udf_generate_conversion_expressions(func_params, :log_types, :convert, :param, :chunk)
@test var_names == [:param_1, :param_2]
@test expressions[1] == :(param_1 = convert(Int, log_types[1], chunk, 1))
@test expressions[2] == :(param_2 = convert(String, log_types[2], chunk, 2))
# Generate UDF
db = DuckDB.DB()
con = DuckDB.connect(db)
fun = DuckDB.@create_scalar_function mysum(a::Int, b::Int)::Int
#ptr = @cfunction(fun.wrapper, Cvoid, (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}))
#ptr = pointer_from_objref(mysum_udf.wrapper)
#DuckDB.duckdb_scalar_function_set_function(mysum_udf.handle, ptr)
DuckDB.register_scalar_function(con, fun) # Register UDF
@test_throws ArgumentError DuckDB.register_scalar_function(con, fun) # Register UDF twice
DuckDB.execute(con, "CREATE TABLE test1 (a INT, b INT);")
DuckDB.execute(con, "INSERT INTO test1 VALUES ('1', '2'), ('3','4'), ('5', '6')")
result = DuckDB.execute(con, "SELECT mysum(a, b) as result FROM test1") |> DataFrame
@test result.result == [3, 7, 11]
end
@testset "UDF Macro Various Types" begin
import Dates
db = DuckDB.DB()
con = DuckDB.connect(db)
my_reverse_inner = (s) -> ("Inner:" * string(reverse(s)))
fun_is_weekend = (d) -> Dates.dayofweek(d) in (6, 7)
date_2020 = (x) -> Dates.Date(2020, 1, 1) + Dates.Day(x) # Dummy function
my_and(a, b) = a && b
my_int_add(a, b) = a + b
my_mixed_add(a::Int, b::Float64) = a + b
df_numbers =
DataFrame(a = rand(1:100, 30), b = rand(1:100, 30), c = rand(30), d = rand(Bool, 30), e = rand(Bool, 30))
df_strings = DataFrame(a = ["hello", "world", "julia", "duckdb", "🦆DB"])
t = Date(2020, 1, 1):Day(1):Date(2020, 12, 31)
df_dates = DataFrame(t = t, k = 1:length(t), is_weekend = fun_is_weekend.(t))
DuckDB.register_table(con, df_strings, "test_strings")
DuckDB.register_table(con, df_dates, "test_dates")
DuckDB.register_table(con, df_numbers, "test_numbers")
# Register UDFs
fun_string = DuckDB.@create_scalar_function my_reverse(s::String)::String (s) -> my_reverse_inner(s)
DuckDB.register_scalar_function(con, fun_string) # Register UDF
fun_date = DuckDB.@create_scalar_function is_weekend(d::Date)::Bool fun_is_weekend
fun_date2 = DuckDB.@create_scalar_function date_2020(x::Int)::Date date_2020
DuckDB.register_scalar_function(con, fun_date) # Register UDF
DuckDB.register_scalar_function(con, fun_date2) # Register UDF
fun_and = DuckDB.@create_scalar_function my_and(a::Bool, b::Bool)::Bool my_and
fun_int_add = DuckDB.@create_scalar_function my_int_add(a::Int, b::Int)::Int my_int_add
fun_mixed_add = DuckDB.@create_scalar_function my_mixed_add(a::Int, b::Float64)::Float64 my_mixed_add
DuckDB.register_scalar_function(con, fun_and)
DuckDB.register_scalar_function(con, fun_int_add)
DuckDB.register_scalar_function(con, fun_mixed_add)
result1 = DuckDB.execute(con, "SELECT my_reverse(a) as result FROM test_strings") |> DataFrame
@test result1.result == my_reverse_inner.(df_strings.a)
result2_1 = DuckDB.execute(con, "SELECT is_weekend(t) as result FROM test_dates") |> DataFrame
@test result2_1.result == fun_is_weekend.(df_dates.t)
result2_2 = DuckDB.execute(con, "SELECT date_2020(k) as result FROM test_dates") |> DataFrame
@test result2_2.result == date_2020.(df_dates.k)
result3 = DuckDB.execute(con, "SELECT my_and(d, e) as result FROM test_numbers") |> DataFrame
@test result3.result == my_and.(df_numbers.d, df_numbers.e)
result4 = DuckDB.execute(con, "SELECT my_int_add(a, b) as result FROM test_numbers") |> DataFrame
@test result4.result == my_int_add.(df_numbers.a, df_numbers.b)
result5 = DuckDB.execute(con, "SELECT my_mixed_add(a, c) as result FROM test_numbers") |> DataFrame
@test result5.result == my_mixed_add.(df_numbers.a, df_numbers.c)
end
@testset "UDF Macro Exception" begin
f_error = function (a)
if iseven(a)
throw(ArgumentError("Even number"))
else
return a + 1
end
end
db = DuckDB.DB()
con = DuckDB.connect(db)
fun_error = DuckDB.@create_scalar_function f_error(a::Int)::Int f_error
DuckDB.register_scalar_function(con, fun_error) # Register UDF
df = DataFrame(a = 1:10)
DuckDB.register_table(con, df, "test1")
@test_throws Exception result = DuckDB.execute(con, "SELECT f_error(a) as result FROM test1") |> DataFrame
end
@testset "UDF Macro Missing Values" begin
f_add = (a, b) -> a + b
db = DuckDB.DB()
con = DuckDB.connect(db)
fun = DuckDB.@create_scalar_function f_add(a::Int, b::Int)::Int f_add
DuckDB.register_scalar_function(con, fun)
df = DataFrame(a = [1, missing, 3], b = [missing, 2, 3])
DuckDB.register_table(con, df, "test1")
result = DuckDB.execute(con, "SELECT f_add(a, b) as result FROM test1") |> DataFrame
@test isequal(result.result, [missing, missing, 6])
end
@testset "UDF Macro Benchmark" begin
# Check if the generated UDF is comparable to pure Julia or DuckDB expressions
#
# Currently UDFs takes about as much time as Julia/DuckDB expressions
# - The evaluation of the wrapper takes around 20% of the execution time
# - slow calls are setindex! and getindex
# - table_scan_func is the slowest call
db = DuckDB.DB()
con = DuckDB.connect(db)
fun_int = DuckDB.@create_scalar_function mysum(a::Int, b::Int)::Int
fun_float = DuckDB.@create_scalar_function mysum_f(a::Float64, b::Float64)::Float64 mysum
DuckDB.register_scalar_function(con, fun_int) # Register UDF
DuckDB.register_scalar_function(con, fun_float) # Register UDF
N = 10_000_000
df = DataFrame(a = 1:N, b = 1:N, c = rand(N), d = rand(N))
DuckDB.register_table(con, df, "test1")
# Precompile functions
precompile(mysum, (Int, Int))
precompile(mysum, (Float64, Float64))
DuckDB.execute(con, "SELECT mysum(a, b) as result FROM test1")
DuckDB.execute(con, "SELECT mysum_f(c, d) as result FROM test1")
# INTEGER Benchmark
t1 = @elapsed result_exp = df.a .+ df.b
t2 = @elapsed result = DuckDB.execute(con, "SELECT mysum(a, b) as result FROM test1")
t3 = @elapsed result2 = DuckDB.execute(con, "SELECT a + b as result FROM test1")
@test DataFrame(result).result == result_exp
# Prints:
# Benchmark Int: Julia Expression: 0.092947083, UDF: 0.078665125, DDB: 0.065306042
@info "Benchmark Int: Julia Expression: $t1, UDF: $t2, DDB: $t3"
# FLOAT Benchmark
t1 = @elapsed result_exp = df.c .+ df.d
t2 = @elapsed result = DuckDB.execute(con, "SELECT mysum_f(c, d) as result FROM test1")
t3 = @elapsed result2 = DuckDB.execute(con, "SELECT c + d as result FROM test1")
@test DataFrame(result).result result_exp atol = 1e-6
# Prints:
# Benchmark Float: Julia Expression: 0.090409625, UDF: 0.080781, DDB: 0.054156167
@info "Benchmark Float: Julia Expression: $t1, UDF: $t2, DDB: $t3"
end

View File

@@ -0,0 +1,327 @@
# test_sqlite.jl
# tests adopted from SQLite.jl
using Tables
function setup_clean_test_db(f::Function, args...)
tables = [
"album",
"artist",
"customer",
"employee",
"genre",
"invoice",
"invoiceline",
"mediatype",
"playlist",
"playlisttrack",
"track"
]
con = DBInterface.connect(DuckDB.DB)
datadir = joinpath(@__DIR__, "../data")
for table in tables
DBInterface.execute(con, "CREATE TABLE $table AS SELECT * FROM '$datadir/$table.parquet'")
end
try
f(con)
finally
close(con)
end
end
@testset "DB Connection" begin
con = DBInterface.connect(DuckDB.DB)
@test con isa DuckDB.DB
DBInterface.close!(con)
end
@testset "Issue #207: 32 bit integers" begin
setup_clean_test_db() do db
ds = DBInterface.execute(db, "SELECT 42::INT64 a FROM Track LIMIT 1") |> columntable
@test ds.a[1] isa Int64
end
end
@testset "Regular DuckDB Tests" begin
setup_clean_test_db() do db
@test_throws DuckDB.QueryException DBInterface.execute(db, "just some syntax error")
# syntax correct, table missing
@test_throws DuckDB.QueryException DBInterface.execute(
db,
"SELECT name FROM sqlite_nomaster WHERE type='table';"
)
end
end
@testset "close!(query)" begin
setup_clean_test_db() do db
qry = DBInterface.execute(db, "SELECT name FROM sqlite_master WHERE type='table';")
DBInterface.close!(qry)
return DBInterface.close!(qry) # test it doesn't throw on double-close
end
end
@testset "Query tables" begin
setup_clean_test_db() do db
ds = DBInterface.execute(db, "SELECT name FROM sqlite_master WHERE type='table';") |> columntable
@test length(ds) == 1
@test keys(ds) == (:name,)
@test length(ds.name) == 11
end
end
@testset "DBInterface.execute([f])" begin
setup_clean_test_db() do db
# pipe approach
results = DBInterface.execute(db, "SELECT * FROM Employee;") |> columntable
@test length(results) == 15
@test length(results[1]) == 8
# callable approach
@test isequal(DBInterface.execute(columntable, db, "SELECT * FROM Employee"), results)
employees_stmt = DBInterface.prepare(db, "SELECT * FROM Employee")
@test isequal(columntable(DBInterface.execute(employees_stmt)), results)
@test isequal(DBInterface.execute(columntable, employees_stmt), results)
@testset "throwing from f()" begin
f(::DuckDB.QueryResult) = error("I'm throwing!")
@test_throws ErrorException DBInterface.execute(f, employees_stmt)
@test_throws ErrorException DBInterface.execute(f, db, "SELECT * FROM Employee")
end
return DBInterface.close!(employees_stmt)
end
end
@testset "isempty(::Query)" begin
setup_clean_test_db() do db
@test !DBInterface.execute(isempty, db, "SELECT * FROM Employee")
@test DBInterface.execute(isempty, db, "SELECT * FROM Employee WHERE FirstName='Joanne'")
end
end
@testset "empty query has correct schema and return type" begin
setup_clean_test_db() do db
empty_scheme = DBInterface.execute(Tables.schema, db, "SELECT * FROM Employee WHERE FirstName='Joanne'")
all_scheme = DBInterface.execute(Tables.schema, db, "SELECT * FROM Employee WHERE FirstName='Joanne'")
@test empty_scheme.names == all_scheme.names
@test all(ea -> ea[1] <: ea[2], zip(empty_scheme.types, all_scheme.types))
empty_tbl = DBInterface.execute(columntable, db, "SELECT * FROM Employee WHERE FirstName='Joanne'")
all_tbl = DBInterface.execute(columntable, db, "SELECT * FROM Employee")
@test propertynames(empty_tbl) == propertynames(all_tbl)
end
end
@testset "Create table, run commit/rollback tests" begin
setup_clean_test_db() do db
DBInterface.execute(db, "create table temp as select * from album")
DBInterface.execute(db, "alter table temp add column colyear int")
DBInterface.execute(db, "update temp set colyear = 2014")
r = DBInterface.execute(db, "select * from temp limit 10") |> columntable
@test length(r) == 4 && length(r[1]) == 10
@test all(==(2014), r[4])
@test_throws DuckDB.QueryException DuckDB.rollback(db)
@test_throws DuckDB.QueryException DuckDB.commit(db)
DuckDB.transaction(db)
DBInterface.execute(db, "update temp set colyear = 2015")
DuckDB.rollback(db)
r = DBInterface.execute(db, "select * from temp limit 10") |> columntable
@test all(==(2014), r[4])
DuckDB.transaction(db)
DBInterface.execute(db, "update temp set colyear = 2015")
DuckDB.commit(db)
r = DBInterface.execute(db, "select * from temp limit 10") |> columntable
@test all(==(2015), r[4])
end
end
@testset "Dates" begin
setup_clean_test_db() do db
DBInterface.execute(db, "create table temp as select * from album")
DBInterface.execute(db, "alter table temp add column dates date")
stmt = DBInterface.prepare(db, "update temp set dates = ?")
DBInterface.execute(stmt, (Date(2014, 1, 1),))
r = DBInterface.execute(db, "select * from temp limit 10") |> columntable
@test length(r) == 4 && length(r[1]) == 10
@test isa(r[4][1], Date)
@test all(Bool[x == Date(2014, 1, 1) for x in r[4]])
return DBInterface.execute(db, "drop table temp")
end
end
@testset "Prepared Statements" begin
setup_clean_test_db() do db
DBInterface.execute(db, "CREATE TABLE temp AS SELECT * FROM Album")
r = DBInterface.execute(db, "SELECT * FROM temp LIMIT ?", [3]) |> columntable
@test length(r) == 3 && length(r[1]) == 3
r = DBInterface.execute(db, "SELECT * FROM temp WHERE Title ILIKE ?", ["%time%"]) |> columntable
@test r[1] == [76, 111, 187]
DBInterface.execute(db, "INSERT INTO temp VALUES (?1, ?3, ?2)", [0, 0, "Test Album"])
r = DBInterface.execute(db, "SELECT * FROM temp WHERE AlbumId = 0") |> columntable
@test r[1][1] == 0
@test r[2][1] == "Test Album"
@test r[3][1] == 0
DuckDB.drop!(db, "temp")
DBInterface.execute(db, "CREATE TABLE temp AS SELECT * FROM Album")
# FIXME Does it make sense to use named parameters here?
r = DBInterface.execute(db, "SELECT * FROM temp LIMIT ?", (a = 3,)) |> columntable
@test length(r) == 3 && length(r[1]) == 3
r = DBInterface.execute(db, "SELECT * FROM temp LIMIT ?", a = 3) |> columntable
@test length(r) == 3 && length(r[1]) == 3
r = DBInterface.execute(db, "SELECT * FROM temp WHERE Title ILIKE ?", (word = "%time%",)) |> columntable
@test r[1] == [76, 111, 187]
# FIXME: these are supposed to be named parameter tests, but we don't support that yet
DBInterface.execute(db, "INSERT INTO temp VALUES (?, ?, ?)", (lid = 0, title = "Test Album", rid = 1))
DBInterface.execute(db, "INSERT INTO temp VALUES (?, ?, ?)", lid = 400, title = "Test2 Album", rid = 3)
r = DBInterface.execute(db, "SELECT * FROM temp WHERE AlbumId IN (0, 400)") |> columntable
@test r[1] == [0, 400]
@test r[2] == ["Test Album", "Test2 Album"]
@test r[3] == [1, 3]
return DuckDB.drop!(db, "temp")
end
end
@testset "DuckDB to Julia type conversion" begin
binddb = DBInterface.connect(DuckDB.DB)
DBInterface.execute(
binddb,
"CREATE TABLE temp (n INTEGER, i1 INT, i2 integer,
f1 REAL, f2 FLOAT, f3 DOUBLE,
s1 TEXT, s2 CHAR(10), s3 VARCHAR(15), s4 NVARCHAR(5),
d1 DATETIME, ts1 TIMESTAMP)"
)
DBInterface.execute(
binddb,
"INSERT INTO temp VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
[
missing,
Int64(6),
Int64(4),
6.4,
6.3,
Int64(7),
"some long text",
"short text",
"another text",
"short",
"2021-02-21",
"2021-02-12 12:01:32"
]
)
rr = DBInterface.execute(rowtable, binddb, "SELECT * FROM temp")
@test length(rr) == 1
r = first(rr)
@test typeof.(Tuple(r)) ==
(Missing, Int32, Int32, Float32, Float32, Float64, String, String, String, String, DateTime, DateTime)
# Issue #4809: Concrete `String` types.
# Want to test exactly the types `execute` returns, so check the schema directly and
# avoid calling `Tuple` or anything else that would narrow the types in the result.
schema = Tables.schema(rr)
@test nonmissingtype.(schema.types) ==
(Int32, Int32, Int32, Float32, Float32, Float64, String, String, String, String, DateTime, DateTime)
end
@testset "Issue #158: Missing DB File" begin
@test_throws DuckDB.ConnectionException DuckDB.DB("nonexistentdir/not_there.db")
end
@testset "Issue #180, Query" begin
param = "Hello!"
query = DBInterface.execute(DuckDB.DB(), "SELECT ?1 UNION ALL SELECT ?1", [param])
param = "x"
for row in query
@test row[1] == "Hello!"
GC.gc() # this must NOT garbage collect the "Hello!" bound value
end
db = DBInterface.connect(DuckDB.DB)
DBInterface.execute(db, "CREATE TABLE T (a TEXT, PRIMARY KEY (a))")
q = DBInterface.prepare(db, "INSERT INTO T VALUES(?)")
DBInterface.execute(q, ["a"])
@test_throws DuckDB.QueryException DBInterface.execute(q, [1, "a"])
end
@testset "show(DB)" begin
io = IOBuffer()
db = DuckDB.DB()
show(io, db)
@test String(take!(io)) == "DuckDB.DB(\":memory:\")"
DBInterface.close!(db)
end
@testset "DuckDB.execute()" begin
db = DBInterface.connect(DuckDB.DB)
DBInterface.execute(db, "CREATE TABLE T (x INT UNIQUE)")
q = DBInterface.prepare(db, "INSERT INTO T VALUES(?)")
DuckDB.execute(q, (1,))
r = DBInterface.execute(db, "SELECT * FROM T") |> columntable
@test r[1] == [1]
DuckDB.execute(q, [2])
r = DBInterface.execute(db, "SELECT * FROM T") |> columntable
@test r[1] == [1, 2]
q = DBInterface.prepare(db, "INSERT INTO T VALUES(?)")
DuckDB.execute(q, [3])
r = DBInterface.execute(columntable, db, "SELECT * FROM T")
@test r[1] == [1, 2, 3]
DuckDB.execute(q, [4])
r = DBInterface.execute(columntable, db, "SELECT * FROM T")
@test r[1] == [1, 2, 3, 4]
DuckDB.execute(db, "INSERT INTO T VALUES(?)", [5])
r = DBInterface.execute(columntable, db, "SELECT * FROM T")
@test r[1] == [1, 2, 3, 4, 5]
r = DBInterface.execute(db, strip(" SELECT * FROM T ")) |> columntable
@test r[1] == [1, 2, 3, 4, 5]
r = DBInterface.execute(db, "SELECT * FROM T")
@test Tables.istable(r)
@test Tables.rowaccess(r)
@test Tables.rows(r) === r
@test Base.IteratorSize(typeof(r)) == Base.SizeUnknown()
row = first(r)
end
@testset "last_insert_rowid unsupported" begin
db = DBInterface.connect(DuckDB.DB)
@test_throws DuckDB.NotImplementedException DBInterface.lastrowid(db)
@test DuckDB.esc_id(["1", "2", "3"]) == "\"1\",\"2\",\"3\""
end
@testset "Escaping" begin
@test DuckDB.esc_id(["1", "2", "3"]) == "\"1\",\"2\",\"3\""
end
@testset "Issue #253: Ensure query column names are unique by default" begin
db = DuckDB.DB()
res = DBInterface.execute(db, "select 1 as x2, 2 as x2, 3 as x2, 4 as x2_2") |> columntable
@test res == (x2 = [1], x2_1 = [2], x2_2 = [3], x2_2_1 = [4])
end
@testset "drop!() table name escaping" begin
db = DuckDB.DB()
DBInterface.execute(db, "CREATE TABLE \"escape 10.0%\"(i INTEGER)")
# table exists
DBInterface.execute(db, "SELECT * FROM \"escape 10.0%\"")
# drop the table
DuckDB.drop!(db, "escape 10.0%")
# it should no longer exist
@test_throws DuckDB.QueryException DBInterface.execute(db, "SELECT * FROM \"escape 10.0%\"")
end

View File

@@ -0,0 +1,96 @@
# test_stream_data_chunk.jl
@testset "Test streaming result sets" begin
result_types::Vector = Vector()
push!(result_types, DuckDB.MaterializedResult)
push!(result_types, DuckDB.StreamResult)
for result_type in result_types
con = DBInterface.connect(DuckDB.DB)
res = DBInterface.execute(con, "SELECT * FROM range(10000) t(i)", result_type)
@test res.names == [:i]
@test res.types == [Union{Missing, Int64}]
# loop over the chunks and perform a sum + count
sum::Int64 = 0
total_count::Int64 = 0
while true
# fetch the next chunk
chunk = DuckDB.nextDataChunk(res)
if chunk === missing
# consumed all chunks
break
end
# read the data of this chunk
count = DuckDB.get_size(chunk)
data = DuckDB.get_array(chunk, 1, Int64)
for i in 1:count
sum += data[i]
end
total_count += count
DuckDB.destroy_data_chunk(chunk)
end
@test sum == 49995000
@test total_count == 10000
end
GC.gc(true)
end
@testset "Test giant streaming result" begin
# this would take forever if it wasn't streaming
con = DBInterface.connect(DuckDB.DB)
res = DBInterface.execute(con, "SELECT * FROM range(1000000000000) t(i)", DuckDB.StreamResult)
@test res.names == [:i]
@test res.types == [Union{Missing, Int64}]
# fetch the first three chunks
for i in 1:3
chunk = DuckDB.nextDataChunk(res)
@test chunk !== missing
DuckDB.destroy_data_chunk(chunk)
end
DBInterface.close!(res)
DBInterface.close!(con)
GC.gc(true)
end
@testset "Test streaming data chunk destruction" begin
paths = ["types_map.parquet", "types_list.parquet", "types_nested.parquet"]
for path in paths
# DuckDB "in memory database"
connection = DBInterface.connect(DuckDB.DB)
statement = DuckDB.Stmt(connection, "SELECT * FROM read_parquet(?, file_row_number=1)", DuckDB.StreamResult)
result = DBInterface.execute(statement, [joinpath(@__DIR__, "resources", path)])
num_columns = length(result.types)
while true
chunk = DuckDB.nextDataChunk(result)
chunk === missing && break # are we done?
num_rows = DuckDB.get_size(chunk) # number of rows in the retrieved chunk
row_ids = DuckDB.get_array(chunk, num_columns, Int64)
# move over each column, last column are the row_ids
for column_idx in 1:(num_columns - 1)
column_name::Symbol = result.names[column_idx]
# Convert from the DuckDB internal types into Julia types
duckdb_logical_type = DuckDB.LogicalType(DuckDB.duckdb_column_logical_type(result.handle, column_idx))
duckdb_conversion_state = DuckDB.ColumnConversionData([chunk], column_idx, duckdb_logical_type, nothing)
duckdb_data = DuckDB.convert_column(duckdb_conversion_state)
for i in 1:num_rows
row_id = row_ids[i] + 1 # julia indices start at 1
value = duckdb_data[i]
@test value !== missing
end
end
DuckDB.destroy_data_chunk(chunk)
end
close(connection)
end
GC.gc(true)
end

View File

@@ -0,0 +1,223 @@
# test_table_function.jl
struct MyBindStruct
count::Int64
function MyBindStruct(count::Int64)
return new(count)
end
end
function my_bind_function(info::DuckDB.BindInfo)
DuckDB.add_result_column(info, "forty_two", Int64)
parameter = DuckDB.get_parameter(info, 0)
number = DuckDB.getvalue(parameter, Int64)
return MyBindStruct(number)
end
mutable struct MyInitStruct
pos::Int64
function MyInitStruct()
return new(0)
end
end
function my_init_function(info::DuckDB.InitInfo)
return MyInitStruct()
end
function my_main_function_print(info::DuckDB.FunctionInfo, output::DuckDB.DataChunk)
bind_info = DuckDB.get_bind_info(info, MyBindStruct)
init_info = DuckDB.get_init_info(info, MyInitStruct)
result_array = DuckDB.get_array(output, 1, Int64)
count = 0
for i in 1:(DuckDB.VECTOR_SIZE)
if init_info.pos >= bind_info.count
break
end
result_array[count + 1] = init_info.pos % 2 == 0 ? 42 : 84
# We print within the table function to test behavior with synchronous API calls in Julia table functions
println(result_array[count + 1])
count += 1
init_info.pos += 1
end
DuckDB.set_size(output, count)
return
end
function my_main_function(info::DuckDB.FunctionInfo, output::DuckDB.DataChunk)
bind_info = DuckDB.get_bind_info(info, MyBindStruct)
init_info = DuckDB.get_init_info(info, MyInitStruct)
result_array = DuckDB.get_array(output, 1, Int64)
count = 0
for i in 1:(DuckDB.VECTOR_SIZE)
if init_info.pos >= bind_info.count
break
end
result_array[count + 1] = init_info.pos % 2 == 0 ? 42 : 84
count += 1
init_info.pos += 1
end
DuckDB.set_size(output, count)
return
end
function my_main_function_nulls(info::DuckDB.FunctionInfo, output::DuckDB.DataChunk)
bind_info = DuckDB.get_bind_info(info, MyBindStruct)
init_info = DuckDB.get_init_info(info, MyInitStruct)
result_array = DuckDB.get_array(output, 1, Int64)
validity = DuckDB.get_validity(output, 1)
count = 0
for i in 1:(DuckDB.VECTOR_SIZE)
if init_info.pos >= bind_info.count
break
end
if init_info.pos % 2 == 0
result_array[count + 1] = 42
else
DuckDB.setinvalid(validity, count + 1)
end
count += 1
init_info.pos += 1
end
DuckDB.set_size(output, count)
return
end
@testset "Test custom table functions that produce IO" begin
con = DBInterface.connect(DuckDB.DB)
DuckDB.create_table_function(
con,
"forty_two_print",
[Int64],
my_bind_function,
my_init_function,
my_main_function_print
)
GC.gc()
# 3 elements
results = DBInterface.execute(con, "SELECT * FROM forty_two_print(3)")
GC.gc()
df = DataFrame(results)
@test names(df) == ["forty_two"]
@test size(df, 1) == 3
@test df.forty_two == [42, 84, 42]
# > vsize elements
results = DBInterface.execute(con, "SELECT COUNT(*) cnt FROM forty_two_print(10000)")
GC.gc()
df = DataFrame(results)
@test df.cnt == [10000]
# @time begin
# results = DBInterface.execute(con, "SELECT SUM(forty_two) cnt FROM forty_two(10000000)")
# end
# df = DataFrame(results)
# println(df)
end
@testset "Test custom table functions" begin
con = DBInterface.connect(DuckDB.DB)
DuckDB.create_table_function(con, "forty_two", [Int64], my_bind_function, my_init_function, my_main_function)
GC.gc()
# 3 elements
results = DBInterface.execute(con, "SELECT * FROM forty_two(3)")
GC.gc()
df = DataFrame(results)
@test names(df) == ["forty_two"]
@test size(df, 1) == 3
@test df.forty_two == [42, 84, 42]
# > vsize elements
results = DBInterface.execute(con, "SELECT COUNT(*) cnt FROM forty_two(10000)")
GC.gc()
df = DataFrame(results)
@test df.cnt == [10000]
# @time begin
# results = DBInterface.execute(con, "SELECT SUM(forty_two) cnt FROM forty_two(10000000)")
# end
# df = DataFrame(results)
# println(df)
# return null values from a table function
DuckDB.create_table_function(
con,
"forty_two_nulls",
[Int64],
my_bind_function,
my_init_function,
my_main_function_nulls
)
results = DBInterface.execute(con, "SELECT COUNT(*) total_cnt, COUNT(forty_two) cnt FROM forty_two_nulls(10000)")
df = DataFrame(results)
@test df.total_cnt == [10000]
@test df.cnt == [5000]
# @time begin
# results = DBInterface.execute(con, "SELECT SUM(forty_two) cnt FROM forty_two_nulls(10000000)")
# end
# df = DataFrame(results)
# println(df)
end
function my_bind_error_function(info::DuckDB.BindInfo)
throw("bind error")
end
function my_init_error_function(info::DuckDB.InitInfo)
throw("init error")
end
function my_main_error_function(info::DuckDB.FunctionInfo, output::DuckDB.DataChunk)
throw("runtime error")
end
@testset "Test table function errors" begin
con = DBInterface.connect(DuckDB.DB)
DuckDB.create_table_function(
con,
"bind_error_function",
[Int64],
my_bind_error_function,
my_init_function,
my_main_function
)
DuckDB.create_table_function(
con,
"init_error_function",
[Int64],
my_bind_function,
my_init_error_function,
my_main_function
)
DuckDB.create_table_function(
con,
"main_error_function",
[Int64],
my_bind_function,
my_init_function,
my_main_error_function
)
@test_throws DuckDB.QueryException DBInterface.execute(con, "SELECT * FROM bind_error_function(3)")
@test_throws DuckDB.QueryException DBInterface.execute(con, "SELECT * FROM init_error_function(3)")
@test_throws DuckDB.QueryException DBInterface.execute(con, "SELECT * FROM main_error_function(3)")
end

View File

@@ -0,0 +1,328 @@
# test_tbl_scan.jl
@testset "Test standard DataFrame scan" begin
con = DBInterface.connect(DuckDB.DB)
df = DataFrame(a = [1, 2, 3], b = [42, 84, 42])
DuckDB.register_table(con, df, "my_df")
GC.gc()
results = DBInterface.execute(con, "SELECT * FROM my_df")
GC.gc()
df = DataFrame(results)
@test names(df) == ["a", "b"]
@test size(df, 1) == 3
@test df.a == [1, 2, 3]
@test df.b == [42, 84, 42]
DBInterface.close!(con)
end
@testset "Test standard table scan" begin
df = (a = [1, 2, 3], b = [42, 84, 42])
for df in [df, Tables.rowtable(df)]
con = DBInterface.connect(DuckDB.DB)
DuckDB.register_table(con, df, "my_df")
GC.gc()
results = DBInterface.execute(con, "SELECT * FROM my_df")
GC.gc()
df = columntable(results)
@test Tables.columnnames(df) == (:a, :b)
@test Tables.rowcount(df) == 3
@test df.a == [1, 2, 3]
@test df.b == [42, 84, 42]
DBInterface.close!(con)
end
end
@testset "Test DataFrame scan with NULL values" begin
con = DBInterface.connect(DuckDB.DB)
df = DataFrame(a = [1, missing, 3], b = [missing, 84, missing])
DuckDB.register_table(con, df, "my_df")
results = DBInterface.execute(con, "SELECT * FROM my_df")
df = DataFrame(results)
@test names(df) == ["a", "b"]
@test size(df, 1) == 3
@test isequal(df.a, [1, missing, 3])
@test isequal(df.b, [missing, 84, missing])
DBInterface.close!(con)
end
@testset "Test table scan with NULL values" begin
df = (a = [1, missing, 3], b = [missing, 84, missing])
for df in [df, Tables.rowtable(df)]
con = DBInterface.connect(DuckDB.DB)
DuckDB.register_table(con, df, "my_df")
results = DBInterface.execute(con, "SELECT * FROM my_df")
df = columntable(results)
@test Tables.columnnames(df) == (:a, :b)
@test Tables.rowcount(df) == 3
@test isequal(df.a, [1, missing, 3])
@test isequal(df.b, [missing, 84, missing])
DBInterface.close!(con)
end
end
@testset "Test DataFrame scan with numerics" begin
con = DBInterface.connect(DuckDB.DB)
numeric_types = [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64]
for type in numeric_types
my_df = DataFrame(a = [1, missing, 3], b = [missing, 84, missing])
my_df[!, :a] = convert.(Union{type, Missing}, my_df[!, :a])
my_df[!, :b] = convert.(Union{type, Missing}, my_df[!, :b])
DuckDB.register_table(con, my_df, "my_df")
results = DBInterface.execute(con, "SELECT * FROM my_df")
df = DataFrame(results)
@test isequal(df, my_df)
end
DBInterface.close!(con)
end
@testset "Test table scan with numerics" begin
for tblf in [Tables.columntable, Tables.rowtable]
con = DBInterface.connect(DuckDB.DB)
numeric_types = [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64]
for type in numeric_types
my_df = (a = [1, missing, 3], b = [missing, 84, missing])
my_df = map(my_df) do col
return convert.(Union{type, Missing}, col)
end
DuckDB.register_table(con, tblf(my_df), "my_df")
results = DBInterface.execute(con, "SELECT * FROM my_df")
df = columntable(results)
@test isequal(df, my_df)
end
DBInterface.close!(con)
end
end
@testset "Test DataFrame scan with various types" begin
con = DBInterface.connect(DuckDB.DB)
# boolean
my_df = DataFrame(a = [true, false, missing])
DuckDB.register_table(con, my_df, "my_df")
results = DBInterface.execute(con, "SELECT * FROM my_df")
df = DataFrame(results)
@test isequal(df, my_df)
# date/time/timestamp
my_df = DataFrame(
date = [Date(1992, 9, 20), missing, Date(1950, 2, 3)],
time = [Time(23, 3, 1), Time(11, 49, 33), missing],
timestamp = [DateTime(1992, 9, 20, 23, 3, 1), DateTime(1950, 2, 3, 11, 49, 3), missing]
)
DuckDB.register_table(con, my_df, "my_df")
results = DBInterface.execute(con, "SELECT * FROM my_df")
df = DataFrame(results)
@test isequal(df, my_df)
DBInterface.close!(con)
end
@testset "Test table scan with various types" begin
for tblf in [Tables.columntable, Tables.rowtable]
con = DBInterface.connect(DuckDB.DB)
# boolean
my_df = (a = [true, false, missing],)
DuckDB.register_table(con, tblf(my_df), "my_df")
results = DBInterface.execute(con, "SELECT * FROM my_df")
df = columntable(results)
@test isequal(df, my_df)
# date/time/timestamp
my_df = (
date = [Date(1992, 9, 20), missing, Date(1950, 2, 3)],
time = [Time(23, 3, 1), Time(11, 49, 33), missing],
timestamp = [DateTime(1992, 9, 20, 23, 3, 1), DateTime(1950, 2, 3, 11, 49, 3), missing]
)
DuckDB.register_table(con, tblf(my_df), "my_df")
results = DBInterface.execute(con, "SELECT * FROM my_df")
df = columntable(results)
@test isequal(df, my_df)
DBInterface.close!(con)
end
end
@testset "Test DataFrame scan with strings" begin
con = DBInterface.connect(DuckDB.DB)
# date/time/timestamp
my_df = DataFrame(str = ["hello", "this is a very long string", missing, "obligatory mühleisen"])
DuckDB.register_table(con, my_df, "my_df")
results = DBInterface.execute(con, "SELECT * FROM my_df")
df = DataFrame(results)
@test isequal(df, my_df)
DBInterface.close!(con)
end
@testset "Test table scan with strings" begin
for tblf in [Tables.columntable, Tables.rowtable]
con = DBInterface.connect(DuckDB.DB)
# date/time/timestamp
my_df = (str = ["hello", "this is a very long string", missing, "obligatory mühleisen"],)
DuckDB.register_table(con, tblf(my_df), "my_df")
results = DBInterface.execute(con, "SELECT * FROM my_df")
df = columntable(results)
@test isequal(df, my_df)
DBInterface.close!(con)
end
end
@testset "Test DataFrame scan projection pushdown" begin
con = DBInterface.connect(DuckDB.DB)
df = DataFrame(a = [1, 2, 3], b = [42, 84, 42], c = [3, 7, 18])
DuckDB.register_table(con, df, "my_df")
GC.gc()
results = DBInterface.execute(con, "SELECT b FROM my_df")
GC.gc()
df = DataFrame(results)
@test names(df) == ["b"]
@test size(df, 1) == 3
@test df.b == [42, 84, 42]
results = DBInterface.execute(con, "SELECT c, b FROM my_df")
GC.gc()
df = DataFrame(results)
@test names(df) == ["c", "b"]
@test size(df, 1) == 3
@test df.b == [42, 84, 42]
@test df.c == [3, 7, 18]
results = DBInterface.execute(con, "SELECT c, a, a FROM my_df")
GC.gc()
df = DataFrame(results)
@test names(df) == ["c", "a", "a_1"]
@test size(df, 1) == 3
@test df.c == [3, 7, 18]
@test df.a == [1, 2, 3]
@test df.a_1 == [1, 2, 3]
results = DBInterface.execute(con, "SELECT COUNT(*) cnt FROM my_df")
GC.gc()
df = DataFrame(results)
@test names(df) == ["cnt"]
@test size(df, 1) == 1
@test df.cnt == [3]
GC.gc()
DBInterface.close!(con)
end
@testset "Test table scan projection pushdown" begin
for tblf in [Tables.columntable, Tables.rowtable]
con = DBInterface.connect(DuckDB.DB)
df = (a = [1, 2, 3], b = [42, 84, 42], c = [3, 7, 18])
DuckDB.register_table(con, tblf(df), "my_df")
GC.gc()
results = DBInterface.execute(con, "SELECT b FROM my_df")
GC.gc()
df = columntable(results)
@test Tables.columnnames(df) == (:b,)
@test Tables.rowcount(df) == 3
@test df.b == [42, 84, 42]
results = DBInterface.execute(con, "SELECT c, b FROM my_df")
GC.gc()
df = columntable(results)
@test Tables.columnnames(df) == (:c, :b)
@test Tables.rowcount(df) == 3
@test df.b == [42, 84, 42]
@test df.c == [3, 7, 18]
results = DBInterface.execute(con, "SELECT c, a, a FROM my_df")
GC.gc()
df = columntable(results)
@test Tables.columnnames(df) == (:c, :a, :a_1)
@test Tables.rowcount(df) == 3
@test df.c == [3, 7, 18]
@test df.a == [1, 2, 3]
@test df.a_1 == [1, 2, 3]
results = DBInterface.execute(con, "SELECT COUNT(*) cnt FROM my_df")
GC.gc()
df = columntable(results)
@test Tables.columnnames(df) == (:cnt,)
@test Tables.rowcount(df) == 1
@test df.cnt == [3]
GC.gc()
DBInterface.close!(con)
end
end
@testset "Test large DataFrame scan" begin
con = DBInterface.connect(DuckDB.DB)
my_df = DataFrame(DBInterface.execute(con, "SELECT i%5 AS i FROM range(10000000) tbl(i)"))
DuckDB.register_table(con, my_df, "my_df")
GC.gc()
results = DBInterface.execute(con, "SELECT SUM(i) AS sum FROM my_df")
GC.gc()
df = DataFrame(results)
@test names(df) == ["sum"]
@test size(df, 1) == 1
@test df.sum == [20000000]
DBInterface.close!(con)
end
@testset "Test large table scan" begin
for tblf in [Tables.columntable, Tables.rowtable]
con = DBInterface.connect(DuckDB.DB)
my_df = tblf(DBInterface.execute(con, "SELECT i%5 AS i FROM range(10000000) tbl(i)"))
DuckDB.register_table(con, my_df, "my_df")
GC.gc()
results = DBInterface.execute(con, "SELECT SUM(i) AS sum FROM my_df")
GC.gc()
df = columntable(results)
@test Tables.columnnames(df) == (:sum,)
@test Tables.rowcount(df) == 1
@test df.sum == [20000000]
DBInterface.close!(con)
end
end

View File

@@ -0,0 +1,12 @@
# test_threading.jl
@testset "Test threading" begin
con = DBInterface.connect(DuckDB.DB)
DBInterface.execute(con, "CREATE TABLE integers AS SELECT * FROM range(100000000) t(i)")
results = DBInterface.execute(con, "SELECT SUM(i) sum FROM integers")
df = DataFrame(results)
@test df.sum == [4999999950000000]
DBInterface.close!(con)
end

View File

@@ -0,0 +1,59 @@
# test_tpch.jl
# DuckDB needs to have been built with TPCH (BUILD_TPCH=1) to run this test!
@testset "Test TPC-H" begin
sf = "0.1"
# load TPC-H into DuckDB
native_con = DBInterface.connect(DuckDB.DB)
try
DBInterface.execute(native_con, "CALL dbgen(sf=$sf)")
catch
@info "TPC-H extension not available; skipping"
return
end
# convert all tables to Julia DataFrames
customer = DataFrame(DBInterface.execute(native_con, "SELECT * FROM customer"))
lineitem = DataFrame(DBInterface.execute(native_con, "SELECT * FROM lineitem"))
nation = DataFrame(DBInterface.execute(native_con, "SELECT * FROM nation"))
orders = DataFrame(DBInterface.execute(native_con, "SELECT * FROM orders"))
part = DataFrame(DBInterface.execute(native_con, "SELECT * FROM part"))
partsupp = DataFrame(DBInterface.execute(native_con, "SELECT * FROM partsupp"))
region = DataFrame(DBInterface.execute(native_con, "SELECT * FROM region"))
supplier = DataFrame(DBInterface.execute(native_con, "SELECT * FROM supplier"))
# now open a new in-memory database, and register the dataframes there
df_con = DBInterface.connect(DuckDB.DB)
DuckDB.register_table(df_con, customer, "customer")
DuckDB.register_table(df_con, lineitem, "lineitem")
DuckDB.register_table(df_con, nation, "nation")
DuckDB.register_table(df_con, orders, "orders")
DuckDB.register_table(df_con, part, "part")
DuckDB.register_table(df_con, partsupp, "partsupp")
DuckDB.register_table(df_con, region, "region")
DuckDB.register_table(df_con, supplier, "supplier")
GC.gc()
# run all the queries
for i in 1:22
# print("Q$i\n")
# for each query, compare the results of the query ran on the original tables
# versus the result when run on the Julia DataFrames
res = DataFrame(DBInterface.execute(df_con, "PRAGMA tpch($i)"))
res2 = DataFrame(DBInterface.execute(native_con, "PRAGMA tpch($i)"))
@test isequal(res, res2)
# print("Native DuckDB\n")
# @time begin
# results = DBInterface.execute(native_con, "PRAGMA tpch($i)")
# end
# print("DataFrame\n")
# @time begin
# results = DBInterface.execute(df_con, "PRAGMA tpch($i)")
# end
end
DBInterface.close!(df_con)
DBInterface.close!(native_con)
end

View File

@@ -0,0 +1,54 @@
# test_tpch_multithread.jl
# DuckDB needs to have been built with TPCH (BUILD_TPCH=1) to run this test!
function test_tpch_multithread()
sf = "0.10"
# load TPC-H into DuckDB
native_con = DBInterface.connect(DuckDB.DB)
try
DBInterface.execute(native_con, "CALL dbgen(sf=$sf)")
catch
@info "TPC-H extension not available; skipping"
return
end
# convert all tables to Julia DataFrames
customer = DataFrame(DBInterface.execute(native_con, "SELECT * FROM customer"))
lineitem = DataFrame(DBInterface.execute(native_con, "SELECT * FROM lineitem"))
nation = DataFrame(DBInterface.execute(native_con, "SELECT * FROM nation"))
orders = DataFrame(DBInterface.execute(native_con, "SELECT * FROM orders"))
part = DataFrame(DBInterface.execute(native_con, "SELECT * FROM part"))
partsupp = DataFrame(DBInterface.execute(native_con, "SELECT * FROM partsupp"))
region = DataFrame(DBInterface.execute(native_con, "SELECT * FROM region"))
supplier = DataFrame(DBInterface.execute(native_con, "SELECT * FROM supplier"))
id = Threads.threadid()
# now open a new in-memory database, and register the dataframes there
df_con = DBInterface.connect(DuckDB.DB)
DuckDB.register_table(df_con, customer, "customer")
DuckDB.register_table(df_con, lineitem, "lineitem")
DuckDB.register_table(df_con, nation, "nation")
DuckDB.register_table(df_con, orders, "orders")
DuckDB.register_table(df_con, part, "part")
DuckDB.register_table(df_con, partsupp, "partsupp")
DuckDB.register_table(df_con, region, "region")
DuckDB.register_table(df_con, supplier, "supplier")
GC.gc()
# Execute all the queries
for _ in 1:10
for i in 1:22
print("T:$id | Q:$i\n")
res = DataFrame(DBInterface.execute(df_con, "PRAGMA tpch($i)"))
end
end
DBInterface.close!(df_con)
return DBInterface.close!(native_con)
end
@testset "Test TPC-H Stresstest" begin
test_tpch_multithread()
end

View File

@@ -0,0 +1,23 @@
# test_transaction.jl
@testset "Test DBInterface.transaction" begin
con = DBInterface.connect(DuckDB.DB, ":memory:")
# throw an exception in DBInterface.transaction
# this should cause a rollback to happen
@test_throws DuckDB.QueryException DBInterface.transaction(con) do
DBInterface.execute(con, "CREATE TABLE integers(i INTEGER)")
return DBInterface.execute(con, "SELEC")
end
# verify that the table does not exist
@test_throws DuckDB.QueryException DBInterface.execute(con, "SELECT * FROM integers")
# no exception, this should work and be committed
DBInterface.transaction(con) do
return DBInterface.execute(con, "CREATE TABLE integers(i INTEGER)")
end
DBInterface.execute(con, "SELECT * FROM integers")
DBInterface.close!(con)
end

View File

@@ -0,0 +1,43 @@
# test_union_type.jl
@testset "Test Union Type" begin
db = DBInterface.connect(DuckDB.DB)
con = DBInterface.connect(db)
DBInterface.execute(
con,
"""
create table tbl (
u UNION (a BOOL, b VARCHAR)
);
"""
)
DBInterface.execute(
con,
"""
insert into tbl VALUES('str'), (true);
"""
)
df = DataFrame(DBInterface.execute(
con,
"""
select u from tbl;
"""
))
@test isequal(df.u, ["str", true])
DBInterface.execute(
con,
"""
insert into tbl VALUES(NULL);
"""
)
df = DataFrame(DBInterface.execute(
con,
"""
select u from tbl;
"""
))
@test isequal(df.u, ["str", true, missing])
end