should be it

2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions
--- a/external/duckdb/examples/embedded-c++-windows/cppintegration.cpp
+++ b/external/duckdb/examples/embedded-c++-windows/cppintegration.cpp
@@ -0,0 +1,34 @@
+/**
+ * =====================================
+ * Simple DuckDb C++ integration Test
+ * =====================================
+ */
+
+#include "duckdb.hpp"
+
+#include <iostream>
+
+using namespace duckdb;
+
+int main(int argc, char *argv[]) {
+	DuckDB db(nullptr);
+	Connection con(db);
+	auto result = con.Query("SELECT 42");
+
+	// Basic create table and insert
+	con.Query("CREATE TABLE people(id INTEGER, name VARCHAR)");
+	con.Query("CREATE TABLE test");
+	con.Query("INSERT INTO people VALUES (0,'Mark'), (1, 'Hannes')");
+
+	// Update data
+	auto prepared = con.Prepare("UPDATE people SET name = $1 WHERE id = $2");
+	auto prep = prepared->Execute("DuckDb", 2);
+
+	// Delete data
+	auto resultDelete = con.Query("DELETE FROM people WHERE id = 2");
+
+	// Read data
+	auto resultSelect = con.Query("SELECT * FROM people");
+
+	return 0;
+}
--- a/external/duckdb/examples/embedded-c++/CMakeLists.txt
+++ b/external/duckdb/examples/embedded-c++/CMakeLists.txt
@@ -0,0 +1,10 @@
+cmake_minimum_required(VERSION 2.8.12...3.29)
+project(example-c++)
+
+set(CMAKE_CXX_STANDARD 11)
+
+include_directories(../../src/include)
+link_directories(../../build/release/src)
+
+add_executable(example main.cpp)
+target_link_libraries(example duckdb)
--- a/external/duckdb/examples/embedded-c++/main.cpp
+++ b/external/duckdb/examples/embedded-c++/main.cpp
@@ -0,0 +1,14 @@
+#include "duckdb.hpp"
+
+using namespace duckdb;
+
+int main() {
+	DuckDB db(nullptr);
+
+	Connection con(db);
+
+	con.Query("CREATE TABLE integers(i INTEGER)");
+	con.Query("INSERT INTO integers VALUES (3)");
+	auto result = con.Query("SELECT * FROM integers");
+	result->Print();
+}
--- a/external/duckdb/examples/embedded-c/CMakeLists.txt
+++ b/external/duckdb/examples/embedded-c/CMakeLists.txt
@@ -0,0 +1,8 @@
+cmake_minimum_required(VERSION 2.8.12...3.29)
+project(example-c)
+
+include_directories(../../src/include)
+link_directories(../../build/release/src)
+
+add_executable(example main.c)
+target_link_libraries(example duckdb)
--- a/external/duckdb/examples/embedded-c/main.c
+++ b/external/duckdb/examples/embedded-c/main.c
@@ -0,0 +1,50 @@
+#include "duckdb.h"
+#include <stdio.h>
+
+int main() {
+	duckdb_database db = NULL;
+	duckdb_connection con = NULL;
+	duckdb_result result;
+
+	if (duckdb_open(NULL, &db) == DuckDBError) {
+		fprintf(stderr, "Failed to open database\n");
+		goto cleanup;
+	}
+	if (duckdb_connect(db, &con) == DuckDBError) {
+		fprintf(stderr, "Failed to open connection\n");
+		goto cleanup;
+	}
+	if (duckdb_query(con, "CREATE TABLE integers(i INTEGER, j INTEGER);", NULL) == DuckDBError) {
+		fprintf(stderr, "Failed to query database\n");
+		goto cleanup;
+	}
+	if (duckdb_query(con, "INSERT INTO integers VALUES (3, 4), (5, 6), (7, NULL);", NULL) == DuckDBError) {
+		fprintf(stderr, "Failed to query database\n");
+		goto cleanup;
+	}
+	if (duckdb_query(con, "SELECT * FROM integers", &result) == DuckDBError) {
+		fprintf(stderr, "Failed to query database\n");
+		goto cleanup;
+	}
+	// print the names of the result
+	idx_t row_count = duckdb_row_count(&result);
+	idx_t column_count = duckdb_column_count(&result);
+	for (size_t i = 0; i < column_count; i++) {
+		printf("%s ", duckdb_column_name(&result, i));
+	}
+	printf("\n");
+	// print the data of the result
+	for (size_t row_idx = 0; row_idx < row_count; row_idx++) {
+		for (size_t col_idx = 0; col_idx < column_count; col_idx++) {
+			char *val = duckdb_value_varchar(&result, col_idx, row_idx);
+			printf("%s ", val);
+			duckdb_free(val);
+		}
+		printf("\n");
+	}
+	// duckdb_print_result(result);
+cleanup:
+	duckdb_destroy_result(&result);
+	duckdb_disconnect(&con);
+	duckdb_close(&db);
+}
--- a/external/duckdb/examples/python/duckdb-python.py
+++ b/external/duckdb/examples/python/duckdb-python.py
@@ -0,0 +1,185 @@
+import duckdb
+
+# basic SQL API
+
+# connect to an in-memory temporary database
+conn = duckdb.connect()
+
+# if you want, you can create a cursor() like described in PEP 249 but it's fully redundant
+cursor = conn.cursor()
+
+# run arbitrary SQL commands
+conn.execute("CREATE TABLE test_table (i INTEGER, j STRING)")
+
+# add some data
+conn.execute("INSERT INTO test_table VALUES (1, 'one')")
+
+# we can use placeholders for parameters
+conn.execute("INSERT INTO test_table VALUES (?, ?)", [2, 'two'])
+
+# we can provide multiple sets of parameters to executemany()
+conn.executemany("INSERT INTO test_table VALUES (?, ?)", [[3, 'three'], [4, 'four']])
+
+# fetch as pandas data frame
+print(conn.execute("SELECT * FROM test_table").fetchdf())
+
+# fetch as list of masked numpy arrays, cleaner when handling NULLs
+print(conn.execute("SELECT * FROM test_table").fetchnumpy())
+
+
+# we can query pandas data frames as if they were SQL views
+# create a sample pandas data frame
+import pandas as pd
+
+test_df = pd.DataFrame.from_dict({"i": [1, 2, 3, 4], "j": ["one", "two", "three", "four"]})
+
+# make this data frame available as a view in duckdb
+conn.register("test_df", test_df)
+print(conn.execute("SELECT j FROM test_df WHERE i > 1").fetchdf())
+
+
+# relation API, programmatic querying. relations are lazily evaluated chains of relational operators
+
+# create a "relation" from a pandas data frame with an existing connection
+rel = conn.from_df(test_df)
+print(rel)
+
+# alternative shorthand, use a built-in default connection to create a relation from a pandas data frame
+rel = duckdb.df(test_df)
+print(rel)
+
+# create a relation from a CSV file
+
+# first create a CSV file from our pandas example
+import tempfile, os
+
+temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
+test_df.to_csv(temp_file_name, index=False)
+
+# now create a relation from it
+rel = duckdb.from_csv_auto(temp_file_name)
+print(rel)
+
+# create a relation from an existing table
+rel = conn.table("test_table")
+print(rel)
+
+# a relation has an alias (like a table name)
+print(rel.alias)
+
+# we can change the alias, useful for (self)joins for example
+rel2 = rel.set_alias('new_alias')
+print(rel2.alias)
+
+# we can inspect the type of a relation
+print(rel.type)
+
+# or the column names that are in it
+print(rel.columns)
+
+# or the types of those columns
+print(rel.types)
+
+# now we can apply some operators to the relation
+# filter the relation
+print(rel.filter('i > 1'))
+
+# project the relation, get some columns
+print(rel.project('i, j'))
+
+# or transform them
+print(rel.project('i + 1'))
+
+# order the relation
+print(rel.order('j'))
+
+# limit the rows returned
+print(rel.limit(2))
+
+# skip the first row and limit the number of results
+print(rel.limit(2, offset=1))
+
+# of course these things can be chained
+print(rel.filter('i > 1').project('i + 1, j').order('j').limit(2))
+
+# aggregate the relation
+print(rel.aggregate("sum(i)"))
+
+# non-aggregated columns create implicit grouping
+print(rel.aggregate("j, sum(i)"))
+
+# we can also explicit group the relation before aggregating
+print(rel.aggregate("sum(i)", "j"))
+
+# distinct values
+print(rel.distinct())
+
+
+# multi-relation operators are also supported, e.g union
+print(rel.union(rel))
+
+# join rel with itself on i
+rel2 = conn.from_df(test_df)
+print(rel.join(rel2, 'i'))
+
+# for explicit join conditions the relations can be named using alias()
+print(rel.set_alias('a').join(rel.set_alias('b'), 'a.i=b.i'))
+
+
+# there are also shorthand methods to directly create a relation and apply an operator from pandas data frame objects
+print(duckdb.filter(test_df, 'i > 1'))
+print(duckdb.project(test_df, 'i + 1'))
+print(duckdb.order(test_df, 'j'))
+print(duckdb.limit(test_df, 2))
+
+print(duckdb.aggregate(test_df, "sum(i)"))
+print(duckdb.distinct(test_df))
+
+# when chaining only the first call needs to include the data frame parameter
+print(duckdb.filter(test_df, 'i > 1').project('i + 1, j').order('j').limit(2))
+
+# turn the relation into something else again
+
+
+# compute the query result from the relation
+res = rel.execute()
+print(res)
+# res is a query result, you can call fetchdf() or fetchnumpy() or fetchone() on it
+print(res.fetchone())
+print(res.fetchall())
+
+# convert a relation back to a pandas data frame
+print(rel.to_df())
+
+# df() is shorthand for to_df() on relations
+print(rel.df())
+
+# create a table in duckdb from the relation
+print(rel.create("test_table2"))
+
+# insert the relation's data into an existing table
+conn.execute("CREATE TABLE test_table3 (i INTEGER, j STRING)")
+print(rel.insert_into("test_table3"))
+
+# Inserting elements into table_3
+print(conn.values([5, 'five']).insert_into("test_table3"))
+rel_3 = conn.table("test_table3")
+rel_3.insert([6, 'six'])
+
+# create a SQL-accessible view of the relation
+print(rel.create_view('test_view'))
+
+
+# we can also directly run SQL queries on relation objects without explicitly creating a view
+# the first parameter gives the rel object a view name so we can refer to it in queries
+res = rel.query('my_name_for_rel', 'SELECT * FROM my_name_for_rel')
+print(res)
+# res is a query result, we can fetch with the methods described above, e.g.
+print(res.fetchone())
+print(res.fetchdf())
+# or just use df(), a shorthand for fetchdf() on query results
+print(res.df())
+
+# this also works directly on data frames
+res = duckdb.query_df(test_df, 'my_name_for_test_df', 'SELECT * FROM my_name_for_test_df')
+print(res.df())
--- a/external/duckdb/examples/standalone-window/CMakeLists.txt
+++ b/external/duckdb/examples/standalone-window/CMakeLists.txt
@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 2.8.12...3.29)
+project(example-window)
+
+set(CMAKE_CXX_STANDARD 11)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
+
+include_directories(../../src/include)
+link_directories(../../build/debug/src)
+
+add_executable(example main.cpp)
+target_link_libraries(example duckdb)