should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,131 @@
cmake_minimum_required(VERSION 2.8.12)
include(ExternalProject)
# Core config
set(TARGET_NAME delta)
set(EXTENSION_NAME ${TARGET_NAME}_extension)
set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
project(${TARGET_NAME})
include_directories(src/include)
set(EXTENSION_SOURCES src/delta_extension.cpp src/delta_functions.cpp
src/delta_utils.cpp src/functions/delta_scan.cpp)
# Custom config TODO: figure out if we really need this?
if(APPLE)
set(PLATFORM_LIBS
m
c
System
resolv
"-framework Corefoundation -framework SystemConfiguration -framework Security"
)
elseif(UNIX)
set(PLATFORM_LIBS m c resolv)
elseif(WIN32)
set(PLATFORM_LIBS ws2_32 userenv advapi32)
else()
message(STATUS "UNKNOWN OS")
endif()
# Setup delta-kernel-rs dependency
set(KERNEL_NAME delta_kernel)
# Set default ExternalProject root directory
set_directory_properties(PROPERTIES EP_PREFIX ${CMAKE_BINARY_DIR}/rust)
# Propagate arch to rust build for CI
set(RUST_PLATFORM_TARGET "")
if("${OS_NAME}" STREQUAL "linux")
if("${OS_ARCH}" STREQUAL "arm64")
set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu")
else()
set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu")
endif()
elseif("${OS_NAME}" STREQUAL "osx")
# TODO: clean up upstream; we are not correctly setting OS_ARCH for cross
# compile
if("${OSX_BUILD_ARCH}" STREQUAL "arm64")
set(RUST_PLATFORM_TARGET "aarch64-apple-darwin")
elseif("${OSX_BUILD_ARCH}" STREQUAL "x86_64")
set(RUST_PLATFORM_TARGET "x86_64-apple-darwin")
elseif("${OS_ARCH}" STREQUAL "arm64")
set(RUST_PLATFORM_TARGET "aarch64-apple-darwin")
else()
set(RUST_PLATFORM_TARGET "x86_64-apple-darwin")
endif()
endif()
# Add rust_example as a CMake target
ExternalProject_Add(
${KERNEL_NAME}
GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs"
GIT_TAG 08f0764a00e89f42136fd478823d28278adc7ee8
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
BUILD_IN_SOURCE 1
# Build debug build
BUILD_COMMAND cargo build --package delta_kernel_ffi --workspace
--all-features --target=${RUST_PLATFORM_TARGET}
# Build release build
COMMAND cargo build --package delta_kernel_ffi --workspace --all-features
--release --target=${RUST_PLATFORM_TARGET}
# Build DATs
COMMAND
cargo build
--manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml
BUILD_BYPRODUCTS
"${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
BUILD_BYPRODUCTS
"${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
BUILD_BYPRODUCTS
"${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h"
BUILD_BYPRODUCTS
"${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.hpp"
INSTALL_COMMAND ""
LOG_BUILD ON)
build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
include_directories(
${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers)
include_directories(
${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers)
# Hides annoying linker warnings
set(CMAKE_OSX_DEPLOYMENT_TARGET
13.3
CACHE STRING "Minimum OS X deployment version" FORCE)
# Add the default client
add_compile_definitions(DEFINE_DEFAULT_ENGINE)
# Link delta-kernal-rs to static lib
target_link_libraries(
${EXTENSION_NAME}
debug
"${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
optimized
"${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
${PLATFORM_LIBS})
add_dependencies(${EXTENSION_NAME} delta_kernel)
# Link delta-kernal-rs to dynamic lib
target_link_libraries(
${LOADABLE_EXTENSION_NAME}
debug
"${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/libdelta_kernel_ffi.a"
optimized
"${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/libdelta_kernel_ffi.a"
${PLATFORM_LIBS})
add_dependencies(${LOADABLE_EXTENSION_NAME} delta_kernel)
install(
TARGETS ${EXTENSION_NAME}
EXPORT "${DUCKDB_EXPORT_SET}"
LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")

View File

@@ -0,0 +1,71 @@
# DuckDB Delta Extension
This is the experimental DuckDB extension for [Delta](https://delta.io/). It is built using the (also experimental)
[Delta Kernel](https://github.com/delta-incubator/delta-kernel-rs). The extension (currently) offers **read** support for delta
tables, both local and remote.
# Supported platforms
The supported platforms are:
- `linux_amd64`
- `osx_amd64` and `osx_arm64`
Support for the [other](https://duckdb.org/docs/stable/extensions/extension_distribution#platforms) DuckDB platforms is
work-in-progress
# How to use
**NOTE: this extension requires the DuckDB v0.10.3 or higher**
This extension is distributed as a binary extension. To use it, simply use one of its functions from DuckDB and the extension will be autoloaded:
```SQL
FROM delta_scan('s3://some/delta/table');
```
Note that using DuckDB [Secrets](https://duckdb.org/docs/stable/configuration/secrets_manager) for S3 authentication is supported:
```SQL
CREATE SECRET (TYPE S3, provider credential_chain);
FROM delta_scan('s3://some/delta/table/with/auth');
```
To scan a local table, use the full path prefixes with `file://`
```SQL
FROM delta_scan('file:///some/path/on/local/machine');
```
# Features
While still experimental, many (scanning) features/optimizations are already supported in this extension as it reuses most of DuckDB's
regular parquet scanning logic:
- multithreaded scans and parquet metadata reading
- data skipping/filter pushdown
- skipping row-groups in file (based on parquet metadata)
- skipping complete files (based on delta partition info)
- projection pushdown
- scanning tables with deletion vectors
- all primitive types
- structs
- S3 support with secrets
More features coming soon!
# Building
See the [Extension Template](https://github.com/duckdb/extension-template) for generic build instructions
# Running tests
There are various tests available for the delta extension:
1. Delta Acceptence Test (DAT) based tests in `/test/sql/dat`
2. delta-kernel-rs based tests in `/test/sql/delta_kernel_rs`
3. Generated data based tests in `tests/sql/generated` (generated using [delta-rs](https://delta-io.github.io/delta-rs/), [PySpark](https://spark.apache.org/docs/latest/api/python/index.html), and DuckDB)
To run the first 2 sets of tests:
```shell
make test_debug
```
or in release mode
```shell
make test
```
To also run the tests on generated data:
```shell
make generate-data
GENERATED_DATA_AVAILABLE=1 make test
```

View File

@@ -0,0 +1,14 @@
# This file is included by DuckDB's build system. It specifies which extension to load
# Extension from this repo
duckdb_extension_load(delta
SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}
LOAD_TESTS
)
# Build the httpfs extension to test with s3/http
duckdb_extension_load(httpfs)
# Build the tpch and tpcds extension for testing/benchmarking
duckdb_extension_load(tpch)
duckdb_extension_load(tpcds)

View File

@@ -0,0 +1,32 @@
#include "delta_extension.hpp"
#include "delta_functions.hpp"
#include "duckdb.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/main/extension/extension_loader.hpp"
namespace duckdb {
static void LoadInternal(ExtensionLoader &loader) {
// Load functions
for (const auto &function : DeltaFunctions::GetTableFunctions(instance)) {
loader.RegisterFunction(function);
}
}
void DeltaExtension::Load(ExtensionLoader &loader) {
LoadInternal(loader);
}
std::string DeltaExtension::Name() {
return "delta";
}
} // namespace duckdb
extern "C" {
DUCKDB_CPP_EXTENSION_ENTRY(delta, loader) {
duckdb::LoadInternal(loader);
}
}

View File

@@ -0,0 +1,17 @@
#include "delta_functions.hpp"
#include "duckdb.hpp"
#include "duckdb/main/extension_util.hpp"
#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
namespace duckdb {
vector<TableFunctionSet> DeltaFunctions::GetTableFunctions(DatabaseInstance &instance) {
vector<TableFunctionSet> functions;
functions.push_back(GetDeltaScanFunction(instance));
return functions;
}
}; // namespace duckdb

View File

@@ -0,0 +1,322 @@
#include "delta_utils.hpp"
#include "duckdb.hpp"
#include "duckdb/main/extension_util.hpp"
#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
namespace duckdb {
unique_ptr<SchemaVisitor::FieldList> SchemaVisitor::VisitSnapshotSchema(ffi::SharedSnapshot *snapshot) {
SchemaVisitor state;
ffi::EngineSchemaVisitor visitor;
visitor.data = &state;
visitor.make_field_list = (uintptr_t(*)(void *, uintptr_t)) & MakeFieldList;
visitor.visit_struct = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, uintptr_t)) & VisitStruct;
visitor.visit_array = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t)) & VisitArray;
visitor.visit_map = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t)) & VisitMap;
visitor.visit_decimal = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, uint8_t, uint8_t)) & VisitDecimal;
visitor.visit_string = VisitSimpleType<LogicalType::VARCHAR>();
visitor.visit_long = VisitSimpleType<LogicalType::BIGINT>();
visitor.visit_integer = VisitSimpleType<LogicalType::INTEGER>();
visitor.visit_short = VisitSimpleType<LogicalType::SMALLINT>();
visitor.visit_byte = VisitSimpleType<LogicalType::TINYINT>();
visitor.visit_float = VisitSimpleType<LogicalType::FLOAT>();
visitor.visit_double = VisitSimpleType<LogicalType::DOUBLE>();
visitor.visit_boolean = VisitSimpleType<LogicalType::BOOLEAN>();
visitor.visit_binary = VisitSimpleType<LogicalType::VARCHAR>();
visitor.visit_date = VisitSimpleType<LogicalType::DATE>();
visitor.visit_timestamp = VisitSimpleType<LogicalType::TIMESTAMP>();
visitor.visit_timestamp_ntz = VisitSimpleType<LogicalType::TIMESTAMP_TZ>();
uintptr_t result = visit_schema(snapshot, &visitor);
return state.TakeFieldList(result);
}
void SchemaVisitor::VisitDecimal(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
uint8_t precision, uint8_t scale) {
state->AppendToList(sibling_list_id, name, LogicalType::DECIMAL(precision, scale));
}
uintptr_t SchemaVisitor::MakeFieldList(SchemaVisitor *state, uintptr_t capacity_hint) {
return state->MakeFieldListImpl(capacity_hint);
}
void SchemaVisitor::VisitStruct(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
uintptr_t child_list_id) {
auto children = state->TakeFieldList(child_list_id);
state->AppendToList(sibling_list_id, name, LogicalType::STRUCT(std::move(*children)));
}
void SchemaVisitor::VisitArray(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
bool contains_null, uintptr_t child_list_id) {
auto children = state->TakeFieldList(child_list_id);
D_ASSERT(children->size() == 1);
state->AppendToList(sibling_list_id, name, LogicalType::LIST(children->front().second));
}
void SchemaVisitor::VisitMap(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
bool contains_null, uintptr_t child_list_id) {
auto children = state->TakeFieldList(child_list_id);
D_ASSERT(children->size() == 2);
state->AppendToList(sibling_list_id, name, LogicalType::MAP(LogicalType::STRUCT(std::move(*children))));
}
uintptr_t SchemaVisitor::MakeFieldListImpl(uintptr_t capacity_hint) {
uintptr_t id = next_id++;
auto list = make_uniq<FieldList>();
if (capacity_hint > 0) {
list->reserve(capacity_hint);
}
inflight_lists.emplace(id, std::move(list));
return id;
}
void SchemaVisitor::AppendToList(uintptr_t id, ffi::KernelStringSlice name, LogicalType &&child) {
auto it = inflight_lists.find(id);
if (it == inflight_lists.end()) {
// TODO... some error...
throw InternalException("WEIRD SHIT");
} else {
it->second->emplace_back(std::make_pair(string(name.ptr, name.len), std::move(child)));
}
}
unique_ptr<SchemaVisitor::FieldList> SchemaVisitor::TakeFieldList(uintptr_t id) {
auto it = inflight_lists.find(id);
if (it == inflight_lists.end()) {
// TODO: Raise some kind of error.
throw InternalException("WEIRD SHIT 2");
}
auto rval = std::move(it->second);
inflight_lists.erase(it);
return rval;
}
ffi::EngineError *DuckDBEngineError::AllocateError(ffi::KernelError etype, ffi::KernelStringSlice msg) {
auto error = new DuckDBEngineError;
error->etype = etype;
error->error_message = string(msg.ptr, msg.len);
return error;
}
string DuckDBEngineError::KernelErrorEnumToString(ffi::KernelError err) {
const char *KERNEL_ERROR_ENUM_STRINGS[] = {
"UnknownError",
"FFIError",
"ArrowError",
"EngineDataTypeError",
"ExtractError",
"GenericError",
"IOErrorError",
"ParquetError",
"ObjectStoreError",
"ObjectStorePathError",
"Reqwest",
"FileNotFoundError",
"MissingColumnError",
"UnexpectedColumnTypeError",
"MissingDataError",
"MissingVersionError",
"DeletionVectorError",
"InvalidUrlError",
"MalformedJsonError",
"MissingMetadataError",
"MissingProtocolError",
"MissingMetadataAndProtocolError",
"ParseError",
"JoinFailureError",
"Utf8Error",
"ParseIntError",
"InvalidColumnMappingMode",
"InvalidTableLocation",
"InvalidDecimalError",
};
static_assert(sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *) - 1 == (int)ffi::KernelError::InvalidDecimalError,
"KernelErrorEnumStrings mismatched with kernel");
if ((int)err < sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *)) {
return KERNEL_ERROR_ENUM_STRINGS[(int)err];
}
return StringUtil::Format("EnumOutOfRange (enum val out of range: %d)", (int)err);
}
void DuckDBEngineError::Throw(string from_where) {
// Make copies before calling delete this
auto etype_copy = etype;
auto message_copy = error_message;
// Consume error by calling delete this (remember this error is created by kernel using AllocateError)
delete this;
throw IOException("Hit DeltaKernel FFI error (from: %s): Hit error: %u (%s) with message (%s)", from_where.c_str(),
etype_copy, KernelErrorEnumToString(etype_copy), message_copy);
}
ffi::KernelStringSlice KernelUtils::ToDeltaString(const string &str) {
return {str.data(), str.size()};
}
string KernelUtils::FromDeltaString(const struct ffi::KernelStringSlice slice) {
return {slice.ptr, slice.len};
}
vector<bool> KernelUtils::FromDeltaBoolSlice(const struct ffi::KernelBoolSlice slice) {
vector<bool> result;
result.assign(slice.ptr, slice.ptr + slice.len);
return result;
}
PredicateVisitor::PredicateVisitor(const vector<string> &column_names, optional_ptr<TableFilterSet> filters)
: EnginePredicate {.predicate = this,
.visitor = (uintptr_t(*)(void *, ffi::KernelExpressionVisitorState *)) & VisitPredicate} {
if (filters) {
for (auto &filter : filters->filters) {
column_filters[column_names[filter.first]] = filter.second.get();
}
}
}
// Template wrapper function that implements get_next for EngineIteratorFromCallable.
template <typename Callable>
static auto GetNextFromCallable(Callable *callable) -> decltype(std::declval<Callable>()()) {
return callable->operator()();
}
// Wraps a callable object (e.g. C++11 lambda) as an EngineIterator.
template <typename Callable>
ffi::EngineIterator EngineIteratorFromCallable(Callable &callable) {
auto *get_next = &GetNextFromCallable<Callable>;
return {.data = &callable, .get_next = (const void *(*)(void *))get_next};
};
// Helper function to prevent pushing down filters kernel cant handle
// TODO: remove once kernel handles this properly?
static bool CanHandleFilter(TableFilter *filter) {
switch (filter->filter_type) {
case TableFilterType::CONSTANT_COMPARISON:
return true;
case TableFilterType::CONJUNCTION_AND: {
auto &conjunction = static_cast<const ConjunctionAndFilter &>(*filter);
bool can_handle = true;
for (const auto &child : conjunction.child_filters) {
can_handle = can_handle && CanHandleFilter(child.get());
}
return can_handle;
}
default:
return false;
}
}
// Prunes the list of predicates to ones that we can handle
static unordered_map<string, TableFilter *> PrunePredicates(unordered_map<string, TableFilter *> predicates) {
unordered_map<string, TableFilter *> result;
for (const auto &predicate : predicates) {
if (CanHandleFilter(predicate.second)) {
result[predicate.first] = predicate.second;
}
}
return result;
}
uintptr_t PredicateVisitor::VisitPredicate(PredicateVisitor *predicate, ffi::KernelExpressionVisitorState *state) {
auto filters = PrunePredicates(predicate->column_filters);
auto it = filters.begin();
auto end = filters.end();
auto get_next = [predicate, state, &it, &end]() -> uintptr_t {
if (it == end) {
return 0;
}
auto &filter = *it++;
return predicate->VisitFilter(filter.first, *filter.second, state);
};
auto eit = EngineIteratorFromCallable(get_next);
// TODO: this should be fixed upstream?
try {
return visit_expression_and(state, &eit);
} catch (...) {
return ~0;
}
}
uintptr_t PredicateVisitor::VisitConstantFilter(const string &col_name, const ConstantFilter &filter,
ffi::KernelExpressionVisitorState *state) {
auto maybe_left =
ffi::visit_expression_column(state, KernelUtils::ToDeltaString(col_name), DuckDBEngineError::AllocateError);
uintptr_t left = KernelUtils::UnpackResult(maybe_left, "VisitConstantFilter failed to visit_expression_column");
uintptr_t right = ~0;
auto &value = filter.constant;
switch (value.type().id()) {
case LogicalType::BIGINT:
right = visit_expression_literal_long(state, BigIntValue::Get(value));
break;
case LogicalType::VARCHAR: {
// WARNING: C++ lifetime extension rules don't protect calls of the form foo(std::string(...).c_str())
auto str = StringValue::Get(value);
auto maybe_right = ffi::visit_expression_literal_string(state, KernelUtils::ToDeltaString(col_name),
DuckDBEngineError::AllocateError);
right = KernelUtils::UnpackResult(maybe_right, "VisitConstantFilter failed to visit_expression_literal_string");
break;
}
default:
break; // unsupported type
}
// TODO support other comparison types?
switch (filter.comparison_type) {
case ExpressionType::COMPARE_LESSTHAN:
return visit_expression_lt(state, left, right);
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
return visit_expression_le(state, left, right);
case ExpressionType::COMPARE_GREATERTHAN:
return visit_expression_gt(state, left, right);
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
return visit_expression_ge(state, left, right);
case ExpressionType::COMPARE_EQUAL:
return visit_expression_eq(state, left, right);
default:
std::cout << " Unsupported operation: " << (int)filter.comparison_type << std::endl;
return ~0; // Unsupported operation
}
}
uintptr_t PredicateVisitor::VisitAndFilter(const string &col_name, const ConjunctionAndFilter &filter,
ffi::KernelExpressionVisitorState *state) {
auto it = filter.child_filters.begin();
auto end = filter.child_filters.end();
auto get_next = [this, col_name, state, &it, &end]() -> uintptr_t {
if (it == end) {
return 0;
}
auto &child_filter = *it++;
return VisitFilter(col_name, *child_filter, state);
};
auto eit = EngineIteratorFromCallable(get_next);
return visit_expression_and(state, &eit);
}
uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilter &filter,
ffi::KernelExpressionVisitorState *state) {
switch (filter.filter_type) {
case TableFilterType::CONSTANT_COMPARISON:
return VisitConstantFilter(col_name, static_cast<const ConstantFilter &>(filter), state);
case TableFilterType::CONJUNCTION_AND:
return VisitAndFilter(col_name, static_cast<const ConjunctionAndFilter &>(filter), state);
default:
throw NotImplementedException("Attempted to push down unimplemented filter type: '%s'",
EnumUtil::ToString(filter.filter_type));
}
}
}; // namespace duckdb

View File

@@ -0,0 +1,626 @@
#include "duckdb/function/table_function.hpp"
#include "delta_functions.hpp"
#include "functions/delta_scan.hpp"
#include "duckdb/optimizer/filter_combiner.hpp"
#include "duckdb/planner/operator/logical_get.hpp"
#include "duckdb/main/extension_util.hpp"
#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
#include "duckdb/common/local_file_system.hpp"
#include "duckdb/common/types/data_chunk.hpp"
#include "duckdb/parser/expression/constant_expression.hpp"
#include "duckdb/parser/expression/function_expression.hpp"
#include "duckdb/parser/parsed_expression.hpp"
#include "duckdb/execution/expression_executor.hpp"
#include "duckdb/planner/binder.hpp"
#include "duckdb/main/secret/secret_manager.hpp"
#include <string>
#include <numeric>
namespace duckdb {
static void *allocate_string(const struct ffi::KernelStringSlice slice) {
return new string(slice.ptr, slice.len);
}
static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size,
const ffi::DvInfo *dv_info, const struct ffi::CStringMap *partition_values) {
auto context = (DeltaSnapshot *)engine_context;
auto path_string = context->GetPath();
StringUtil::RTrim(path_string, "/");
path_string += "/" + KernelUtils::FromDeltaString(path);
// First we append the file to our resolved files
context->resolved_files.push_back(DeltaSnapshot::ToDuckDBPath(path_string));
context->metadata.emplace_back(make_uniq<DeltaFileMetaData>());
D_ASSERT(context->resolved_files.size() == context->metadata.size());
// Initialize the file metadata
context->metadata.back()->delta_snapshot_version = context->version;
context->metadata.back()->file_number = context->resolved_files.size() - 1;
// Fetch the deletion vector
auto selection_vector_res =
ffi::selection_vector_from_dv(dv_info, context->extern_engine.get(), context->global_state.get());
auto selection_vector =
KernelUtils::UnpackResult(selection_vector_res, "selection_vector_from_dv for path " + context->GetPath());
if (selection_vector.ptr) {
context->metadata.back()->selection_vector = selection_vector;
}
// Lookup all columns for potential hits in the constant map
case_insensitive_map_t<string> constant_map;
for (const auto &col : context->names) {
auto key = KernelUtils::ToDeltaString(col);
auto *partition_val = (string *)ffi::get_from_map(partition_values, key, allocate_string);
if (partition_val) {
constant_map[col] = *partition_val;
delete partition_val;
}
}
context->metadata.back()->partition_map = std::move(constant_map);
}
static void visit_data(void *engine_context, ffi::EngineData *engine_data,
const struct ffi::KernelBoolSlice selection_vec) {
ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback);
}
static ffi::EngineBuilder *CreateBuilder(ClientContext &context, const string &path) {
ffi::EngineBuilder *builder;
// For "regular" paths we early out with the default builder config
if (!StringUtil::StartsWith(path, "s3://")) {
auto interface_builder_res =
ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError);
return KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path);
}
auto end_of_container = path.find('/', 5);
if (end_of_container == string::npos) {
throw IOException("Invalid s3 url passed to delta scan: %s", path);
}
auto bucket = path.substr(5, end_of_container - 5);
auto path_in_bucket = path.substr(end_of_container);
auto interface_builder_res =
ffi::get_engine_builder(KernelUtils::ToDeltaString(path), DuckDBEngineError::AllocateError);
builder = KernelUtils::UnpackResult(interface_builder_res, "get_engine_interface_builder for path " + path);
// For S3 paths we need to trim the url, set the container, and fetch a potential secret
auto &secret_manager = SecretManager::Get(context);
auto transaction = CatalogTransaction::GetSystemCatalogTransaction(context);
auto secret_match = secret_manager.LookupSecret(transaction, path, "s3");
// No secret: nothing left to do here!
if (!secret_match.HasMatch()) {
return builder;
}
const auto &kv_secret = dynamic_cast<const KeyValueSecret &>(*secret_match.secret_entry->secret);
auto key_id = kv_secret.TryGetValue("key_id").ToString();
auto secret = kv_secret.TryGetValue("secret").ToString();
auto session_token = kv_secret.TryGetValue("session_token").ToString();
auto region = kv_secret.TryGetValue("region").ToString();
if (key_id.empty() && secret.empty()) {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"),
KernelUtils::ToDeltaString("true"));
}
if (!key_id.empty()) {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"),
KernelUtils::ToDeltaString(key_id));
}
if (!secret.empty()) {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"),
KernelUtils::ToDeltaString(secret));
}
if (!session_token.empty()) {
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"),
KernelUtils::ToDeltaString(session_token));
}
ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region));
return builder;
}
DeltaSnapshot::DeltaSnapshot(ClientContext &context_p, const string &path)
: MultiFileList({ToDeltaPath(path)}, FileGlobOptions::ALLOW_EMPTY), context(context_p) {
}
string DeltaSnapshot::GetPath() {
return GetPaths()[0];
}
string DeltaSnapshot::ToDuckDBPath(const string &raw_path) {
if (StringUtil::StartsWith(raw_path, "file://")) {
return raw_path.substr(7);
}
return raw_path;
}
string DeltaSnapshot::ToDeltaPath(const string &raw_path) {
string path;
if (StringUtil::StartsWith(raw_path, "./")) {
LocalFileSystem fs;
path = fs.JoinPath(fs.GetWorkingDirectory(), raw_path.substr(2));
path = "file://" + path;
} else {
path = raw_path;
}
// Paths always end in a slash (kernel likes it that way for now)
if (path[path.size() - 1] != '/') {
path = path + '/';
}
return path;
}
void DeltaSnapshot::Bind(vector<LogicalType> &return_types, vector<string> &names) {
if (!initialized) {
InitializeFiles();
}
auto schema = SchemaVisitor::VisitSnapshotSchema(snapshot.get());
for (const auto &field : *schema) {
names.push_back(field.first);
return_types.push_back(field.second);
}
// Store the bound names for resolving the complex filter pushdown later
this->names = names;
}
string DeltaSnapshot::GetFile(idx_t i) {
if (!initialized) {
InitializeFiles();
}
// We already have this file
if (i < resolved_files.size()) {
return resolved_files[i];
}
if (files_exhausted) {
return "";
}
while (i >= resolved_files.size()) {
auto have_scan_data_res = ffi::kernel_scan_data_next(scan_data_iterator.get(), this, visit_data);
auto have_scan_data = TryUnpackKernelResult(have_scan_data_res);
// kernel has indicated that we have no more data to scan
if (!have_scan_data) {
files_exhausted = true;
return "";
}
}
// The kernel scan visitor should have resolved a file OR returned
if (i >= resolved_files.size()) {
throw IOException("Delta Kernel seems to have failed to resolve a new file");
}
return resolved_files[i];
}
void DeltaSnapshot::InitializeFiles() {
auto path_slice = KernelUtils::ToDeltaString(paths[0]);
// Register engine
auto interface_builder = CreateBuilder(context, paths[0]);
extern_engine = TryUnpackKernelResult(ffi::builder_build(interface_builder));
// Initialize Snapshot
snapshot = TryUnpackKernelResult(ffi::snapshot(path_slice, extern_engine.get()));
// Create Scan
PredicateVisitor visitor(names, &table_filters);
scan = TryUnpackKernelResult(ffi::scan(snapshot.get(), extern_engine.get(), &visitor));
// Create GlobalState
global_state = ffi::get_global_scan_state(scan.get());
// Set version
this->version = ffi::version(snapshot.get());
// Create scan data iterator
scan_data_iterator = TryUnpackKernelResult(ffi::kernel_scan_data_init(extern_engine.get(), scan.get()));
initialized = true;
}
unique_ptr<MultiFileList> DeltaSnapshot::ComplexFilterPushdown(ClientContext &context, const MultiFileOptions &options,
LogicalGet &get,
vector<unique_ptr<Expression>> &filters) {
FilterCombiner combiner(context);
for (const auto &filter : filters) {
combiner.AddFilter(filter->Copy());
}
auto filterstmp = combiner.GenerateTableScanFilters(get.column_ids);
// TODO: can/should we figure out if this filtered anything?
auto filtered_list = make_uniq<DeltaSnapshot>(context, paths[0]);
filtered_list->table_filters = std::move(filterstmp);
filtered_list->names = names;
return std::move(filtered_list);
}
vector<string> DeltaSnapshot::GetAllFiles() {
idx_t i = resolved_files.size();
// TODO: this can probably be improved
while (!GetFile(i).empty()) {
i++;
}
return resolved_files;
}
FileExpandResult DeltaSnapshot::GetExpandResult() {
// GetFile(1) will ensure at least the first 2 files are expanded if they are available
GetFile(1);
if (resolved_files.size() > 1) {
return FileExpandResult::MULTIPLE_FILES;
} else if (resolved_files.size() == 1) {
return FileExpandResult::SINGLE_FILE;
}
return FileExpandResult::NO_FILES;
}
idx_t DeltaSnapshot::GetTotalFileCount() {
// TODO: this can probably be improved
idx_t i = resolved_files.size();
while (!GetFile(i).empty()) {
i++;
}
return resolved_files.size();
}
unique_ptr<MultiFileReader> DeltaMultiFileReader::CreateInstance() {
return std::move(make_uniq<DeltaMultiFileReader>());
}
bool DeltaMultiFileReader::Bind(MultiFileOptions &options, MultiFileList &files, vector<LogicalType> &return_types,
vector<string> &names, MultiFileReaderBindData &bind_data) {
auto &delta_snapshot = dynamic_cast<DeltaSnapshot &>(files);
delta_snapshot.Bind(return_types, names);
// We need to parse this option
bool file_row_number_enabled = options.custom_options.find("file_row_number") != options.custom_options.end();
if (file_row_number_enabled) {
bind_data.file_row_number_idx = names.size();
return_types.emplace_back(LogicalType::BIGINT);
names.emplace_back("file_row_number");
} else {
// TODO: this is a bogus ID? Change for flag indicating it should be enabled?
bind_data.file_row_number_idx = names.size();
}
return true;
};
void DeltaMultiFileReader::BindOptions(MultiFileOptions &options, MultiFileList &files,
vector<LogicalType> &return_types, vector<string> &names,
MultiFileReaderBindData &bind_data) {
// Disable all other multifilereader options
options.auto_detect_hive_partitioning = false;
options.hive_partitioning = false;
options.union_by_name = false;
MultiFileReader::BindOptions(options, files, return_types, names, bind_data);
auto demo_gen_col_opt = options.custom_options.find("delta_file_number");
if (demo_gen_col_opt != options.custom_options.end()) {
if (demo_gen_col_opt->second.GetValue<bool>()) {
names.push_back("delta_file_number");
return_types.push_back(LogicalType::UBIGINT);
}
}
}
void DeltaMultiFileReader::FinalizeBind(const MultiFileOptions &file_options, const MultiFileReaderBindData &options,
const string &filename, const vector<string> &local_names,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const vector<column_t> &global_column_ids, MultiFileReaderData &reader_data,
ClientContext &context, optional_ptr<MultiFileReaderGlobalState> global_state) {
MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names,
global_column_ids, reader_data, context, global_state);
// Handle custom delta option set in MultiFileOptions::custom_options
auto file_number_opt = file_options.custom_options.find("delta_file_number");
if (file_number_opt != file_options.custom_options.end()) {
if (file_number_opt->second.GetValue<bool>()) {
D_ASSERT(global_state);
auto &delta_global_state = global_state->Cast<DeltaMultiFileReaderGlobalState>();
D_ASSERT(delta_global_state.delta_file_number_idx != DConstants::INVALID_INDEX);
// We add the constant column for the delta_file_number option
// NOTE: we add a placeholder here, to demonstrate how we can also populate extra columns in the
// FinalizeChunk
reader_data.constant_map.emplace_back(delta_global_state.delta_file_number_idx, Value::UBIGINT(0));
}
}
// Get the metadata for this file
D_ASSERT(global_state->file_list);
const auto &snapshot = dynamic_cast<const DeltaSnapshot &>(*global_state->file_list);
auto &file_metadata = snapshot.metadata[reader_data.file_list_idx.GetIndex()];
if (!file_metadata->partition_map.empty()) {
for (idx_t i = 0; i < global_column_ids.size(); i++) {
column_t col_id = global_column_ids[i];
auto col_partition_entry = file_metadata->partition_map.find(global_names[col_id]);
if (col_partition_entry != file_metadata->partition_map.end()) {
// Todo: use https://github.com/delta-io/delta/blob/master/PROTOCOL.md#partition-value-serialization
auto maybe_value = Value(col_partition_entry->second).DefaultCastAs(global_types[i]);
reader_data.constant_map.emplace_back(i, maybe_value);
}
}
}
}
unique_ptr<MultiFileList> DeltaMultiFileReader::CreateFileList(ClientContext &context, const vector<string> &paths,
FileGlobOptions options) {
if (paths.size() != 1) {
throw BinderException("'delta_scan' only supports single path as input");
}
return make_uniq<DeltaSnapshot>(context, paths[0]);
}
// Generate the correct Selection Vector Based on the Raw delta KernelBoolSlice dv and the row_id_column
// TODO: this probably is slower than needed (we can do with less branches in the for loop for most cases)
static SelectionVector DuckSVFromDeltaSV(const ffi::KernelBoolSlice &dv, Vector row_id_column, idx_t count,
idx_t &select_count) {
D_ASSERT(row_id_column.GetType() == LogicalType::BIGINT);
UnifiedVectorFormat data;
row_id_column.ToUnifiedFormat(count, data);
auto row_ids = UnifiedVectorFormat::GetData<int64_t>(data);
SelectionVector result {count};
idx_t current_select = 0;
for (idx_t i = 0; i < count; i++) {
auto row_id = row_ids[data.sel->get_index(i)];
// TODO: why are deletion vectors not spanning whole data?
if (row_id >= dv.len || dv.ptr[row_id]) {
result.data()[current_select] = i;
current_select++;
}
}
select_count = current_select;
return result;
}
// Parses the columns that are used by the delta extension into
void DeltaMultiFileReaderGlobalState::SetColumnIdx(const string &column, idx_t idx) {
if (column == "file_row_number") {
file_row_number_idx = idx;
return;
} else if (column == "delta_file_number") {
delta_file_number_idx = idx;
return;
}
throw IOException("Unknown column '%s' found as required by the DeltaMultiFileReader");
}
unique_ptr<MultiFileReaderGlobalState> DeltaMultiFileReader::InitializeGlobalState(
duckdb::ClientContext &context, const duckdb::MultiFileOptions &file_options,
const duckdb::MultiFileReaderBindData &bind_data, const duckdb::MultiFileList &file_list,
const vector<duckdb::LogicalType> &global_types, const vector<std::string> &global_names,
const vector<duckdb::column_t> &global_column_ids) {
vector<LogicalType> extra_columns;
vector<pair<string, idx_t>> mapped_columns;
// Create a map of the columns that are in the projection
case_insensitive_map_t<idx_t> selected_columns;
for (idx_t i = 0; i < global_column_ids.size(); i++) {
auto global_id = global_column_ids[i];
if (IsRowIdColumnId(global_id)) {
continue;
}
auto &global_name = global_names[global_id];
selected_columns.insert({global_name, i});
}
// TODO: only add file_row_number column if there are deletes
case_insensitive_map_t<LogicalType> columns_to_map = {
{"file_row_number", LogicalType::BIGINT},
};
// Add the delta_file_number column to the columns to map
auto demo_gen_col_opt = file_options.custom_options.find("delta_file_number");
if (demo_gen_col_opt != file_options.custom_options.end()) {
if (demo_gen_col_opt->second.GetValue<bool>()) {
columns_to_map.insert({"delta_file_number", LogicalType::UBIGINT});
}
}
// Map every column to either a column in the projection, or add it to the extra columns if it doesn't exist
idx_t col_offset = 0;
for (const auto &required_column : columns_to_map) {
// First check if the column is in the projection
auto res = selected_columns.find(required_column.first);
if (res != selected_columns.end()) {
// The column is in the projection, no special handling is required; we simply store the index
mapped_columns.push_back({required_column.first, res->second});
continue;
}
// The column is NOT in the projection: it needs to be added as an extra_column
// Calculate the index of the added column (extra columns are added after all other columns)
idx_t current_col_idx = global_column_ids.size() + col_offset++;
// Add column to the map, to ensure the MultiFileReader can find it when processing the Chunk
mapped_columns.push_back({required_column.first, current_col_idx});
// Ensure the result DataChunk has a vector of the correct type to store this column
extra_columns.push_back(required_column.second);
}
auto res = make_uniq<DeltaMultiFileReaderGlobalState>(extra_columns, &file_list);
// Parse all the mapped columns into the DeltaMultiFileReaderGlobalState for easy use;
for (const auto &mapped_column : mapped_columns) {
res->SetColumnIdx(mapped_column.first, mapped_column.second);
}
return std::move(res);
}
void DeltaMultiFileReader::CreateNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names,
const vector<column_t> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) {
// First call the base implementation to do most mapping
MultiFileReader::CreateNameMapping(file_name, local_types, local_names, global_types, global_names,
global_column_ids, reader_data, initial_file, global_state);
// Then we handle delta specific mapping
D_ASSERT(global_state);
auto &delta_global_state = global_state->Cast<DeltaMultiFileReaderGlobalState>();
// Check if the file_row_number column is an "extra_column" which is not part of the projection
if (delta_global_state.file_row_number_idx >= global_column_ids.size()) {
D_ASSERT(delta_global_state.file_row_number_idx != DConstants::INVALID_INDEX);
// Build the name map
case_insensitive_map_t<idx_t> name_map;
for (idx_t col_idx = 0; col_idx < local_names.size(); col_idx++) {
name_map[local_names[col_idx]] = col_idx;
}
// Lookup the required column in the local map
auto entry = name_map.find("file_row_number");
if (entry == name_map.end()) {
throw IOException("Failed to find the file_row_number column");
}
// Register the column to be scanned from this file
reader_data.column_ids.push_back(entry->second);
reader_data.column_mapping.push_back(delta_global_state.file_row_number_idx);
}
// This may have changed: update it
reader_data.empty_columns = reader_data.column_ids.empty();
}
void DeltaMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFileReaderBindData &bind_data,
const MultiFileReaderData &reader_data, DataChunk &chunk,
optional_ptr<MultiFileReaderGlobalState> global_state) {
// Base class finalization first
MultiFileReader::FinalizeChunk(context, bind_data, reader_data, chunk, global_state);
D_ASSERT(global_state);
auto &delta_global_state = global_state->Cast<DeltaMultiFileReaderGlobalState>();
D_ASSERT(delta_global_state.file_list);
// Get the metadata for this file
const auto &snapshot = dynamic_cast<const DeltaSnapshot &>(*global_state->file_list);
auto &metadata = snapshot.metadata[reader_data.file_list_idx.GetIndex()];
if (metadata->selection_vector.ptr && chunk.size() != 0) {
D_ASSERT(delta_global_state.file_row_number_idx != DConstants::INVALID_INDEX);
auto &file_row_number_column = chunk.data[delta_global_state.file_row_number_idx];
// Construct the selection vector using the file_row_number column and the raw selection vector from delta
idx_t select_count;
auto sv = DuckSVFromDeltaSV(metadata->selection_vector, file_row_number_column, chunk.size(), select_count);
chunk.Slice(sv, select_count);
}
// Note: this demo function shows how we can use DuckDB's Binder create expression-based generated columns
if (delta_global_state.delta_file_number_idx != DConstants::INVALID_INDEX) {
//! Create Dummy expression (0 + file_number)
vector<unique_ptr<ParsedExpression>> child_expr;
child_expr.push_back(make_uniq<ConstantExpression>(Value::UBIGINT(0)));
child_expr.push_back(make_uniq<ConstantExpression>(Value::UBIGINT(7)));
unique_ptr<ParsedExpression> expr =
make_uniq<FunctionExpression>("+", std::move(child_expr), nullptr, nullptr, false, true);
//! s dummy expression
auto binder = Binder::CreateBinder(context);
ExpressionBinder expr_binder(*binder, context);
auto bound_expr = expr_binder.Bind(expr, nullptr);
//! Execute dummy expression into result column
ExpressionExecutor expr_executor(context);
expr_executor.AddExpression(*bound_expr);
//! Execute the expression directly into the output Chunk
expr_executor.ExecuteExpression(chunk.data[delta_global_state.delta_file_number_idx]);
}
};
bool DeltaMultiFileReader::ParseOption(const string &key, const Value &val, MultiFileOptions &options,
ClientContext &context) {
auto loption = StringUtil::Lower(key);
if (loption == "delta_file_number") {
options.custom_options[loption] = val;
return true;
}
// We need to capture this one to know whether to emit
if (loption == "file_row_number") {
options.custom_options[loption] = val;
return true;
}
return MultiFileReader::ParseOption(key, val, options, context);
}
//
// DeltaMultiFileReaderBindData::DeltaMultiFileReaderBindData(DeltaSnapshot & delta_snapshot):
// current_snapshot(delta_snapshot){
//
//}
TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance) {
// The delta_scan function is constructed by grabbing the parquet scan from the Catalog, then injecting the
// DeltaMultiFileReader into it to create a Delta-based multi file read
auto &parquet_scan = ExtensionUtil::GetTableFunction(instance, "parquet_scan");
auto parquet_scan_copy = parquet_scan.functions;
for (auto &function : parquet_scan_copy.functions) {
// Register the MultiFileReader as the driver for reads
function.get_multi_file_reader = DeltaMultiFileReader::CreateInstance;
// Unset all of these: they are either broken, very inefficient.
// TODO: implement/fix these
function.serialize = nullptr;
function.deserialize = nullptr;
function.statistics = nullptr;
function.table_scan_progress = nullptr;
function.cardinality = nullptr;
function.get_bind_info = nullptr;
// Schema param is just confusing here
function.named_parameters.erase("schema");
// Demonstration of a generated column based on information from DeltaSnapshot
function.named_parameters["delta_file_number"] = LogicalType::BOOLEAN;
function.name = "delta_scan";
}
parquet_scan_copy.name = "delta_scan";
return parquet_scan_copy;
}
} // namespace duckdb

View File

@@ -0,0 +1,13 @@
#pragma once
#include "duckdb.hpp"
namespace duckdb {
class DeltaExtension : public Extension {
public:
void Load(ExtensionLoader &loader) override;
std::string Name() override;
};
} // namespace duckdb

View File

@@ -0,0 +1,22 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// delta_functions.hpp
//
//
//===----------------------------------------------------------------------===//
#pragma once
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
namespace duckdb {
class DeltaFunctions {
public:
static vector<TableFunctionSet> GetTableFunctions(DatabaseInstance &instance);
private:
static TableFunctionSet GetDeltaScanFunction(DatabaseInstance &instance);
};
} // namespace duckdb

View File

@@ -0,0 +1,155 @@
#pragma once
#include "delta_kernel_ffi.hpp"
#include "duckdb/planner/filter/constant_filter.hpp"
#include "duckdb/planner/filter/conjunction_filter.hpp"
#include "duckdb/common/enum_util.hpp"
#include <iostream>
// TODO: clean up this file as we go
namespace duckdb {
// SchemaVisitor is used to parse the schema of a Delta table from the Kernel
class SchemaVisitor {
public:
using FieldList = child_list_t<LogicalType>;
static unique_ptr<FieldList> VisitSnapshotSchema(ffi::SharedSnapshot *snapshot);
private:
unordered_map<uintptr_t, unique_ptr<FieldList>> inflight_lists;
uintptr_t next_id = 1;
typedef void(SimpleTypeVisitorFunction)(void *, uintptr_t, ffi::KernelStringSlice);
template <LogicalTypeId TypeId>
static SimpleTypeVisitorFunction *VisitSimpleType() {
return (SimpleTypeVisitorFunction *)&VisitSimpleTypeImpl<TypeId>;
}
template <LogicalTypeId TypeId>
static void VisitSimpleTypeImpl(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name) {
state->AppendToList(sibling_list_id, name, TypeId);
}
static void VisitDecimal(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
uint8_t precision, uint8_t scale);
static uintptr_t MakeFieldList(SchemaVisitor *state, uintptr_t capacity_hint);
static void VisitStruct(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
uintptr_t child_list_id);
static void VisitArray(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
bool contains_null, uintptr_t child_list_id);
static void VisitMap(SchemaVisitor *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name,
bool contains_null, uintptr_t child_list_id);
uintptr_t MakeFieldListImpl(uintptr_t capacity_hint);
void AppendToList(uintptr_t id, ffi::KernelStringSlice name, LogicalType &&child);
unique_ptr<FieldList> TakeFieldList(uintptr_t id);
};
// Allocator for errors that the kernel might throw
struct DuckDBEngineError : ffi::EngineError {
// Allocate a DuckDBEngineError, function ptr passed to kernel for error allocation
static ffi::EngineError *AllocateError(ffi::KernelError etype, ffi::KernelStringSlice msg);
// Convert a kernel error enum to a string
static string KernelErrorEnumToString(ffi::KernelError err);
// Throw the error as an IOException
[[noreturn]] void Throw(string from_info);
// The error message from Kernel
string error_message;
};
// RAII wrapper that returns ownership of a kernel pointer to kernel when it goes out of
// scope. Similar to std::unique_ptr. but does not define operator->() and does not require the
// kernel type to be complete.
template <typename KernelType>
struct UniqueKernelPointer {
UniqueKernelPointer() : ptr(nullptr), free(nullptr) {
}
// Takes ownership of a pointer with associated deleter.
UniqueKernelPointer(KernelType *ptr, void (*free)(KernelType *)) : ptr(ptr), free(free) {
}
// movable but not copyable
UniqueKernelPointer(UniqueKernelPointer &&other) : ptr(other.ptr) {
other.ptr = nullptr;
}
UniqueKernelPointer &operator=(UniqueKernelPointer &&other) {
std::swap(ptr, other.ptr);
std::swap(free, other.free);
return *this;
}
UniqueKernelPointer(const UniqueKernelPointer &) = delete;
UniqueKernelPointer &operator=(const UniqueKernelPointer &) = delete;
~UniqueKernelPointer() {
if (ptr && free) {
free(ptr);
}
}
KernelType *get() const {
return ptr;
}
private:
KernelType *ptr;
void (*free)(KernelType *) = nullptr;
};
// Syntactic sugar around the different kernel types
template <typename KernelType, void (*DeleteFunction)(KernelType *)>
struct TemplatedUniqueKernelPointer : public UniqueKernelPointer<KernelType> {
TemplatedUniqueKernelPointer() : UniqueKernelPointer<KernelType>() {};
TemplatedUniqueKernelPointer(KernelType *ptr) : UniqueKernelPointer<KernelType>(ptr, DeleteFunction) {};
};
typedef TemplatedUniqueKernelPointer<ffi::SharedSnapshot, ffi::drop_snapshot> KernelSnapshot;
typedef TemplatedUniqueKernelPointer<ffi::SharedExternEngine, ffi::drop_engine> KernelExternEngine;
typedef TemplatedUniqueKernelPointer<ffi::SharedScan, ffi::drop_scan> KernelScan;
typedef TemplatedUniqueKernelPointer<ffi::SharedGlobalScanState, ffi::drop_global_scan_state> KernelGlobalScanState;
typedef TemplatedUniqueKernelPointer<ffi::SharedScanDataIterator, ffi::kernel_scan_data_free> KernelScanDataIterator;
struct KernelUtils {
static ffi::KernelStringSlice ToDeltaString(const string &str);
static string FromDeltaString(const struct ffi::KernelStringSlice slice);
static vector<bool> FromDeltaBoolSlice(const struct ffi::KernelBoolSlice slice);
// TODO: all kernel results need to be unpacked, not doing so will result in an error. This should be cleaned up
template <class T>
static T UnpackResult(ffi::ExternResult<T> result, const string &from_where) {
if (result.tag == ffi::ExternResult<T>::Tag::Err) {
if (result.err._0) {
auto error_cast = static_cast<DuckDBEngineError *>(result.err._0);
error_cast->Throw(from_where);
} else {
throw IOException("Hit DeltaKernel FFI error (from: %s): Hit error, but error was nullptr",
from_where.c_str());
}
} else if (result.tag == ffi::ExternResult<T>::Tag::Ok) {
return result.ok._0;
}
throw IOException("Invalid error ExternResult tag found!");
}
};
class PredicateVisitor : public ffi::EnginePredicate {
public:
PredicateVisitor(const vector<string> &column_names, optional_ptr<TableFilterSet> filters);
private:
unordered_map<string, TableFilter *> column_filters;
static uintptr_t VisitPredicate(PredicateVisitor *predicate, ffi::KernelExpressionVisitorState *state);
uintptr_t VisitConstantFilter(const string &col_name, const ConstantFilter &filter,
ffi::KernelExpressionVisitorState *state);
uintptr_t VisitAndFilter(const string &col_name, const ConjunctionAndFilter &filter,
ffi::KernelExpressionVisitorState *state);
uintptr_t VisitFilter(const string &col_name, const TableFilter &filter, ffi::KernelExpressionVisitorState *state);
};
} // namespace duckdb

View File

@@ -0,0 +1,145 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// functions/delta_scan.hpp
//
//
//===----------------------------------------------------------------------===//
#pragma once
#include "delta_utils.hpp"
#include "duckdb/common/multi_file/multi_file_reader.hpp"
namespace duckdb {
struct DeltaFileMetaData {
DeltaFileMetaData() {};
// No copying pls
DeltaFileMetaData(const DeltaFileMetaData &) = delete;
DeltaFileMetaData &operator=(const DeltaFileMetaData &) = delete;
~DeltaFileMetaData() {
if (selection_vector.ptr) {
ffi::drop_bool_slice(selection_vector);
}
}
idx_t delta_snapshot_version = DConstants::INVALID_INDEX;
idx_t file_number = DConstants::INVALID_INDEX;
ffi::KernelBoolSlice selection_vector = {nullptr, 0};
case_insensitive_map_t<string> partition_map;
};
//! The DeltaSnapshot implements the MultiFileList API to allow injecting it into the regular DuckDB parquet scan
struct DeltaSnapshot : public MultiFileList {
DeltaSnapshot(ClientContext &context, const string &path);
string GetPath();
static string ToDuckDBPath(const string &raw_path);
static string ToDeltaPath(const string &raw_path);
//! MultiFileList API
public:
void Bind(vector<LogicalType> &return_types, vector<string> &names);
unique_ptr<MultiFileList> ComplexFilterPushdown(ClientContext &context, const MultiFileOptions &options,
LogicalGet &get, vector<unique_ptr<Expression>> &filters) override;
vector<string> GetAllFiles() override;
FileExpandResult GetExpandResult() override;
idx_t GetTotalFileCount() override;
protected:
//! Get the i-th expanded file
string GetFile(idx_t i) override;
protected:
// TODO: How to guarantee we only call this after the filter pushdown?
void InitializeFiles();
template <class T>
T TryUnpackKernelResult(ffi::ExternResult<T> result) {
return KernelUtils::UnpackResult<T>(
result, StringUtil::Format("While trying to read from delta table: '%s'", paths[0]));
}
// TODO: change back to protected
public:
idx_t version;
//! Delta Kernel Structures
KernelSnapshot snapshot;
KernelExternEngine extern_engine;
KernelScan scan;
KernelGlobalScanState global_state;
KernelScanDataIterator scan_data_iterator;
//! Names
vector<string> names;
//! Metadata map for files
vector<unique_ptr<DeltaFileMetaData>> metadata;
//! Current file list resolution state
bool initialized = false;
bool files_exhausted = false;
vector<string> resolved_files;
TableFilterSet table_filters;
ClientContext &context;
};
struct DeltaMultiFileReaderGlobalState : public MultiFileReaderGlobalState {
DeltaMultiFileReaderGlobalState(vector<LogicalType> extra_columns_p, optional_ptr<const MultiFileList> file_list_p)
: MultiFileReaderGlobalState(extra_columns_p, file_list_p) {
}
//! The idx of the file number column in the result chunk
idx_t delta_file_number_idx = DConstants::INVALID_INDEX;
//! The idx of the file_row_number column in the result chunk
idx_t file_row_number_idx = DConstants::INVALID_INDEX;
void SetColumnIdx(const string &column, idx_t idx);
};
struct DeltaMultiFileReader : public MultiFileReader {
static unique_ptr<MultiFileReader> CreateInstance();
//! Return a DeltaSnapshot
unique_ptr<MultiFileList> CreateFileList(ClientContext &context, const vector<string> &paths,
FileGlobOptions options) override;
//! Override the regular parquet bind using the MultiFileReader Bind. The bind from these are what DuckDB's file
//! readers will try read
bool Bind(MultiFileOptions &options, MultiFileList &files, vector<LogicalType> &return_types, vector<string> &names,
MultiFileReaderBindData &bind_data) override;
//! Override the Options bind
void BindOptions(MultiFileOptions &options, MultiFileList &files, vector<LogicalType> &return_types,
vector<string> &names, MultiFileReaderBindData &bind_data) override;
void CreateNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names, const vector<column_t> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) override;
unique_ptr<MultiFileReaderGlobalState>
InitializeGlobalState(ClientContext &context, const MultiFileOptions &file_options,
const MultiFileReaderBindData &bind_data, const MultiFileList &file_list,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const vector<column_t> &global_column_ids) override;
void FinalizeBind(const MultiFileOptions &file_options, const MultiFileReaderBindData &options,
const string &filename, const vector<string> &local_names,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const vector<column_t> &global_column_ids, MultiFileReaderData &reader_data,
ClientContext &context, optional_ptr<MultiFileReaderGlobalState> global_state) override;
//! Override the FinalizeChunk method
void FinalizeChunk(ClientContext &context, const MultiFileReaderBindData &bind_data,
const MultiFileReaderData &reader_data, DataChunk &chunk,
optional_ptr<MultiFileReaderGlobalState> global_state) override;
//! Override the ParseOption call to parse delta_scan specific options
bool ParseOption(const string &key, const Value &val, MultiFileOptions &options, ClientContext &context) override;
};
} // namespace duckdb

View File

@@ -0,0 +1,5 @@
{
"dependencies": [
"openssl"
]
}