//===----------------------------------------------------------------------===// // DuckDB // // json_executors.hpp // // //===----------------------------------------------------------------------===// #pragma once #include "duckdb/common/vector_operations/vector_operations.hpp" #include "duckdb/execution/expression_executor.hpp" #include "json_functions.hpp" namespace duckdb { template using json_function_t = std::function; struct JSONExecutors { public: //! Single-argument JSON read function, i.e. json_type('[1, 2, 3]') template static void UnaryExecute(DataChunk &args, ExpressionState &state, Vector &result, const json_function_t fun) { auto &lstate = JSONFunctionLocalState::ResetAndGet(state); auto alc = lstate.json_allocator->GetYYAlc(); auto &inputs = args.data[0]; UnaryExecutor::ExecuteWithNulls( inputs, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) { auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, alc); return fun(doc->root, alc, result, mask, idx); }); JSONAllocator::AddBuffer(result, alc); } //! Two-argument JSON read function (with path query), i.e. json_type('[1, 2, 3]', '$[0]') template static void BinaryExecute(DataChunk &args, ExpressionState &state, Vector &result, const json_function_t fun) { auto &func_expr = state.expr.Cast(); const auto &info = func_expr.bind_info->Cast(); auto &lstate = JSONFunctionLocalState::ResetAndGet(state); auto alc = lstate.json_allocator->GetYYAlc(); auto &inputs = args.data[0]; if (info.constant) { // Constant path const char *ptr = info.ptr; const idx_t &len = info.len; if (info.path_type == JSONCommon::JSONPathType::REGULAR) { UnaryExecutor::ExecuteWithNulls( inputs, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) { auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, alc); auto val = JSONCommon::GetUnsafe(doc->root, ptr, len); if (SET_NULL_IF_NOT_FOUND && !val) { mask.SetInvalid(idx); return T {}; } else { return fun(val, alc, result, mask, idx); } }); } else { D_ASSERT(info.path_type == JSONCommon::JSONPathType::WILDCARD); vector vals; UnaryExecutor::Execute(inputs, result, args.size(), [&](string_t input) { vals.clear(); auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, alc); JSONCommon::GetWildcardPath(doc->root, ptr, len, vals); auto current_size = ListVector::GetListSize(result); auto new_size = current_size + vals.size(); if (ListVector::GetListCapacity(result) < new_size) { ListVector::Reserve(result, new_size); } auto &child_entry = ListVector::GetEntry(result); auto child_vals = FlatVector::GetData(child_entry); auto &child_validity = FlatVector::Validity(child_entry); for (idx_t i = 0; i < vals.size(); i++) { auto &val = vals[i]; D_ASSERT(val != nullptr); // Wildcard extract shouldn't give back nullptrs child_vals[current_size + i] = fun(val, alc, result, child_validity, current_size + i); } ListVector::SetListSize(result, new_size); return list_entry_t {current_size, vals.size()}; }); } } else { // Columnref path D_ASSERT(info.path_type == JSONCommon::JSONPathType::REGULAR); unique_ptr casted_paths; if (args.data[1].GetType().id() == LogicalTypeId::VARCHAR) { casted_paths = make_uniq(args.data[1]); } else { casted_paths = make_uniq(LogicalTypeId::VARCHAR); VectorOperations::DefaultCast(args.data[1], *casted_paths, args.size(), true); } BinaryExecutor::ExecuteWithNulls( inputs, *casted_paths, result, args.size(), [&](string_t input, string_t path, ValidityMask &mask, idx_t idx) { auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, alc); auto val = JSONCommon::Get(doc->root, path, args.data[1].GetType().IsIntegral()); if (SET_NULL_IF_NOT_FOUND && !val) { mask.SetInvalid(idx); return T {}; } else { return fun(val, alc, result, mask, idx); } }); } if (args.AllConstant()) { result.SetVectorType(VectorType::CONSTANT_VECTOR); } JSONAllocator::AddBuffer(result, alc); } //! JSON read function with list of path queries, i.e. json_type('[1, 2, 3]', ['$[0]', '$[1]']) template static void ExecuteMany(DataChunk &args, ExpressionState &state, Vector &result, const json_function_t fun) { auto &func_expr = state.expr.Cast(); const auto &info = func_expr.bind_info->Cast(); auto &lstate = JSONFunctionLocalState::ResetAndGet(state); auto alc = lstate.json_allocator->GetYYAlc(); D_ASSERT(info.ptrs.size() == info.lens.size()); const auto count = args.size(); const idx_t num_paths = info.ptrs.size(); const idx_t list_size = count * num_paths; UnifiedVectorFormat input_data; auto &input_vector = args.data[0]; input_vector.ToUnifiedFormat(count, input_data); auto inputs = UnifiedVectorFormat::GetData(input_data); ListVector::Reserve(result, list_size); auto list_entries = FlatVector::GetData(result); auto &list_validity = FlatVector::Validity(result); auto &child = ListVector::GetEntry(result); auto child_data = FlatVector::GetData(child); auto &child_validity = FlatVector::Validity(child); idx_t offset = 0; yyjson_val *val; for (idx_t i = 0; i < count; i++) { auto idx = input_data.sel->get_index(i); if (!input_data.validity.RowIsValid(idx)) { list_validity.SetInvalid(i); continue; } auto doc = JSONCommon::ReadDocument(inputs[idx], JSONCommon::READ_FLAG, alc); for (idx_t path_i = 0; path_i < num_paths; path_i++) { auto child_idx = offset + path_i; val = JSONCommon::GetUnsafe(doc->root, info.ptrs[path_i], info.lens[path_i]); if (SET_NULL_IF_NOT_FOUND && !val) { child_validity.SetInvalid(child_idx); } else { child_data[child_idx] = fun(val, alc, child, child_validity, child_idx); } } list_entries[i].offset = offset; list_entries[i].length = num_paths; offset += num_paths; } ListVector::SetListSize(result, offset); if (args.AllConstant()) { result.SetVectorType(VectorType::CONSTANT_VECTOR); } JSONAllocator::AddBuffer(result, alc); } }; } // namespace duckdb