#include "duckdb/function/copy_function.hpp" #include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/expression/function_expression.hpp" #include "duckdb/parser/expression/positional_reference_expression.hpp" #include "duckdb/parser/query_node/select_node.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" #include "duckdb/planner/binder.hpp" #include "duckdb/common/helper.hpp" #include "json_functions.hpp" #include "json_scan.hpp" #include "json_transform.hpp" #include "json_multi_file_info.hpp" namespace duckdb { static void ThrowJSONCopyParameterException(const string &loption) { throw BinderException("COPY (FORMAT JSON) parameter %s expects a single argument.", loption); } static BoundStatement CopyToJSONPlan(Binder &binder, CopyStatement &stmt) { static const unordered_set SUPPORTED_BASE_OPTIONS { "compression", "encoding", "use_tmp_file", "overwrite_or_ignore", "overwrite", "append", "filename_pattern", "file_extension", "per_thread_output", "file_size_bytes", // "partition_by", unsupported "return_files", "preserve_order", "return_stats", "write_partition_columns", "write_empty_file", "hive_file_pattern"}; auto stmt_copy = stmt.Copy(); auto © = stmt_copy->Cast(); auto &copied_info = *copy.info; // Parse the options, creating options for the CSV writer while doing so string date_format; string timestamp_format; // We insert the JSON file extension here so it works properly with PER_THREAD_OUTPUT/FILE_SIZE_BYTES etc. case_insensitive_map_t> csv_copy_options {{"file_extension", {"json"}}}; for (const auto &kv : copied_info.options) { const auto &loption = StringUtil::Lower(kv.first); if (loption == "dateformat" || loption == "date_format") { if (kv.second.size() != 1) { ThrowJSONCopyParameterException(loption); } date_format = StringValue::Get(kv.second.back()); } else if (loption == "timestampformat" || loption == "timestamp_format") { if (kv.second.size() != 1) { ThrowJSONCopyParameterException(loption); } timestamp_format = StringValue::Get(kv.second.back()); } else if (loption == "array") { if (kv.second.size() > 1) { ThrowJSONCopyParameterException(loption); } if (kv.second.empty() || BooleanValue::Get(kv.second.back().DefaultCastAs(LogicalTypeId::BOOLEAN))) { csv_copy_options["prefix"] = {"[\n\t"}; csv_copy_options["suffix"] = {"\n]\n"}; csv_copy_options["new_line"] = {",\n\t"}; } } else if (SUPPORTED_BASE_OPTIONS.find(loption) != SUPPORTED_BASE_OPTIONS.end()) { // We support these base options csv_copy_options.insert(kv); } else { throw BinderException("Unknown option for COPY ... TO ... (FORMAT JSON): \"%s\".", loption); } } // Bind the select statement of the original to resolve the types auto dummy_binder = Binder::CreateBinder(binder.context, &binder); auto bound_original = dummy_binder->Bind(*stmt.info->select_statement); // Create new SelectNode with the original SelectNode as a subquery in the FROM clause auto select_stmt = make_uniq(); select_stmt->node = std::move(copied_info.select_statement); auto subquery_ref = make_uniq(std::move(select_stmt)); copied_info.select_statement = make_uniq_base(); auto &select_node = copied_info.select_statement->Cast(); select_node.from_table = std::move(subquery_ref); // Create new select list vector> select_list; select_list.reserve(bound_original.types.size()); // strftime if the user specified a format (loop also gives columns a name, needed for struct_pack) // TODO: deal with date/timestamp within nested types vector> strftime_children; for (idx_t col_idx = 0; col_idx < bound_original.types.size(); col_idx++) { auto column = make_uniq_base(col_idx + 1); strftime_children = vector>(); const auto &type = bound_original.types[col_idx]; const auto &name = bound_original.names[col_idx]; if (!date_format.empty() && type == LogicalTypeId::DATE) { strftime_children.emplace_back(std::move(column)); strftime_children.emplace_back(make_uniq(date_format)); column = make_uniq("strftime", std::move(strftime_children)); } else if (!timestamp_format.empty() && type == LogicalTypeId::TIMESTAMP) { strftime_children.emplace_back(std::move(column)); strftime_children.emplace_back(make_uniq(timestamp_format)); column = make_uniq("strftime", std::move(strftime_children)); } column->SetAlias(name); select_list.emplace_back(std::move(column)); } // Now create the struct_pack/to_json to create a JSON object per row vector> struct_pack_child; struct_pack_child.emplace_back(make_uniq("struct_pack", std::move(select_list))); select_node.select_list.emplace_back(make_uniq("to_json", std::move(struct_pack_child))); // Now we can just use the CSV writer copied_info.format = "csv"; copied_info.options = std::move(csv_copy_options); copied_info.options["quote"] = {""}; copied_info.options["escape"] = {""}; copied_info.options["delimiter"] = {"\n"}; copied_info.options["header"] = {{0}}; return binder.Bind(*stmt_copy); } CopyFunction JSONFunctions::GetJSONCopyFunction() { CopyFunction function("json"); function.extension = "json"; function.plan = CopyToJSONPlan; function.copy_from_bind = MultiFileFunction::MultiFileBindCopy; function.copy_from_function = JSONFunctions::GetReadJSONTableFunction(make_shared_ptr( JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, JSONRecordType::RECORDS, false)); return function; } } // namespace duckdb