//===----------------------------------------------------------------------===// // DuckDB // // json_scan.hpp // // //===----------------------------------------------------------------------===// #pragma once #include "json_reader.hpp" #include "duckdb/common/multi_file/multi_file_reader.hpp" #include "duckdb/common/mutex.hpp" #include "duckdb/common/pair.hpp" #include "duckdb/common/types/type_map.hpp" #include "duckdb/function/scalar/strftime_format.hpp" #include "duckdb/function/table_function.hpp" #include "json_enums.hpp" #include "json_transform.hpp" #include "json_reader_options.hpp" namespace duckdb { struct JSONScanData : public TableFunctionData { public: JSONScanData(); void InitializeFormats(); void InitializeFormats(bool auto_detect); public: //! JSON reader options JSONReaderOptions options; //! The set of keys to extract (case sensitive) vector key_names; //! The date format map unique_ptr date_format_map; //! Options when transforming the JSON to columnar data JSONTransformOptions transform_options; optional_idx max_threads; optional_idx estimated_cardinality_per_file; }; struct JSONScanInfo : public TableFunctionInfo { public: explicit JSONScanInfo(JSONScanType type_p = JSONScanType::INVALID, JSONFormat format_p = JSONFormat::AUTO_DETECT, JSONRecordType record_type_p = JSONRecordType::AUTO_DETECT, bool auto_detect_p = false) : type(type_p), format(format_p), record_type(record_type_p), auto_detect(auto_detect_p) { } JSONScanType type; JSONFormat format; JSONRecordType record_type; bool auto_detect; }; struct JSONScanGlobalState { public: JSONScanGlobalState(ClientContext &context, const MultiFileBindData &bind_data); public: //! Bound data const MultiFileBindData &bind_data; const JSONScanData &json_data; //! Options when transforming the JSON to columnar data JSONTransformOptions transform_options; //! Column names that we're actually reading (after projection pushdown) vector names; vector column_ids; vector column_indices; //! Buffer manager allocator Allocator &allocator; //! The current buffer capacity idx_t buffer_capacity; //! Current number of threads active idx_t system_threads; //! Whether we enable parallel scans (only if less files than threads) bool enable_parallel_scans; bool file_is_assigned = false; bool initialized = false; }; struct JSONScanLocalState { public: JSONScanLocalState(ClientContext &context, JSONScanGlobalState &gstate); public: idx_t Read(); void AddTransformError(idx_t object_index, const string &error_message); JSONReaderScanState &GetScanState() { return scan_state; } const JSONReaderScanState &GetScanState() const { return scan_state; } bool TryInitializeScan(JSONScanGlobalState &gstate, JSONReader &reader); public: //! Options when transforming the JSON to columnar data JSONTransformOptions transform_options; private: void ParseJSON(char *const json_start, const idx_t json_size, const idx_t remaining); private: //! Scan state JSONReaderScanState scan_state; }; struct JSONGlobalTableFunctionState : public GlobalTableFunctionState { public: JSONGlobalTableFunctionState(ClientContext &context, const MultiFileBindData &bind_data); public: JSONScanGlobalState state; }; struct JSONLocalTableFunctionState : public LocalTableFunctionState { public: JSONLocalTableFunctionState(ClientContext &context, JSONScanGlobalState &gstate); public: JSONScanLocalState state; }; struct JSONScan { public: static void AutoDetect(ClientContext &context, MultiFileBindData &bind_data, vector &return_types, vector &names); static void Serialize(Serializer &serializer, const optional_ptr bind_data, const TableFunction &function); static unique_ptr Deserialize(Deserializer &deserializer, TableFunction &function); static void TableFunctionDefaults(TableFunction &table_function); }; } // namespace duckdb