Files
email-tracker/external/duckdb/extension/json/include/json_scan.hpp
2025-10-24 19:21:19 -05:00

149 lines
4.0 KiB
C++

//===----------------------------------------------------------------------===//
// DuckDB
//
// json_scan.hpp
//
//
//===----------------------------------------------------------------------===//
#pragma once
#include "json_reader.hpp"
#include "duckdb/common/multi_file/multi_file_reader.hpp"
#include "duckdb/common/mutex.hpp"
#include "duckdb/common/pair.hpp"
#include "duckdb/common/types/type_map.hpp"
#include "duckdb/function/scalar/strftime_format.hpp"
#include "duckdb/function/table_function.hpp"
#include "json_enums.hpp"
#include "json_transform.hpp"
#include "json_reader_options.hpp"
namespace duckdb {
struct JSONScanData : public TableFunctionData {
public:
JSONScanData();
void InitializeFormats();
void InitializeFormats(bool auto_detect);
public:
//! JSON reader options
JSONReaderOptions options;
//! The set of keys to extract (case sensitive)
vector<string> key_names;
//! The date format map
unique_ptr<DateFormatMap> date_format_map;
//! Options when transforming the JSON to columnar data
JSONTransformOptions transform_options;
optional_idx max_threads;
optional_idx estimated_cardinality_per_file;
};
struct JSONScanInfo : public TableFunctionInfo {
public:
explicit JSONScanInfo(JSONScanType type_p = JSONScanType::INVALID, JSONFormat format_p = JSONFormat::AUTO_DETECT,
JSONRecordType record_type_p = JSONRecordType::AUTO_DETECT, bool auto_detect_p = false)
: type(type_p), format(format_p), record_type(record_type_p), auto_detect(auto_detect_p) {
}
JSONScanType type;
JSONFormat format;
JSONRecordType record_type;
bool auto_detect;
};
struct JSONScanGlobalState {
public:
JSONScanGlobalState(ClientContext &context, const MultiFileBindData &bind_data);
public:
//! Bound data
const MultiFileBindData &bind_data;
const JSONScanData &json_data;
//! Options when transforming the JSON to columnar data
JSONTransformOptions transform_options;
//! Column names that we're actually reading (after projection pushdown)
vector<string> names;
vector<column_t> column_ids;
vector<ColumnIndex> column_indices;
//! Buffer manager allocator
Allocator &allocator;
//! The current buffer capacity
idx_t buffer_capacity;
//! Current number of threads active
idx_t system_threads;
//! Whether we enable parallel scans (only if less files than threads)
bool enable_parallel_scans;
bool file_is_assigned = false;
bool initialized = false;
};
struct JSONScanLocalState {
public:
JSONScanLocalState(ClientContext &context, JSONScanGlobalState &gstate);
public:
idx_t Read();
void AddTransformError(idx_t object_index, const string &error_message);
JSONReaderScanState &GetScanState() {
return scan_state;
}
const JSONReaderScanState &GetScanState() const {
return scan_state;
}
bool TryInitializeScan(JSONScanGlobalState &gstate, JSONReader &reader);
public:
//! Options when transforming the JSON to columnar data
JSONTransformOptions transform_options;
private:
void ParseJSON(char *const json_start, const idx_t json_size, const idx_t remaining);
private:
//! Scan state
JSONReaderScanState scan_state;
};
struct JSONGlobalTableFunctionState : public GlobalTableFunctionState {
public:
JSONGlobalTableFunctionState(ClientContext &context, const MultiFileBindData &bind_data);
public:
JSONScanGlobalState state;
};
struct JSONLocalTableFunctionState : public LocalTableFunctionState {
public:
JSONLocalTableFunctionState(ClientContext &context, JSONScanGlobalState &gstate);
public:
JSONScanLocalState state;
};
struct JSONScan {
public:
static void AutoDetect(ClientContext &context, MultiFileBindData &bind_data, vector<LogicalType> &return_types,
vector<string> &names);
static void Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data,
const TableFunction &function);
static unique_ptr<FunctionData> Deserialize(Deserializer &deserializer, TableFunction &function);
static void TableFunctionDefaults(TableFunction &table_function);
};
} // namespace duckdb