149 lines
4.0 KiB
C++
149 lines
4.0 KiB
C++
//===----------------------------------------------------------------------===//
|
|
// DuckDB
|
|
//
|
|
// json_scan.hpp
|
|
//
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#pragma once
|
|
|
|
#include "json_reader.hpp"
|
|
#include "duckdb/common/multi_file/multi_file_reader.hpp"
|
|
#include "duckdb/common/mutex.hpp"
|
|
#include "duckdb/common/pair.hpp"
|
|
#include "duckdb/common/types/type_map.hpp"
|
|
#include "duckdb/function/scalar/strftime_format.hpp"
|
|
#include "duckdb/function/table_function.hpp"
|
|
#include "json_enums.hpp"
|
|
#include "json_transform.hpp"
|
|
#include "json_reader_options.hpp"
|
|
|
|
namespace duckdb {
|
|
|
|
struct JSONScanData : public TableFunctionData {
|
|
public:
|
|
JSONScanData();
|
|
|
|
void InitializeFormats();
|
|
void InitializeFormats(bool auto_detect);
|
|
|
|
public:
|
|
//! JSON reader options
|
|
JSONReaderOptions options;
|
|
|
|
//! The set of keys to extract (case sensitive)
|
|
vector<string> key_names;
|
|
|
|
//! The date format map
|
|
unique_ptr<DateFormatMap> date_format_map;
|
|
//! Options when transforming the JSON to columnar data
|
|
JSONTransformOptions transform_options;
|
|
|
|
optional_idx max_threads;
|
|
optional_idx estimated_cardinality_per_file;
|
|
};
|
|
|
|
struct JSONScanInfo : public TableFunctionInfo {
|
|
public:
|
|
explicit JSONScanInfo(JSONScanType type_p = JSONScanType::INVALID, JSONFormat format_p = JSONFormat::AUTO_DETECT,
|
|
JSONRecordType record_type_p = JSONRecordType::AUTO_DETECT, bool auto_detect_p = false)
|
|
: type(type_p), format(format_p), record_type(record_type_p), auto_detect(auto_detect_p) {
|
|
}
|
|
|
|
JSONScanType type;
|
|
JSONFormat format;
|
|
JSONRecordType record_type;
|
|
bool auto_detect;
|
|
};
|
|
|
|
struct JSONScanGlobalState {
|
|
public:
|
|
JSONScanGlobalState(ClientContext &context, const MultiFileBindData &bind_data);
|
|
|
|
public:
|
|
//! Bound data
|
|
const MultiFileBindData &bind_data;
|
|
const JSONScanData &json_data;
|
|
//! Options when transforming the JSON to columnar data
|
|
JSONTransformOptions transform_options;
|
|
|
|
//! Column names that we're actually reading (after projection pushdown)
|
|
vector<string> names;
|
|
vector<column_t> column_ids;
|
|
vector<ColumnIndex> column_indices;
|
|
|
|
//! Buffer manager allocator
|
|
Allocator &allocator;
|
|
//! The current buffer capacity
|
|
idx_t buffer_capacity;
|
|
|
|
//! Current number of threads active
|
|
idx_t system_threads;
|
|
//! Whether we enable parallel scans (only if less files than threads)
|
|
bool enable_parallel_scans;
|
|
|
|
bool file_is_assigned = false;
|
|
bool initialized = false;
|
|
};
|
|
|
|
struct JSONScanLocalState {
|
|
public:
|
|
JSONScanLocalState(ClientContext &context, JSONScanGlobalState &gstate);
|
|
|
|
public:
|
|
idx_t Read();
|
|
void AddTransformError(idx_t object_index, const string &error_message);
|
|
|
|
JSONReaderScanState &GetScanState() {
|
|
return scan_state;
|
|
}
|
|
|
|
const JSONReaderScanState &GetScanState() const {
|
|
return scan_state;
|
|
}
|
|
|
|
bool TryInitializeScan(JSONScanGlobalState &gstate, JSONReader &reader);
|
|
|
|
public:
|
|
//! Options when transforming the JSON to columnar data
|
|
JSONTransformOptions transform_options;
|
|
|
|
private:
|
|
void ParseJSON(char *const json_start, const idx_t json_size, const idx_t remaining);
|
|
|
|
private:
|
|
//! Scan state
|
|
JSONReaderScanState scan_state;
|
|
};
|
|
|
|
struct JSONGlobalTableFunctionState : public GlobalTableFunctionState {
|
|
public:
|
|
JSONGlobalTableFunctionState(ClientContext &context, const MultiFileBindData &bind_data);
|
|
|
|
public:
|
|
JSONScanGlobalState state;
|
|
};
|
|
|
|
struct JSONLocalTableFunctionState : public LocalTableFunctionState {
|
|
public:
|
|
JSONLocalTableFunctionState(ClientContext &context, JSONScanGlobalState &gstate);
|
|
|
|
public:
|
|
JSONScanLocalState state;
|
|
};
|
|
|
|
struct JSONScan {
|
|
public:
|
|
static void AutoDetect(ClientContext &context, MultiFileBindData &bind_data, vector<LogicalType> &return_types,
|
|
vector<string> &names);
|
|
|
|
static void Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data,
|
|
const TableFunction &function);
|
|
static unique_ptr<FunctionData> Deserialize(Deserializer &deserializer, TableFunction &function);
|
|
|
|
static void TableFunctionDefaults(TableFunction &table_function);
|
|
};
|
|
|
|
} // namespace duckdb
|