//===----------------------------------------------------------------------===// // DuckDB // // json_reader_options.hpp // // //===----------------------------------------------------------------------===// #pragma once #include "json_common.hpp" #include "json_enums.hpp" #include "duckdb/common/types/type_map.hpp" #include "duckdb/function/scalar/strftime_format.hpp" namespace duckdb { struct DateFormatMap { friend class MutableDateFormatMap; public: explicit DateFormatMap(type_id_map_t> candidate_formats_p) : candidate_formats(std::move(candidate_formats_p)) { } bool HasFormats(LogicalTypeId type) const { return HasFormats(candidate_formats, type); } const StrpTimeFormat &GetFormat(LogicalTypeId type) const { D_ASSERT(candidate_formats.find(type) != candidate_formats.end()); return candidate_formats.find(type)->second.back(); } public: static void AddFormat(type_id_map_t> &candidate_formats, LogicalTypeId type, const string &format_string) { auto &formats = candidate_formats[type]; formats.emplace_back(); formats.back().format_specifier = format_string; StrpTimeFormat::ParseFormatSpecifier(formats.back().format_specifier, formats.back()); } static bool HasFormats(const type_id_map_t> &candidate_formats, LogicalTypeId type) { return candidate_formats.find(type) != candidate_formats.end(); } private: type_id_map_t> candidate_formats; }; class MutableDateFormatMap { public: explicit MutableDateFormatMap(DateFormatMap &date_format_map) : date_format_map(date_format_map) { } bool HasFormats(LogicalTypeId type) { lock_guard lock(format_lock); return date_format_map.HasFormats(type); } idx_t NumberOfFormats(LogicalTypeId type) { lock_guard lock(format_lock); return date_format_map.candidate_formats.at(type).size(); } bool GetFormatAtIndex(LogicalTypeId type, idx_t index, StrpTimeFormat &format) { lock_guard lock(format_lock); auto &formats = date_format_map.candidate_formats.at(type); if (index >= formats.size()) { return false; } format = formats[index]; return true; } void ShrinkFormatsToSize(LogicalTypeId type, idx_t size) { lock_guard lock(format_lock); auto &formats = date_format_map.candidate_formats[type]; while (formats.size() > size) { formats.pop_back(); } } private: mutex format_lock; DateFormatMap &date_format_map; }; struct JSONReaderOptions { //! Scan type JSONScanType type = JSONScanType::READ_JSON; //! The format of the JSON JSONFormat format = JSONFormat::AUTO_DETECT; //! Whether record types in the JSON JSONRecordType record_type = JSONRecordType::AUTO_DETECT; //! Whether file is compressed or not, and if so which compression type FileCompressionType compression = FileCompressionType::AUTO_DETECT; //! Whether or not we should ignore malformed JSON (default to NULL) bool ignore_errors = false; //! Maximum JSON object size (defaults to 16MB minimum) idx_t maximum_object_size = 16777216; //! Whether we auto-detect a schema bool auto_detect = false; //! Sample size for detecting schema idx_t sample_size = idx_t(STANDARD_VECTOR_SIZE) * 10; //! Max depth we go to detect nested JSON schema (defaults to unlimited) idx_t max_depth = NumericLimits::Maximum(); //! We divide the number of appearances of each JSON field by the auto-detection sample size //! If the average over the fields of an object is less than this threshold, //! we default to the MAP type with value type of merged field types double field_appearance_threshold = 0.1; //! The maximum number of files we sample to sample sample_size rows idx_t maximum_sample_files = 32; //! Whether we auto-detect and convert JSON strings to integers bool convert_strings_to_integers = false; //! If a struct contains more fields than this threshold with at least 80% similar types, //! we infer it as MAP type idx_t map_inference_threshold = 200; //! User-provided list of names (in order) vector name_list; //! User-provided list of types (in order) vector sql_type_list; //! Forced date/timestamp formats string date_format; string timestamp_format; }; } // namespace duckdb