should be it
This commit is contained in:
148
external/duckdb/extension/parquet/include/reader/variant/variant_binary_decoder.hpp
vendored
Normal file
148
external/duckdb/extension/parquet/include/reader/variant/variant_binary_decoder.hpp
vendored
Normal file
@@ -0,0 +1,148 @@
|
||||
#pragma once
|
||||
|
||||
#include "duckdb/common/types/string_type.hpp"
|
||||
#include "duckdb/common/types/value.hpp"
|
||||
#include "reader/variant/variant_value.hpp"
|
||||
|
||||
using namespace duckdb_yyjson;
|
||||
|
||||
namespace duckdb {
|
||||
|
||||
//! ------------ Metadata ------------
|
||||
|
||||
struct VariantMetadataHeader {
|
||||
public:
|
||||
static VariantMetadataHeader FromHeaderByte(uint8_t byte);
|
||||
|
||||
public:
|
||||
//! The version of the protocol used (only '1' supported for now)
|
||||
uint8_t version;
|
||||
//! Number of bytes per dictionary size and offset field
|
||||
uint8_t offset_size;
|
||||
//! Whether dictionary strings are sorted and unique
|
||||
bool sorted_strings = false;
|
||||
};
|
||||
|
||||
struct VariantMetadata {
|
||||
public:
|
||||
explicit VariantMetadata(const string_t &metadata);
|
||||
|
||||
public:
|
||||
const string_t &metadata;
|
||||
|
||||
public:
|
||||
VariantMetadataHeader header;
|
||||
const_data_ptr_t offsets;
|
||||
const_data_ptr_t bytes;
|
||||
|
||||
//! The json object keys have to be null-terminated
|
||||
//! But we don't receive them null-terminated
|
||||
vector<string> strings;
|
||||
};
|
||||
|
||||
//! ------------ Value ------------
|
||||
|
||||
enum class VariantBasicType : uint8_t { PRIMITIVE = 0, SHORT_STRING = 1, OBJECT = 2, ARRAY = 3, INVALID };
|
||||
|
||||
enum class VariantPrimitiveType : uint8_t {
|
||||
NULL_TYPE = 0,
|
||||
BOOLEAN_TRUE = 1,
|
||||
BOOLEAN_FALSE = 2,
|
||||
INT8 = 3,
|
||||
INT16 = 4,
|
||||
INT32 = 5,
|
||||
INT64 = 6,
|
||||
DOUBLE = 7,
|
||||
DECIMAL4 = 8,
|
||||
DECIMAL8 = 9,
|
||||
DECIMAL16 = 10,
|
||||
DATE = 11,
|
||||
TIMESTAMP_MICROS = 12,
|
||||
TIMESTAMP_NTZ_MICROS = 13,
|
||||
FLOAT = 14,
|
||||
BINARY = 15,
|
||||
STRING = 16,
|
||||
TIME_NTZ_MICROS = 17,
|
||||
TIMESTAMP_NANOS = 18,
|
||||
TIMESTAMP_NTZ_NANOS = 19,
|
||||
UUID = 20,
|
||||
INVALID
|
||||
};
|
||||
|
||||
struct VariantValueMetadata {
|
||||
public:
|
||||
VariantValueMetadata() {
|
||||
}
|
||||
|
||||
public:
|
||||
static VariantValueMetadata FromHeaderByte(uint8_t byte);
|
||||
static VariantBasicType VariantBasicTypeFromByte(uint8_t byte) {
|
||||
if (byte >= static_cast<uint8_t>(VariantBasicType::INVALID)) {
|
||||
throw NotImplementedException("Variant BasicType (%d) is not supported", byte);
|
||||
}
|
||||
return static_cast<VariantBasicType>(byte);
|
||||
}
|
||||
|
||||
static VariantPrimitiveType VariantPrimitiveTypeFromByte(uint8_t byte) {
|
||||
if (byte >= static_cast<uint8_t>(VariantPrimitiveType::INVALID)) {
|
||||
throw NotImplementedException("Variant PrimitiveType (%d) is not supported", byte);
|
||||
}
|
||||
return static_cast<VariantPrimitiveType>(byte);
|
||||
}
|
||||
|
||||
public:
|
||||
VariantBasicType basic_type;
|
||||
|
||||
public:
|
||||
//! Primitive Type header
|
||||
VariantPrimitiveType primitive_type;
|
||||
|
||||
public:
|
||||
//! Short String header
|
||||
uint8_t string_size;
|
||||
|
||||
public:
|
||||
//! Object header | Array header
|
||||
|
||||
//! Size in bytes for each 'field_offset' entry
|
||||
uint32_t field_offset_size;
|
||||
//! Size in bytes for each 'field_id' entry
|
||||
uint32_t field_id_size;
|
||||
//! Whether the number of elements is encoded in 1 byte (false) or 4 bytes (true)
|
||||
bool is_large;
|
||||
};
|
||||
|
||||
struct VariantDecodeResult {
|
||||
public:
|
||||
VariantDecodeResult() = default;
|
||||
~VariantDecodeResult() {
|
||||
if (doc) {
|
||||
yyjson_mut_doc_free(doc);
|
||||
}
|
||||
if (data) {
|
||||
free(data);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
yyjson_mut_doc *doc = nullptr;
|
||||
char *data = nullptr;
|
||||
};
|
||||
|
||||
class VariantBinaryDecoder {
|
||||
public:
|
||||
VariantBinaryDecoder() = delete;
|
||||
|
||||
public:
|
||||
static VariantValue Decode(const VariantMetadata &metadata, const_data_ptr_t data);
|
||||
|
||||
public:
|
||||
static VariantValue PrimitiveTypeDecode(const VariantValueMetadata &value_metadata, const_data_ptr_t data);
|
||||
static VariantValue ShortStringDecode(const VariantValueMetadata &value_metadata, const_data_ptr_t data);
|
||||
static VariantValue ObjectDecode(const VariantMetadata &metadata, const VariantValueMetadata &value_metadata,
|
||||
const_data_ptr_t data);
|
||||
static VariantValue ArrayDecode(const VariantMetadata &metadata, const VariantValueMetadata &value_metadata,
|
||||
const_data_ptr_t data);
|
||||
};
|
||||
|
||||
} // namespace duckdb
|
||||
24
external/duckdb/extension/parquet/include/reader/variant/variant_shredded_conversion.hpp
vendored
Normal file
24
external/duckdb/extension/parquet/include/reader/variant/variant_shredded_conversion.hpp
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include "reader/variant/variant_value.hpp"
|
||||
#include "reader/variant/variant_binary_decoder.hpp"
|
||||
|
||||
namespace duckdb {
|
||||
|
||||
class VariantShreddedConversion {
|
||||
public:
|
||||
VariantShreddedConversion() = delete;
|
||||
|
||||
public:
|
||||
static vector<VariantValue> Convert(Vector &metadata, Vector &group, idx_t offset, idx_t length, idx_t total_size,
|
||||
bool is_field);
|
||||
static vector<VariantValue> ConvertShreddedLeaf(Vector &metadata, Vector &value, Vector &typed_value, idx_t offset,
|
||||
idx_t length, idx_t total_size, const bool is_field);
|
||||
static vector<VariantValue> ConvertShreddedArray(Vector &metadata, Vector &value, Vector &typed_value, idx_t offset,
|
||||
idx_t length, idx_t total_size, const bool is_field);
|
||||
static vector<VariantValue> ConvertShreddedObject(Vector &metadata, Vector &value, Vector &typed_value,
|
||||
idx_t offset, idx_t length, idx_t total_size,
|
||||
const bool is_field);
|
||||
};
|
||||
|
||||
} // namespace duckdb
|
||||
54
external/duckdb/extension/parquet/include/reader/variant/variant_value.hpp
vendored
Normal file
54
external/duckdb/extension/parquet/include/reader/variant/variant_value.hpp
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
#pragma once
|
||||
|
||||
#include "duckdb/common/map.hpp"
|
||||
#include "duckdb/common/vector.hpp"
|
||||
#include "duckdb/common/types/value.hpp"
|
||||
|
||||
#include "yyjson.hpp"
|
||||
|
||||
using namespace duckdb_yyjson;
|
||||
|
||||
namespace duckdb {
|
||||
|
||||
enum class VariantValueType : uint8_t { PRIMITIVE, OBJECT, ARRAY, MISSING };
|
||||
|
||||
struct VariantValue {
|
||||
public:
|
||||
VariantValue() : value_type(VariantValueType::MISSING) {
|
||||
}
|
||||
explicit VariantValue(VariantValueType type) : value_type(type) {
|
||||
}
|
||||
explicit VariantValue(Value &&val) : value_type(VariantValueType::PRIMITIVE), primitive_value(std::move(val)) {
|
||||
}
|
||||
// Delete copy constructor and copy assignment operator
|
||||
VariantValue(const VariantValue &) = delete;
|
||||
VariantValue &operator=(const VariantValue &) = delete;
|
||||
|
||||
// Default move constructor and move assignment operator
|
||||
VariantValue(VariantValue &&) noexcept = default;
|
||||
VariantValue &operator=(VariantValue &&) noexcept = default;
|
||||
|
||||
public:
|
||||
bool IsNull() const {
|
||||
return value_type == VariantValueType::PRIMITIVE && primitive_value.IsNull();
|
||||
}
|
||||
bool IsMissing() const {
|
||||
return value_type == VariantValueType::MISSING;
|
||||
}
|
||||
|
||||
public:
|
||||
void AddChild(const string &key, VariantValue &&val);
|
||||
void AddItem(VariantValue &&val);
|
||||
|
||||
public:
|
||||
yyjson_mut_val *ToJSON(ClientContext &context, yyjson_mut_doc *doc) const;
|
||||
|
||||
public:
|
||||
VariantValueType value_type;
|
||||
//! FIXME: how can we get a deterministic child order for a partially shredded object?
|
||||
map<string, VariantValue> object_children;
|
||||
vector<VariantValue> array_items;
|
||||
Value primitive_value;
|
||||
};
|
||||
|
||||
} // namespace duckdb
|
||||
Reference in New Issue
Block a user