should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,148 @@
#pragma once
#include "duckdb/common/types/string_type.hpp"
#include "duckdb/common/types/value.hpp"
#include "reader/variant/variant_value.hpp"
using namespace duckdb_yyjson;
namespace duckdb {
//! ------------ Metadata ------------
struct VariantMetadataHeader {
public:
static VariantMetadataHeader FromHeaderByte(uint8_t byte);
public:
//! The version of the protocol used (only '1' supported for now)
uint8_t version;
//! Number of bytes per dictionary size and offset field
uint8_t offset_size;
//! Whether dictionary strings are sorted and unique
bool sorted_strings = false;
};
struct VariantMetadata {
public:
explicit VariantMetadata(const string_t &metadata);
public:
const string_t &metadata;
public:
VariantMetadataHeader header;
const_data_ptr_t offsets;
const_data_ptr_t bytes;
//! The json object keys have to be null-terminated
//! But we don't receive them null-terminated
vector<string> strings;
};
//! ------------ Value ------------
enum class VariantBasicType : uint8_t { PRIMITIVE = 0, SHORT_STRING = 1, OBJECT = 2, ARRAY = 3, INVALID };
enum class VariantPrimitiveType : uint8_t {
NULL_TYPE = 0,
BOOLEAN_TRUE = 1,
BOOLEAN_FALSE = 2,
INT8 = 3,
INT16 = 4,
INT32 = 5,
INT64 = 6,
DOUBLE = 7,
DECIMAL4 = 8,
DECIMAL8 = 9,
DECIMAL16 = 10,
DATE = 11,
TIMESTAMP_MICROS = 12,
TIMESTAMP_NTZ_MICROS = 13,
FLOAT = 14,
BINARY = 15,
STRING = 16,
TIME_NTZ_MICROS = 17,
TIMESTAMP_NANOS = 18,
TIMESTAMP_NTZ_NANOS = 19,
UUID = 20,
INVALID
};
struct VariantValueMetadata {
public:
VariantValueMetadata() {
}
public:
static VariantValueMetadata FromHeaderByte(uint8_t byte);
static VariantBasicType VariantBasicTypeFromByte(uint8_t byte) {
if (byte >= static_cast<uint8_t>(VariantBasicType::INVALID)) {
throw NotImplementedException("Variant BasicType (%d) is not supported", byte);
}
return static_cast<VariantBasicType>(byte);
}
static VariantPrimitiveType VariantPrimitiveTypeFromByte(uint8_t byte) {
if (byte >= static_cast<uint8_t>(VariantPrimitiveType::INVALID)) {
throw NotImplementedException("Variant PrimitiveType (%d) is not supported", byte);
}
return static_cast<VariantPrimitiveType>(byte);
}
public:
VariantBasicType basic_type;
public:
//! Primitive Type header
VariantPrimitiveType primitive_type;
public:
//! Short String header
uint8_t string_size;
public:
//! Object header | Array header
//! Size in bytes for each 'field_offset' entry
uint32_t field_offset_size;
//! Size in bytes for each 'field_id' entry
uint32_t field_id_size;
//! Whether the number of elements is encoded in 1 byte (false) or 4 bytes (true)
bool is_large;
};
struct VariantDecodeResult {
public:
VariantDecodeResult() = default;
~VariantDecodeResult() {
if (doc) {
yyjson_mut_doc_free(doc);
}
if (data) {
free(data);
}
}
public:
yyjson_mut_doc *doc = nullptr;
char *data = nullptr;
};
class VariantBinaryDecoder {
public:
VariantBinaryDecoder() = delete;
public:
static VariantValue Decode(const VariantMetadata &metadata, const_data_ptr_t data);
public:
static VariantValue PrimitiveTypeDecode(const VariantValueMetadata &value_metadata, const_data_ptr_t data);
static VariantValue ShortStringDecode(const VariantValueMetadata &value_metadata, const_data_ptr_t data);
static VariantValue ObjectDecode(const VariantMetadata &metadata, const VariantValueMetadata &value_metadata,
const_data_ptr_t data);
static VariantValue ArrayDecode(const VariantMetadata &metadata, const VariantValueMetadata &value_metadata,
const_data_ptr_t data);
};
} // namespace duckdb

View File

@@ -0,0 +1,24 @@
#pragma once
#include "reader/variant/variant_value.hpp"
#include "reader/variant/variant_binary_decoder.hpp"
namespace duckdb {
class VariantShreddedConversion {
public:
VariantShreddedConversion() = delete;
public:
static vector<VariantValue> Convert(Vector &metadata, Vector &group, idx_t offset, idx_t length, idx_t total_size,
bool is_field);
static vector<VariantValue> ConvertShreddedLeaf(Vector &metadata, Vector &value, Vector &typed_value, idx_t offset,
idx_t length, idx_t total_size, const bool is_field);
static vector<VariantValue> ConvertShreddedArray(Vector &metadata, Vector &value, Vector &typed_value, idx_t offset,
idx_t length, idx_t total_size, const bool is_field);
static vector<VariantValue> ConvertShreddedObject(Vector &metadata, Vector &value, Vector &typed_value,
idx_t offset, idx_t length, idx_t total_size,
const bool is_field);
};
} // namespace duckdb

View File

@@ -0,0 +1,54 @@
#pragma once
#include "duckdb/common/map.hpp"
#include "duckdb/common/vector.hpp"
#include "duckdb/common/types/value.hpp"
#include "yyjson.hpp"
using namespace duckdb_yyjson;
namespace duckdb {
enum class VariantValueType : uint8_t { PRIMITIVE, OBJECT, ARRAY, MISSING };
struct VariantValue {
public:
VariantValue() : value_type(VariantValueType::MISSING) {
}
explicit VariantValue(VariantValueType type) : value_type(type) {
}
explicit VariantValue(Value &&val) : value_type(VariantValueType::PRIMITIVE), primitive_value(std::move(val)) {
}
// Delete copy constructor and copy assignment operator
VariantValue(const VariantValue &) = delete;
VariantValue &operator=(const VariantValue &) = delete;
// Default move constructor and move assignment operator
VariantValue(VariantValue &&) noexcept = default;
VariantValue &operator=(VariantValue &&) noexcept = default;
public:
bool IsNull() const {
return value_type == VariantValueType::PRIMITIVE && primitive_value.IsNull();
}
bool IsMissing() const {
return value_type == VariantValueType::MISSING;
}
public:
void AddChild(const string &key, VariantValue &&val);
void AddItem(VariantValue &&val);
public:
yyjson_mut_val *ToJSON(ClientContext &context, yyjson_mut_doc *doc) const;
public:
VariantValueType value_type;
//! FIXME: how can we get a deterministic child order for a partially shredded object?
map<string, VariantValue> object_children;
vector<VariantValue> array_items;
Value primitive_value;
};
} // namespace duckdb