should be it
This commit is contained in:
105
external/duckdb/extension/parquet/writer/boolean_column_writer.cpp
vendored
Normal file
105
external/duckdb/extension/parquet/writer/boolean_column_writer.cpp
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
#include "writer/boolean_column_writer.hpp"
|
||||
|
||||
namespace duckdb {
|
||||
|
||||
class BooleanStatisticsState : public ColumnWriterStatistics {
|
||||
public:
|
||||
BooleanStatisticsState() : min(true), max(false) {
|
||||
}
|
||||
|
||||
bool min;
|
||||
bool max;
|
||||
|
||||
public:
|
||||
bool HasStats() override {
|
||||
return !(min && !max);
|
||||
}
|
||||
|
||||
string GetMin() override {
|
||||
return GetMinValue();
|
||||
}
|
||||
string GetMax() override {
|
||||
return GetMaxValue();
|
||||
}
|
||||
string GetMinValue() override {
|
||||
return HasStats() ? string(const_char_ptr_cast(&min), sizeof(bool)) : string();
|
||||
}
|
||||
string GetMaxValue() override {
|
||||
return HasStats() ? string(const_char_ptr_cast(&max), sizeof(bool)) : string();
|
||||
}
|
||||
};
|
||||
|
||||
class BooleanWriterPageState : public ColumnWriterPageState {
|
||||
public:
|
||||
uint8_t byte = 0;
|
||||
uint8_t byte_pos = 0;
|
||||
};
|
||||
|
||||
BooleanColumnWriter::BooleanColumnWriter(ParquetWriter &writer, const ParquetColumnSchema &column_schema,
|
||||
vector<string> schema_path_p, bool can_have_nulls)
|
||||
: PrimitiveColumnWriter(writer, column_schema, std::move(schema_path_p), can_have_nulls) {
|
||||
}
|
||||
|
||||
unique_ptr<ColumnWriterStatistics> BooleanColumnWriter::InitializeStatsState() {
|
||||
return make_uniq<BooleanStatisticsState>();
|
||||
}
|
||||
|
||||
void BooleanColumnWriter::WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p,
|
||||
ColumnWriterPageState *state_p, Vector &input_column, idx_t chunk_start,
|
||||
idx_t chunk_end) {
|
||||
auto &stats = stats_p->Cast<BooleanStatisticsState>();
|
||||
auto &state = state_p->Cast<BooleanWriterPageState>();
|
||||
const auto &mask = FlatVector::Validity(input_column);
|
||||
|
||||
const auto *const ptr = FlatVector::GetData<bool>(input_column);
|
||||
if (stats.max && !stats.min && mask.AllValid()) {
|
||||
// Fast path: stats have already been set, and there's no NULLs
|
||||
for (idx_t r = chunk_start; r < chunk_end; r++) {
|
||||
const auto &val = ptr[r];
|
||||
state.byte |= val << state.byte_pos;
|
||||
if (++state.byte_pos == 8) {
|
||||
temp_writer.Write(state.byte);
|
||||
state.byte = 0;
|
||||
state.byte_pos = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (idx_t r = chunk_start; r < chunk_end; r++) {
|
||||
if (!mask.RowIsValid(r)) {
|
||||
continue;
|
||||
}
|
||||
const auto &val = ptr[r];
|
||||
|
||||
stats.max |= val;
|
||||
stats.min &= val;
|
||||
state.byte |= val << state.byte_pos;
|
||||
|
||||
if (++state.byte_pos == 8) {
|
||||
temp_writer.Write(state.byte);
|
||||
state.byte = 0;
|
||||
state.byte_pos = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<ColumnWriterPageState> BooleanColumnWriter::InitializePageState(PrimitiveColumnWriterState &state,
|
||||
idx_t page_idx) {
|
||||
return make_uniq<BooleanWriterPageState>();
|
||||
}
|
||||
|
||||
void BooleanColumnWriter::FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) {
|
||||
auto &state = state_p->Cast<BooleanWriterPageState>();
|
||||
if (state.byte_pos > 0) {
|
||||
temp_writer.Write<uint8_t>(state.byte);
|
||||
state.byte = 0;
|
||||
state.byte_pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
idx_t BooleanColumnWriter::GetRowSize(const Vector &vector, const idx_t index,
|
||||
const PrimitiveColumnWriterState &state) const {
|
||||
return sizeof(bool);
|
||||
}
|
||||
|
||||
} // namespace duckdb
|
||||
Reference in New Issue
Block a user