#pragma once namespace duckdb { class StripeStreams { public: virtual ~StripeStreams() = default; /** * get column selector for current stripe reading session * @return column selector will hold column projection info */ virtual const dwio::common::ColumnSelector &getColumnSelector() const = 0; // Get row reader options virtual const dwio::common::RowReaderOptclass StripeStreams { public: virtual ~StripeStreams() = default; /** * get column selector for current stripe reading session * @return column selector will hold column projection info */ virtual const dwio::common::ColumnSelector &getColumnSelector() const = 0; // Get row reader options virtual const dwio::common::RowReaderOptions &getRowReaderOptions() const = 0; /** * Get the encoding for the given column for this stripe. */ virtual const proto::ColumnEncoding &getEncoding(const EncodingKey &) const = 0; /** * Get the stream for the given column/kind in this stripe. * @param streamId stream identifier object * @param throwIfNotFound fail if a stream is required and not found * @return the new stream */ virtual unique_ptr getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0; /** * visit all streams of given node and execute visitor logic * return number of streams visited */ virtual uint32_t visitStreamsOfNode(uint32_t node, std::function visitor) const = 0; /** * Get the value of useVInts for the given column in this stripe. * Defaults to true. * @param streamId stream identifier */ virtual bool getUseVInts(const StreamIdentifier &streamId) const = 0; /** * Get the memory pool for this reader. */ virtual memory::MemoryPool &getMemoryPool() const = 0; /** * Get the RowGroupIndex. * @return a vector of RowIndex belonging to the stripe */ virtual unique_ptr getRowGroupIndex(const StreamIdentifier &si) const = 0; /** * Get stride index provider which is used by string dictionary reader to * get the row index stride index where next() happens */ virtual const StrideIndexProvider &getStrideIndexProvider() const = 0; } ions &getRowReaderOptions() const = 0; /** * Get the encoding for the given column for this stripe. */ virtual const proto::ColumnEncoding &getEncoding(const EncodingKey &) const = 0; /** * Get the stream for the given column/kind in this stripe. * @param streamId stream identifier object * @param throwIfNotFound fail if a stream is required and not found * @return the new stream */ virtual unique_ptr getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0; /** * visit all streams of given node and execute visitor logic * return number of streams visited */ virtual uint32_t visitStreamsOfNode(uint32_t node, std::function visitor) const = 0; /** * Get the value of useVInts for the given column in this stripe. * Defaults to true. * @param streamId stream identifier */ virtual bool getUseVInts(const StreamIdentifier &streamId) const = 0; /** * Get the memory pool for this reader. */ virtual memory::MemoryPool &getMemoryPool() const = 0; /** * Get the RowGroupIndex. * @return a vector of RowIndex belonging to the stripe */ virtual unique_ptr getRowGroupIndex(const StreamIdentifier &si) const = 0; /** * Get stride index provider which is used by string dictionary reader to * get the row index stride index where next() happens */ virtual const StrideIndexProvider &getStrideIndexProvider() const = 0; }; class ColumnReader { public: ColumnReader(const EncodingKey &ek, StripeStreams &stripe); virtual ~ColumnReader() = default; /** * Skip number of specified rows. * @param numValues the number of values to skip * @return the number of non-null values skipped */ virtual uint64_t skip(uint64_t numValues); /** * Read the next group of values into a RowVector. * @param numValues the number of values to read * @param vector to read into */ virtual void next(uint64_t numValues, VectorPtr &result, const uint64_t *nulls = nullptr) = 0; }; class SelectiveColumnReader : public ColumnReader { public: static constexpr uint64_t kStringBufferSize = 16 * 1024; SelectiveColumnReader(const EncodingKey &ek, StripeStreams &stripe, common::ScanSpec *scanSpec); /** * Read the next group of values into a RowVector. * @param numValues the number of values to read * @param vector to read into */ void next(uint64_t /*numValues*/, VectorPtr & /*result*/, const uint64_t * /*incomingNulls*/) override { DATALIB_CHECK(false) << "next() is only defined in SelectiveStructColumnReader"; } // Creates a reader for the given stripe. static unique_ptr build(const std::shared_ptr &requestedType, const std::shared_ptr &dataType, StripeStreams &stripe, common::ScanSpec *scanSpec, uint32_t sequence = 0); // Seeks to offset and reads the rows in 'rows' and applies // filters and value processing as given by 'scanSpec supplied at // construction. 'offset' is relative to start of stripe. 'rows' are // relative to 'offset', so that row 0 is the 'offset'th row from // start of stripe. 'rows' is expected to stay constant // between this and the next call to read. virtual void read(vector_size_t offset, RowSet rows, const uint64_t *incomingNulls) = 0; // Extracts the values at 'rows' into '*result'. May rewrite or // reallocate '*result'. 'rows' must be the same set or a subset of // 'rows' passed to the last 'read(). virtual void getValues(RowSet rows, VectorPtr *result) = 0; // Returns the rows that were selected/visited by the last // read(). If 'this' has no filter, returns 'rows' passed to last // read(). const RowSet outputRows() const { if (scanSpec_->hasFilter()) { return outputRows_; } return inputRows_; } // Advances to 'offset', so that the next item to be read is the // offset-th from the start of stripe. void seekTo(vector_size_t offset, bool readsNullsOnly); // The below functions are called from ColumnVisitor to fill the result set. inline void addOutputRow(vector_size_t row) { outputRows_.push_back(row); } template inline void addNull() { DATALIB_DCHECK(rawResultNulls_ && rawValues_ && (numValues_ + 1) * sizeof(T) < rawSize_); anyNulls_ = true; bits::setBit(rawResultNulls_, numValues_); reinterpret_cast(rawValues_)[numValues_] = T(); numValues_++; } template inline void addValue(const T value) { // @lint-ignore-every HOWTOEVEN ConstantArgumentPassByValue static_assert(std::is_pod::value, "General case of addValue is only for primitive types"); DATALIB_DCHECK(rawValues_ && (numValues _ + 1) * sizeof(T) < rawSize_); reinterpret_cast(rawValues_)[numValues_] = value; numValues_++; } void dropResults(vector_size_t count) { outputRows_.resize(outputRows_.size() - count); numValues_ -= count; } common::ScanSpec *scanSpec() const { return scanSpec_; } auto readOffset() const { return readOffset_; } void setReadOffset(vector_size_t readOffset) { readOffset_ = readOffset; } protected: static constexpr int8_t kNoValueSize = -1; template void ensureValuesCapacity(vector_size_t numRows); void prepareNulls(vector_size_t numRows, bool needNulls); template void filterNulls(RowSet rows, bool isNull, bool extractValues); template void prepareRead(vector_size_t offset, RowSet rows, const uint64_t *incomingNulls); void setOutputRows(RowSet rows) { outputRows_.resize(rows.size()); if (!rows.size()) { return; } memcpy(outputRows_.data(), &rows[0], rows.size() * sizeof(vector_size_t)); } template void getFlatValues(RowSet rows, VectorPtr *result); template void compactScalarValues(RowSet rows); void addStringValue(folly::StringPiece value); // Specification of filters, value extraction, pruning etc. The // spec is assigned at construction and the contents may change at // run time based on adaptation. Owned by caller. common::ScanSpec *const scanSpec_; // Row number after last read row, relative to stripe start. vector_size_t readOffset_ = 0; // The rows to process in read(). References memory supplied by // caller. The values must remain live until the next call to read(). RowSet inputRows_; // Rows passing the filter in readWithVisitor. Must stay // constant between consecutive calls to read(). vector outputRows_; // The row number corresponding to each element in 'values_' vector valueRows_; // The set of all nulls in the range of read(). Created when first // needed and then reused. Not returned to callers. BufferPtr nullsInReadRange_; // Nulls buffer for readWithVisitor. Not set if no nulls. 'numValues' // is the index of the first non-set bit. BufferPtr resultNulls_; uint64_t *rawResultNulls_ = nullptr; // Buffer for gathering scalar values in readWithVisitor. BufferPtr values_; // Writable content in 'values' void *rawValues_ = nullptr; vector_size_t numValues_ = 0; // Size of fixed width value in 'rawValues'. For integers, values // are read at 64 bit width and can be compacted or extracted at a // different width. int8_t valueSize_ = kNoValueSize; // Buffers backing the StringViews in 'values' when reading strings. vector stringBuffers_; // Writable contents of 'stringBuffers_.back()'. char *rawStringBuffer_ = nullptr; // Total writable bytes in 'rawStringBuffer_'. int32_t rawStringSize_ = 0; // Number of written bytes in 'rawStringBuffer_'. uint32_t rawStringUsed_ = 0; // True if last read() added any nulls. bool anyNulls_ = false; // True if all values in scope for last read() are null. bool allNull_ = false; }; struct ExtractValues { static constexpr bool kSkipNulls = false; bool acceptsNulls() const { return true; } template void addValue(vector_size_t /*rowIndex*/, V /*value*/) { } void addNull(vector_size_t /*rowIndex*/) { } }; class Filter { protected: Filter(bool deterministic, bool nullAllowed, FilterKind kind) : nullAllowed_(nullAllowed), deterministic_(deterministic), kind_(kind) { } public: virtual ~Filter() = default; // Templates parametrized on filter need to know determinism at compile // time. If this is false, deterministic() will be consulted at // runtime. static constexpr bool deterministic = true; FilterKind kind() const { return kind_; } virtual unique_ptr clone() const = 0; /** * A filter becomes non-deterministic when applies to nested column, * e.g. a[1] > 10 is non-deterministic because > 10 filter applies only to * some positions, e.g. first entry in a set of entries that correspond to a * single top-level position. */ virtual bool isDeterministic() const { return deterministic_; } /** * When a filter applied to a nested column fails, the whole top-level * position should fail. To enable this functionality, the filter keeps track * of the boundaries of top-level positions and allows the caller to find out * where the current top-level position started and how far it continues. * @return number of positions from the start of the current top-level * position up to the current position (excluding current position) */ virtual int getPrecedingPositionsToFail() const { return 0; } /** * @return number of positions remaining until the end of the current * top-level position */ virtual int getSucceedingPositionsToFail() const { return 0; } virtual bool testNull() const { return nullAllowed_; } /** * Used to apply is [not] null filters to complex types, e.g. * a[1] is null AND a[3] is not null, where a is an array(array(T)). * * In these case, the exact values are not known, but it is known whether they * are null or not. Furthermore, for some positions only nulls are allowed * (a[1] is null), for others only non-nulls (a[3] is not null), and for the * rest both are allowed (a[2] and a[N], where N > 3). */ virtual bool testNonNull() const { DWIO_RAISE("not supported"); } virtual bool testInt64(int64_t /* unused */) const { DWIO_RAISE("not supported"); } virtual bool testDouble(double /* unused */) const { DWIO_RAISE("not supported"); } virtual bool testFloat(float /* unused */) const { DWIO_RAISE("not supported"); } virtual bool testBool(bool /* unused */) const { DWIO_RAISE("not supported"); } virtual bool testBytes(const char * /* unused */, int32_t /* unused */) const { DWIO_RAISE("not supported"); } /** * Filters like string equality and IN, as well as conditions on cardinality * of lists and maps can be at least partly decided by looking at lengths * alone. If this is false, then no further checks are needed. If true, * eventual filters on the data itself need to be evaluated. */ virtual bool testLength(int32_t /* unused */) const { DWIO_RAISE("not supported"); } protected: const bool nullAllowed_; private: const bool deterministic_; const FilterKind kind_; }; // Template parameter for controlling filtering and action on a set of rows. template class ColumnVisitor { public: using FilterType = TFilter; static constexpr bool dense = isDense; ColumnVisitor(TFilter &filter, SelectiveColumnReader *reader, const RowSet &rows, ExtractValues values) : filter_(filter), reader_(reader), allowNulls_(!TFilter::deterministic || filter.testNull()), rows_(&rows[0]), numRows_(rows.size()), rowIndex_(0), values_(values) { } bool allowNulls() { if (ExtractValues::kSkipNulls && TFilter::deterministic) { return false; } return allowNulls_ && values_.acceptsNulls(); } vector_size_t start() { return isDense ? 0 : rowAt(0); } // Tests for a null value and processes it. If the value is not // null, returns 0 and has no effect. If the value is null, advances // to the next non-null value in 'rows_'. Returns the number of // values (not including nulls) to skip to get to the next non-null. // If there is no next non-null in 'rows_', sets 'atEnd'. If 'atEnd' // is set and a non-zero skip is returned, the caller must perform // the skip before returning. FOLLY_ALWAYS_INLINE vector_size_t checkAndSkipNulls(const uint64_t *nulls, vector_size_t ¤t, bool &atEnd) { auto testRow = currentRow(); // Check that the caller and the visitor are in sync about current row. DATALIB_DCHECK(current == testRow); uint32_t nullIndex = testRow >> 6; uint64_t nullWord = nulls[nullIndex]; if (!nullWord) { return 0; } uint8_t nullBit = testRow & 63; if ((nullWord & (1UL << nullBit)) == 0) { return 0; } // We have a null. We find the next non-null. if (++rowIndex_ >= numRows_) { atEnd = true; return 0; } auto rowOfNullWord = testRow - nullBit; if (isDense) { if (nullBit == 63) { nullBit = 0; rowOfNullWord += 64; nullWord = nulls[++nullIndex]; } else { ++nullBit; // set all the bits below the row to null. nullWord |= f4d::bits::lowMask(nullBit); } for (;;) { auto nextNonNull = count_trailing_zeros(~nullWord); if (rowOfNullWord + nextNonNull >= numRows_) { // Nulls all the way to the end. atEnd = true; return 0; } if (nextNonNull < 64) { DATALIB_CHECK(rowIndex_ <= rowOfNullWord + nextNonNull); rowIndex_ = rowOfNullWord + nextNonNull; current = currentRow(); return 0; } rowOfNullWord += 64; nullWord = nulls[++nullIndex]; } } else { // Sparse row numbers. We find the first non-null and count // how many non-nulls on rows not in 'rows_' we skipped. int32_t toSkip = 0; nullWord |= f4d::bits::lowMask(nullBit); for (;;) { testRow = currentRow(); while (testRow >= rowOfNullWord + 64) { toSkip += __builtin_popcountll(~nullWord); nullWord = nulls[++nullIndex]; rowOfNullWord += 64; } // testRow is inside nullWord. See if non-null. nullBit = testRow & 63; if ((nullWord & (1UL << nullBit)) == 0) { toSkip += __builtin_popcountll(~nullWord & f4d::bits::lowMask(nullBit)); current = testRow; return toSkip; } if (++rowIndex_ >= numRows_) { // We end with a null. Add the non-nulls below the final null. toSkip += __builtin_popcountll(~nullWord & f4d::bits::lowMask(testRow - rowOfNullWord)); atEnd = true; return toSkip; } } } } vector_size_t processNull(bool &atEnd) { vector_size_t previous = currentRow(); if (filter_.testNull()) { filterPassedForNull(); } else { filterFailed(); } if (++rowIndex_ >= numRows_) { atEnd = true; return rows_[numRows_ - 1] - previous; } if (TFilter::deterministic && isDense) { return 0; } return currentRow() - previous - 1; } FOLLY_ALWAYS_INLINE vector_size_t process(T value, bool &atEnd) { if (!TFilter::deterministic) { auto previous = currentRow(); if (common::applyFilter(filter_, value)) { filterPassed(value); } else { filterFailed(); } if (++rowIndex_ >= numRows_) { atEnd = true; return rows_[numRows_ - 1] - previous; } return currentRow() - previous - 1; } // The filter passes or fails and we go to the next row if any. if (common::applyFilter(filter_, value)) { filterPassed(value); } else { filterFailed(); } if (++rowIndex_ >= numRows_) { atEnd = true; return 0; } if (isDense) { return 0; } return currentRow() - rows_[rowIndex_ - 1] - 1; } inline vector_size_t rowAt(vector_size_t index) { if (isDense) { return index; } return rows_[index]; } vector_size_t currentRow() { if (isDense) { return rowIndex_; } return rows_[rowIndex_]; } vector_size_t numRows() { return numRows_; } void filterPassed(T value) { addResult(value); if (!std::is_same::value) { addOutputRow(currentRow()); } } inline void filterPassedForNull() { addNull(); if (!std::is_same::value) { addOutputRow(currentRow()); } } FOLLY_ALWAYS_INLINE void filterFailed(); inline void addResult(T value); inline void addNull(); inline void addOutputRow(vector_size_t row); protected: TFilter &filter_; SelectiveColumnReader *reader_; const bool allowNulls_; const vector_size_t *rows_; vector_size_t numRows_; vector_size_t rowIndex_; ExtractValues values_; }; } // namespace duckdb