should be it
This commit is contained in:
3775
external/duckdb/third_party/brotli/enc/backward_references.cpp
vendored
Normal file
3775
external/duckdb/third_party/brotli/enc/backward_references.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
36
external/duckdb/third_party/brotli/enc/backward_references.h
vendored
Normal file
36
external/duckdb/third_party/brotli/enc/backward_references.h
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function to find backward reference copies. */
|
||||
|
||||
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/context.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "command.h"
|
||||
#include "brotli_hash.h"
|
||||
#include "quality.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* "commands" points to the next output command to write to, "*num_commands" is
|
||||
initially the total amount of commands output by previous
|
||||
CreateBackwardReferences calls, and must be incremented by the amount written
|
||||
by this call. */
|
||||
BROTLI_INTERNAL void BrotliCreateBackwardReferences(size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
||||
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
|
||||
Command* commands, size_t* num_commands, size_t* num_literals);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
|
||||
935
external/duckdb/third_party/brotli/enc/backward_references_hq.cpp
vendored
Normal file
935
external/duckdb/third_party/brotli/enc/backward_references_hq.cpp
vendored
Normal file
@@ -0,0 +1,935 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function to find backward reference copies. */
|
||||
|
||||
#include "backward_references_hq.h"
|
||||
|
||||
#include <string.h> /* memcpy, memset */
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "command.h"
|
||||
#include "compound_dictionary.h"
|
||||
#include "encoder_dict.h"
|
||||
#include "fast_log.h"
|
||||
#include "find_match_length.h"
|
||||
#include "literal_cost.h"
|
||||
#include "memory.h"
|
||||
#include "brotli_params.h"
|
||||
#include "prefix.h"
|
||||
#include "quality.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
/* BrotliCalculateDistanceCodeLimit(BROTLI_MAX_ALLOWED_DISTANCE, 3, 120). */
|
||||
#define BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE 544
|
||||
|
||||
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
|
||||
|
||||
static const uint32_t kDistanceCacheIndex[] = {
|
||||
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
||||
};
|
||||
static const int kDistanceCacheOffset[] = {
|
||||
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
||||
};
|
||||
|
||||
void duckdb_brotli::BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
|
||||
ZopfliNode stub;
|
||||
size_t i;
|
||||
stub.length = 1;
|
||||
stub.distance = 0;
|
||||
stub.dcode_insert_length = 0;
|
||||
stub.u.cost = kInfinity;
|
||||
for (i = 0; i < length; ++i) array[i] = stub;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
|
||||
return self->length & 0x1FFFFFF;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
|
||||
const uint32_t modifier = self->length >> 25;
|
||||
return ZopfliNodeCopyLength(self) + 9u - modifier;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
|
||||
return self->distance;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
|
||||
const uint32_t short_code = self->dcode_insert_length >> 27;
|
||||
return short_code == 0 ?
|
||||
ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 :
|
||||
short_code - 1;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
|
||||
return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF);
|
||||
}
|
||||
|
||||
/* Temporary data for ZopfliCostModelSetFromCommands. */
|
||||
typedef struct ZopfliCostModelArena {
|
||||
uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
|
||||
uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
|
||||
uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
|
||||
float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
|
||||
} ZopfliCostModelArena;
|
||||
|
||||
/* Histogram based cost model for zopflification. */
|
||||
typedef struct ZopfliCostModel {
|
||||
/* The insert and copy length symbols. */
|
||||
float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
|
||||
float* cost_dist_;
|
||||
uint32_t distance_histogram_size;
|
||||
/* Cumulative costs of literals per position in the stream. */
|
||||
float* literal_costs_;
|
||||
float min_cost_cmd_;
|
||||
size_t num_bytes_;
|
||||
|
||||
/* Temporary data. */
|
||||
union {
|
||||
size_t literal_histograms[3 * 256];
|
||||
ZopfliCostModelArena arena;
|
||||
};
|
||||
} ZopfliCostModel;
|
||||
|
||||
static void InitZopfliCostModel(
|
||||
MemoryManager* m, ZopfliCostModel* self, const BrotliDistanceParams* dist,
|
||||
size_t num_bytes) {
|
||||
self->num_bytes_ = num_bytes;
|
||||
self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
|
||||
self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size_limit);
|
||||
self->distance_histogram_size = dist->alphabet_size_limit;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
}
|
||||
|
||||
static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
|
||||
BROTLI_FREE(m, self->literal_costs_);
|
||||
BROTLI_FREE(m, self->cost_dist_);
|
||||
}
|
||||
|
||||
static void SetCost(const uint32_t* histogram, size_t histogram_size,
|
||||
BROTLI_BOOL literal_histogram, float* cost) {
|
||||
size_t sum = 0;
|
||||
size_t missing_symbol_sum;
|
||||
float log2sum;
|
||||
float missing_symbol_cost;
|
||||
size_t i;
|
||||
for (i = 0; i < histogram_size; i++) {
|
||||
sum += histogram[i];
|
||||
}
|
||||
log2sum = (float)FastLog2(sum);
|
||||
missing_symbol_sum = sum;
|
||||
if (!literal_histogram) {
|
||||
for (i = 0; i < histogram_size; i++) {
|
||||
if (histogram[i] == 0) missing_symbol_sum++;
|
||||
}
|
||||
}
|
||||
missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2;
|
||||
for (i = 0; i < histogram_size; i++) {
|
||||
if (histogram[i] == 0) {
|
||||
cost[i] = missing_symbol_cost;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Shannon bits for this symbol. */
|
||||
cost[i] = log2sum - (float)FastLog2(histogram[i]);
|
||||
|
||||
/* Cannot be coded with less than 1 bit */
|
||||
if (cost[i] < 1) cost[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const Command* commands,
|
||||
size_t num_commands,
|
||||
size_t last_insert_len) {
|
||||
ZopfliCostModelArena* arena = &self->arena;
|
||||
size_t pos = position - last_insert_len;
|
||||
float min_cost_cmd = kInfinity;
|
||||
size_t i;
|
||||
float* cost_cmd = self->cost_cmd_;
|
||||
|
||||
memset(arena->histogram_literal, 0, sizeof(arena->histogram_literal));
|
||||
memset(arena->histogram_cmd, 0, sizeof(arena->histogram_cmd));
|
||||
memset(arena->histogram_dist, 0, sizeof(arena->histogram_dist));
|
||||
|
||||
for (i = 0; i < num_commands; i++) {
|
||||
size_t inslength = commands[i].insert_len_;
|
||||
size_t copylength = CommandCopyLen(&commands[i]);
|
||||
size_t distcode = commands[i].dist_prefix_ & 0x3FF;
|
||||
size_t cmdcode = commands[i].cmd_prefix_;
|
||||
size_t j;
|
||||
|
||||
arena->histogram_cmd[cmdcode]++;
|
||||
if (cmdcode >= 128) arena->histogram_dist[distcode]++;
|
||||
|
||||
for (j = 0; j < inslength; j++) {
|
||||
arena->histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
|
||||
}
|
||||
|
||||
pos += inslength + copylength;
|
||||
}
|
||||
|
||||
SetCost(arena->histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
|
||||
arena->cost_literal);
|
||||
SetCost(arena->histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
|
||||
cost_cmd);
|
||||
SetCost(arena->histogram_dist, self->distance_histogram_size, BROTLI_FALSE,
|
||||
self->cost_dist_);
|
||||
|
||||
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
|
||||
min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
|
||||
}
|
||||
self->min_cost_cmd_ = min_cost_cmd;
|
||||
|
||||
{
|
||||
float* literal_costs = self->literal_costs_;
|
||||
float literal_carry = 0.0;
|
||||
size_t num_bytes = self->num_bytes_;
|
||||
literal_costs[0] = 0.0;
|
||||
for (i = 0; i < num_bytes; ++i) {
|
||||
literal_carry +=
|
||||
arena->cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
|
||||
literal_costs[i + 1] = literal_costs[i] + literal_carry;
|
||||
literal_carry -= literal_costs[i + 1] - literal_costs[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
float* literal_costs = self->literal_costs_;
|
||||
float literal_carry = 0.0;
|
||||
float* cost_dist = self->cost_dist_;
|
||||
float* cost_cmd = self->cost_cmd_;
|
||||
size_t num_bytes = self->num_bytes_;
|
||||
size_t i;
|
||||
BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
|
||||
ringbuffer, self->literal_histograms,
|
||||
&literal_costs[1]);
|
||||
literal_costs[0] = 0.0;
|
||||
for (i = 0; i < num_bytes; ++i) {
|
||||
literal_carry += literal_costs[i + 1];
|
||||
literal_costs[i + 1] = literal_costs[i] + literal_carry;
|
||||
literal_carry -= literal_costs[i + 1] - literal_costs[i];
|
||||
}
|
||||
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
|
||||
cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
|
||||
}
|
||||
for (i = 0; i < self->distance_histogram_size; ++i) {
|
||||
cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
|
||||
}
|
||||
self->min_cost_cmd_ = (float)FastLog2(11);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE float ZopfliCostModelGetCommandCost(
|
||||
const ZopfliCostModel* self, uint16_t cmdcode) {
|
||||
return self->cost_cmd_[cmdcode];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE float ZopfliCostModelGetDistanceCost(
|
||||
const ZopfliCostModel* self, size_t distcode) {
|
||||
return self->cost_dist_[distcode];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE float ZopfliCostModelGetLiteralCosts(
|
||||
const ZopfliCostModel* self, size_t from, size_t to) {
|
||||
return self->literal_costs_[to] - self->literal_costs_[from];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE float ZopfliCostModelGetMinCostCmd(
|
||||
const ZopfliCostModel* self) {
|
||||
return self->min_cost_cmd_;
|
||||
}
|
||||
|
||||
/* REQUIRES: len >= 2, start_pos <= pos */
|
||||
/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
|
||||
/* Maintains the "ZopfliNode array invariant". */
|
||||
static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
|
||||
size_t start_pos, size_t len, size_t len_code, size_t dist,
|
||||
size_t short_code, float cost) {
|
||||
ZopfliNode* next = &nodes[pos + len];
|
||||
next->length = (uint32_t)(len | ((len + 9u - len_code) << 25));
|
||||
next->distance = (uint32_t)dist;
|
||||
next->dcode_insert_length = (uint32_t)(
|
||||
(short_code << 27) | (pos - start_pos));
|
||||
next->u.cost = cost;
|
||||
}
|
||||
|
||||
typedef struct PosData {
|
||||
size_t pos;
|
||||
int distance_cache[4];
|
||||
float costdiff;
|
||||
float cost;
|
||||
} PosData;
|
||||
|
||||
/* Maintains the smallest 8 cost difference together with their positions */
|
||||
typedef struct StartPosQueue {
|
||||
PosData q_[8];
|
||||
size_t idx_;
|
||||
} StartPosQueue;
|
||||
|
||||
static BROTLI_INLINE void InitStartPosQueue(StartPosQueue* self) {
|
||||
self->idx_ = 0;
|
||||
}
|
||||
|
||||
static size_t StartPosQueueSize(const StartPosQueue* self) {
|
||||
return BROTLI_MIN(size_t, self->idx_, 8);
|
||||
}
|
||||
|
||||
static void StartPosQueuePush(StartPosQueue* self, const PosData* posdata) {
|
||||
size_t offset = ~(self->idx_++) & 7;
|
||||
size_t len = StartPosQueueSize(self);
|
||||
size_t i;
|
||||
PosData* q = self->q_;
|
||||
q[offset] = *posdata;
|
||||
/* Restore the sorted order. In the list of |len| items at most |len - 1|
|
||||
adjacent element comparisons / swaps are required. */
|
||||
for (i = 1; i < len; ++i) {
|
||||
if (q[offset & 7].costdiff > q[(offset + 1) & 7].costdiff) {
|
||||
BROTLI_SWAP(PosData, q, offset & 7, (offset + 1) & 7);
|
||||
}
|
||||
++offset;
|
||||
}
|
||||
}
|
||||
|
||||
static const PosData* StartPosQueueAt(const StartPosQueue* self, size_t k) {
|
||||
return &self->q_[(k - self->idx_) & 7];
|
||||
}
|
||||
|
||||
/* Returns the minimum possible copy length that can improve the cost of any */
|
||||
/* future position. */
|
||||
static size_t ComputeMinimumCopyLength(const float start_cost,
|
||||
const ZopfliNode* nodes,
|
||||
const size_t num_bytes,
|
||||
const size_t pos) {
|
||||
/* Compute the minimum possible cost of reaching any future position. */
|
||||
float min_cost = start_cost;
|
||||
size_t len = 2;
|
||||
size_t next_len_bucket = 4;
|
||||
size_t next_len_offset = 10;
|
||||
while (pos + len <= num_bytes && nodes[pos + len].u.cost <= min_cost) {
|
||||
/* We already reached (pos + len) with no more cost than the minimum
|
||||
possible cost of reaching anything from this pos, so there is no point in
|
||||
looking for lengths <= len. */
|
||||
++len;
|
||||
if (len == next_len_offset) {
|
||||
/* We reached the next copy length code bucket, so we add one more
|
||||
extra bit to the minimum cost. */
|
||||
min_cost += 1.0f;
|
||||
next_len_offset += next_len_bucket;
|
||||
next_len_bucket *= 2;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/* REQUIRES: nodes[pos].cost < kInfinity
|
||||
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
|
||||
static uint32_t ComputeDistanceShortcut(const size_t block_start,
|
||||
const size_t pos,
|
||||
const size_t max_backward_limit,
|
||||
const size_t gap,
|
||||
const ZopfliNode* nodes) {
|
||||
const size_t clen = ZopfliNodeCopyLength(&nodes[pos]);
|
||||
const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF;
|
||||
const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]);
|
||||
/* Since |block_start + pos| is the end position of the command, the copy part
|
||||
starts from |block_start + pos - clen|. Distances that are greater than
|
||||
this or greater than |max_backward_limit| + |gap| are static dictionary
|
||||
references, and do not update the last distances.
|
||||
Also distance code 0 (last distance) does not update the last distances. */
|
||||
if (pos == 0) {
|
||||
return 0;
|
||||
} else if (dist + clen <= block_start + pos + gap &&
|
||||
dist <= max_backward_limit + gap &&
|
||||
ZopfliNodeDistanceCode(&nodes[pos]) > 0) {
|
||||
return (uint32_t)pos;
|
||||
} else {
|
||||
return nodes[pos - clen - ilen].u.shortcut;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fills in dist_cache[0..3] with the last four distances (as defined by
|
||||
Section 4. of the Spec) that would be used at (block_start + pos) if we
|
||||
used the shortest path of commands from block_start, computed from
|
||||
nodes[0..pos]. The last four distances at block_start are in
|
||||
starting_dist_cache[0..3].
|
||||
REQUIRES: nodes[pos].cost < kInfinity
|
||||
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
|
||||
static void ComputeDistanceCache(const size_t pos,
|
||||
const int* starting_dist_cache,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache) {
|
||||
int idx = 0;
|
||||
size_t p = nodes[pos].u.shortcut;
|
||||
while (idx < 4 && p > 0) {
|
||||
const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF;
|
||||
const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
|
||||
const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
|
||||
dist_cache[idx++] = (int)dist;
|
||||
/* Because of prerequisite, p >= clen + ilen >= 2. */
|
||||
p = nodes[p - clen - ilen].u.shortcut;
|
||||
}
|
||||
for (; idx < 4; ++idx) {
|
||||
dist_cache[idx] = *starting_dist_cache++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Maintains "ZopfliNode array invariant" and pushes node to the queue, if it
|
||||
is eligible. */
|
||||
static void EvaluateNode(
|
||||
const size_t block_start, const size_t pos, const size_t max_backward_limit,
|
||||
const size_t gap, const int* starting_dist_cache,
|
||||
const ZopfliCostModel* model, StartPosQueue* queue, ZopfliNode* nodes) {
|
||||
/* Save cost, because ComputeDistanceCache invalidates it. */
|
||||
float node_cost = nodes[pos].u.cost;
|
||||
nodes[pos].u.shortcut = ComputeDistanceShortcut(
|
||||
block_start, pos, max_backward_limit, gap, nodes);
|
||||
if (node_cost <= ZopfliCostModelGetLiteralCosts(model, 0, pos)) {
|
||||
PosData posdata;
|
||||
posdata.pos = pos;
|
||||
posdata.cost = node_cost;
|
||||
posdata.costdiff = node_cost -
|
||||
ZopfliCostModelGetLiteralCosts(model, 0, pos);
|
||||
ComputeDistanceCache(
|
||||
pos, starting_dist_cache, nodes, posdata.distance_cache);
|
||||
StartPosQueuePush(queue, &posdata);
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns longest copy length. */
|
||||
static size_t UpdateNodes(
|
||||
const size_t num_bytes, const size_t block_start, const size_t pos,
|
||||
const uint8_t* ringbuffer, const size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, const size_t max_backward_limit,
|
||||
const int* starting_dist_cache, const size_t num_matches,
|
||||
const BackwardMatch* matches, const ZopfliCostModel* model,
|
||||
StartPosQueue* queue, ZopfliNode* nodes) {
|
||||
const size_t stream_offset = params->stream_offset;
|
||||
const size_t cur_ix = block_start + pos;
|
||||
const size_t cur_ix_masked = cur_ix & ringbuffer_mask;
|
||||
const size_t max_distance = BROTLI_MIN(size_t, cur_ix, max_backward_limit);
|
||||
const size_t dictionary_start = BROTLI_MIN(size_t,
|
||||
cur_ix + stream_offset, max_backward_limit);
|
||||
const size_t max_len = num_bytes - pos;
|
||||
const size_t max_zopfli_len = MaxZopfliLen(params);
|
||||
const size_t max_iters = MaxZopfliCandidates(params);
|
||||
size_t min_len;
|
||||
size_t result = 0;
|
||||
size_t k;
|
||||
const CompoundDictionary* addon = ¶ms->dictionary.compound;
|
||||
size_t gap = addon->total_size;
|
||||
|
||||
EvaluateNode(block_start + stream_offset, pos, max_backward_limit, gap,
|
||||
starting_dist_cache, model, queue, nodes);
|
||||
|
||||
{
|
||||
const PosData* posdata = StartPosQueueAt(queue, 0);
|
||||
float min_cost = (posdata->cost + ZopfliCostModelGetMinCostCmd(model) +
|
||||
ZopfliCostModelGetLiteralCosts(model, posdata->pos, pos));
|
||||
min_len = ComputeMinimumCopyLength(min_cost, nodes, num_bytes, pos);
|
||||
}
|
||||
|
||||
/* Go over the command starting positions in order of increasing cost
|
||||
difference. */
|
||||
for (k = 0; k < max_iters && k < StartPosQueueSize(queue); ++k) {
|
||||
const PosData* posdata = StartPosQueueAt(queue, k);
|
||||
const size_t start = posdata->pos;
|
||||
const uint16_t inscode = GetInsertLengthCode(pos - start);
|
||||
const float start_costdiff = posdata->costdiff;
|
||||
const float base_cost = start_costdiff + (float)GetInsertExtra(inscode) +
|
||||
ZopfliCostModelGetLiteralCosts(model, 0, pos);
|
||||
|
||||
/* Look for last distance matches using the distance cache from this
|
||||
starting position. */
|
||||
size_t best_len = min_len - 1;
|
||||
size_t j = 0;
|
||||
for (; j < BROTLI_NUM_DISTANCE_SHORT_CODES && best_len < max_len; ++j) {
|
||||
const size_t idx = kDistanceCacheIndex[j];
|
||||
const size_t backward =
|
||||
(size_t)(posdata->distance_cache[idx] + kDistanceCacheOffset[j]);
|
||||
size_t prev_ix = cur_ix - backward;
|
||||
size_t len = 0;
|
||||
uint8_t continuation = ringbuffer[cur_ix_masked + best_len];
|
||||
if (cur_ix_masked + best_len > ringbuffer_mask) {
|
||||
break;
|
||||
}
|
||||
if (BROTLI_PREDICT_FALSE(backward > dictionary_start + gap)) {
|
||||
/* Word dictionary -> ignore. */
|
||||
continue;
|
||||
}
|
||||
if (backward <= max_distance) {
|
||||
/* Regular backward reference. */
|
||||
if (prev_ix >= cur_ix) {
|
||||
continue;
|
||||
}
|
||||
|
||||
prev_ix &= ringbuffer_mask;
|
||||
if (prev_ix + best_len > ringbuffer_mask ||
|
||||
continuation != ringbuffer[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
len = FindMatchLengthWithLimit(&ringbuffer[prev_ix],
|
||||
&ringbuffer[cur_ix_masked],
|
||||
max_len);
|
||||
} else if (backward > dictionary_start) {
|
||||
size_t d = 0;
|
||||
size_t offset;
|
||||
size_t limit;
|
||||
const uint8_t* source;
|
||||
offset = dictionary_start + 1 + addon->total_size - 1;
|
||||
while (offset >= backward + addon->chunk_offsets[d + 1]) d++;
|
||||
source = addon->chunk_source[d];
|
||||
offset = offset - addon->chunk_offsets[d] - backward;
|
||||
limit = addon->chunk_offsets[d + 1] - addon->chunk_offsets[d] - offset;
|
||||
limit = limit > max_len ? max_len : limit;
|
||||
if (best_len >= limit ||
|
||||
continuation != source[offset + best_len]) {
|
||||
continue;
|
||||
}
|
||||
len = FindMatchLengthWithLimit(&source[offset],
|
||||
&ringbuffer[cur_ix_masked],
|
||||
limit);
|
||||
} else {
|
||||
/* "Gray" area. It is addressable by decoder, but this encoder
|
||||
instance does not have that data -> should not touch it. */
|
||||
continue;
|
||||
}
|
||||
{
|
||||
const float dist_cost = base_cost +
|
||||
ZopfliCostModelGetDistanceCost(model, j);
|
||||
size_t l;
|
||||
for (l = best_len + 1; l <= len; ++l) {
|
||||
const uint16_t copycode = GetCopyLengthCode(l);
|
||||
const uint16_t cmdcode =
|
||||
CombineLengthCodes(inscode, copycode, j == 0);
|
||||
const float cost = (cmdcode < 128 ? base_cost : dist_cost) +
|
||||
(float)GetCopyExtra(copycode) +
|
||||
ZopfliCostModelGetCommandCost(model, cmdcode);
|
||||
if (cost < nodes[pos + l].u.cost) {
|
||||
UpdateZopfliNode(nodes, pos, start, l, l, backward, j + 1, cost);
|
||||
result = BROTLI_MAX(size_t, result, l);
|
||||
}
|
||||
best_len = l;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* At higher iterations look only for new last distance matches, since
|
||||
looking only for new command start positions with the same distances
|
||||
does not help much. */
|
||||
if (k >= 2) continue;
|
||||
|
||||
{
|
||||
/* Loop through all possible copy lengths at this position. */
|
||||
size_t len = min_len;
|
||||
for (j = 0; j < num_matches; ++j) {
|
||||
BackwardMatch match = matches[j];
|
||||
size_t dist = match.distance;
|
||||
BROTLI_BOOL is_dictionary_match =
|
||||
TO_BROTLI_BOOL(dist > dictionary_start + gap);
|
||||
/* We already tried all possible last distance matches, so we can use
|
||||
normal distance code here. */
|
||||
size_t dist_code = dist + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
|
||||
uint16_t dist_symbol;
|
||||
uint32_t distextra;
|
||||
uint32_t distnumextra;
|
||||
float dist_cost;
|
||||
size_t max_match_len;
|
||||
PrefixEncodeCopyDistance(
|
||||
dist_code, params->dist.num_direct_distance_codes,
|
||||
params->dist.distance_postfix_bits, &dist_symbol, &distextra);
|
||||
distnumextra = dist_symbol >> 10;
|
||||
dist_cost = base_cost + (float)distnumextra +
|
||||
ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF);
|
||||
|
||||
/* Try all copy lengths up until the maximum copy length corresponding
|
||||
to this distance. If the distance refers to the static dictionary, or
|
||||
the maximum length is long enough, try only one maximum length. */
|
||||
max_match_len = BackwardMatchLength(&match);
|
||||
if (len < max_match_len &&
|
||||
(is_dictionary_match || max_match_len > max_zopfli_len)) {
|
||||
len = max_match_len;
|
||||
}
|
||||
for (; len <= max_match_len; ++len) {
|
||||
const size_t len_code =
|
||||
is_dictionary_match ? BackwardMatchLengthCode(&match) : len;
|
||||
const uint16_t copycode = GetCopyLengthCode(len_code);
|
||||
const uint16_t cmdcode = CombineLengthCodes(inscode, copycode, 0);
|
||||
const float cost = dist_cost + (float)GetCopyExtra(copycode) +
|
||||
ZopfliCostModelGetCommandCost(model, cmdcode);
|
||||
if (cost < nodes[pos + len].u.cost) {
|
||||
UpdateZopfliNode(nodes, pos, start, len, len_code, dist, 0, cost);
|
||||
result = BROTLI_MAX(size_t, result, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static size_t ComputeShortestPathFromNodes(size_t num_bytes,
|
||||
ZopfliNode* nodes) {
|
||||
size_t index = num_bytes;
|
||||
size_t num_commands = 0;
|
||||
while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 &&
|
||||
nodes[index].length == 1) --index;
|
||||
nodes[index].u.next = BROTLI_UINT32_MAX;
|
||||
while (index != 0) {
|
||||
size_t len = ZopfliNodeCommandLength(&nodes[index]);
|
||||
index -= len;
|
||||
nodes[index].u.next = (uint32_t)len;
|
||||
num_commands++;
|
||||
}
|
||||
return num_commands;
|
||||
}
|
||||
|
||||
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
|
||||
void duckdb_brotli::BrotliZopfliCreateCommands(const size_t num_bytes,
|
||||
const size_t block_start, const ZopfliNode* nodes, int* dist_cache,
|
||||
size_t* last_insert_len, const BrotliEncoderParams* params,
|
||||
Command* commands, size_t* num_literals) {
|
||||
const size_t stream_offset = params->stream_offset;
|
||||
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
||||
size_t pos = 0;
|
||||
uint32_t offset = nodes[0].u.next;
|
||||
size_t i;
|
||||
size_t gap = params->dictionary.compound.total_size;
|
||||
for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
|
||||
const ZopfliNode* next = &nodes[pos + offset];
|
||||
size_t copy_length = ZopfliNodeCopyLength(next);
|
||||
size_t insert_length = next->dcode_insert_length & 0x7FFFFFF;
|
||||
pos += insert_length;
|
||||
offset = next->u.next;
|
||||
if (i == 0) {
|
||||
insert_length += *last_insert_len;
|
||||
*last_insert_len = 0;
|
||||
}
|
||||
{
|
||||
size_t distance = ZopfliNodeCopyDistance(next);
|
||||
size_t len_code = ZopfliNodeLengthCode(next);
|
||||
size_t dictionary_start = BROTLI_MIN(size_t,
|
||||
block_start + pos + stream_offset, max_backward_limit);
|
||||
BROTLI_BOOL is_dictionary =
|
||||
TO_BROTLI_BOOL(distance > dictionary_start + gap);
|
||||
size_t dist_code = ZopfliNodeDistanceCode(next);
|
||||
InitCommand(&commands[i], ¶ms->dist, insert_length,
|
||||
copy_length, (int)len_code - (int)copy_length, dist_code);
|
||||
|
||||
if (!is_dictionary && dist_code > 0) {
|
||||
dist_cache[3] = dist_cache[2];
|
||||
dist_cache[2] = dist_cache[1];
|
||||
dist_cache[1] = dist_cache[0];
|
||||
dist_cache[0] = (int)distance;
|
||||
}
|
||||
}
|
||||
|
||||
*num_literals += insert_length;
|
||||
pos += copy_length;
|
||||
}
|
||||
*last_insert_len += num_bytes - pos;
|
||||
}
|
||||
|
||||
static size_t ZopfliIterate(size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, const size_t gap, const int* dist_cache,
|
||||
const ZopfliCostModel* model, const uint32_t* num_matches,
|
||||
const BackwardMatch* matches, ZopfliNode* nodes) {
|
||||
const size_t stream_offset = params->stream_offset;
|
||||
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
||||
const size_t max_zopfli_len = MaxZopfliLen(params);
|
||||
StartPosQueue queue;
|
||||
size_t cur_match_pos = 0;
|
||||
size_t i;
|
||||
nodes[0].length = 0;
|
||||
nodes[0].u.cost = 0;
|
||||
InitStartPosQueue(&queue);
|
||||
for (i = 0; i + 3 < num_bytes; i++) {
|
||||
size_t skip = UpdateNodes(num_bytes, position, i, ringbuffer,
|
||||
ringbuffer_mask, params, max_backward_limit, dist_cache,
|
||||
num_matches[i], &matches[cur_match_pos], model, &queue, nodes);
|
||||
if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
|
||||
cur_match_pos += num_matches[i];
|
||||
if (num_matches[i] == 1 &&
|
||||
BackwardMatchLength(&matches[cur_match_pos - 1]) > max_zopfli_len) {
|
||||
skip = BROTLI_MAX(size_t,
|
||||
BackwardMatchLength(&matches[cur_match_pos - 1]), skip);
|
||||
}
|
||||
if (skip > 1) {
|
||||
skip--;
|
||||
while (skip) {
|
||||
i++;
|
||||
if (i + 3 >= num_bytes) break;
|
||||
EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
|
||||
dist_cache, model, &queue, nodes);
|
||||
cur_match_pos += num_matches[i];
|
||||
skip--;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ComputeShortestPathFromNodes(num_bytes, nodes);
|
||||
}
|
||||
|
||||
static void MergeMatches(BackwardMatch* dst,
|
||||
BackwardMatch* src1, size_t len1, BackwardMatch* src2, size_t len2) {
|
||||
while (len1 > 0 && len2 > 0) {
|
||||
size_t l1 = BackwardMatchLength(src1);
|
||||
size_t l2 = BackwardMatchLength(src2);
|
||||
if (l1 < l2 || ((l1 == l2) && (src1->distance < src2->distance))) {
|
||||
*dst++ = *src1++;
|
||||
len1--;
|
||||
} else {
|
||||
*dst++ = *src2++;
|
||||
len2--;
|
||||
}
|
||||
}
|
||||
while (len1-- > 0) *dst++ = *src1++;
|
||||
while (len2-- > 0) *dst++ = *src2++;
|
||||
}
|
||||
|
||||
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
|
||||
size_t duckdb_brotli::BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
||||
const int* dist_cache, Hasher* hasher, ZopfliNode* nodes) {
|
||||
const size_t stream_offset = params->stream_offset;
|
||||
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
||||
const size_t max_zopfli_len = MaxZopfliLen(params);
|
||||
StartPosQueue queue;
|
||||
BackwardMatch* BROTLI_RESTRICT matches =
|
||||
BROTLI_ALLOC(m, BackwardMatch, 2 * (MAX_NUM_MATCHES_H10 + 64));
|
||||
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
|
||||
position + num_bytes - StoreLookaheadH10() + 1 : position;
|
||||
size_t i;
|
||||
const CompoundDictionary* addon = ¶ms->dictionary.compound;
|
||||
size_t gap = addon->total_size;
|
||||
size_t lz_matches_offset =
|
||||
(addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
|
||||
ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
|
||||
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) || BROTLI_IS_NULL(matches)) {
|
||||
return 0;
|
||||
}
|
||||
nodes[0].length = 0;
|
||||
nodes[0].u.cost = 0;
|
||||
InitZopfliCostModel(m, model, ¶ms->dist, num_bytes);
|
||||
if (BROTLI_IS_OOM(m)) return 0;
|
||||
ZopfliCostModelSetFromLiteralCosts(
|
||||
model, position, ringbuffer, ringbuffer_mask);
|
||||
InitStartPosQueue(&queue);
|
||||
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
|
||||
const size_t pos = position + i;
|
||||
const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
|
||||
const size_t dictionary_start = BROTLI_MIN(size_t,
|
||||
pos + stream_offset, max_backward_limit);
|
||||
size_t skip;
|
||||
size_t num_matches;
|
||||
int dict_id = 0;
|
||||
if (params->dictionary.contextual.context_based) {
|
||||
uint8_t p1 = pos >= 1 ?
|
||||
ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
|
||||
uint8_t p2 = pos >= 2 ?
|
||||
ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
|
||||
dict_id = params->dictionary.contextual.context_map[
|
||||
BROTLI_CONTEXT(p1, p2, literal_context_lut)];
|
||||
}
|
||||
num_matches = FindAllMatchesH10(&hasher->privat._H10,
|
||||
params->dictionary.contextual.dict[dict_id],
|
||||
ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
|
||||
dictionary_start + gap, params, &matches[lz_matches_offset]);
|
||||
if (addon->num_chunks != 0) {
|
||||
size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
|
||||
ringbuffer, ringbuffer_mask, pos, 3, num_bytes - i,
|
||||
dictionary_start, params->dist.max_distance,
|
||||
&matches[lz_matches_offset - 64], 64);
|
||||
MergeMatches(matches, &matches[lz_matches_offset - 64], cd_matches,
|
||||
&matches[lz_matches_offset], num_matches);
|
||||
num_matches += cd_matches;
|
||||
}
|
||||
if (num_matches > 0 &&
|
||||
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
|
||||
matches[0] = matches[num_matches - 1];
|
||||
num_matches = 1;
|
||||
}
|
||||
skip = UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
|
||||
params, max_backward_limit, dist_cache, num_matches, matches, model,
|
||||
&queue, nodes);
|
||||
if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
|
||||
if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
|
||||
skip = BROTLI_MAX(size_t, BackwardMatchLength(&matches[0]), skip);
|
||||
}
|
||||
if (skip > 1) {
|
||||
/* Add the tail of the copy to the hasher. */
|
||||
StoreRangeH10(&hasher->privat._H10,
|
||||
ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
|
||||
size_t, pos + skip, store_end));
|
||||
skip--;
|
||||
while (skip) {
|
||||
i++;
|
||||
if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
|
||||
EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
|
||||
dist_cache, model, &queue, nodes);
|
||||
skip--;
|
||||
}
|
||||
}
|
||||
}
|
||||
CleanupZopfliCostModel(m, model);
|
||||
BROTLI_FREE(m, model);
|
||||
BROTLI_FREE(m, matches);
|
||||
return ComputeShortestPathFromNodes(num_bytes, nodes);
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliCreateZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
||||
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
|
||||
Command* commands, size_t* num_commands, size_t* num_literals) {
|
||||
ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
|
||||
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
|
||||
BrotliInitZopfliNodes(nodes, num_bytes + 1);
|
||||
*num_commands += BrotliZopfliComputeShortestPath(m, num_bytes,
|
||||
position, ringbuffer, ringbuffer_mask, literal_context_lut, params,
|
||||
dist_cache, hasher, nodes);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
|
||||
last_insert_len, params, commands, num_literals);
|
||||
BROTLI_FREE(m, nodes);
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
||||
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
|
||||
Command* commands, size_t* num_commands, size_t* num_literals) {
|
||||
const size_t stream_offset = params->stream_offset;
|
||||
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
||||
uint32_t* num_matches = BROTLI_ALLOC(m, uint32_t, num_bytes);
|
||||
size_t matches_size = 4 * num_bytes;
|
||||
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
|
||||
position + num_bytes - StoreLookaheadH10() + 1 : position;
|
||||
size_t cur_match_pos = 0;
|
||||
size_t i;
|
||||
size_t orig_num_literals;
|
||||
size_t orig_last_insert_len;
|
||||
int orig_dist_cache[4];
|
||||
size_t orig_num_commands;
|
||||
ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
|
||||
ZopfliNode* nodes;
|
||||
BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
|
||||
const CompoundDictionary* addon = ¶ms->dictionary.compound;
|
||||
size_t gap = addon->total_size;
|
||||
size_t shadow_matches =
|
||||
(addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
|
||||
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) ||
|
||||
BROTLI_IS_NULL(num_matches) || BROTLI_IS_NULL(matches)) {
|
||||
return;
|
||||
}
|
||||
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
|
||||
const size_t pos = position + i;
|
||||
size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
|
||||
size_t dictionary_start = BROTLI_MIN(size_t,
|
||||
pos + stream_offset, max_backward_limit);
|
||||
size_t max_length = num_bytes - i;
|
||||
size_t num_found_matches;
|
||||
size_t cur_match_end;
|
||||
size_t j;
|
||||
int dict_id = 0;
|
||||
if (params->dictionary.contextual.context_based) {
|
||||
uint8_t p1 = pos >= 1 ?
|
||||
ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
|
||||
uint8_t p2 = pos >= 2 ?
|
||||
ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
|
||||
dict_id = params->dictionary.contextual.context_map[
|
||||
BROTLI_CONTEXT(p1, p2, literal_context_lut)];
|
||||
}
|
||||
/* Ensure that we have enough free slots. */
|
||||
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
|
||||
cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
num_found_matches = FindAllMatchesH10(&hasher->privat._H10,
|
||||
params->dictionary.contextual.dict[dict_id],
|
||||
ringbuffer, ringbuffer_mask, pos, max_length,
|
||||
max_distance, dictionary_start + gap, params,
|
||||
&matches[cur_match_pos + shadow_matches]);
|
||||
if (addon->num_chunks != 0) {
|
||||
size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
|
||||
ringbuffer, ringbuffer_mask, pos, 3, max_length,
|
||||
dictionary_start, params->dist.max_distance,
|
||||
&matches[cur_match_pos + shadow_matches - 64], 64);
|
||||
MergeMatches(&matches[cur_match_pos],
|
||||
&matches[cur_match_pos + shadow_matches - 64], cd_matches,
|
||||
&matches[cur_match_pos + shadow_matches], num_found_matches);
|
||||
num_found_matches += cd_matches;
|
||||
}
|
||||
cur_match_end = cur_match_pos + num_found_matches;
|
||||
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
|
||||
BROTLI_DCHECK(BackwardMatchLength(&matches[j]) <=
|
||||
BackwardMatchLength(&matches[j + 1]));
|
||||
}
|
||||
num_matches[i] = (uint32_t)num_found_matches;
|
||||
if (num_found_matches > 0) {
|
||||
const size_t match_len = BackwardMatchLength(&matches[cur_match_end - 1]);
|
||||
if (match_len > MAX_ZOPFLI_LEN_QUALITY_11) {
|
||||
const size_t skip = match_len - 1;
|
||||
matches[cur_match_pos++] = matches[cur_match_end - 1];
|
||||
num_matches[i] = 1;
|
||||
/* Add the tail of the copy to the hasher. */
|
||||
StoreRangeH10(&hasher->privat._H10,
|
||||
ringbuffer, ringbuffer_mask, pos + 1,
|
||||
BROTLI_MIN(size_t, pos + match_len, store_end));
|
||||
memset(&num_matches[i + 1], 0, skip * sizeof(num_matches[0]));
|
||||
i += skip;
|
||||
} else {
|
||||
cur_match_pos = cur_match_end;
|
||||
}
|
||||
}
|
||||
}
|
||||
orig_num_literals = *num_literals;
|
||||
orig_last_insert_len = *last_insert_len;
|
||||
memcpy(orig_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
orig_num_commands = *num_commands;
|
||||
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
|
||||
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
|
||||
InitZopfliCostModel(m, model, ¶ms->dist, num_bytes);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < 2; i++) {
|
||||
BrotliInitZopfliNodes(nodes, num_bytes + 1);
|
||||
if (i == 0) {
|
||||
ZopfliCostModelSetFromLiteralCosts(
|
||||
model, position, ringbuffer, ringbuffer_mask);
|
||||
} else {
|
||||
ZopfliCostModelSetFromCommands(model, position, ringbuffer,
|
||||
ringbuffer_mask, commands, *num_commands - orig_num_commands,
|
||||
orig_last_insert_len);
|
||||
}
|
||||
*num_commands = orig_num_commands;
|
||||
*num_literals = orig_num_literals;
|
||||
*last_insert_len = orig_last_insert_len;
|
||||
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
*num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
|
||||
ringbuffer_mask, params, gap, dist_cache, model, num_matches, matches,
|
||||
nodes);
|
||||
BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
|
||||
last_insert_len, params, commands, num_literals);
|
||||
}
|
||||
CleanupZopfliCostModel(m, model);
|
||||
BROTLI_FREE(m, model);
|
||||
BROTLI_FREE(m, nodes);
|
||||
BROTLI_FREE(m, matches);
|
||||
BROTLI_FREE(m, num_matches);
|
||||
}
|
||||
|
||||
|
||||
92
external/duckdb/third_party/brotli/enc/backward_references_hq.h
vendored
Normal file
92
external/duckdb/third_party/brotli/enc/backward_references_hq.h
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function to find backward reference copies. */
|
||||
|
||||
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
|
||||
#define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/context.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "command.h"
|
||||
#include "brotli_hash.h"
|
||||
#include "memory.h"
|
||||
#include "quality.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
|
||||
size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
||||
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
|
||||
Command* commands, size_t* num_commands, size_t* num_literals);
|
||||
|
||||
BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
|
||||
size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
||||
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
|
||||
Command* commands, size_t* num_commands, size_t* num_literals);
|
||||
|
||||
typedef struct ZopfliNode {
|
||||
/* Best length to get up to this byte (not including this byte itself)
|
||||
highest 7 bit is used to reconstruct the length code. */
|
||||
uint32_t length;
|
||||
/* Distance associated with the length. */
|
||||
uint32_t distance;
|
||||
/* Number of literal inserts before this copy; highest 5 bits contain
|
||||
distance short code + 1 (or zero if no short code). */
|
||||
uint32_t dcode_insert_length;
|
||||
|
||||
/* This union holds information used by dynamic-programming. During forward
|
||||
pass |cost| it used to store the goal function. When node is processed its
|
||||
|cost| is invalidated in favor of |shortcut|. On path back-tracing pass
|
||||
|next| is assigned the offset to next node on the path. */
|
||||
union {
|
||||
/* Smallest cost to get to this byte from the beginning, as found so far. */
|
||||
float cost;
|
||||
/* Offset to the next node on the path. Equals to command_length() of the
|
||||
next node on the path. For last node equals to BROTLI_UINT32_MAX */
|
||||
uint32_t next;
|
||||
/* Node position that provides next distance for distance cache. */
|
||||
uint32_t shortcut;
|
||||
} u;
|
||||
} ZopfliNode;
|
||||
|
||||
BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
|
||||
|
||||
/* Computes the shortest path of commands from position to at most
|
||||
position + num_bytes.
|
||||
|
||||
On return, path->size() is the number of commands found and path[i] is the
|
||||
length of the i-th command (copy length plus insert length).
|
||||
Note that the sum of the lengths of all commands can be less than num_bytes.
|
||||
|
||||
On return, the nodes[0..num_bytes] array will have the following
|
||||
"ZopfliNode array invariant":
|
||||
For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
|
||||
(1) nodes[i].copy_length() >= 2
|
||||
(2) nodes[i].command_length() <= i and
|
||||
(3) nodes[i - nodes[i].command_length()].cost < kInfinity */
|
||||
BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
|
||||
MemoryManager* m, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
ContextLut literal_context_lut, const BrotliEncoderParams* params,
|
||||
const int* dist_cache, Hasher* hasher, ZopfliNode* nodes);
|
||||
|
||||
BROTLI_INTERNAL void BrotliZopfliCreateCommands(
|
||||
const size_t num_bytes, const size_t block_start, const ZopfliNode* nodes,
|
||||
int* dist_cache, size_t* last_insert_len, const BrotliEncoderParams* params,
|
||||
Command* commands, size_t* num_literals);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ */
|
||||
410
external/duckdb/third_party/brotli/enc/bit_cost.cpp
vendored
Normal file
410
external/duckdb/third_party/brotli/enc/bit_cost.cpp
vendored
Normal file
@@ -0,0 +1,410 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Functions to estimate the bit cost of Huffman trees. */
|
||||
|
||||
#include "bit_cost.h"
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "fast_log.h"
|
||||
#include "histogram.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
#define FN(X) duckdb_brotli:: X ## Literal
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN */
|
||||
|
||||
#define HistogramType FN(Histogram)
|
||||
|
||||
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
|
||||
static const double kOneSymbolHistogramCost = 12;
|
||||
static const double kTwoSymbolHistogramCost = 20;
|
||||
static const double kThreeSymbolHistogramCost = 28;
|
||||
static const double kFourSymbolHistogramCost = 37;
|
||||
const size_t data_size = FN(HistogramDataSize)();
|
||||
int count = 0;
|
||||
size_t s[5];
|
||||
double bits = 0.0;
|
||||
size_t i;
|
||||
if (histogram->total_count_ == 0) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
for (i = 0; i < data_size; ++i) {
|
||||
if (histogram->data_[i] > 0) {
|
||||
s[count] = i;
|
||||
++count;
|
||||
if (count > 4) break;
|
||||
}
|
||||
}
|
||||
if (count == 1) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
if (count == 2) {
|
||||
return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
|
||||
}
|
||||
if (count == 3) {
|
||||
const uint32_t histo0 = histogram->data_[s[0]];
|
||||
const uint32_t histo1 = histogram->data_[s[1]];
|
||||
const uint32_t histo2 = histogram->data_[s[2]];
|
||||
const uint32_t histomax =
|
||||
BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
|
||||
return (kThreeSymbolHistogramCost +
|
||||
2 * (histo0 + histo1 + histo2) - histomax);
|
||||
}
|
||||
if (count == 4) {
|
||||
uint32_t histo[4];
|
||||
uint32_t h23;
|
||||
uint32_t histomax;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
histo[i] = histogram->data_[s[i]];
|
||||
}
|
||||
/* Sort */
|
||||
for (i = 0; i < 4; ++i) {
|
||||
size_t j;
|
||||
for (j = i + 1; j < 4; ++j) {
|
||||
if (histo[j] > histo[i]) {
|
||||
BROTLI_SWAP(uint32_t, histo, j, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
h23 = histo[2] + histo[3];
|
||||
histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
|
||||
return (kFourSymbolHistogramCost +
|
||||
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
||||
}
|
||||
|
||||
{
|
||||
/* In this loop we compute the entropy of the histogram and simultaneously
|
||||
build a simplified histogram of the code length codes where we use the
|
||||
zero repeat code 17, but we don't use the non-zero repeat code 16. */
|
||||
size_t max_depth = 1;
|
||||
uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
|
||||
const double log2total = FastLog2(histogram->total_count_);
|
||||
for (i = 0; i < data_size;) {
|
||||
if (histogram->data_[i] > 0) {
|
||||
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
||||
= log2(total_count) - log2(count(symbol)) */
|
||||
double log2p = log2total - FastLog2(histogram->data_[i]);
|
||||
/* Approximate the bit depth by round(-log2(P(symbol))) */
|
||||
size_t depth = (size_t)(log2p + 0.5);
|
||||
bits += histogram->data_[i] * log2p;
|
||||
if (depth > 15) {
|
||||
depth = 15;
|
||||
}
|
||||
if (depth > max_depth) {
|
||||
max_depth = depth;
|
||||
}
|
||||
++depth_histo[depth];
|
||||
++i;
|
||||
} else {
|
||||
/* Compute the run length of zeros and add the appropriate number of 0
|
||||
and 17 code length codes to the code length code histogram. */
|
||||
uint32_t reps = 1;
|
||||
size_t k;
|
||||
for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
|
||||
++reps;
|
||||
}
|
||||
i += reps;
|
||||
if (i == data_size) {
|
||||
/* Don't add any cost for the last zero run, since these are encoded
|
||||
only implicitly. */
|
||||
break;
|
||||
}
|
||||
if (reps < 3) {
|
||||
depth_histo[0] += reps;
|
||||
} else {
|
||||
reps -= 2;
|
||||
while (reps > 0) {
|
||||
++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
|
||||
/* Add the 3 extra bits for the 17 code length code. */
|
||||
bits += 3;
|
||||
reps >>= 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Add the estimated encoding cost of the code length code histogram. */
|
||||
bits += (double)(18 + 2 * max_depth);
|
||||
/* Add the entropy of the code length code histogram. */
|
||||
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
#undef HistogramType
|
||||
#undef FN
|
||||
|
||||
#define FN(X) duckdb_brotli:: X ## Command
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN */
|
||||
|
||||
#define HistogramType FN(Histogram)
|
||||
|
||||
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
|
||||
static const double kOneSymbolHistogramCost = 12;
|
||||
static const double kTwoSymbolHistogramCost = 20;
|
||||
static const double kThreeSymbolHistogramCost = 28;
|
||||
static const double kFourSymbolHistogramCost = 37;
|
||||
const size_t data_size = FN(HistogramDataSize)();
|
||||
int count = 0;
|
||||
size_t s[5];
|
||||
double bits = 0.0;
|
||||
size_t i;
|
||||
if (histogram->total_count_ == 0) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
for (i = 0; i < data_size; ++i) {
|
||||
if (histogram->data_[i] > 0) {
|
||||
s[count] = i;
|
||||
++count;
|
||||
if (count > 4) break;
|
||||
}
|
||||
}
|
||||
if (count == 1) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
if (count == 2) {
|
||||
return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
|
||||
}
|
||||
if (count == 3) {
|
||||
const uint32_t histo0 = histogram->data_[s[0]];
|
||||
const uint32_t histo1 = histogram->data_[s[1]];
|
||||
const uint32_t histo2 = histogram->data_[s[2]];
|
||||
const uint32_t histomax =
|
||||
BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
|
||||
return (kThreeSymbolHistogramCost +
|
||||
2 * (histo0 + histo1 + histo2) - histomax);
|
||||
}
|
||||
if (count == 4) {
|
||||
uint32_t histo[4];
|
||||
uint32_t h23;
|
||||
uint32_t histomax;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
histo[i] = histogram->data_[s[i]];
|
||||
}
|
||||
/* Sort */
|
||||
for (i = 0; i < 4; ++i) {
|
||||
size_t j;
|
||||
for (j = i + 1; j < 4; ++j) {
|
||||
if (histo[j] > histo[i]) {
|
||||
BROTLI_SWAP(uint32_t, histo, j, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
h23 = histo[2] + histo[3];
|
||||
histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
|
||||
return (kFourSymbolHistogramCost +
|
||||
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
||||
}
|
||||
|
||||
{
|
||||
/* In this loop we compute the entropy of the histogram and simultaneously
|
||||
build a simplified histogram of the code length codes where we use the
|
||||
zero repeat code 17, but we don't use the non-zero repeat code 16. */
|
||||
size_t max_depth = 1;
|
||||
uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
|
||||
const double log2total = FastLog2(histogram->total_count_);
|
||||
for (i = 0; i < data_size;) {
|
||||
if (histogram->data_[i] > 0) {
|
||||
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
||||
= log2(total_count) - log2(count(symbol)) */
|
||||
double log2p = log2total - FastLog2(histogram->data_[i]);
|
||||
/* Approximate the bit depth by round(-log2(P(symbol))) */
|
||||
size_t depth = (size_t)(log2p + 0.5);
|
||||
bits += histogram->data_[i] * log2p;
|
||||
if (depth > 15) {
|
||||
depth = 15;
|
||||
}
|
||||
if (depth > max_depth) {
|
||||
max_depth = depth;
|
||||
}
|
||||
++depth_histo[depth];
|
||||
++i;
|
||||
} else {
|
||||
/* Compute the run length of zeros and add the appropriate number of 0
|
||||
and 17 code length codes to the code length code histogram. */
|
||||
uint32_t reps = 1;
|
||||
size_t k;
|
||||
for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
|
||||
++reps;
|
||||
}
|
||||
i += reps;
|
||||
if (i == data_size) {
|
||||
/* Don't add any cost for the last zero run, since these are encoded
|
||||
only implicitly. */
|
||||
break;
|
||||
}
|
||||
if (reps < 3) {
|
||||
depth_histo[0] += reps;
|
||||
} else {
|
||||
reps -= 2;
|
||||
while (reps > 0) {
|
||||
++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
|
||||
/* Add the 3 extra bits for the 17 code length code. */
|
||||
bits += 3;
|
||||
reps >>= 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Add the estimated encoding cost of the code length code histogram. */
|
||||
bits += (double)(18 + 2 * max_depth);
|
||||
/* Add the entropy of the code length code histogram. */
|
||||
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
#undef HistogramType
|
||||
#undef FN
|
||||
|
||||
#define FN(X) duckdb_brotli:: X ## Distance
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN */
|
||||
|
||||
#define HistogramType FN(Histogram)
|
||||
|
||||
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
|
||||
static const double kOneSymbolHistogramCost = 12;
|
||||
static const double kTwoSymbolHistogramCost = 20;
|
||||
static const double kThreeSymbolHistogramCost = 28;
|
||||
static const double kFourSymbolHistogramCost = 37;
|
||||
const size_t data_size = FN(HistogramDataSize)();
|
||||
int count = 0;
|
||||
size_t s[5];
|
||||
double bits = 0.0;
|
||||
size_t i;
|
||||
if (histogram->total_count_ == 0) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
for (i = 0; i < data_size; ++i) {
|
||||
if (histogram->data_[i] > 0) {
|
||||
s[count] = i;
|
||||
++count;
|
||||
if (count > 4) break;
|
||||
}
|
||||
}
|
||||
if (count == 1) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
if (count == 2) {
|
||||
return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
|
||||
}
|
||||
if (count == 3) {
|
||||
const uint32_t histo0 = histogram->data_[s[0]];
|
||||
const uint32_t histo1 = histogram->data_[s[1]];
|
||||
const uint32_t histo2 = histogram->data_[s[2]];
|
||||
const uint32_t histomax =
|
||||
BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
|
||||
return (kThreeSymbolHistogramCost +
|
||||
2 * (histo0 + histo1 + histo2) - histomax);
|
||||
}
|
||||
if (count == 4) {
|
||||
uint32_t histo[4];
|
||||
uint32_t h23;
|
||||
uint32_t histomax;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
histo[i] = histogram->data_[s[i]];
|
||||
}
|
||||
/* Sort */
|
||||
for (i = 0; i < 4; ++i) {
|
||||
size_t j;
|
||||
for (j = i + 1; j < 4; ++j) {
|
||||
if (histo[j] > histo[i]) {
|
||||
BROTLI_SWAP(uint32_t, histo, j, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
h23 = histo[2] + histo[3];
|
||||
histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
|
||||
return (kFourSymbolHistogramCost +
|
||||
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
||||
}
|
||||
|
||||
{
|
||||
/* In this loop we compute the entropy of the histogram and simultaneously
|
||||
build a simplified histogram of the code length codes where we use the
|
||||
zero repeat code 17, but we don't use the non-zero repeat code 16. */
|
||||
size_t max_depth = 1;
|
||||
uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
|
||||
const double log2total = FastLog2(histogram->total_count_);
|
||||
for (i = 0; i < data_size;) {
|
||||
if (histogram->data_[i] > 0) {
|
||||
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
||||
= log2(total_count) - log2(count(symbol)) */
|
||||
double log2p = log2total - FastLog2(histogram->data_[i]);
|
||||
/* Approximate the bit depth by round(-log2(P(symbol))) */
|
||||
size_t depth = (size_t)(log2p + 0.5);
|
||||
bits += histogram->data_[i] * log2p;
|
||||
if (depth > 15) {
|
||||
depth = 15;
|
||||
}
|
||||
if (depth > max_depth) {
|
||||
max_depth = depth;
|
||||
}
|
||||
++depth_histo[depth];
|
||||
++i;
|
||||
} else {
|
||||
/* Compute the run length of zeros and add the appropriate number of 0
|
||||
and 17 code length codes to the code length code histogram. */
|
||||
uint32_t reps = 1;
|
||||
size_t k;
|
||||
for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
|
||||
++reps;
|
||||
}
|
||||
i += reps;
|
||||
if (i == data_size) {
|
||||
/* Don't add any cost for the last zero run, since these are encoded
|
||||
only implicitly. */
|
||||
break;
|
||||
}
|
||||
if (reps < 3) {
|
||||
depth_histo[0] += reps;
|
||||
} else {
|
||||
reps -= 2;
|
||||
while (reps > 0) {
|
||||
++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
|
||||
/* Add the 3 extra bits for the 17 code length code. */
|
||||
bits += 3;
|
||||
reps >>= 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Add the estimated encoding cost of the code length code histogram. */
|
||||
bits += (double)(18 + 2 * max_depth);
|
||||
/* Add the entropy of the code length code histogram. */
|
||||
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
#undef HistogramType
|
||||
#undef FN
|
||||
|
||||
|
||||
60
external/duckdb/third_party/brotli/enc/bit_cost.h
vendored
Normal file
60
external/duckdb/third_party/brotli/enc/bit_cost.h
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Functions to estimate the bit cost of Huffman trees. */
|
||||
|
||||
#ifndef BROTLI_ENC_BIT_COST_H_
|
||||
#define BROTLI_ENC_BIT_COST_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "fast_log.h"
|
||||
#include "histogram.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
static BROTLI_INLINE double ShannonEntropy(
|
||||
const uint32_t* population, size_t size, size_t* total) {
|
||||
size_t sum = 0;
|
||||
double retval = 0;
|
||||
const uint32_t* population_end = population + size;
|
||||
size_t p;
|
||||
if (size & 1) {
|
||||
goto odd_number_of_elements_left;
|
||||
}
|
||||
while (population < population_end) {
|
||||
p = *population++;
|
||||
sum += p;
|
||||
retval -= (double)p * FastLog2(p);
|
||||
odd_number_of_elements_left:
|
||||
p = *population++;
|
||||
sum += p;
|
||||
retval -= (double)p * FastLog2(p);
|
||||
}
|
||||
if (sum) retval += (double)sum * FastLog2(sum);
|
||||
*total = sum;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE double BitsEntropy(
|
||||
const uint32_t* population, size_t size) {
|
||||
size_t sum;
|
||||
double retval = ShannonEntropy(population, size, &sum);
|
||||
if (retval < (double)sum) {
|
||||
/* At least one bit per literal is needed. */
|
||||
retval = (double)sum;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
|
||||
BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
|
||||
BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_BIT_COST_H_ */
|
||||
1653
external/duckdb/third_party/brotli/enc/block_splitter.cpp
vendored
Normal file
1653
external/duckdb/third_party/brotli/enc/block_splitter.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
48
external/duckdb/third_party/brotli/enc/block_splitter.h
vendored
Normal file
48
external/duckdb/third_party/brotli/enc/block_splitter.h
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Block split point selection utilities. */
|
||||
|
||||
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
#define BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "command.h"
|
||||
#include "memory.h"
|
||||
#include "quality.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
typedef struct BlockSplit {
|
||||
size_t num_types; /* Amount of distinct types */
|
||||
size_t num_blocks; /* Amount of values in types and length */
|
||||
uint8_t* types;
|
||||
uint32_t* lengths;
|
||||
|
||||
size_t types_alloc_size;
|
||||
size_t lengths_alloc_size;
|
||||
} BlockSplit;
|
||||
|
||||
BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
|
||||
BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
|
||||
BlockSplit* self);
|
||||
|
||||
BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
|
||||
const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
const BrotliEncoderParams* params,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
|
||||
1431
external/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp
vendored
Normal file
1431
external/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
85
external/duckdb/third_party/brotli/enc/brotli_bit_stream.h
vendored
Normal file
85
external/duckdb/third_party/brotli/enc/brotli_bit_stream.h
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
/* Copyright 2014 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Functions to convert brotli-related data structures into the
|
||||
brotli bit stream. The functions here operate under
|
||||
assumption that there is enough space in the storage, i.e., there are
|
||||
no out-of-range checks anywhere.
|
||||
|
||||
These functions do bit addressing into a byte array. The byte array
|
||||
is called "storage" and the index to the bit is called storage_ix
|
||||
in function arguments. */
|
||||
|
||||
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/context.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "command.h"
|
||||
#include "entropy_encode.h"
|
||||
#include "memory.h"
|
||||
#include "metablock.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* All Store functions here will use a storage_ix, which is always the bit
|
||||
position for the current storage. */
|
||||
|
||||
BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
|
||||
HuffmanTree* tree, const uint32_t* histogram, const size_t histogram_total,
|
||||
const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
/* REQUIRES: length > 0 */
|
||||
/* REQUIRES: length <= (1 << 24) */
|
||||
BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
|
||||
const uint8_t* input, size_t start_pos, size_t length, size_t mask,
|
||||
uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
|
||||
const BrotliEncoderParams* params, ContextType literal_context_mode,
|
||||
const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
/* Stores the meta-block without doing any block splitting, just collects
|
||||
one histogram per block category and uses that for entropy coding.
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
|
||||
const uint8_t* input, size_t start_pos, size_t length, size_t mask,
|
||||
BROTLI_BOOL is_last, const BrotliEncoderParams* params,
|
||||
const Command* commands, size_t n_commands,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
/* Same as above, but uses static prefix codes for histograms with a only a few
|
||||
symbols, and uses static code length prefix codes for all other histograms.
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
|
||||
const uint8_t* input, size_t start_pos, size_t length, size_t mask,
|
||||
BROTLI_BOOL is_last, const BrotliEncoderParams* params,
|
||||
const Command* commands, size_t n_commands,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
/* This is for storing uncompressed blocks (simple raw storage of
|
||||
bytes-as-bytes).
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
|
||||
BROTLI_BOOL is_final_block, const uint8_t* BROTLI_RESTRICT input,
|
||||
size_t position, size_t mask, size_t len,
|
||||
size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage);
|
||||
|
||||
#if defined(BROTLI_TEST)
|
||||
void GetBlockLengthPrefixCodeForTest(uint32_t, size_t*, uint32_t*, uint32_t*);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
|
||||
4352
external/duckdb/third_party/brotli/enc/brotli_hash.h
vendored
Normal file
4352
external/duckdb/third_party/brotli/enc/brotli_hash.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
47
external/duckdb/third_party/brotli/enc/brotli_params.h
vendored
Normal file
47
external/duckdb/third_party/brotli/enc/brotli_params.h
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
/* Copyright 2017 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Parameters for the Brotli encoder with chosen quality levels. */
|
||||
|
||||
#ifndef BROTLI_ENC_PARAMS_H_
|
||||
#define BROTLI_ENC_PARAMS_H_
|
||||
|
||||
#include <brotli/encode.h>
|
||||
|
||||
#include "encoder_dict.h"
|
||||
|
||||
typedef struct BrotliHasherParams {
|
||||
int type;
|
||||
int bucket_bits;
|
||||
int block_bits;
|
||||
int num_last_distances_to_check;
|
||||
} BrotliHasherParams;
|
||||
|
||||
typedef struct BrotliDistanceParams {
|
||||
uint32_t distance_postfix_bits;
|
||||
uint32_t num_direct_distance_codes;
|
||||
uint32_t alphabet_size_max;
|
||||
uint32_t alphabet_size_limit;
|
||||
size_t max_distance;
|
||||
} BrotliDistanceParams;
|
||||
|
||||
/* Encoding parameters */
|
||||
typedef struct BrotliEncoderParams {
|
||||
BrotliEncoderMode mode;
|
||||
int quality;
|
||||
int lgwin;
|
||||
int lgblock;
|
||||
size_t stream_offset;
|
||||
size_t size_hint;
|
||||
BROTLI_BOOL disable_literal_context_modeling;
|
||||
BROTLI_BOOL large_window;
|
||||
BrotliHasherParams hasher;
|
||||
BrotliDistanceParams dist;
|
||||
/* TODO(eustas): rename to BrotliShared... */
|
||||
duckdb_brotli::SharedEncoderDictionary dictionary;
|
||||
} BrotliEncoderParams;
|
||||
|
||||
#endif /* BROTLI_ENC_PARAMS_H_ */
|
||||
1025
external/duckdb/third_party/brotli/enc/cluster.cpp
vendored
Normal file
1025
external/duckdb/third_party/brotli/enc/cluster.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1017
external/duckdb/third_party/brotli/enc/cluster.h
vendored
Normal file
1017
external/duckdb/third_party/brotli/enc/cluster.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
24
external/duckdb/third_party/brotli/enc/command.cpp
vendored
Normal file
24
external/duckdb/third_party/brotli/enc/command.cpp
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
#include "command.h"
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
const uint32_t duckdb_brotli::kBrotliInsBase[BROTLI_NUM_INS_COPY_CODES] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26,
|
||||
34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594};
|
||||
const uint32_t duckdb_brotli::kBrotliInsExtra[BROTLI_NUM_INS_COPY_CODES] = {
|
||||
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24};
|
||||
const uint32_t duckdb_brotli::kBrotliCopyBase[BROTLI_NUM_INS_COPY_CODES] = {
|
||||
2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18,
|
||||
22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118};
|
||||
const uint32_t duckdb_brotli::kBrotliCopyExtra[BROTLI_NUM_INS_COPY_CODES] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24};
|
||||
|
||||
|
||||
187
external/duckdb/third_party/brotli/enc/command.h
vendored
Normal file
187
external/duckdb/third_party/brotli/enc/command.h
vendored
Normal file
@@ -0,0 +1,187 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* This class models a sequence of literals and a backward reference copy. */
|
||||
|
||||
#ifndef BROTLI_ENC_COMMAND_H_
|
||||
#define BROTLI_ENC_COMMAND_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "fast_log.h"
|
||||
#include "brotli_params.h"
|
||||
#include "prefix.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
BROTLI_INTERNAL extern const uint32_t
|
||||
kBrotliInsBase[BROTLI_NUM_INS_COPY_CODES];
|
||||
BROTLI_INTERNAL extern const uint32_t
|
||||
kBrotliInsExtra[BROTLI_NUM_INS_COPY_CODES];
|
||||
BROTLI_INTERNAL extern const uint32_t
|
||||
kBrotliCopyBase[BROTLI_NUM_INS_COPY_CODES];
|
||||
BROTLI_INTERNAL extern const uint32_t
|
||||
kBrotliCopyExtra[BROTLI_NUM_INS_COPY_CODES];
|
||||
|
||||
static BROTLI_INLINE uint16_t GetInsertLengthCode(size_t insertlen) {
|
||||
if (insertlen < 6) {
|
||||
return (uint16_t)insertlen;
|
||||
} else if (insertlen < 130) {
|
||||
uint32_t nbits = Log2FloorNonZero(insertlen - 2) - 1u;
|
||||
return (uint16_t)((nbits << 1) + ((insertlen - 2) >> nbits) + 2);
|
||||
} else if (insertlen < 2114) {
|
||||
return (uint16_t)(Log2FloorNonZero(insertlen - 66) + 10);
|
||||
} else if (insertlen < 6210) {
|
||||
return 21u;
|
||||
} else if (insertlen < 22594) {
|
||||
return 22u;
|
||||
} else {
|
||||
return 23u;
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint16_t GetCopyLengthCode(size_t copylen) {
|
||||
if (copylen < 10) {
|
||||
return (uint16_t)(copylen - 2);
|
||||
} else if (copylen < 134) {
|
||||
uint32_t nbits = Log2FloorNonZero(copylen - 6) - 1u;
|
||||
return (uint16_t)((nbits << 1) + ((copylen - 6) >> nbits) + 4);
|
||||
} else if (copylen < 2118) {
|
||||
return (uint16_t)(Log2FloorNonZero(copylen - 70) + 12);
|
||||
} else {
|
||||
return 23u;
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint16_t CombineLengthCodes(
|
||||
uint16_t inscode, uint16_t copycode, BROTLI_BOOL use_last_distance) {
|
||||
uint16_t bits64 =
|
||||
(uint16_t)((copycode & 0x7u) | ((inscode & 0x7u) << 3u));
|
||||
if (use_last_distance && inscode < 8u && copycode < 16u) {
|
||||
return (copycode < 8u) ? bits64 : (bits64 | 64u);
|
||||
} else {
|
||||
/* Specification: 5 Encoding of ... (last table) */
|
||||
/* offset = 2 * index, where index is in range [0..8] */
|
||||
uint32_t offset = 2u * ((copycode >> 3u) + 3u * (inscode >> 3u));
|
||||
/* All values in specification are K * 64,
|
||||
where K = [2, 3, 6, 4, 5, 8, 7, 9, 10],
|
||||
i + 1 = [1, 2, 3, 4, 5, 6, 7, 8, 9],
|
||||
K - i - 1 = [1, 1, 3, 0, 0, 2, 0, 1, 2] = D.
|
||||
All values in D require only 2 bits to encode.
|
||||
Magic constant is shifted 6 bits left, to avoid final multiplication. */
|
||||
offset = (offset << 5u) + 0x40u + ((0x520D40u >> offset) & 0xC0u);
|
||||
return (uint16_t)(offset | bits64);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void GetLengthCode(size_t insertlen, size_t copylen,
|
||||
BROTLI_BOOL use_last_distance,
|
||||
uint16_t* code) {
|
||||
uint16_t inscode = GetInsertLengthCode(insertlen);
|
||||
uint16_t copycode = GetCopyLengthCode(copylen);
|
||||
*code = CombineLengthCodes(inscode, copycode, use_last_distance);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t GetInsertBase(uint16_t inscode) {
|
||||
return kBrotliInsBase[inscode];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t GetInsertExtra(uint16_t inscode) {
|
||||
return kBrotliInsExtra[inscode];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t GetCopyBase(uint16_t copycode) {
|
||||
return kBrotliCopyBase[copycode];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
|
||||
return kBrotliCopyExtra[copycode];
|
||||
}
|
||||
|
||||
typedef struct Command {
|
||||
uint32_t insert_len_;
|
||||
/* Stores copy_len in low 25 bits and copy_code - copy_len in high 7 bit. */
|
||||
uint32_t copy_len_;
|
||||
/* Stores distance extra bits. */
|
||||
uint32_t dist_extra_;
|
||||
uint16_t cmd_prefix_;
|
||||
/* Stores distance code in low 10 bits
|
||||
and number of extra bits in high 6 bits. */
|
||||
uint16_t dist_prefix_;
|
||||
} Command;
|
||||
|
||||
/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
|
||||
static BROTLI_INLINE void InitCommand(Command* self,
|
||||
const BrotliDistanceParams* dist, size_t insertlen,
|
||||
size_t copylen, int copylen_code_delta, size_t distance_code) {
|
||||
/* Don't rely on signed int representation, use honest casts. */
|
||||
uint32_t delta = (uint8_t)((int8_t)copylen_code_delta);
|
||||
self->insert_len_ = (uint32_t)insertlen;
|
||||
self->copy_len_ = (uint32_t)(copylen | (delta << 25));
|
||||
/* The distance prefix and extra bits are stored in this Command as if
|
||||
npostfix and ndirect were 0, they are only recomputed later after the
|
||||
clustering if needed. */
|
||||
PrefixEncodeCopyDistance(
|
||||
distance_code, dist->num_direct_distance_codes,
|
||||
dist->distance_postfix_bits, &self->dist_prefix_, &self->dist_extra_);
|
||||
GetLengthCode(
|
||||
insertlen, (size_t)((int)copylen + copylen_code_delta),
|
||||
TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
|
||||
self->insert_len_ = (uint32_t)insertlen;
|
||||
self->copy_len_ = 4 << 25;
|
||||
self->dist_extra_ = 0;
|
||||
self->dist_prefix_ = BROTLI_NUM_DISTANCE_SHORT_CODES;
|
||||
GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(
|
||||
const Command* self, const BrotliDistanceParams* dist) {
|
||||
if ((self->dist_prefix_ & 0x3FFu) <
|
||||
BROTLI_NUM_DISTANCE_SHORT_CODES + dist->num_direct_distance_codes) {
|
||||
return self->dist_prefix_ & 0x3FFu;
|
||||
} else {
|
||||
uint32_t dcode = self->dist_prefix_ & 0x3FFu;
|
||||
uint32_t nbits = self->dist_prefix_ >> 10;
|
||||
uint32_t extra = self->dist_extra_;
|
||||
uint32_t postfix_mask = (1U << dist->distance_postfix_bits) - 1U;
|
||||
uint32_t hcode = (dcode - dist->num_direct_distance_codes -
|
||||
BROTLI_NUM_DISTANCE_SHORT_CODES) >>
|
||||
dist->distance_postfix_bits;
|
||||
uint32_t lcode = (dcode - dist->num_direct_distance_codes -
|
||||
BROTLI_NUM_DISTANCE_SHORT_CODES) & postfix_mask;
|
||||
uint32_t offset = ((2U + (hcode & 1U)) << nbits) - 4U;
|
||||
return ((offset + extra) << dist->distance_postfix_bits) + lcode +
|
||||
dist->num_direct_distance_codes + BROTLI_NUM_DISTANCE_SHORT_CODES;
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
|
||||
uint32_t r = self->cmd_prefix_ >> 6;
|
||||
uint32_t c = self->cmd_prefix_ & 7;
|
||||
if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
|
||||
return c;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
|
||||
return self->copy_len_ & 0x1FFFFFF;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
|
||||
uint32_t modifier = self->copy_len_ >> 25;
|
||||
int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1)));
|
||||
return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_COMMAND_H_ */
|
||||
209
external/duckdb/third_party/brotli/enc/compound_dictionary.cpp
vendored
Normal file
209
external/duckdb/third_party/brotli/enc/compound_dictionary.cpp
vendored
Normal file
@@ -0,0 +1,209 @@
|
||||
/* Copyright 2017 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
#include "compound_dictionary.h"
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "memory.h"
|
||||
#include "quality.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
|
||||
const uint8_t* source, size_t source_size, uint32_t bucket_bits,
|
||||
uint32_t slot_bits, uint32_t hash_bits, uint16_t bucket_limit) {
|
||||
/* Step 1: create "bloated" hasher. */
|
||||
uint32_t num_slots = 1u << slot_bits;
|
||||
uint32_t num_buckets = 1u << bucket_bits;
|
||||
uint32_t hash_shift = 64u - bucket_bits;
|
||||
uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
|
||||
uint32_t slot_mask = num_slots - 1;
|
||||
size_t alloc_size = (sizeof(uint32_t) << slot_bits) +
|
||||
(sizeof(uint32_t) << slot_bits) +
|
||||
(sizeof(uint16_t) << bucket_bits) +
|
||||
(sizeof(uint32_t) << bucket_bits) +
|
||||
(sizeof(uint32_t) * source_size);
|
||||
uint8_t* flat = NULL;
|
||||
PreparedDictionary* result = NULL;
|
||||
uint16_t* num = NULL;
|
||||
uint32_t* bucket_heads = NULL;
|
||||
uint32_t* next_bucket = NULL;
|
||||
uint32_t* slot_offsets = NULL;
|
||||
uint16_t* heads = NULL;
|
||||
uint32_t* items = NULL;
|
||||
uint8_t** source_ref = NULL;
|
||||
uint32_t i;
|
||||
uint32_t* slot_size = NULL;
|
||||
uint32_t* slot_limit = NULL;
|
||||
uint32_t total_items = 0;
|
||||
if (slot_bits > 16) return NULL;
|
||||
if (slot_bits > bucket_bits) return NULL;
|
||||
if (bucket_bits - slot_bits >= 16) return NULL;
|
||||
|
||||
flat = BROTLI_ALLOC(m, uint8_t, alloc_size);
|
||||
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(flat)) return NULL;
|
||||
|
||||
slot_size = (uint32_t*)flat;
|
||||
slot_limit = (uint32_t*)(&slot_size[num_slots]);
|
||||
num = (uint16_t*)(&slot_limit[num_slots]);
|
||||
bucket_heads = (uint32_t*)(&num[num_buckets]);
|
||||
next_bucket = (uint32_t*)(&bucket_heads[num_buckets]);
|
||||
memset(num, 0, num_buckets * sizeof(num[0]));
|
||||
|
||||
/* TODO(eustas): apply custom "store" order. */
|
||||
for (i = 0; i + 7 < source_size; ++i) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(&source[i]) & hash_mask) *
|
||||
kPreparedDictionaryHashMul64Long;
|
||||
const uint32_t key = (uint32_t)(h >> hash_shift);
|
||||
uint16_t count = num[key];
|
||||
next_bucket[i] = (count == 0) ? ((uint32_t)(-1)) : bucket_heads[key];
|
||||
bucket_heads[key] = i;
|
||||
count++;
|
||||
if (count > bucket_limit) count = bucket_limit;
|
||||
num[key] = count;
|
||||
}
|
||||
|
||||
/* Step 2: find slot limits. */
|
||||
for (i = 0; i < num_slots; ++i) {
|
||||
BROTLI_BOOL overflow = BROTLI_FALSE;
|
||||
slot_limit[i] = bucket_limit;
|
||||
while (BROTLI_TRUE) {
|
||||
uint32_t limit = slot_limit[i];
|
||||
size_t j;
|
||||
uint32_t count = 0;
|
||||
overflow = BROTLI_FALSE;
|
||||
for (j = i; j < num_buckets; j += num_slots) {
|
||||
uint32_t size = num[j];
|
||||
/* Last chain may span behind 64K limit; overflow happens only if
|
||||
we are about to use 0xFFFF+ as item offset. */
|
||||
if (count >= 0xFFFF) {
|
||||
overflow = BROTLI_TRUE;
|
||||
break;
|
||||
}
|
||||
if (size > limit) size = limit;
|
||||
count += size;
|
||||
}
|
||||
if (!overflow) {
|
||||
slot_size[i] = count;
|
||||
total_items += count;
|
||||
break;
|
||||
}
|
||||
slot_limit[i]--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 3: transfer data to "slim" hasher. */
|
||||
alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
|
||||
(sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
|
||||
sizeof(uint8_t*);
|
||||
|
||||
result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
|
||||
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
|
||||
BROTLI_FREE(m, flat);
|
||||
return NULL;
|
||||
}
|
||||
slot_offsets = (uint32_t*)(&result[1]);
|
||||
heads = (uint16_t*)(&slot_offsets[num_slots]);
|
||||
items = (uint32_t*)(&heads[num_buckets]);
|
||||
source_ref = (uint8_t**)(&items[total_items]);
|
||||
|
||||
result->magic = kLeanPreparedDictionaryMagic;
|
||||
result->num_items = total_items;
|
||||
result->source_size = (uint32_t)source_size;
|
||||
result->hash_bits = hash_bits;
|
||||
result->bucket_bits = bucket_bits;
|
||||
result->slot_bits = slot_bits;
|
||||
BROTLI_UNALIGNED_STORE_PTR(source_ref, source);
|
||||
|
||||
total_items = 0;
|
||||
for (i = 0; i < num_slots; ++i) {
|
||||
slot_offsets[i] = total_items;
|
||||
total_items += slot_size[i];
|
||||
slot_size[i] = 0;
|
||||
}
|
||||
for (i = 0; i < num_buckets; ++i) {
|
||||
uint32_t slot = i & slot_mask;
|
||||
uint32_t count = num[i];
|
||||
uint32_t pos;
|
||||
size_t j;
|
||||
size_t cursor = slot_size[slot];
|
||||
if (count > slot_limit[slot]) count = slot_limit[slot];
|
||||
if (count == 0) {
|
||||
heads[i] = 0xFFFF;
|
||||
continue;
|
||||
}
|
||||
heads[i] = (uint16_t)cursor;
|
||||
cursor += slot_offsets[slot];
|
||||
slot_size[slot] += count;
|
||||
pos = bucket_heads[i];
|
||||
for (j = 0; j < count; j++) {
|
||||
items[cursor++] = pos;
|
||||
pos = next_bucket[pos];
|
||||
}
|
||||
items[cursor - 1] |= 0x80000000;
|
||||
}
|
||||
|
||||
BROTLI_FREE(m, flat);
|
||||
return result;
|
||||
}
|
||||
|
||||
PreparedDictionary* duckdb_brotli::CreatePreparedDictionary(MemoryManager* m,
|
||||
const uint8_t* source, size_t source_size) {
|
||||
uint32_t bucket_bits = 17;
|
||||
uint32_t slot_bits = 7;
|
||||
uint32_t hash_bits = 40;
|
||||
uint16_t bucket_limit = 32;
|
||||
size_t volume = 16u << bucket_bits;
|
||||
/* Tune parameters to fit dictionary size. */
|
||||
while (volume < source_size && bucket_bits < 22) {
|
||||
bucket_bits++;
|
||||
slot_bits++;
|
||||
volume <<= 1;
|
||||
}
|
||||
return CreatePreparedDictionaryWithParams(m,
|
||||
source, source_size, bucket_bits, slot_bits, hash_bits, bucket_limit);
|
||||
}
|
||||
|
||||
void duckdb_brotli::DestroyPreparedDictionary(MemoryManager* m,
|
||||
PreparedDictionary* dictionary) {
|
||||
if (!dictionary) return;
|
||||
BROTLI_FREE(m, dictionary);
|
||||
}
|
||||
|
||||
BROTLI_BOOL duckdb_brotli::AttachPreparedDictionary(
|
||||
CompoundDictionary* compound, const PreparedDictionary* dictionary) {
|
||||
size_t length = 0;
|
||||
size_t index = 0;
|
||||
|
||||
if (compound->num_chunks == SHARED_BROTLI_MAX_COMPOUND_DICTS) {
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
|
||||
if (!dictionary) return BROTLI_FALSE;
|
||||
|
||||
length = dictionary->source_size;
|
||||
index = compound->num_chunks;
|
||||
compound->total_size += length;
|
||||
compound->chunks[index] = dictionary;
|
||||
compound->chunk_offsets[index + 1] = compound->total_size;
|
||||
{
|
||||
uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
|
||||
uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]);
|
||||
uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]);
|
||||
const void* tail = (void*)&items[dictionary->num_items];
|
||||
if (dictionary->magic == kPreparedDictionaryMagic) {
|
||||
compound->chunk_source[index] = (const uint8_t*)tail;
|
||||
} else {
|
||||
/* dictionary->magic == kLeanPreparedDictionaryMagic */
|
||||
compound->chunk_source[index] =
|
||||
(const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
|
||||
}
|
||||
}
|
||||
compound->num_chunks++;
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
75
external/duckdb/third_party/brotli/enc/compound_dictionary.h
vendored
Normal file
75
external/duckdb/third_party/brotli/enc/compound_dictionary.h
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
/* Copyright 2017 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
#ifndef BROTLI_ENC_PREPARED_DICTIONARY_H_
|
||||
#define BROTLI_ENC_PREPARED_DICTIONARY_H_
|
||||
|
||||
#include <brotli/shared_dictionary.h>
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "memory.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* "Fat" prepared dictionary, could be cooked outside of C implementation,
|
||||
* e.g. on Java side. LZ77 data is copied inside PreparedDictionary struct. */
|
||||
static const uint32_t kPreparedDictionaryMagic = 0xDEBCEDE0;
|
||||
|
||||
static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1;
|
||||
|
||||
static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2;
|
||||
|
||||
/* "Lean" prepared dictionary. LZ77 data is referenced. It is the responsibility
|
||||
* of caller of "prepare dictionary" to keep the LZ77 data while prepared
|
||||
* dictionary is in use. */
|
||||
static const uint32_t kLeanPreparedDictionaryMagic = 0xDEBCEDE3;
|
||||
|
||||
static const uint64_t kPreparedDictionaryHashMul64Long = BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
|
||||
|
||||
typedef struct PreparedDictionary {
|
||||
uint32_t magic;
|
||||
uint32_t num_items;
|
||||
uint32_t source_size;
|
||||
uint32_t hash_bits;
|
||||
uint32_t bucket_bits;
|
||||
uint32_t slot_bits;
|
||||
|
||||
/* --- Dynamic size members --- */
|
||||
|
||||
/* uint32_t slot_offsets[1 << slot_bits]; */
|
||||
/* uint16_t heads[1 << bucket_bits]; */
|
||||
/* uint32_t items[variable]; */
|
||||
|
||||
/* [maybe] uint8_t* source_ref, depending on magic. */
|
||||
/* [maybe] uint8_t source[source_size], depending on magic. */
|
||||
} PreparedDictionary;
|
||||
|
||||
BROTLI_INTERNAL PreparedDictionary *CreatePreparedDictionary(duckdb_brotli::MemoryManager *m, const uint8_t *source,
|
||||
size_t source_size);
|
||||
|
||||
BROTLI_INTERNAL void DestroyPreparedDictionary(duckdb_brotli::MemoryManager *m, PreparedDictionary *dictionary);
|
||||
|
||||
typedef struct CompoundDictionary {
|
||||
/* LZ77 prefix, compound dictionary */
|
||||
size_t num_chunks;
|
||||
size_t total_size;
|
||||
/* Client instances. */
|
||||
const PreparedDictionary *chunks[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
|
||||
const uint8_t *chunk_source[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
|
||||
size_t chunk_offsets[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
|
||||
|
||||
size_t num_prepared_instances_;
|
||||
/* Owned instances. */
|
||||
PreparedDictionary *prepared_instances_[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
|
||||
} CompoundDictionary;
|
||||
|
||||
BROTLI_INTERNAL BROTLI_BOOL AttachPreparedDictionary(CompoundDictionary *compound,
|
||||
const PreparedDictionary *dictionary);
|
||||
|
||||
}
|
||||
#endif /* BROTLI_ENC_PREPARED_DICTIONARY */
|
||||
796
external/duckdb/third_party/brotli/enc/compress_fragment.cpp
vendored
Normal file
796
external/duckdb/third_party/brotli/enc/compress_fragment.cpp
vendored
Normal file
@@ -0,0 +1,796 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function for fast encoding of an input fragment, independently from the input
|
||||
history. This function uses one-pass processing: when we find a backward
|
||||
match, we immediately emit the corresponding command and literal codes to
|
||||
the bit stream.
|
||||
|
||||
Adapted from the CompressFragment() function in
|
||||
https://github.com/google/snappy/blob/master/snappy.cc */
|
||||
|
||||
#include "compress_fragment.h"
|
||||
|
||||
#include <string.h> /* memcmp, memcpy, memset */
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "brotli_bit_stream.h"
|
||||
#include "entropy_encode.h"
|
||||
#include "fast_log.h"
|
||||
#include "find_match_length.h"
|
||||
#include "write_bits.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
|
||||
|
||||
/* kHashMul32 multiplier has these properties:
|
||||
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
* No long streaks of ones or zeros.
|
||||
* There is no effort to ensure that it is a prime, the oddity is enough
|
||||
for this use.
|
||||
* The number has been tuned heuristically against compression benchmarks. */
|
||||
static const uint32_t kHashMul32 = 0x1E35A7BD;
|
||||
|
||||
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t HashBytesAtOffset(
|
||||
uint64_t v, int offset, size_t shift) {
|
||||
BROTLI_DCHECK(offset >= 0);
|
||||
BROTLI_DCHECK(offset <= 3);
|
||||
{
|
||||
const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
return TO_BROTLI_BOOL(
|
||||
BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
|
||||
p1[4] == p2[4]);
|
||||
}
|
||||
|
||||
/* Builds a literal prefix code into "depths" and "bits" based on the statistics
|
||||
of the "input" string and stores it into the bit stream.
|
||||
Note that the prefix code here is built from the pre-LZ77 input, therefore
|
||||
we can only approximate the statistics of the actual literal stream.
|
||||
Moreover, for long inputs we build a histogram from a sample of the input
|
||||
and thus have to assign a non-zero depth for each literal.
|
||||
Returns estimated compression ratio millibytes/char for encoding given input
|
||||
with generated code. */
|
||||
static size_t BuildAndStoreLiteralPrefixCode(BrotliOnePassArena* s,
|
||||
const uint8_t* input,
|
||||
const size_t input_size,
|
||||
uint8_t depths[256],
|
||||
uint16_t bits[256],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
uint32_t* BROTLI_RESTRICT const histogram = s->histogram;
|
||||
size_t histogram_total;
|
||||
size_t i;
|
||||
memset(histogram, 0, sizeof(s->histogram));
|
||||
|
||||
if (input_size < (1 << 15)) {
|
||||
for (i = 0; i < input_size; ++i) {
|
||||
++histogram[input[i]];
|
||||
}
|
||||
histogram_total = input_size;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
/* We weigh the first 11 samples with weight 3 to account for the
|
||||
balancing effect of the LZ77 phase on the histogram. */
|
||||
const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
}
|
||||
} else {
|
||||
static const size_t kSampleRate = 29;
|
||||
for (i = 0; i < input_size; i += kSampleRate) {
|
||||
++histogram[input[i]];
|
||||
}
|
||||
histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
/* We add 1 to each population count to avoid 0 bit depths (since this is
|
||||
only a sample and we don't know if the symbol appears or not), and we
|
||||
weigh the first 11 samples with weight 3 to account for the balancing
|
||||
effect of the LZ77 phase on the histogram (more frequent symbols are
|
||||
more likely to be in backward references instead as literals). */
|
||||
const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
}
|
||||
}
|
||||
BrotliBuildAndStoreHuffmanTreeFast(s->tree, histogram, histogram_total,
|
||||
/* max_bits = */ 8,
|
||||
depths, bits, storage_ix, storage);
|
||||
{
|
||||
size_t literal_ratio = 0;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
if (histogram[i]) literal_ratio += histogram[i] * depths[i];
|
||||
}
|
||||
/* Estimated encoding ratio, millibytes per symbol. */
|
||||
return (literal_ratio * 125) / histogram_total;
|
||||
}
|
||||
}
|
||||
|
||||
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
"bits" based on "histogram" and stores it into the bit stream. */
|
||||
static void BuildAndStoreCommandPrefixCode(BrotliOnePassArena* s,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const uint32_t* const histogram = s->cmd_histo;
|
||||
uint8_t* const depth = s->cmd_depth;
|
||||
uint16_t* const bits = s->cmd_bits;
|
||||
uint8_t* BROTLI_RESTRICT const tmp_depth = s->tmp_depth;
|
||||
uint16_t* BROTLI_RESTRICT const tmp_bits = s->tmp_bits;
|
||||
/* TODO(eustas): do only once on initialization. */
|
||||
memset(tmp_depth, 0, BROTLI_NUM_COMMAND_SYMBOLS);
|
||||
|
||||
BrotliCreateHuffmanTree(histogram, 64, 15, s->tree, depth);
|
||||
BrotliCreateHuffmanTree(&histogram[64], 64, 14, s->tree, &depth[64]);
|
||||
/* We have to jump through a few hoops here in order to compute
|
||||
the command bits because the symbols are in a different order than in
|
||||
the full alphabet. This looks complicated, but having the symbols
|
||||
in this order in the command bits saves a few branches in the Emit*
|
||||
functions. */
|
||||
memcpy(tmp_depth, depth, 24);
|
||||
memcpy(tmp_depth + 24, depth + 40, 8);
|
||||
memcpy(tmp_depth + 32, depth + 24, 8);
|
||||
memcpy(tmp_depth + 40, depth + 48, 8);
|
||||
memcpy(tmp_depth + 48, depth + 32, 8);
|
||||
memcpy(tmp_depth + 56, depth + 56, 8);
|
||||
BrotliConvertBitDepthsToSymbols(tmp_depth, 64, tmp_bits);
|
||||
memcpy(bits, tmp_bits, 48);
|
||||
memcpy(bits + 24, tmp_bits + 32, 16);
|
||||
memcpy(bits + 32, tmp_bits + 48, 16);
|
||||
memcpy(bits + 40, tmp_bits + 24, 16);
|
||||
memcpy(bits + 48, tmp_bits + 40, 16);
|
||||
memcpy(bits + 56, tmp_bits + 56, 16);
|
||||
BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
/* Create the bit length array for the full command alphabet. */
|
||||
size_t i;
|
||||
memset(tmp_depth, 0, 64); /* only 64 first values were used */
|
||||
memcpy(tmp_depth, depth, 8);
|
||||
memcpy(tmp_depth + 64, depth + 8, 8);
|
||||
memcpy(tmp_depth + 128, depth + 16, 8);
|
||||
memcpy(tmp_depth + 192, depth + 24, 8);
|
||||
memcpy(tmp_depth + 384, depth + 32, 8);
|
||||
for (i = 0; i < 8; ++i) {
|
||||
tmp_depth[128 + 8 * i] = depth[40 + i];
|
||||
tmp_depth[256 + 8 * i] = depth[48 + i];
|
||||
tmp_depth[448 + 8 * i] = depth[56 + i];
|
||||
}
|
||||
/* TODO(eustas): could/should full-length machinery be avoided? */
|
||||
BrotliStoreHuffmanTree(
|
||||
tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS, s->tree, storage_ix, storage);
|
||||
}
|
||||
BrotliStoreHuffmanTree(&depth[64], 64, s->tree, storage_ix, storage);
|
||||
}
|
||||
|
||||
/* REQUIRES: insertlen < 6210 */
|
||||
static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (insertlen < 6) {
|
||||
const size_t code = insertlen + 40;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (insertlen < 130) {
|
||||
const size_t tail = insertlen - 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t inscode = (nbits << 1) + prefix + 42;
|
||||
BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
|
||||
++histo[inscode];
|
||||
} else if (insertlen < 2114) {
|
||||
const size_t tail = insertlen - 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 50;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else {
|
||||
BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
|
||||
BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
|
||||
++histo[61];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (insertlen < 22594) {
|
||||
BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
|
||||
BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
|
||||
++histo[62];
|
||||
} else {
|
||||
BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
|
||||
BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
|
||||
++histo[63];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitCopyLen(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (copylen < 10) {
|
||||
BrotliWriteBits(
|
||||
depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
|
||||
++histo[copylen + 14];
|
||||
} else if (copylen < 134) {
|
||||
const size_t tail = copylen - 6;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 20;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (copylen < 2118) {
|
||||
const size_t tail = copylen - 70;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 28;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else {
|
||||
BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
|
||||
++histo[39];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (copylen < 12) {
|
||||
BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
|
||||
++histo[copylen - 4];
|
||||
} else if (copylen < 72) {
|
||||
const size_t tail = copylen - 8;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 4;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (copylen < 136) {
|
||||
const size_t tail = copylen - 8;
|
||||
const size_t code = (tail >> 5) + 30;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(5, tail & 31, storage_ix, storage);
|
||||
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[code];
|
||||
++histo[64];
|
||||
} else if (copylen < 2120) {
|
||||
const size_t tail = copylen - 72;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 28;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
|
||||
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[code];
|
||||
++histo[64];
|
||||
} else {
|
||||
BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
|
||||
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[39];
|
||||
++histo[64];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitDistance(size_t distance,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t d = distance + 3;
|
||||
const uint32_t nbits = Log2FloorNonZero(d) - 1u;
|
||||
const size_t prefix = (d >> nbits) & 1;
|
||||
const size_t offset = (2 + prefix) << nbits;
|
||||
const size_t distcode = 2 * (nbits - 1) + prefix + 80;
|
||||
BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, d - offset, storage_ix, storage);
|
||||
++histo[distcode];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
|
||||
const uint8_t depth[256],
|
||||
const uint16_t bits[256],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
size_t j;
|
||||
for (j = 0; j < len; j++) {
|
||||
const uint8_t lit = input[j];
|
||||
BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
/* REQUIRES: len <= 1 << 24. */
|
||||
static void BrotliStoreMetaBlockHeader(
|
||||
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
size_t nibbles = 6;
|
||||
/* ISLAST */
|
||||
BrotliWriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
nibbles = 4;
|
||||
} else if (len <= (1U << 20)) {
|
||||
nibbles = 5;
|
||||
}
|
||||
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
|
||||
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
|
||||
/* ISUNCOMPRESSED */
|
||||
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
|
||||
uint8_t* array) {
|
||||
while (n_bits > 0) {
|
||||
size_t byte_pos = pos >> 3;
|
||||
size_t n_unchanged_bits = pos & 7;
|
||||
size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
|
||||
size_t total_bits = n_unchanged_bits + n_changed_bits;
|
||||
uint32_t mask =
|
||||
(~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
|
||||
uint32_t unchanged_bits = array[byte_pos] & mask;
|
||||
uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
|
||||
array[byte_pos] =
|
||||
(uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
|
||||
n_bits -= n_changed_bits;
|
||||
bits >>= n_changed_bits;
|
||||
pos += n_changed_bits;
|
||||
}
|
||||
}
|
||||
|
||||
static void RewindBitPosition(const size_t new_storage_ix,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t bitpos = new_storage_ix & 7;
|
||||
const size_t mask = (1u << bitpos) - 1;
|
||||
storage[new_storage_ix >> 3] &= (uint8_t)mask;
|
||||
*storage_ix = new_storage_ix;
|
||||
}
|
||||
|
||||
static BROTLI_BOOL ShouldMergeBlock(BrotliOnePassArena* s,
|
||||
const uint8_t* data, size_t len, const uint8_t* depths) {
|
||||
uint32_t* BROTLI_RESTRICT const histo = s->histogram;
|
||||
static const size_t kSampleRate = 43;
|
||||
size_t i;
|
||||
memset(histo, 0, sizeof(s->histogram));
|
||||
for (i = 0; i < len; i += kSampleRate) {
|
||||
++histo[data[i]];
|
||||
}
|
||||
{
|
||||
const size_t total = (len + kSampleRate - 1) / kSampleRate;
|
||||
double r = (FastLog2(total) + 0.5) * (double)total + 200;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
|
||||
}
|
||||
return TO_BROTLI_BOOL(r >= 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Acceptable loss for uncompressible speedup is 2% */
|
||||
#define MIN_RATIO 980
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
|
||||
const uint8_t* metablock_start, const uint8_t* next_emit,
|
||||
const size_t insertlen, const size_t literal_ratio) {
|
||||
const size_t compressed = (size_t)(next_emit - metablock_start);
|
||||
if (compressed * 50 > insertlen) {
|
||||
return BROTLI_FALSE;
|
||||
} else {
|
||||
return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
|
||||
const size_t storage_ix_start,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t len = (size_t)(end - begin);
|
||||
RewindBitPosition(storage_ix_start, storage_ix, storage);
|
||||
BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], begin, len);
|
||||
*storage_ix += len << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
}
|
||||
|
||||
static uint32_t kCmdHistoSeed[128] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
|
||||
BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
|
||||
BROTLI_BOOL is_last, int* table, size_t table_bits,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
uint8_t* BROTLI_RESTRICT const cmd_depth = s->cmd_depth;
|
||||
uint16_t* BROTLI_RESTRICT const cmd_bits = s->cmd_bits;
|
||||
uint32_t* BROTLI_RESTRICT const cmd_histo = s->cmd_histo;
|
||||
uint8_t* BROTLI_RESTRICT const lit_depth = s->lit_depth;
|
||||
uint16_t* BROTLI_RESTRICT const lit_bits = s->lit_bits;
|
||||
const uint8_t* ip_end;
|
||||
|
||||
/* "next_emit" is a pointer to the first byte that is not covered by a
|
||||
previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
the end of the input will be emitted as literal bytes. */
|
||||
const uint8_t* next_emit = input;
|
||||
/* Save the start of the first block for position and distance computations.
|
||||
*/
|
||||
const uint8_t* base_ip = input;
|
||||
|
||||
static const size_t kFirstBlockSize = 3 << 15;
|
||||
static const size_t kMergeBlockSize = 1 << 16;
|
||||
|
||||
const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
|
||||
const size_t kMinMatchLen = 5;
|
||||
|
||||
const uint8_t* metablock_start = input;
|
||||
size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
|
||||
size_t total_block_size = block_size;
|
||||
/* Save the bit position of the MLEN field of the meta-block header, so that
|
||||
we can update it later if we decide to extend this meta-block. */
|
||||
size_t mlen_storage_ix = *storage_ix + 3;
|
||||
|
||||
size_t literal_ratio;
|
||||
|
||||
const uint8_t* ip;
|
||||
int last_distance;
|
||||
|
||||
const size_t shift = 64u - table_bits;
|
||||
|
||||
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
/* No block splits, no contexts. */
|
||||
BrotliWriteBits(13, 0, storage_ix, storage);
|
||||
|
||||
literal_ratio = BuildAndStoreLiteralPrefixCode(
|
||||
s, input, block_size, s->lit_depth, s->lit_bits, storage_ix, storage);
|
||||
|
||||
{
|
||||
/* Store the pre-compressed command and distance prefix codes. */
|
||||
size_t i;
|
||||
for (i = 0; i + 7 < s->cmd_code_numbits; i += 8) {
|
||||
BrotliWriteBits(8, s->cmd_code[i >> 3], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
BrotliWriteBits(s->cmd_code_numbits & 7,
|
||||
s->cmd_code[s->cmd_code_numbits >> 3], storage_ix, storage);
|
||||
|
||||
emit_commands:
|
||||
/* Initialize the command and distance histograms. We will gather
|
||||
statistics of command and distance codes during the processing
|
||||
of this block and use it to update the command and distance
|
||||
prefix codes for the next block. */
|
||||
memcpy(s->cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
|
||||
|
||||
/* "ip" is the input pointer. */
|
||||
ip = input;
|
||||
last_distance = -1;
|
||||
ip_end = input + block_size;
|
||||
|
||||
if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
/* For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
sure that all distances are at most window size - 16.
|
||||
For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
we don't go over the block size with a copy. */
|
||||
const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
uint32_t next_hash;
|
||||
for (next_hash = Hash(++ip, shift); ; ) {
|
||||
/* Step 1: Scan forward in the input looking for a 5-byte-long match.
|
||||
If we get close to exhausting the input then goto emit_remainder.
|
||||
|
||||
Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
found, start looking only at every other byte. If 32 more bytes are
|
||||
scanned, look at every third byte, etc.. When a match is found,
|
||||
immediately go back to looking at every byte. This is a small loss
|
||||
(~5% performance, ~0.1% density) for compressible data due to more
|
||||
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
win since the compressor quickly "realizes" the data is incompressible
|
||||
and doesn't bother looking for matches everywhere.
|
||||
|
||||
The "skip" variable keeps track of how many bytes there are since the
|
||||
last match; dividing it by 32 (i.e. right-shifting by five) gives the
|
||||
number of bytes to move ahead for each iteration. */
|
||||
uint32_t skip = 32;
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
const uint8_t* candidate;
|
||||
BROTLI_DCHECK(next_emit < ip);
|
||||
trawl:
|
||||
do {
|
||||
uint32_t hash = next_hash;
|
||||
uint32_t bytes_between_hash_lookups = skip++ >> 5;
|
||||
BROTLI_DCHECK(hash == Hash(next_ip, shift));
|
||||
ip = next_ip;
|
||||
next_ip = ip + bytes_between_hash_lookups;
|
||||
if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
next_hash = Hash(next_ip, shift);
|
||||
candidate = ip - last_distance;
|
||||
if (IsMatch(ip, candidate)) {
|
||||
if (BROTLI_PREDICT_TRUE(candidate < ip)) {
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
break;
|
||||
}
|
||||
}
|
||||
candidate = base_ip + table[hash];
|
||||
BROTLI_DCHECK(candidate >= base_ip);
|
||||
BROTLI_DCHECK(candidate < ip);
|
||||
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
} while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
/* Check copy distance. If candidate is not feasible, continue search.
|
||||
Checking is done outside of hot loop to reduce overhead. */
|
||||
if (ip - candidate > MAX_DISTANCE) goto trawl;
|
||||
|
||||
/* Step 2: Emit the found match together with the literal bytes from
|
||||
"next_emit" to the bit stream, and then see if we can find a next match
|
||||
immediately afterwards. Repeat until we find no match for the input
|
||||
without emitting some literal bytes. */
|
||||
|
||||
{
|
||||
/* We have a 5-byte match at ip, and we need to emit bytes in
|
||||
[next_emit, ip). */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
|
||||
int distance = (int)(base - candidate); /* > 0 */
|
||||
size_t insert = (size_t)(base - next_emit);
|
||||
ip += matched;
|
||||
BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
|
||||
(int)(next_emit - base_ip), (unsigned long)insert, 2));
|
||||
BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
|
||||
if (BROTLI_PREDICT_TRUE(insert < 6210)) {
|
||||
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
|
||||
literal_ratio)) {
|
||||
EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
|
||||
storage_ix, storage);
|
||||
input_size -= (size_t)(base - input);
|
||||
input = base;
|
||||
next_emit = input;
|
||||
goto next_block;
|
||||
} else {
|
||||
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
}
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
if (distance == last_distance) {
|
||||
BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
|
||||
++cmd_histo[64];
|
||||
} else {
|
||||
EmitDistance((size_t)distance, cmd_depth, cmd_bits,
|
||||
cmd_histo, storage_ix, storage);
|
||||
last_distance = distance;
|
||||
}
|
||||
EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
|
||||
"[CompressFragment] pos = %d insert = %d copy = %d\n"
|
||||
"[CompressFragment] pos = %d distance = %d\n",
|
||||
(int)(base - base_ip), (int)distance,
|
||||
(int)(base - base_ip) + 2, 0, (int)matched - 2,
|
||||
(int)(base - base_ip) + 2, (int)distance));
|
||||
|
||||
next_emit = ip;
|
||||
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some positions
|
||||
within the last copy. */
|
||||
{
|
||||
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
while (IsMatch(ip, candidate)) {
|
||||
/* We have a 5-byte match at ip, and no need to emit any literal bytes
|
||||
prior to ip. */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
|
||||
if (ip - candidate > MAX_DISTANCE) break;
|
||||
ip += matched;
|
||||
last_distance = (int)(base - candidate); /* > 0 */
|
||||
BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
|
||||
EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
|
||||
cmd_histo, storage_ix, storage);
|
||||
BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
|
||||
"[CompressFragment] pos = %d distance = %d\n",
|
||||
(int)(base - base_ip), 0, (int)matched,
|
||||
(int)(base - base_ip), (int)last_distance));
|
||||
|
||||
next_emit = ip;
|
||||
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some positions
|
||||
within the last copy. */
|
||||
{
|
||||
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
next_hash = Hash(++ip, shift);
|
||||
}
|
||||
}
|
||||
|
||||
emit_remainder:
|
||||
BROTLI_DCHECK(next_emit <= ip_end);
|
||||
input += block_size;
|
||||
input_size -= block_size;
|
||||
block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
|
||||
|
||||
/* Decide if we want to continue this meta-block instead of emitting the
|
||||
last insert-only command. */
|
||||
if (input_size > 0 &&
|
||||
total_block_size + block_size <= (1 << 20) &&
|
||||
ShouldMergeBlock(s, input, block_size, lit_depth)) {
|
||||
BROTLI_DCHECK(total_block_size > (1 << 16));
|
||||
/* Update the size of the current meta-block and continue emitting commands.
|
||||
We can do this because the current size and the new size both have 5
|
||||
nibbles. */
|
||||
total_block_size += block_size;
|
||||
UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
/* Emit the remaining bytes as literals. */
|
||||
if (next_emit < ip_end) {
|
||||
const size_t insert = (size_t)(ip_end - next_emit);
|
||||
BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
|
||||
(int)(next_emit - base_ip), (unsigned long)insert, 2));
|
||||
if (BROTLI_PREDICT_TRUE(insert < 6210)) {
|
||||
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
|
||||
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
|
||||
literal_ratio)) {
|
||||
EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
}
|
||||
}
|
||||
next_emit = ip_end;
|
||||
|
||||
next_block:
|
||||
/* If we have more data, write a new meta-block header and prefix codes and
|
||||
then continue emitting commands. */
|
||||
if (input_size > 0) {
|
||||
metablock_start = input;
|
||||
block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
|
||||
total_block_size = block_size;
|
||||
/* Save the bit position of the MLEN field of the meta-block header, so that
|
||||
we can update it later if we decide to extend this meta-block. */
|
||||
mlen_storage_ix = *storage_ix + 3;
|
||||
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
/* No block splits, no contexts. */
|
||||
BrotliWriteBits(13, 0, storage_ix, storage);
|
||||
literal_ratio = BuildAndStoreLiteralPrefixCode(
|
||||
s, input, block_size, lit_depth, lit_bits, storage_ix, storage);
|
||||
BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
if (!is_last) {
|
||||
/* If this is not the last block, update the command and distance prefix
|
||||
codes for the next block and store the compressed forms. */
|
||||
s->cmd_code[0] = 0;
|
||||
s->cmd_code_numbits = 0;
|
||||
BuildAndStoreCommandPrefixCode(s, &s->cmd_code_numbits, s->cmd_code);
|
||||
}
|
||||
}
|
||||
|
||||
#define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
|
||||
|
||||
#define BAKE_METHOD_PARAM_(B) \
|
||||
static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \
|
||||
BrotliOnePassArena* s, const uint8_t* input, size_t input_size, \
|
||||
BROTLI_BOOL is_last, int* table, size_t* storage_ix, uint8_t* storage) { \
|
||||
BrotliCompressFragmentFastImpl(s, input, input_size, is_last, table, B, \
|
||||
storage_ix, storage); \
|
||||
}
|
||||
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
|
||||
#undef BAKE_METHOD_PARAM_
|
||||
|
||||
void duckdb_brotli::BrotliCompressFragmentFast(
|
||||
BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
|
||||
BROTLI_BOOL is_last, int* table, size_t table_size,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t initial_storage_ix = *storage_ix;
|
||||
const size_t table_bits = Log2FloorNonZero(table_size);
|
||||
|
||||
if (input_size == 0) {
|
||||
BROTLI_DCHECK(is_last);
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (table_bits) {
|
||||
#define CASE_(B) \
|
||||
case B: \
|
||||
BrotliCompressFragmentFastImpl ## B( \
|
||||
s, input, input_size, is_last, table, storage_ix, storage);\
|
||||
break;
|
||||
FOR_TABLE_BITS_(CASE_)
|
||||
#undef CASE_
|
||||
default: BROTLI_DCHECK(0); break;
|
||||
}
|
||||
|
||||
/* If output is larger than single uncompressed block, rewrite it. */
|
||||
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
|
||||
EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
|
||||
storage_ix, storage);
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
||||
#undef FOR_TABLE_BITS_
|
||||
|
||||
|
||||
82
external/duckdb/third_party/brotli/enc/compress_fragment.h
vendored
Normal file
82
external/duckdb/third_party/brotli/enc/compress_fragment.h
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function for fast encoding of an input fragment, independently from the input
|
||||
history. This function uses one-pass processing: when we find a backward
|
||||
match, we immediately emit the corresponding command and literal codes to
|
||||
the bit stream. */
|
||||
|
||||
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "entropy_encode.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
typedef struct BrotliOnePassArena {
|
||||
uint8_t lit_depth[256];
|
||||
uint16_t lit_bits[256];
|
||||
|
||||
/* Command and distance prefix codes (each 64 symbols, stored back-to-back)
|
||||
used for the next block. The command prefix code is over a smaller alphabet
|
||||
with the following 64 symbols:
|
||||
0 - 15: insert length code 0, copy length code 0 - 15, same distance
|
||||
16 - 39: insert length code 0, copy length code 0 - 23
|
||||
40 - 63: insert length code 0 - 23, copy length code 0
|
||||
Note that symbols 16 and 40 represent the same code in the full alphabet,
|
||||
but we do not use either of them. */
|
||||
uint8_t cmd_depth[128];
|
||||
uint16_t cmd_bits[128];
|
||||
uint32_t cmd_histo[128];
|
||||
|
||||
/* The compressed form of the command and distance prefix codes for the next
|
||||
block. */
|
||||
uint8_t cmd_code[512];
|
||||
size_t cmd_code_numbits;
|
||||
|
||||
HuffmanTree tree[2 * BROTLI_NUM_LITERAL_SYMBOLS + 1];
|
||||
uint32_t histogram[256];
|
||||
uint8_t tmp_depth[BROTLI_NUM_COMMAND_SYMBOLS];
|
||||
uint16_t tmp_bits[64];
|
||||
} BrotliOnePassArena;
|
||||
|
||||
/* Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
meta-blocks, and updates the "*storage_ix" bit position.
|
||||
|
||||
If "is_last" is 1, emits an additional empty last meta-block.
|
||||
|
||||
"cmd_depth" and "cmd_bits" contain the command and distance prefix codes
|
||||
(see comment in encode.h) used for the encoding of this input fragment.
|
||||
If "is_last" is 0, they are updated to reflect the statistics
|
||||
of this input fragment, to be used for the encoding of the next fragment.
|
||||
|
||||
"*cmd_code_numbits" is the number of bits of the compressed representation
|
||||
of the command and distance prefix codes, and "cmd_code" is an array of
|
||||
at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
|
||||
command and distance prefix codes. If "is_last" is 0, these are also
|
||||
updated to represent the updated "cmd_depth" and "cmd_bits".
|
||||
|
||||
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
|
||||
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
|
||||
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
|
||||
OUTPUT: maximal copy distance <= |input_size|
|
||||
OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
|
||||
BROTLI_INTERNAL void BrotliCompressFragmentFast(BrotliOnePassArena* s,
|
||||
const uint8_t* input,
|
||||
size_t input_size,
|
||||
BROTLI_BOOL is_last,
|
||||
int* table, size_t table_size,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */
|
||||
653
external/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp
vendored
Normal file
653
external/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp
vendored
Normal file
@@ -0,0 +1,653 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function for fast encoding of an input fragment, independently from the input
|
||||
history. This function uses two-pass processing: in the first pass we save
|
||||
the found backward matches and literal bytes into a buffer, and in the
|
||||
second pass we emit them into the bit stream using prefix codes built based
|
||||
on the actual command and literal byte histograms. */
|
||||
|
||||
#include "compress_fragment_two_pass.h"
|
||||
|
||||
#include <string.h> /* memcmp, memcpy, memset */
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "bit_cost.h"
|
||||
#include "brotli_bit_stream.h"
|
||||
#include "entropy_encode.h"
|
||||
#include "fast_log.h"
|
||||
#include "find_match_length.h"
|
||||
#include "write_bits.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
|
||||
|
||||
/* kHashMul32 multiplier has these properties:
|
||||
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
* No long streaks of ones or zeros.
|
||||
* There is no effort to ensure that it is a prime, the oddity is enough
|
||||
for this use.
|
||||
* The number has been tuned heuristically against compression benchmarks. */
|
||||
static const uint32_t kHashMul32 = 0x1E35A7BD;
|
||||
|
||||
static BROTLI_INLINE uint32_t Hash(const uint8_t* p,
|
||||
size_t shift, size_t length) {
|
||||
const uint64_t h =
|
||||
(BROTLI_UNALIGNED_LOAD64LE(p) << ((8 - length) * 8)) * kHashMul32;
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t HashBytesAtOffset(uint64_t v, size_t offset,
|
||||
size_t shift, size_t length) {
|
||||
BROTLI_DCHECK(offset <= 8 - length);
|
||||
{
|
||||
const uint64_t h = ((v >> (8 * offset)) << ((8 - length) * 8)) * kHashMul32;
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2,
|
||||
size_t length) {
|
||||
if (BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2)) {
|
||||
if (length == 4) return BROTLI_TRUE;
|
||||
return TO_BROTLI_BOOL(p1[4] == p2[4] && p1[5] == p2[5]);
|
||||
}
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
|
||||
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
"bits" based on "histogram" and stores it into the bit stream. */
|
||||
static void BuildAndStoreCommandPrefixCode(BrotliTwoPassArena* s,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
|
||||
/* TODO(eustas): initialize once. */
|
||||
memset(s->tmp_depth, 0, sizeof(s->tmp_depth));
|
||||
BrotliCreateHuffmanTree(s->cmd_histo, 64, 15, s->tmp_tree, s->cmd_depth);
|
||||
BrotliCreateHuffmanTree(&s->cmd_histo[64], 64, 14, s->tmp_tree,
|
||||
&s->cmd_depth[64]);
|
||||
/* We have to jump through a few hoops here in order to compute
|
||||
the command bits because the symbols are in a different order than in
|
||||
the full alphabet. This looks complicated, but having the symbols
|
||||
in this order in the command bits saves a few branches in the Emit*
|
||||
functions. */
|
||||
memcpy(s->tmp_depth, s->cmd_depth + 24, 24);
|
||||
memcpy(s->tmp_depth + 24, s->cmd_depth, 8);
|
||||
memcpy(s->tmp_depth + 32, s->cmd_depth + 48, 8);
|
||||
memcpy(s->tmp_depth + 40, s->cmd_depth + 8, 8);
|
||||
memcpy(s->tmp_depth + 48, s->cmd_depth + 56, 8);
|
||||
memcpy(s->tmp_depth + 56, s->cmd_depth + 16, 8);
|
||||
BrotliConvertBitDepthsToSymbols(s->tmp_depth, 64, s->tmp_bits);
|
||||
memcpy(s->cmd_bits, s->tmp_bits + 24, 16);
|
||||
memcpy(s->cmd_bits + 8, s->tmp_bits + 40, 16);
|
||||
memcpy(s->cmd_bits + 16, s->tmp_bits + 56, 16);
|
||||
memcpy(s->cmd_bits + 24, s->tmp_bits, 48);
|
||||
memcpy(s->cmd_bits + 48, s->tmp_bits + 32, 16);
|
||||
memcpy(s->cmd_bits + 56, s->tmp_bits + 48, 16);
|
||||
BrotliConvertBitDepthsToSymbols(&s->cmd_depth[64], 64, &s->cmd_bits[64]);
|
||||
{
|
||||
/* Create the bit length array for the full command alphabet. */
|
||||
size_t i;
|
||||
memset(s->tmp_depth, 0, 64); /* only 64 first values were used */
|
||||
memcpy(s->tmp_depth, s->cmd_depth + 24, 8);
|
||||
memcpy(s->tmp_depth + 64, s->cmd_depth + 32, 8);
|
||||
memcpy(s->tmp_depth + 128, s->cmd_depth + 40, 8);
|
||||
memcpy(s->tmp_depth + 192, s->cmd_depth + 48, 8);
|
||||
memcpy(s->tmp_depth + 384, s->cmd_depth + 56, 8);
|
||||
for (i = 0; i < 8; ++i) {
|
||||
s->tmp_depth[128 + 8 * i] = s->cmd_depth[i];
|
||||
s->tmp_depth[256 + 8 * i] = s->cmd_depth[8 + i];
|
||||
s->tmp_depth[448 + 8 * i] = s->cmd_depth[16 + i];
|
||||
}
|
||||
BrotliStoreHuffmanTree(s->tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS,
|
||||
s->tmp_tree, storage_ix, storage);
|
||||
}
|
||||
BrotliStoreHuffmanTree(&s->cmd_depth[64], 64, s->tmp_tree, storage_ix,
|
||||
storage);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitInsertLen(
|
||||
uint32_t insertlen, uint32_t** commands) {
|
||||
if (insertlen < 6) {
|
||||
**commands = insertlen;
|
||||
} else if (insertlen < 130) {
|
||||
const uint32_t tail = insertlen - 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
|
||||
const uint32_t prefix = tail >> nbits;
|
||||
const uint32_t inscode = (nbits << 1) + prefix + 2;
|
||||
const uint32_t extra = tail - (prefix << nbits);
|
||||
**commands = inscode | (extra << 8);
|
||||
} else if (insertlen < 2114) {
|
||||
const uint32_t tail = insertlen - 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const uint32_t code = nbits + 10;
|
||||
const uint32_t extra = tail - (1u << nbits);
|
||||
**commands = code | (extra << 8);
|
||||
} else if (insertlen < 6210) {
|
||||
const uint32_t extra = insertlen - 2114;
|
||||
**commands = 21 | (extra << 8);
|
||||
} else if (insertlen < 22594) {
|
||||
const uint32_t extra = insertlen - 6210;
|
||||
**commands = 22 | (extra << 8);
|
||||
} else {
|
||||
const uint32_t extra = insertlen - 22594;
|
||||
**commands = 23 | (extra << 8);
|
||||
}
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
|
||||
if (copylen < 10) {
|
||||
**commands = (uint32_t)(copylen + 38);
|
||||
} else if (copylen < 134) {
|
||||
const size_t tail = copylen - 6;
|
||||
const size_t nbits = Log2FloorNonZero(tail) - 1;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 44;
|
||||
const size_t extra = tail - (prefix << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
} else if (copylen < 2118) {
|
||||
const size_t tail = copylen - 70;
|
||||
const size_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 52;
|
||||
const size_t extra = tail - ((size_t)1 << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
} else {
|
||||
const size_t extra = copylen - 2118;
|
||||
**commands = (uint32_t)(63 | (extra << 8));
|
||||
}
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitCopyLenLastDistance(
|
||||
size_t copylen, uint32_t** commands) {
|
||||
if (copylen < 12) {
|
||||
**commands = (uint32_t)(copylen + 20);
|
||||
++(*commands);
|
||||
} else if (copylen < 72) {
|
||||
const size_t tail = copylen - 8;
|
||||
const size_t nbits = Log2FloorNonZero(tail) - 1;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 28;
|
||||
const size_t extra = tail - (prefix << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
++(*commands);
|
||||
} else if (copylen < 136) {
|
||||
const size_t tail = copylen - 8;
|
||||
const size_t code = (tail >> 5) + 54;
|
||||
const size_t extra = tail & 31;
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else if (copylen < 2120) {
|
||||
const size_t tail = copylen - 72;
|
||||
const size_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 52;
|
||||
const size_t extra = tail - ((size_t)1 << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else {
|
||||
const size_t extra = copylen - 2120;
|
||||
**commands = (uint32_t)(63 | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
|
||||
uint32_t d = distance + 3;
|
||||
uint32_t nbits = Log2FloorNonZero(d) - 1;
|
||||
const uint32_t prefix = (d >> nbits) & 1;
|
||||
const uint32_t offset = (2 + prefix) << nbits;
|
||||
const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
|
||||
uint32_t extra = d - offset;
|
||||
**commands = distcode | (extra << 8);
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
/* REQUIRES: len <= 1 << 24. */
|
||||
static void BrotliStoreMetaBlockHeader(
|
||||
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
size_t nibbles = 6;
|
||||
/* ISLAST */
|
||||
BrotliWriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
nibbles = 4;
|
||||
} else if (len <= (1U << 20)) {
|
||||
nibbles = 5;
|
||||
}
|
||||
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
|
||||
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
|
||||
/* ISUNCOMPRESSED */
|
||||
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void CreateCommands(const uint8_t* input,
|
||||
size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
|
||||
size_t table_bits, size_t min_match,
|
||||
uint8_t** literals, uint32_t** commands) {
|
||||
/* "ip" is the input pointer. */
|
||||
const uint8_t* ip = input;
|
||||
const size_t shift = 64u - table_bits;
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
/* "next_emit" is a pointer to the first byte that is not covered by a
|
||||
previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
the end of the input will be emitted as literal bytes. */
|
||||
const uint8_t* next_emit = input;
|
||||
|
||||
int last_distance = -1;
|
||||
const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
|
||||
|
||||
if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
/* For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
sure that all distances are at most window size - 16.
|
||||
For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
we don't go over the block size with a copy. */
|
||||
const size_t len_limit = BROTLI_MIN(size_t, block_size - min_match,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
uint32_t next_hash;
|
||||
for (next_hash = Hash(++ip, shift, min_match); ; ) {
|
||||
/* Step 1: Scan forward in the input looking for a 6-byte-long match.
|
||||
If we get close to exhausting the input then goto emit_remainder.
|
||||
|
||||
Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
found, start looking only at every other byte. If 32 more bytes are
|
||||
scanned, look at every third byte, etc.. When a match is found,
|
||||
immediately go back to looking at every byte. This is a small loss
|
||||
(~5% performance, ~0.1% density) for compressible data due to more
|
||||
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
win since the compressor quickly "realizes" the data is incompressible
|
||||
and doesn't bother looking for matches everywhere.
|
||||
|
||||
The "skip" variable keeps track of how many bytes there are since the
|
||||
last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
number of bytes to move ahead for each iteration. */
|
||||
uint32_t skip = 32;
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
const uint8_t* candidate;
|
||||
|
||||
BROTLI_DCHECK(next_emit < ip);
|
||||
trawl:
|
||||
do {
|
||||
uint32_t hash = next_hash;
|
||||
uint32_t bytes_between_hash_lookups = skip++ >> 5;
|
||||
ip = next_ip;
|
||||
BROTLI_DCHECK(hash == Hash(ip, shift, min_match));
|
||||
next_ip = ip + bytes_between_hash_lookups;
|
||||
if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
next_hash = Hash(next_ip, shift, min_match);
|
||||
candidate = ip - last_distance;
|
||||
if (IsMatch(ip, candidate, min_match)) {
|
||||
if (BROTLI_PREDICT_TRUE(candidate < ip)) {
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
break;
|
||||
}
|
||||
}
|
||||
candidate = base_ip + table[hash];
|
||||
BROTLI_DCHECK(candidate >= base_ip);
|
||||
BROTLI_DCHECK(candidate < ip);
|
||||
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
} while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate, min_match)));
|
||||
|
||||
/* Check copy distance. If candidate is not feasible, continue search.
|
||||
Checking is done outside of hot loop to reduce overhead. */
|
||||
if (ip - candidate > MAX_DISTANCE) goto trawl;
|
||||
|
||||
/* Step 2: Emit the found match together with the literal bytes from
|
||||
"next_emit", and then see if we can find a next match immediately
|
||||
afterwards. Repeat until we find no match for the input
|
||||
without emitting some literal bytes. */
|
||||
|
||||
{
|
||||
/* We have a 6-byte match at ip, and we need to emit bytes in
|
||||
[next_emit, ip). */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = min_match + FindMatchLengthWithLimit(
|
||||
candidate + min_match, ip + min_match,
|
||||
(size_t)(ip_end - ip) - min_match);
|
||||
int distance = (int)(base - candidate); /* > 0 */
|
||||
int insert = (int)(base - next_emit);
|
||||
ip += matched;
|
||||
BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
|
||||
EmitInsertLen((uint32_t)insert, commands);
|
||||
BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n",
|
||||
(int)(next_emit - base_ip), insert, 2));
|
||||
memcpy(*literals, next_emit, (size_t)insert);
|
||||
*literals += insert;
|
||||
if (distance == last_distance) {
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else {
|
||||
EmitDistance((uint32_t)distance, commands);
|
||||
last_distance = distance;
|
||||
}
|
||||
EmitCopyLenLastDistance(matched, commands);
|
||||
BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
|
||||
"[CompressFragment] pos = %d insert = %d copy = %d\n"
|
||||
"[CompressFragment] pos = %d distance = %d\n",
|
||||
(int)(base - base_ip), (int)distance,
|
||||
(int)(base - base_ip) + 2, 0, (int)matched - 2,
|
||||
(int)(base - base_ip) + 2, (int)distance));
|
||||
|
||||
next_emit = ip;
|
||||
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
{
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some
|
||||
positions within the last copy. */
|
||||
uint64_t input_bytes;
|
||||
uint32_t cur_hash;
|
||||
uint32_t prev_hash;
|
||||
if (min_match == 4) {
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
|
||||
cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
} else {
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
|
||||
cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
}
|
||||
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
while (ip - candidate <= MAX_DISTANCE &&
|
||||
IsMatch(ip, candidate, min_match)) {
|
||||
/* We have a 6-byte match at ip, and no need to emit any
|
||||
literal bytes prior to ip. */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = min_match + FindMatchLengthWithLimit(
|
||||
candidate + min_match, ip + min_match,
|
||||
(size_t)(ip_end - ip) - min_match);
|
||||
ip += matched;
|
||||
last_distance = (int)(base - candidate); /* > 0 */
|
||||
BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
|
||||
EmitCopyLen(matched, commands);
|
||||
EmitDistance((uint32_t)last_distance, commands);
|
||||
BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
|
||||
"[CompressFragment] pos = %d distance = %d\n",
|
||||
(int)(base - base_ip), 0, (int)matched,
|
||||
(int)(base - base_ip), (int)last_distance));
|
||||
|
||||
next_emit = ip;
|
||||
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
{
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some
|
||||
positions within the last copy. */
|
||||
uint64_t input_bytes;
|
||||
uint32_t cur_hash;
|
||||
uint32_t prev_hash;
|
||||
if (min_match == 4) {
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
|
||||
cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
} else {
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
|
||||
cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
}
|
||||
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
next_hash = Hash(++ip, shift, min_match);
|
||||
}
|
||||
}
|
||||
|
||||
emit_remainder:
|
||||
BROTLI_DCHECK(next_emit <= ip_end);
|
||||
/* Emit the remaining bytes as literals. */
|
||||
if (next_emit < ip_end) {
|
||||
const uint32_t insert = (uint32_t)(ip_end - next_emit);
|
||||
EmitInsertLen(insert, commands);
|
||||
BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n",
|
||||
(int)(next_emit - base_ip), insert, 2));
|
||||
memcpy(*literals, next_emit, insert);
|
||||
*literals += insert;
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreCommands(BrotliTwoPassArena* s,
|
||||
const uint8_t* literals, const size_t num_literals,
|
||||
const uint32_t* commands, const size_t num_commands,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
static const uint32_t kNumExtraBits[128] = {
|
||||
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
|
||||
6, 7, 8, 9, 10, 12, 14, 24, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 2, 2, 3, 3, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
||||
17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
|
||||
};
|
||||
static const uint32_t kInsertOffset[24] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26,
|
||||
34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594,
|
||||
};
|
||||
|
||||
size_t i;
|
||||
memset(s->lit_histo, 0, sizeof(s->lit_histo));
|
||||
/* TODO(eustas): is that necessary? */
|
||||
memset(s->cmd_depth, 0, sizeof(s->cmd_depth));
|
||||
/* TODO(eustas): is that necessary? */
|
||||
memset(s->cmd_bits, 0, sizeof(s->cmd_bits));
|
||||
memset(s->cmd_histo, 0, sizeof(s->cmd_histo));
|
||||
for (i = 0; i < num_literals; ++i) {
|
||||
++s->lit_histo[literals[i]];
|
||||
}
|
||||
BrotliBuildAndStoreHuffmanTreeFast(s->tmp_tree, s->lit_histo, num_literals,
|
||||
/* max_bits = */ 8, s->lit_depth,
|
||||
s->lit_bits, storage_ix, storage);
|
||||
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
const uint32_t code = commands[i] & 0xFF;
|
||||
BROTLI_DCHECK(code < 128);
|
||||
++s->cmd_histo[code];
|
||||
}
|
||||
s->cmd_histo[1] += 1;
|
||||
s->cmd_histo[2] += 1;
|
||||
s->cmd_histo[64] += 1;
|
||||
s->cmd_histo[84] += 1;
|
||||
BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
|
||||
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
const uint32_t cmd = commands[i];
|
||||
const uint32_t code = cmd & 0xFF;
|
||||
const uint32_t extra = cmd >> 8;
|
||||
BROTLI_DCHECK(code < 128);
|
||||
BrotliWriteBits(s->cmd_depth[code], s->cmd_bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
|
||||
if (code < 24) {
|
||||
const uint32_t insert = kInsertOffset[code] + extra;
|
||||
uint32_t j;
|
||||
for (j = 0; j < insert; ++j) {
|
||||
const uint8_t lit = *literals;
|
||||
BrotliWriteBits(s->lit_depth[lit], s->lit_bits[lit], storage_ix,
|
||||
storage);
|
||||
++literals;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Acceptable loss for uncompressible speedup is 2% */
|
||||
#define MIN_RATIO 0.98
|
||||
#define SAMPLE_RATE 43
|
||||
|
||||
static BROTLI_BOOL ShouldCompress(BrotliTwoPassArena* s,
|
||||
const uint8_t* input, size_t input_size, size_t num_literals) {
|
||||
double corpus_size = (double)input_size;
|
||||
if ((double)num_literals < MIN_RATIO * corpus_size) {
|
||||
return BROTLI_TRUE;
|
||||
} else {
|
||||
const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
|
||||
size_t i;
|
||||
memset(s->lit_histo, 0, sizeof(s->lit_histo));
|
||||
for (i = 0; i < input_size; i += SAMPLE_RATE) {
|
||||
++s->lit_histo[input[i]];
|
||||
}
|
||||
return TO_BROTLI_BOOL(BitsEntropy(s->lit_histo, 256) < max_total_bit_cost);
|
||||
}
|
||||
}
|
||||
|
||||
static void RewindBitPosition(const size_t new_storage_ix,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t bitpos = new_storage_ix & 7;
|
||||
const size_t mask = (1u << bitpos) - 1;
|
||||
storage[new_storage_ix >> 3] &= (uint8_t)mask;
|
||||
*storage_ix = new_storage_ix;
|
||||
}
|
||||
|
||||
static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], input, input_size);
|
||||
*storage_ix += input_size << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
|
||||
BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
|
||||
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_bits, size_t min_match,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
/* Save the start of the first block for position and distance computations.
|
||||
*/
|
||||
const uint8_t* base_ip = input;
|
||||
BROTLI_UNUSED(is_last);
|
||||
|
||||
while (input_size > 0) {
|
||||
size_t block_size =
|
||||
BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
|
||||
uint32_t* commands = command_buf;
|
||||
uint8_t* literals = literal_buf;
|
||||
size_t num_literals;
|
||||
CreateCommands(input, block_size, input_size, base_ip, table,
|
||||
table_bits, min_match, &literals, &commands);
|
||||
num_literals = (size_t)(literals - literal_buf);
|
||||
if (ShouldCompress(s, input, block_size, num_literals)) {
|
||||
const size_t num_commands = (size_t)(commands - command_buf);
|
||||
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
/* No block splits, no contexts. */
|
||||
BrotliWriteBits(13, 0, storage_ix, storage);
|
||||
StoreCommands(s, literal_buf, num_literals, command_buf, num_commands,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
/* Since we did not find many backward references and the entropy of
|
||||
the data is close to 8 bits, we can simply emit an uncompressed block.
|
||||
This makes compression speed of uncompressible data about 3x faster. */
|
||||
EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
|
||||
}
|
||||
input += block_size;
|
||||
input_size -= block_size;
|
||||
}
|
||||
}
|
||||
|
||||
#define FOR_TABLE_BITS_(X) \
|
||||
X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
|
||||
|
||||
#define BAKE_METHOD_PARAM_(B) \
|
||||
static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B( \
|
||||
BrotliTwoPassArena* s, const uint8_t* input, size_t input_size, \
|
||||
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf, \
|
||||
int* table, size_t* storage_ix, uint8_t* storage) { \
|
||||
size_t min_match = (B <= 15) ? 4 : 6; \
|
||||
BrotliCompressFragmentTwoPassImpl(s, input, input_size, is_last, command_buf,\
|
||||
literal_buf, table, B, min_match, storage_ix, storage); \
|
||||
}
|
||||
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
|
||||
#undef BAKE_METHOD_PARAM_
|
||||
|
||||
void duckdb_brotli::BrotliCompressFragmentTwoPass(
|
||||
BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
|
||||
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t initial_storage_ix = *storage_ix;
|
||||
const size_t table_bits = Log2FloorNonZero(table_size);
|
||||
switch (table_bits) {
|
||||
#define CASE_(B) \
|
||||
case B: \
|
||||
BrotliCompressFragmentTwoPassImpl ## B( \
|
||||
s, input, input_size, is_last, command_buf, \
|
||||
literal_buf, table, storage_ix, storage); \
|
||||
break;
|
||||
FOR_TABLE_BITS_(CASE_)
|
||||
#undef CASE_
|
||||
default: BROTLI_DCHECK(0); break;
|
||||
}
|
||||
|
||||
/* If output is larger than single uncompressed block, rewrite it. */
|
||||
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
|
||||
RewindBitPosition(initial_storage_ix, storage_ix, storage);
|
||||
EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
||||
#undef FOR_TABLE_BITS_
|
||||
|
||||
|
||||
68
external/duckdb/third_party/brotli/enc/compress_fragment_two_pass.h
vendored
Normal file
68
external/duckdb/third_party/brotli/enc/compress_fragment_two_pass.h
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function for fast encoding of an input fragment, independently from the input
|
||||
history. This function uses two-pass processing: in the first pass we save
|
||||
the found backward matches and literal bytes into a buffer, and in the
|
||||
second pass we emit them into the bit stream using prefix codes built based
|
||||
on the actual command and literal byte histograms. */
|
||||
|
||||
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "entropy_encode.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* TODO(eustas): turn to macro. */
|
||||
static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
|
||||
|
||||
typedef struct BrotliTwoPassArena {
|
||||
uint32_t lit_histo[256];
|
||||
uint8_t lit_depth[256];
|
||||
uint16_t lit_bits[256];
|
||||
|
||||
uint32_t cmd_histo[128];
|
||||
uint8_t cmd_depth[128];
|
||||
uint16_t cmd_bits[128];
|
||||
|
||||
/* BuildAndStoreCommandPrefixCode */
|
||||
HuffmanTree tmp_tree[2 * BROTLI_NUM_LITERAL_SYMBOLS + 1];
|
||||
uint8_t tmp_depth[BROTLI_NUM_COMMAND_SYMBOLS];
|
||||
uint16_t tmp_bits[64];
|
||||
} BrotliTwoPassArena;
|
||||
|
||||
/* Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
meta-blocks, and updates the "*storage_ix" bit position.
|
||||
|
||||
If "is_last" is 1, emits an additional empty last meta-block.
|
||||
|
||||
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
|
||||
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
|
||||
REQUIRES: "command_buf" and "literal_buf" point to at least
|
||||
kCompressFragmentTwoPassBlockSize long arrays.
|
||||
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
REQUIRES: "table_size" is a power of two
|
||||
OUTPUT: maximal copy distance <= |input_size|
|
||||
OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
|
||||
BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(BrotliTwoPassArena* s,
|
||||
const uint8_t* input,
|
||||
size_t input_size,
|
||||
BROTLI_BOOL is_last,
|
||||
uint32_t* command_buf,
|
||||
uint8_t* literal_buf,
|
||||
int* table,
|
||||
size_t table_size,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */
|
||||
1844
external/duckdb/third_party/brotli/enc/dictionary_hash.cpp
vendored
Normal file
1844
external/duckdb/third_party/brotli/enc/dictionary_hash.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
21
external/duckdb/third_party/brotli/enc/dictionary_hash.h
vendored
Normal file
21
external/duckdb/third_party/brotli/enc/dictionary_hash.h
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Hash table on the 4-byte prefixes of static dictionary words. */
|
||||
|
||||
#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
|
||||
#define BROTLI_ENC_DICTIONARY_HASH_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
extern const uint16_t kStaticDictionaryHashWords[32768];
|
||||
extern const uint8_t kStaticDictionaryHashLengths[32768];
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_DICTIONARY_HASH_H_ */
|
||||
1990
external/duckdb/third_party/brotli/enc/encode.cpp
vendored
Normal file
1990
external/duckdb/third_party/brotli/enc/encode.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
636
external/duckdb/third_party/brotli/enc/encoder_dict.cpp
vendored
Normal file
636
external/duckdb/third_party/brotli/enc/encoder_dict.cpp
vendored
Normal file
@@ -0,0 +1,636 @@
|
||||
/* Copyright 2017 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
#include "encoder_dict.h"
|
||||
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
|
||||
#include "../common/dictionary.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "../common/shared_dictionary_internal.h"
|
||||
#include "../common/transform.h"
|
||||
#include "compound_dictionary.h"
|
||||
#include "dictionary_hash.h"
|
||||
#include "memory.h"
|
||||
#include "quality.h"
|
||||
#include "brotli_hash.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
#define NUM_HASH_BITS 15u
|
||||
#define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS)
|
||||
|
||||
static void BrotliTrieInit(BrotliTrie* trie) {
|
||||
trie->pool_capacity = 0;
|
||||
trie->pool_size = 0;
|
||||
trie->pool = 0;
|
||||
|
||||
/* Set up the root node */
|
||||
trie->root.single = 0;
|
||||
trie->root.len_ = 0;
|
||||
trie->root.idx_ = 0;
|
||||
trie->root.sub = 0;
|
||||
}
|
||||
|
||||
static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) {
|
||||
BrotliFree(m, trie->pool);
|
||||
}
|
||||
|
||||
/* Initializes to RFC 7932 static dictionary / transforms. */
|
||||
static void InitEncoderDictionary(BrotliEncoderDictionary* dict) {
|
||||
dict->words = BrotliGetDictionary();
|
||||
dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms;
|
||||
|
||||
dict->hash_table_words = kStaticDictionaryHashWords;
|
||||
dict->hash_table_lengths = kStaticDictionaryHashLengths;
|
||||
dict->buckets = kStaticDictionaryBuckets;
|
||||
dict->dict_words = kStaticDictionaryWords;
|
||||
|
||||
dict->cutoffTransformsCount = kCutoffTransformsCount;
|
||||
dict->cutoffTransforms = kCutoffTransforms;
|
||||
|
||||
dict->parent = 0;
|
||||
|
||||
dict->hash_table_data_words_ = 0;
|
||||
dict->hash_table_data_lengths_ = 0;
|
||||
dict->buckets_alloc_size_ = 0;
|
||||
dict->buckets_data_ = 0;
|
||||
dict->dict_words_alloc_size_ = 0;
|
||||
dict->dict_words_data_ = 0;
|
||||
dict->words_instance_ = 0;
|
||||
dict->has_words_heavy = BROTLI_FALSE;
|
||||
BrotliTrieInit(&dict->trie);
|
||||
}
|
||||
|
||||
static void BrotliDestroyEncoderDictionary(MemoryManager* m,
|
||||
BrotliEncoderDictionary* dict) {
|
||||
BrotliFree(m, dict->hash_table_data_words_);
|
||||
BrotliFree(m, dict->hash_table_data_lengths_);
|
||||
BrotliFree(m, dict->buckets_data_);
|
||||
BrotliFree(m, dict->dict_words_data_);
|
||||
BrotliFree(m, dict->words_instance_);
|
||||
BrotliTrieFree(m, &dict->trie);
|
||||
}
|
||||
|
||||
#if defined(BROTLI_EXPERIMENTAL)
|
||||
/* Word length must be at least 4 bytes */
|
||||
static uint32_t Hash(const uint8_t* data, int bits) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return h >> (32 - bits);
|
||||
}
|
||||
|
||||
/* Theoretical max possible word size after transform */
|
||||
#define kTransformedBufferSize \
|
||||
(256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH)
|
||||
|
||||
/* To be safe buffer must have at least kTransformedBufferSize */
|
||||
static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform,
|
||||
const BrotliTransforms* transforms,
|
||||
const BrotliEncoderDictionary* dict,
|
||||
uint8_t* buffer, size_t* size) {
|
||||
const uint8_t* dict_word = &dict->words->data[
|
||||
dict->words->offsets_by_length[len] + (uint32_t)len * word_idx];
|
||||
*size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len,
|
||||
transforms, transform);
|
||||
}
|
||||
|
||||
static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) {
|
||||
DictWord result;
|
||||
result.len = len;
|
||||
result.transform = transform;
|
||||
result.idx = idx;
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie,
|
||||
BrotliTrieNode** keep) {
|
||||
uint32_t result;
|
||||
uint32_t keep_index = 0;
|
||||
if (keep && *keep != &trie->root) {
|
||||
/* Optional node to keep, since address may change after re-allocating */
|
||||
keep_index = (uint32_t)(*keep - trie->pool);
|
||||
}
|
||||
if (trie->pool_size == 0) {
|
||||
/* Have a dummy node in the front. We do not want the result to be 0, it
|
||||
must be at least 1, 0 represents "null pointer" */
|
||||
trie->pool_size = 1;
|
||||
}
|
||||
BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity,
|
||||
trie->pool_size + num);
|
||||
if (BROTLI_IS_OOM(m)) return 0;
|
||||
/* Init the new nodes to empty */
|
||||
memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num);
|
||||
result = (uint32_t)trie->pool_size;
|
||||
trie->pool_size += num;
|
||||
if (keep && *keep != &trie->root) {
|
||||
*keep = trie->pool + keep_index;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* len and idx: payload for last node
|
||||
* word, size: the string
|
||||
* index: position in the string
|
||||
*/
|
||||
static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len,
|
||||
uint32_t idx, const uint8_t* word, size_t size, int index,
|
||||
BrotliTrieNode* node, BrotliTrie* trie) {
|
||||
BrotliTrieNode* child = 0;
|
||||
uint8_t c;
|
||||
if ((size_t)index == size) {
|
||||
if (!node->len_ || idx < node->idx_) {
|
||||
node->len_ = len;
|
||||
node->idx_ = idx;
|
||||
}
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
c = word[index];
|
||||
if (node->single && c != node->c) {
|
||||
BrotliTrieNode old = trie->pool[node->sub];
|
||||
uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node);
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
node->single = 0;
|
||||
node->sub = new_nodes;
|
||||
trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16;
|
||||
trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] =
|
||||
old;
|
||||
}
|
||||
if (!node->sub) {
|
||||
uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node);
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
node->single = 1;
|
||||
node->c = c;
|
||||
node->sub = new_node;
|
||||
}
|
||||
if (node->single) {
|
||||
child = &trie->pool[node->sub];
|
||||
} else {
|
||||
if (!trie->pool[node->sub + (c >> 4)].sub) {
|
||||
uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node);
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
trie->pool[node->sub + (c >> 4)].sub = new_nodes;
|
||||
}
|
||||
child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)];
|
||||
}
|
||||
return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie);
|
||||
}
|
||||
|
||||
static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx,
|
||||
const uint8_t* word, size_t size, BrotliTrie* trie) {
|
||||
return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie);
|
||||
}
|
||||
|
||||
const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
|
||||
const BrotliTrieNode* node, uint8_t c) {
|
||||
BrotliTrieNode* temp_node;
|
||||
if (node->single) {
|
||||
if (node->c == c) return &trie->pool[node->sub];
|
||||
return 0;
|
||||
}
|
||||
if (!node->sub) return 0;
|
||||
temp_node = &trie->pool[node->sub + (c >> 4)];
|
||||
if (!temp_node->sub) return 0;
|
||||
return &trie->pool[temp_node->sub + (c & 15)];
|
||||
}
|
||||
|
||||
static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie,
|
||||
const uint8_t* word, size_t size) {
|
||||
const BrotliTrieNode* node = &trie->root;
|
||||
size_t i;
|
||||
for (i = 0; i < size; i++) {
|
||||
node = BrotliTrieSub(trie, node, word[i]);
|
||||
if (!node) return 0;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m,
|
||||
const BrotliTransforms* transforms,
|
||||
BrotliEncoderDictionary* dict) {
|
||||
uint32_t i;
|
||||
DictWord* dict_words;
|
||||
uint16_t* buckets;
|
||||
DictWord** words_by_hash;
|
||||
size_t* words_by_hash_size;
|
||||
size_t* words_by_hash_capacity;
|
||||
BrotliTrie dedup;
|
||||
uint8_t word[kTransformedBufferSize];
|
||||
size_t word_size;
|
||||
size_t total = 0;
|
||||
uint8_t l;
|
||||
uint16_t idx;
|
||||
|
||||
BrotliTrieInit(&dedup);
|
||||
|
||||
words_by_hash = (DictWord**)BrotliAllocate(m,
|
||||
sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
|
||||
words_by_hash_size = (size_t*)BrotliAllocate(m,
|
||||
sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
|
||||
words_by_hash_capacity = (size_t*)BrotliAllocate(m,
|
||||
sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
|
||||
memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
|
||||
memset(words_by_hash_capacity, 0,
|
||||
sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
|
||||
|
||||
if (transforms->num_transforms > 0) {
|
||||
for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
|
||||
l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
|
||||
uint16_t n = dict->words->size_bits_by_length[l] ?
|
||||
(uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
|
||||
for (idx = 0; idx < n; ++idx) {
|
||||
uint32_t key;
|
||||
/* First transform (usually identity) */
|
||||
TransformedDictionaryWord(idx, l, 0, transforms, dict, word,
|
||||
&word_size);
|
||||
/* Cannot hash words smaller than 4 bytes */
|
||||
if (word_size < 4) {
|
||||
/* Break instead of continue, all next words of this length will have
|
||||
same length after transform */
|
||||
break;
|
||||
}
|
||||
if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) {
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
key = Hash(word, NUM_HASH_BITS);
|
||||
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
|
||||
words_by_hash_capacity[key], words_by_hash_size[key],
|
||||
MakeDictWord(l, 0, idx));
|
||||
++total;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* These LUT transforms only supported if no custom transforms. This is
|
||||
ok, we will use the heavy trie instead. */
|
||||
if (transforms == BrotliGetTransforms()) {
|
||||
for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
|
||||
l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
|
||||
uint16_t n = dict->words->size_bits_by_length[l] ?
|
||||
(uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
|
||||
for (idx = 0; idx < n; ++idx) {
|
||||
int k;
|
||||
BROTLI_BOOL is_ascii = BROTLI_TRUE;
|
||||
size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx;
|
||||
const uint8_t* data = &dict->words->data[offset];
|
||||
for (k = 0; k < l; ++k) {
|
||||
if (data[k] >= 128) is_ascii = BROTLI_FALSE;
|
||||
}
|
||||
if (data[0] < 128) {
|
||||
int transform = 9; /* {empty, uppercase first, empty} */
|
||||
uint32_t ix = idx + (uint32_t)transform * n;
|
||||
const BrotliTrieNode* it;
|
||||
TransformedDictionaryWord(idx, l, transform, transforms,
|
||||
dict, word, &word_size);
|
||||
it = BrotliTrieFind(&dedup, word, word_size);
|
||||
if (!it || it->idx_ > ix) {
|
||||
uint32_t key = Hash(word, NUM_HASH_BITS);
|
||||
if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
|
||||
words_by_hash_capacity[key], words_by_hash_size[key],
|
||||
MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx));
|
||||
++total;
|
||||
}
|
||||
}
|
||||
if (is_ascii) {
|
||||
int transform = 44; /* {empty, uppercase all, empty} */
|
||||
uint32_t ix = idx + (uint32_t)transform * n;
|
||||
const BrotliTrieNode* it;
|
||||
TransformedDictionaryWord(idx, l, transform, transforms,
|
||||
dict, word, &word_size);
|
||||
it = BrotliTrieFind(&dedup, word, word_size);
|
||||
if (!it || it->idx_ > ix) {
|
||||
uint32_t key = Hash(word, NUM_HASH_BITS);
|
||||
if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
|
||||
words_by_hash_capacity[key], words_by_hash_size[key],
|
||||
MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx));
|
||||
++total;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dict_words = (DictWord*)BrotliAllocate(m,
|
||||
sizeof(*dict->dict_words) * (total + 1));
|
||||
buckets = (uint16_t*)BrotliAllocate(m,
|
||||
sizeof(*dict->buckets) * NUM_HASH_BUCKETS);
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
dict->dict_words_alloc_size_ = total + 1;
|
||||
dict->dict_words = dict->dict_words_data_ = dict_words;
|
||||
dict->buckets_alloc_size_ = NUM_HASH_BUCKETS;
|
||||
dict->buckets = dict->buckets_data_ = buckets;
|
||||
|
||||
/* Unused; makes offsets start from 1. */
|
||||
dict_words[0] = MakeDictWord(0, 0, 0);
|
||||
total = 1;
|
||||
for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
|
||||
size_t num_words = words_by_hash_size[i];
|
||||
if (num_words > 0) {
|
||||
buckets[i] = (uint16_t)(total);
|
||||
memcpy(&dict_words[total], &words_by_hash[i][0],
|
||||
sizeof(dict_words[0]) * num_words);
|
||||
total += num_words;
|
||||
dict_words[total - 1].len |= 0x80;
|
||||
} else {
|
||||
buckets[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
|
||||
BrotliFree(m, words_by_hash[i]);
|
||||
}
|
||||
BrotliFree(m, words_by_hash);
|
||||
BrotliFree(m, words_by_hash_size);
|
||||
BrotliFree(m, words_by_hash_capacity);
|
||||
BrotliTrieFree(m, &dedup);
|
||||
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
|
||||
static void BuildDictionaryHashTable(uint16_t* hash_table_words,
|
||||
uint8_t* hash_table_lengths, const BrotliDictionary* dict) {
|
||||
int j, len;
|
||||
/* The order of the loops is such that in case of collision, words with
|
||||
shorter length are preferred, and in case of same length, words with
|
||||
smaller index. There is only a single word per bucket. */
|
||||
/* TODO(lode): consider adding optional user-supplied frequency_map to use
|
||||
for preferred words instead, this can make the encoder better for
|
||||
quality 9 and below without affecting the decoder */
|
||||
memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords));
|
||||
memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths));
|
||||
for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH;
|
||||
len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) {
|
||||
const size_t num_words = dict->size_bits_by_length[len] ?
|
||||
(1u << dict->size_bits_by_length[len]) : 0;
|
||||
for (j = (int)num_words - 1; j >= 0; --j) {
|
||||
size_t offset = dict->offsets_by_length[len] +
|
||||
(size_t)len * (size_t)j;
|
||||
const uint8_t* word = &dict->data[offset];
|
||||
const uint32_t key = Hash(word, 14);
|
||||
int idx = (int)(key << 1) + (len < 8 ? 1 : 0);
|
||||
BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS);
|
||||
hash_table_words[idx] = (uint16_t)j;
|
||||
hash_table_lengths[idx] = (uint8_t)len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m,
|
||||
const BrotliTransforms* transforms,
|
||||
BrotliEncoderDictionary* dict) {
|
||||
int i, j, l;
|
||||
for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) {
|
||||
for (l = 0; l < 32; l++) {
|
||||
int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u);
|
||||
for (i = 0; i < num; i++) {
|
||||
uint8_t transformed[kTransformedBufferSize];
|
||||
size_t size;
|
||||
TransformedDictionaryWord(
|
||||
(uint32_t)i, l, j, transforms, dict, transformed, &size);
|
||||
if (size < 4) continue;
|
||||
if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j),
|
||||
transformed, size, &dict->trie)) {
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
|
||||
/* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for
|
||||
the custom transforms, where possible within the limits of the
|
||||
cutoffTransforms encoding. The fast encoder uses this to do fast lookup for
|
||||
transforms that remove the N last characters (OmitLast). */
|
||||
static void ComputeCutoffTransforms(
|
||||
const BrotliTransforms* transforms,
|
||||
uint32_t* count, uint64_t* data) {
|
||||
int i;
|
||||
/* The encoding in a 64-bit integer of transform N in the data is: (N << 2) +
|
||||
((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity
|
||||
transform code must be 0-63, for N=1 the transform code must be 4-67, ...,
|
||||
for N=9 it must be 36-99.
|
||||
TODO(lode): consider a simple flexible uint8_t[10] instead of the uint64_t
|
||||
for the cutoff transforms, so that shared dictionaries can have the
|
||||
OmitLast transforms anywhere without loss. */
|
||||
*count = 0;
|
||||
*data = 0;
|
||||
for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
|
||||
int idx = transforms->cutOffTransforms[i];
|
||||
if (idx == -1) break; /* Not found */
|
||||
if (idx < (i << 2)) break; /* Too small for the encoding */
|
||||
if (idx >= (i << 2) + 64) break; /* Too large for the encoding */
|
||||
(*count)++;
|
||||
*data |= (uint64_t)(((uint64_t)idx -
|
||||
((uint64_t)i << 2u)) << ((uint64_t)i * 6u));
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality,
|
||||
const BrotliTransforms* transforms,
|
||||
BrotliEncoderDictionary* current) {
|
||||
int default_words = current->words == BrotliGetDictionary();
|
||||
int default_transforms = transforms == BrotliGetTransforms();
|
||||
|
||||
if (default_words && default_transforms) {
|
||||
/* hashes are already set to Brotli defaults */
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
|
||||
current->hash_table_data_words_ = (uint16_t*)BrotliAllocate(
|
||||
m, sizeof(kStaticDictionaryHashWords));
|
||||
current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate(
|
||||
m, sizeof(kStaticDictionaryHashLengths));
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
current->hash_table_words = current->hash_table_data_words_;
|
||||
current->hash_table_lengths = current->hash_table_data_lengths_;
|
||||
|
||||
BuildDictionaryHashTable(current->hash_table_data_words_,
|
||||
current->hash_table_data_lengths_, current->words);
|
||||
|
||||
ComputeCutoffTransforms(transforms,
|
||||
¤t->cutoffTransformsCount, ¤t->cutoffTransforms);
|
||||
|
||||
/* Only compute the data for slow encoder if the requested quality is high
|
||||
enough to need it */
|
||||
if (quality >= ZOPFLIFICATION_QUALITY) {
|
||||
if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE;
|
||||
|
||||
/* For the built-in Brotli transforms, there is a hard-coded function to
|
||||
handle all transforms, but for custom transforms, we use the following
|
||||
large hammer instead */
|
||||
current->has_words_heavy = !default_transforms;
|
||||
if (current->has_words_heavy) {
|
||||
if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
#endif /* BROTLI_EXPERIMENTAL */
|
||||
|
||||
void duckdb_brotli::BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) {
|
||||
dict->magic = kSharedDictionaryMagic;
|
||||
|
||||
dict->compound.num_chunks = 0;
|
||||
dict->compound.total_size = 0;
|
||||
dict->compound.chunk_offsets[0] = 0;
|
||||
dict->compound.num_prepared_instances_ = 0;
|
||||
|
||||
dict->contextual.context_based = 0;
|
||||
dict->contextual.num_dictionaries = 1;
|
||||
dict->contextual.instances_ = 0;
|
||||
dict->contextual.num_instances_ = 1; /* The instance_ field */
|
||||
dict->contextual.dict[0] = &dict->contextual.instance_;
|
||||
InitEncoderDictionary(&dict->contextual.instance_);
|
||||
dict->contextual.instance_.parent = &dict->contextual;
|
||||
|
||||
dict->max_quality = BROTLI_MAX_QUALITY;
|
||||
}
|
||||
|
||||
#if defined(BROTLI_EXPERIMENTAL)
|
||||
/* TODO(eustas): make sure that tooling will warn user if not all the cutoff
|
||||
transforms are available (for low-quality encoder). */
|
||||
static BROTLI_BOOL InitCustomSharedEncoderDictionary(
|
||||
MemoryManager* m, const BrotliSharedDictionary* decoded_dict,
|
||||
int quality, SharedEncoderDictionary* dict) {
|
||||
ContextualEncoderDictionary* contextual;
|
||||
CompoundDictionary* compound;
|
||||
BrotliEncoderDictionary* instances;
|
||||
int i;
|
||||
BrotliInitSharedEncoderDictionary(dict);
|
||||
|
||||
contextual = &dict->contextual;
|
||||
compound = &dict->compound;
|
||||
|
||||
for (i = 0; i < (int)decoded_dict->num_prefix; i++) {
|
||||
PreparedDictionary* prepared = CreatePreparedDictionary(m,
|
||||
decoded_dict->prefix[i], decoded_dict->prefix_size[i]);
|
||||
AttachPreparedDictionary(compound, prepared);
|
||||
/* remember for cleanup */
|
||||
compound->prepared_instances_[
|
||||
compound->num_prepared_instances_++] = prepared;
|
||||
}
|
||||
|
||||
dict->max_quality = quality;
|
||||
contextual->context_based = decoded_dict->context_based;
|
||||
if (decoded_dict->context_based) {
|
||||
memcpy(contextual->context_map, decoded_dict->context_map,
|
||||
SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS);
|
||||
}
|
||||
|
||||
contextual->num_dictionaries = decoded_dict->num_dictionaries;
|
||||
contextual->num_instances_ = decoded_dict->num_dictionaries;
|
||||
if (contextual->num_instances_ == 1) {
|
||||
instances = &contextual->instance_;
|
||||
} else {
|
||||
contextual->instances_ = (BrotliEncoderDictionary*)
|
||||
BrotliAllocate(m, sizeof(*contextual->instances_) *
|
||||
contextual->num_instances_);
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
instances = contextual->instances_;
|
||||
}
|
||||
for (i = 0; i < (int)contextual->num_instances_; i++) {
|
||||
BrotliEncoderDictionary* current = &instances[i];
|
||||
InitEncoderDictionary(current);
|
||||
current->parent = &dict->contextual;
|
||||
if (decoded_dict->words[i] == BrotliGetDictionary()) {
|
||||
current->words = BrotliGetDictionary();
|
||||
} else {
|
||||
current->words_instance_ = (BrotliDictionary*)BrotliAllocate(
|
||||
m, sizeof(BrotliDictionary));
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
*current->words_instance_ = *decoded_dict->words[i];
|
||||
current->words = current->words_instance_;
|
||||
}
|
||||
current->num_transforms =
|
||||
(uint32_t)decoded_dict->transforms[i]->num_transforms;
|
||||
if (!ComputeDictionary(
|
||||
m, quality, decoded_dict->transforms[i], current)) {
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
|
||||
contextual->dict[i] = current;
|
||||
}
|
||||
|
||||
return BROTLI_TRUE; /* success */
|
||||
}
|
||||
|
||||
BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
|
||||
MemoryManager* m, const uint8_t* encoded_dict, size_t size,
|
||||
int quality, SharedEncoderDictionary* dict) {
|
||||
BROTLI_BOOL success = BROTLI_FALSE;
|
||||
BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance(
|
||||
m->alloc_func, m->free_func, m->opaque);
|
||||
if (!decoded_dict) { /* OOM */
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
success = BrotliSharedDictionaryAttach(
|
||||
decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict);
|
||||
if (success) {
|
||||
success = InitCustomSharedEncoderDictionary(m,
|
||||
decoded_dict, quality, dict);
|
||||
}
|
||||
BrotliSharedDictionaryDestroyInstance(decoded_dict);
|
||||
return success;
|
||||
}
|
||||
#endif /* BROTLI_EXPERIMENTAL */
|
||||
|
||||
void duckdb_brotli::BrotliCleanupSharedEncoderDictionary(MemoryManager* m,
|
||||
SharedEncoderDictionary* dict) {
|
||||
size_t i;
|
||||
for (i = 0; i < dict->compound.num_prepared_instances_; i++) {
|
||||
DestroyPreparedDictionary(m,
|
||||
(PreparedDictionary*)dict->compound.prepared_instances_[i]);
|
||||
}
|
||||
if (dict->contextual.num_instances_ == 1) {
|
||||
BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_);
|
||||
} else if (dict->contextual.num_instances_ > 1) {
|
||||
for (i = 0; i < dict->contextual.num_instances_; i++) {
|
||||
BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]);
|
||||
}
|
||||
BrotliFree(m, dict->contextual.instances_);
|
||||
}
|
||||
}
|
||||
|
||||
ManagedDictionary* duckdb_brotli::BrotliCreateManagedDictionary(
|
||||
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
|
||||
ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc(
|
||||
sizeof(ManagedDictionary), alloc_func, free_func, opaque);
|
||||
if (result == NULL) return NULL;
|
||||
|
||||
result->magic = kManagedDictionaryMagic;
|
||||
BrotliInitMemoryManager(
|
||||
&result->memory_manager_, alloc_func, free_func, opaque);
|
||||
result->dictionary = NULL;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) {
|
||||
if (!dictionary) return;
|
||||
BrotliBootstrapFree(dictionary, &dictionary->memory_manager_);
|
||||
}
|
||||
|
||||
/* Escalate internal functions visibility; for testing purposes only. */
|
||||
#if defined(BROTLI_TEST)
|
||||
void InitEncoderDictionaryForTest(BrotliEncoderDictionary*);
|
||||
void InitEncoderDictionaryForTest(BrotliEncoderDictionary* d) {
|
||||
InitEncoderDictionary(d);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
153
external/duckdb/third_party/brotli/enc/encoder_dict.h
vendored
Normal file
153
external/duckdb/third_party/brotli/enc/encoder_dict.h
vendored
Normal file
@@ -0,0 +1,153 @@
|
||||
/* Copyright 2017 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
#ifndef BROTLI_ENC_ENCODER_DICT_H_
|
||||
#define BROTLI_ENC_ENCODER_DICT_H_
|
||||
|
||||
#include <brotli/shared_dictionary.h>
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/dictionary.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "compound_dictionary.h"
|
||||
#include "memory.h"
|
||||
#include "static_dict_lut.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/*
|
||||
Dictionary hierarchy for Encoder:
|
||||
-SharedEncoderDictionary
|
||||
--CompoundDictionary
|
||||
---PreparedDictionary [up to 15x]
|
||||
= prefix dictionary with precomputed hashes
|
||||
--ContextualEncoderDictionary
|
||||
---BrotliEncoderDictionary [up to 64x]
|
||||
= for each context, precomputed static dictionary with words + transforms
|
||||
|
||||
Dictionary hiearchy from common: similar, but without precomputed hashes
|
||||
-BrotliSharedDictionary
|
||||
--BrotliDictionary [up to 64x]
|
||||
--BrotliTransforms [up to 64x]
|
||||
--const uint8_t* prefix [up to 15x]: compound dictionaries
|
||||
*/
|
||||
|
||||
typedef struct BrotliTrieNode {
|
||||
uint8_t single; /* if 1, sub is a single node for c instead of 256 */
|
||||
uint8_t c;
|
||||
uint8_t len_; /* untransformed length */
|
||||
uint32_t idx_; /* word index + num words * transform index */
|
||||
uint32_t sub; /* index of sub node(s) in the pool */
|
||||
} BrotliTrieNode;
|
||||
|
||||
typedef struct BrotliTrie {
|
||||
BrotliTrieNode* pool;
|
||||
size_t pool_capacity;
|
||||
size_t pool_size;
|
||||
BrotliTrieNode root;
|
||||
} BrotliTrie;
|
||||
|
||||
#if defined(BROTLI_EXPERIMENTAL)
|
||||
BROTLI_INTERNAL const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
|
||||
const BrotliTrieNode* node, uint8_t c);
|
||||
#endif /* BROTLI_EXPERIMENTAL */
|
||||
|
||||
/* Dictionary data (words and transforms) for 1 possible context */
|
||||
typedef struct BrotliEncoderDictionary {
|
||||
const BrotliDictionary* words;
|
||||
uint32_t num_transforms;
|
||||
|
||||
/* cut off for fast encoder */
|
||||
uint32_t cutoffTransformsCount;
|
||||
uint64_t cutoffTransforms;
|
||||
|
||||
/* from dictionary_hash.h, for fast encoder */
|
||||
const uint16_t* hash_table_words;
|
||||
const uint8_t* hash_table_lengths;
|
||||
|
||||
/* from static_dict_lut.h, for slow encoder */
|
||||
const uint16_t* buckets;
|
||||
const DictWord* dict_words;
|
||||
/* Heavy version, for use by slow encoder when there are custom transforms.
|
||||
Contains every possible transformed dictionary word in a trie. It encodes
|
||||
about as fast as the non-heavy encoder but consumes a lot of memory and
|
||||
takes time to build. */
|
||||
BrotliTrie trie;
|
||||
BROTLI_BOOL has_words_heavy;
|
||||
|
||||
/* Reference to other dictionaries. */
|
||||
const struct ContextualEncoderDictionary* parent;
|
||||
|
||||
/* Allocated memory, used only when not using the Brotli defaults */
|
||||
uint16_t* hash_table_data_words_;
|
||||
uint8_t* hash_table_data_lengths_;
|
||||
size_t buckets_alloc_size_;
|
||||
uint16_t* buckets_data_;
|
||||
size_t dict_words_alloc_size_;
|
||||
DictWord* dict_words_data_;
|
||||
BrotliDictionary* words_instance_;
|
||||
} BrotliEncoderDictionary;
|
||||
|
||||
/* Dictionary data for all 64 contexts */
|
||||
typedef struct ContextualEncoderDictionary {
|
||||
BROTLI_BOOL context_based;
|
||||
uint8_t num_dictionaries;
|
||||
uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
|
||||
const BrotliEncoderDictionary* dict[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
|
||||
|
||||
/* If num_instances_ is 1, instance_ is used, else dynamic allocation with
|
||||
instances_ is used. */
|
||||
size_t num_instances_;
|
||||
BrotliEncoderDictionary instance_;
|
||||
BrotliEncoderDictionary* instances_;
|
||||
} ContextualEncoderDictionary;
|
||||
|
||||
typedef struct SharedEncoderDictionary {
|
||||
/* Magic value to distinguish this struct from PreparedDictionary for
|
||||
certain external usages. */
|
||||
uint32_t magic;
|
||||
|
||||
/* LZ77 prefix, compound dictionary */
|
||||
CompoundDictionary compound;
|
||||
|
||||
/* Custom static dictionary (optionally context-based) */
|
||||
ContextualEncoderDictionary contextual;
|
||||
|
||||
/* The maximum quality the dictionary was computed for */
|
||||
int max_quality;
|
||||
} SharedEncoderDictionary;
|
||||
|
||||
typedef struct ManagedDictionary {
|
||||
uint32_t magic;
|
||||
MemoryManager memory_manager_;
|
||||
uint32_t* dictionary;
|
||||
} ManagedDictionary;
|
||||
|
||||
/* Initializes to the brotli built-in dictionary */
|
||||
BROTLI_INTERNAL void BrotliInitSharedEncoderDictionary(
|
||||
SharedEncoderDictionary* dict);
|
||||
|
||||
#if defined(BROTLI_EXPERIMENTAL)
|
||||
/* Initializes to shared dictionary that will be parsed from
|
||||
encoded_dict. Requires that you keep the encoded_dict buffer
|
||||
around, parts of data will point to it. */
|
||||
BROTLI_INTERNAL BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
|
||||
MemoryManager* m, const uint8_t* encoded_dict, size_t size,
|
||||
int quality, SharedEncoderDictionary* dict);
|
||||
#endif /* BROTLI_EXPERIMENTAL */
|
||||
|
||||
BROTLI_INTERNAL void BrotliCleanupSharedEncoderDictionary(
|
||||
MemoryManager* m, SharedEncoderDictionary* dict);
|
||||
|
||||
BROTLI_INTERNAL ManagedDictionary* BrotliCreateManagedDictionary(
|
||||
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
|
||||
|
||||
BROTLI_INTERNAL void BrotliDestroyManagedDictionary(
|
||||
ManagedDictionary* dictionary);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_ENCODER_DICT_H_ */
|
||||
500
external/duckdb/third_party/brotli/enc/entropy_encode.cpp
vendored
Normal file
500
external/duckdb/third_party/brotli/enc/entropy_encode.cpp
vendored
Normal file
@@ -0,0 +1,500 @@
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Entropy encoding (Huffman) utilities. */
|
||||
|
||||
#include "entropy_encode.h"
|
||||
|
||||
#include <string.h> /* memset */
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
const size_t duckdb_brotli::kBrotliShellGaps[] = {132, 57, 23, 10, 4, 1};
|
||||
|
||||
BROTLI_BOOL duckdb_brotli::BrotliSetDepth(
|
||||
int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
|
||||
int stack[16];
|
||||
int level = 0;
|
||||
int p = p0;
|
||||
BROTLI_DCHECK(max_depth <= 15);
|
||||
stack[0] = -1;
|
||||
while (BROTLI_TRUE) {
|
||||
if (pool[p].index_left_ >= 0) {
|
||||
level++;
|
||||
if (level > max_depth) return BROTLI_FALSE;
|
||||
stack[level] = pool[p].index_right_or_value_;
|
||||
p = pool[p].index_left_;
|
||||
continue;
|
||||
} else {
|
||||
depth[pool[p].index_right_or_value_] = (uint8_t)level;
|
||||
}
|
||||
while (level >= 0 && stack[level] == -1) level--;
|
||||
if (level < 0) return BROTLI_TRUE;
|
||||
p = stack[level];
|
||||
stack[level] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort the root nodes, least popular first. */
|
||||
static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
|
||||
const HuffmanTree* v0, const HuffmanTree* v1) {
|
||||
if (v0->total_count_ != v1->total_count_) {
|
||||
return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
|
||||
}
|
||||
return TO_BROTLI_BOOL(v0->index_right_or_value_ > v1->index_right_or_value_);
|
||||
}
|
||||
|
||||
/* This function will create a Huffman tree.
|
||||
|
||||
The catch here is that the tree cannot be arbitrarily deep.
|
||||
Brotli specifies a maximum depth of 15 bits for "code trees"
|
||||
and 7 bits for "code length code trees."
|
||||
|
||||
count_limit is the value that is to be faked as the minimum value
|
||||
and this minimum value is raised until the tree matches the
|
||||
maximum length requirement.
|
||||
|
||||
This algorithm is not of excellent performance for very long data blocks,
|
||||
especially when population counts are longer than 2**tree_limit, but
|
||||
we are not planning to use this with extremely long blocks.
|
||||
|
||||
See http://en.wikipedia.org/wiki/Huffman_coding */
|
||||
void duckdb_brotli::BrotliCreateHuffmanTree(const uint32_t* data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t* depth) {
|
||||
uint32_t count_limit;
|
||||
HuffmanTree sentinel;
|
||||
InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
|
||||
/* For block sizes below 64 kB, we never need to do a second iteration
|
||||
of this loop. Probably all of our block sizes will be smaller than
|
||||
that, so this loop is mostly of academic interest. If we actually
|
||||
would need this, we would be better off with the Katajainen algorithm. */
|
||||
for (count_limit = 1; ; count_limit *= 2) {
|
||||
size_t n = 0;
|
||||
size_t i;
|
||||
size_t j;
|
||||
size_t k;
|
||||
for (i = length; i != 0;) {
|
||||
--i;
|
||||
if (data[i]) {
|
||||
const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
|
||||
InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
|
||||
}
|
||||
}
|
||||
|
||||
if (n == 1) {
|
||||
depth[tree[0].index_right_or_value_] = 1; /* Only one element. */
|
||||
break;
|
||||
}
|
||||
|
||||
SortHuffmanTreeItems(tree, n, SortHuffmanTree);
|
||||
|
||||
/* The nodes are:
|
||||
[0, n): the sorted leaf nodes that we start with.
|
||||
[n]: we add a sentinel here.
|
||||
[n + 1, 2n): new parent nodes are added here, starting from
|
||||
(n+1). These are naturally in ascending order.
|
||||
[2n]: we add a sentinel at the end as well.
|
||||
There will be (2n+1) elements at the end. */
|
||||
tree[n] = sentinel;
|
||||
tree[n + 1] = sentinel;
|
||||
|
||||
i = 0; /* Points to the next leaf node. */
|
||||
j = n + 1; /* Points to the next non-leaf node. */
|
||||
for (k = n - 1; k != 0; --k) {
|
||||
size_t left, right;
|
||||
if (tree[i].total_count_ <= tree[j].total_count_) {
|
||||
left = i;
|
||||
++i;
|
||||
} else {
|
||||
left = j;
|
||||
++j;
|
||||
}
|
||||
if (tree[i].total_count_ <= tree[j].total_count_) {
|
||||
right = i;
|
||||
++i;
|
||||
} else {
|
||||
right = j;
|
||||
++j;
|
||||
}
|
||||
|
||||
{
|
||||
/* The sentinel node becomes the parent node. */
|
||||
size_t j_end = 2 * n - k;
|
||||
tree[j_end].total_count_ =
|
||||
tree[left].total_count_ + tree[right].total_count_;
|
||||
tree[j_end].index_left_ = (int16_t)left;
|
||||
tree[j_end].index_right_or_value_ = (int16_t)right;
|
||||
|
||||
/* Add back the last sentinel node. */
|
||||
tree[j_end + 1] = sentinel;
|
||||
}
|
||||
}
|
||||
if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
|
||||
/* We need to pack the Huffman tree in tree_limit bits. If this was not
|
||||
successful, add fake entities to the lowest values and retry. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Reverse(uint8_t* v, size_t start, size_t end) {
|
||||
--end;
|
||||
while (start < end) {
|
||||
uint8_t tmp = v[start];
|
||||
v[start] = v[end];
|
||||
v[end] = tmp;
|
||||
++start;
|
||||
--end;
|
||||
}
|
||||
}
|
||||
|
||||
static void BrotliWriteHuffmanTreeRepetitions(
|
||||
const uint8_t previous_value,
|
||||
const uint8_t value,
|
||||
size_t repetitions,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
BROTLI_DCHECK(repetitions > 0);
|
||||
if (previous_value != value) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions == 7) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions < 3) {
|
||||
size_t i;
|
||||
for (i = 0; i < repetitions; ++i) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
} else {
|
||||
size_t start = *tree_size;
|
||||
repetitions -= 3;
|
||||
while (BROTLI_TRUE) {
|
||||
tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
|
||||
extra_bits_data[*tree_size] = repetitions & 0x3;
|
||||
++(*tree_size);
|
||||
repetitions >>= 2;
|
||||
if (repetitions == 0) {
|
||||
break;
|
||||
}
|
||||
--repetitions;
|
||||
}
|
||||
Reverse(tree, start, *tree_size);
|
||||
Reverse(extra_bits_data, start, *tree_size);
|
||||
}
|
||||
}
|
||||
|
||||
static void BrotliWriteHuffmanTreeRepetitionsZeros(
|
||||
size_t repetitions,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
if (repetitions == 11) {
|
||||
tree[*tree_size] = 0;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions < 3) {
|
||||
size_t i;
|
||||
for (i = 0; i < repetitions; ++i) {
|
||||
tree[*tree_size] = 0;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
} else {
|
||||
size_t start = *tree_size;
|
||||
repetitions -= 3;
|
||||
while (BROTLI_TRUE) {
|
||||
tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
|
||||
extra_bits_data[*tree_size] = repetitions & 0x7;
|
||||
++(*tree_size);
|
||||
repetitions >>= 3;
|
||||
if (repetitions == 0) {
|
||||
break;
|
||||
}
|
||||
--repetitions;
|
||||
}
|
||||
Reverse(tree, start, *tree_size);
|
||||
Reverse(extra_bits_data, start, *tree_size);
|
||||
}
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
uint8_t* good_for_rle) {
|
||||
size_t nonzero_count = 0;
|
||||
size_t stride;
|
||||
size_t limit;
|
||||
size_t sum;
|
||||
const size_t streak_limit = 1240;
|
||||
/* Let's make the Huffman code more compatible with RLE encoding. */
|
||||
size_t i;
|
||||
for (i = 0; i < length; i++) {
|
||||
if (counts[i]) {
|
||||
++nonzero_count;
|
||||
}
|
||||
}
|
||||
if (nonzero_count < 16) {
|
||||
return;
|
||||
}
|
||||
while (length != 0 && counts[length - 1] == 0) {
|
||||
--length;
|
||||
}
|
||||
if (length == 0) {
|
||||
return; /* All zeros. */
|
||||
}
|
||||
/* Now counts[0..length - 1] does not have trailing zeros. */
|
||||
{
|
||||
size_t nonzeros = 0;
|
||||
uint32_t smallest_nonzero = 1 << 30;
|
||||
for (i = 0; i < length; ++i) {
|
||||
if (counts[i] != 0) {
|
||||
++nonzeros;
|
||||
if (smallest_nonzero > counts[i]) {
|
||||
smallest_nonzero = counts[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nonzeros < 5) {
|
||||
/* Small histogram will model it well. */
|
||||
return;
|
||||
}
|
||||
if (smallest_nonzero < 4) {
|
||||
size_t zeros = length - nonzeros;
|
||||
if (zeros < 6) {
|
||||
for (i = 1; i < length - 1; ++i) {
|
||||
if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
|
||||
counts[i] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nonzeros < 28) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* 2) Let's mark all population counts that already can be encoded
|
||||
with an RLE code. */
|
||||
memset(good_for_rle, 0, length);
|
||||
{
|
||||
/* Let's not spoil any of the existing good RLE codes.
|
||||
Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
||||
Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
|
||||
uint32_t symbol = counts[0];
|
||||
size_t step = 0;
|
||||
for (i = 0; i <= length; ++i) {
|
||||
if (i == length || counts[i] != symbol) {
|
||||
if ((symbol == 0 && step >= 5) ||
|
||||
(symbol != 0 && step >= 7)) {
|
||||
size_t k;
|
||||
for (k = 0; k < step; ++k) {
|
||||
good_for_rle[i - k - 1] = 1;
|
||||
}
|
||||
}
|
||||
step = 1;
|
||||
if (i != length) {
|
||||
symbol = counts[i];
|
||||
}
|
||||
} else {
|
||||
++step;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* 3) Let's replace those population counts that lead to more RLE codes.
|
||||
Math here is in 24.8 fixed point representation. */
|
||||
stride = 0;
|
||||
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
|
||||
sum = 0;
|
||||
for (i = 0; i <= length; ++i) {
|
||||
if (i == length || good_for_rle[i] ||
|
||||
(i != 0 && good_for_rle[i - 1]) ||
|
||||
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
|
||||
if (stride >= 4 || (stride >= 3 && sum == 0)) {
|
||||
size_t k;
|
||||
/* The stride must end, collapse what we have, if we have enough (4). */
|
||||
size_t count = (sum + stride / 2) / stride;
|
||||
if (count == 0) {
|
||||
count = 1;
|
||||
}
|
||||
if (sum == 0) {
|
||||
/* Don't make an all zeros stride to be upgraded to ones. */
|
||||
count = 0;
|
||||
}
|
||||
for (k = 0; k < stride; ++k) {
|
||||
/* We don't want to change value at counts[i],
|
||||
that is already belonging to the next stride. Thus - 1. */
|
||||
counts[i - k - 1] = (uint32_t)count;
|
||||
}
|
||||
}
|
||||
stride = 0;
|
||||
sum = 0;
|
||||
if (i < length - 2) {
|
||||
/* All interesting strides have a count of at least 4, */
|
||||
/* at least when non-zeros. */
|
||||
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
|
||||
} else if (i < length) {
|
||||
limit = 256 * counts[i];
|
||||
} else {
|
||||
limit = 0;
|
||||
}
|
||||
}
|
||||
++stride;
|
||||
if (i != length) {
|
||||
sum += counts[i];
|
||||
if (stride >= 4) {
|
||||
limit = (256 * sum + stride / 2) / stride;
|
||||
}
|
||||
if (stride == 4) {
|
||||
limit += 120;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
|
||||
BROTLI_BOOL* use_rle_for_non_zero,
|
||||
BROTLI_BOOL* use_rle_for_zero) {
|
||||
size_t total_reps_zero = 0;
|
||||
size_t total_reps_non_zero = 0;
|
||||
size_t count_reps_zero = 1;
|
||||
size_t count_reps_non_zero = 1;
|
||||
size_t i;
|
||||
for (i = 0; i < length;) {
|
||||
const uint8_t value = depth[i];
|
||||
size_t reps = 1;
|
||||
size_t k;
|
||||
for (k = i + 1; k < length && depth[k] == value; ++k) {
|
||||
++reps;
|
||||
}
|
||||
if (reps >= 3 && value == 0) {
|
||||
total_reps_zero += reps;
|
||||
++count_reps_zero;
|
||||
}
|
||||
if (reps >= 4 && value != 0) {
|
||||
total_reps_non_zero += reps;
|
||||
++count_reps_non_zero;
|
||||
}
|
||||
i += reps;
|
||||
}
|
||||
*use_rle_for_non_zero =
|
||||
TO_BROTLI_BOOL(total_reps_non_zero > count_reps_non_zero * 2);
|
||||
*use_rle_for_zero = TO_BROTLI_BOOL(total_reps_zero > count_reps_zero * 2);
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliWriteHuffmanTree(const uint8_t* depth,
|
||||
size_t length,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
|
||||
size_t i;
|
||||
BROTLI_BOOL use_rle_for_non_zero = BROTLI_FALSE;
|
||||
BROTLI_BOOL use_rle_for_zero = BROTLI_FALSE;
|
||||
|
||||
/* Throw away trailing zeros. */
|
||||
size_t new_length = length;
|
||||
for (i = 0; i < length; ++i) {
|
||||
if (depth[length - i - 1] == 0) {
|
||||
--new_length;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* First gather statistics on if it is a good idea to do RLE. */
|
||||
if (length > 50) {
|
||||
/* Find RLE coding for longer codes.
|
||||
Shorter codes seem not to benefit from RLE. */
|
||||
DecideOverRleUse(depth, new_length,
|
||||
&use_rle_for_non_zero, &use_rle_for_zero);
|
||||
}
|
||||
|
||||
/* Actual RLE coding. */
|
||||
for (i = 0; i < new_length;) {
|
||||
const uint8_t value = depth[i];
|
||||
size_t reps = 1;
|
||||
if ((value != 0 && use_rle_for_non_zero) ||
|
||||
(value == 0 && use_rle_for_zero)) {
|
||||
size_t k;
|
||||
for (k = i + 1; k < new_length && depth[k] == value; ++k) {
|
||||
++reps;
|
||||
}
|
||||
}
|
||||
if (value == 0) {
|
||||
BrotliWriteHuffmanTreeRepetitionsZeros(
|
||||
reps, tree_size, tree, extra_bits_data);
|
||||
} else {
|
||||
BrotliWriteHuffmanTreeRepetitions(previous_value,
|
||||
value, reps, tree_size,
|
||||
tree, extra_bits_data);
|
||||
previous_value = value;
|
||||
}
|
||||
i += reps;
|
||||
}
|
||||
}
|
||||
|
||||
static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
|
||||
static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */
|
||||
0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E,
|
||||
0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F
|
||||
};
|
||||
size_t retval = kLut[bits & 0x0F];
|
||||
size_t i;
|
||||
for (i = 4; i < num_bits; i += 4) {
|
||||
retval <<= 4;
|
||||
bits = (uint16_t)(bits >> 4);
|
||||
retval |= kLut[bits & 0x0F];
|
||||
}
|
||||
retval >>= ((0 - num_bits) & 0x03);
|
||||
return (uint16_t)retval;
|
||||
}
|
||||
|
||||
/* 0..15 are values for bits */
|
||||
#define MAX_HUFFMAN_BITS 16
|
||||
|
||||
void duckdb_brotli::BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
|
||||
size_t len,
|
||||
uint16_t* bits) {
|
||||
/* In Brotli, all bit depths are [1..15]
|
||||
0 bit depth means that the symbol does not exist. */
|
||||
uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
|
||||
uint16_t next_code[MAX_HUFFMAN_BITS];
|
||||
size_t i;
|
||||
int code = 0;
|
||||
for (i = 0; i < len; ++i) {
|
||||
++bl_count[depth[i]];
|
||||
}
|
||||
bl_count[0] = 0;
|
||||
next_code[0] = 0;
|
||||
for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
|
||||
code = (code + bl_count[i - 1]) << 1;
|
||||
next_code[i] = (uint16_t)code;
|
||||
}
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (depth[i]) {
|
||||
bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
119
external/duckdb/third_party/brotli/enc/entropy_encode.h
vendored
Normal file
119
external/duckdb/third_party/brotli/enc/entropy_encode.h
vendored
Normal file
@@ -0,0 +1,119 @@
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Entropy encoding (Huffman) utilities. */
|
||||
|
||||
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* A node of a Huffman tree. */
|
||||
typedef struct HuffmanTree {
|
||||
uint32_t total_count_;
|
||||
int16_t index_left_;
|
||||
int16_t index_right_or_value_;
|
||||
} HuffmanTree;
|
||||
|
||||
static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
|
||||
int16_t left, int16_t right) {
|
||||
self->total_count_ = count;
|
||||
self->index_left_ = left;
|
||||
self->index_right_or_value_ = right;
|
||||
}
|
||||
|
||||
/* Returns 1 is assignment of depths succeeded, otherwise 0. */
|
||||
BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
|
||||
int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
|
||||
|
||||
/* This function will create a Huffman tree.
|
||||
|
||||
The (data,length) contains the population counts.
|
||||
The tree_limit is the maximum bit depth of the Huffman codes.
|
||||
|
||||
The depth contains the tree, i.e., how many bits are used for
|
||||
the symbol.
|
||||
|
||||
The actual Huffman tree is constructed in the tree[] array, which has to
|
||||
be at least 2 * length + 1 long.
|
||||
|
||||
See http://en.wikipedia.org/wiki/Huffman_coding */
|
||||
BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t* depth);
|
||||
|
||||
/* Change the population counts in a way that the consequent
|
||||
Huffman tree compression, especially its RLE-part will be more
|
||||
likely to compress this data more efficiently.
|
||||
|
||||
length contains the size of the histogram.
|
||||
counts contains the population counts.
|
||||
good_for_rle is a buffer of at least length size */
|
||||
BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
|
||||
size_t length, uint32_t* counts, uint8_t* good_for_rle);
|
||||
|
||||
/* Write a Huffman tree from bit depths into the bit-stream representation
|
||||
of a Huffman tree. The generated Huffman tree is to be compressed once
|
||||
more using a Huffman tree */
|
||||
BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
|
||||
size_t num,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data);
|
||||
|
||||
/* Get the actual bit values for a tree of bit depths. */
|
||||
BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
|
||||
size_t len,
|
||||
uint16_t* bits);
|
||||
|
||||
BROTLI_INTERNAL extern const size_t kBrotliShellGaps[6];
|
||||
/* Input size optimized Shell sort. */
|
||||
typedef BROTLI_BOOL (*HuffmanTreeComparator)(
|
||||
const HuffmanTree*, const HuffmanTree*);
|
||||
static BROTLI_INLINE void SortHuffmanTreeItems(HuffmanTree* items,
|
||||
const size_t n, HuffmanTreeComparator comparator) {
|
||||
if (n < 13) {
|
||||
/* Insertion sort. */
|
||||
size_t i;
|
||||
for (i = 1; i < n; ++i) {
|
||||
HuffmanTree tmp = items[i];
|
||||
size_t k = i;
|
||||
size_t j = i - 1;
|
||||
while (comparator(&tmp, &items[j])) {
|
||||
items[k] = items[j];
|
||||
k = j;
|
||||
if (!j--) break;
|
||||
}
|
||||
items[k] = tmp;
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
/* Shell sort. */
|
||||
int g = n < 57 ? 2 : 0;
|
||||
for (; g < 6; ++g) {
|
||||
size_t gap = kBrotliShellGaps[g];
|
||||
size_t i;
|
||||
for (i = gap; i < n; ++i) {
|
||||
size_t j = i;
|
||||
HuffmanTree tmp = items[i];
|
||||
for (; j >= gap && comparator(&tmp, &items[j - gap]); j -= gap) {
|
||||
items[j] = items[j - gap];
|
||||
}
|
||||
items[j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_ENTROPY_ENCODE_H_ */
|
||||
538
external/duckdb/third_party/brotli/enc/entropy_encode_static.h
vendored
Normal file
538
external/duckdb/third_party/brotli/enc/entropy_encode_static.h
vendored
Normal file
@@ -0,0 +1,538 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Static entropy codes used for faster meta-block encoding. */
|
||||
|
||||
#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "write_bits.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
static const uint8_t kCodeLengthDepth[18] = {
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
|
||||
};
|
||||
|
||||
static const uint8_t kStaticCommandCodeDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
};
|
||||
|
||||
static const uint8_t kStaticDistanceCodeDepth[64] = {
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
};
|
||||
|
||||
/* GENERATED CODE START */
|
||||
static const uint32_t kCodeLengthBits[18] = {
|
||||
0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
|
||||
};
|
||||
|
||||
static BROTLI_INLINE void StoreStaticCodeLengthCode(
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
BrotliWriteBits(
|
||||
40, BROTLI_MAKE_UINT64_T(0x0000FFu, 0x55555554u), storage_ix, storage);
|
||||
}
|
||||
|
||||
static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
|
||||
0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
|
||||
0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
|
||||
0x00003b87, 0x00000397, 0x00000b97, 0x00001397, 0x00001b97, 0x00002397,
|
||||
0x00002b97, 0x00003397, 0x00003b97, 0x000003a7, 0x00000ba7, 0x000013a7,
|
||||
0x00001ba7, 0x000023a7, 0x00002ba7, 0x000033a7, 0x00003ba7, 0x000003b7,
|
||||
0x00000bb7, 0x000013b7, 0x00001bb7, 0x000023b7, 0x00002bb7, 0x000033b7,
|
||||
0x00003bb7, 0x000003c7, 0x00000bc7, 0x000013c7, 0x00001bc7, 0x000023c7,
|
||||
0x00002bc7, 0x000033c7, 0x00003bc7, 0x000003d7, 0x00000bd7, 0x000013d7,
|
||||
0x00001bd7, 0x000023d7, 0x00002bd7, 0x000033d7, 0x00003bd7, 0x000003e7,
|
||||
0x00000be7, 0x000013e7, 0x00001be7, 0x000023e7, 0x00002be7, 0x000033e7,
|
||||
0x00003be7, 0x000003f7, 0x00000bf7, 0x000013f7, 0x00001bf7, 0x000023f7,
|
||||
0x00002bf7, 0x000033f7, 0x00003bf7, 0x0001c387, 0x0005c387, 0x0009c387,
|
||||
0x000dc387, 0x0011c387, 0x0015c387, 0x0019c387, 0x001dc387, 0x0001cb87,
|
||||
0x0005cb87, 0x0009cb87, 0x000dcb87, 0x0011cb87, 0x0015cb87, 0x0019cb87,
|
||||
0x001dcb87, 0x0001d387, 0x0005d387, 0x0009d387, 0x000dd387, 0x0011d387,
|
||||
0x0015d387, 0x0019d387, 0x001dd387, 0x0001db87, 0x0005db87, 0x0009db87,
|
||||
0x000ddb87, 0x0011db87, 0x0015db87, 0x0019db87, 0x001ddb87, 0x0001e387,
|
||||
0x0005e387, 0x0009e387, 0x000de387, 0x0011e387, 0x0015e387, 0x0019e387,
|
||||
0x001de387, 0x0001eb87, 0x0005eb87, 0x0009eb87, 0x000deb87, 0x0011eb87,
|
||||
0x0015eb87, 0x0019eb87, 0x001deb87, 0x0001f387, 0x0005f387, 0x0009f387,
|
||||
0x000df387, 0x0011f387, 0x0015f387, 0x0019f387, 0x001df387, 0x0001fb87,
|
||||
0x0005fb87, 0x0009fb87, 0x000dfb87, 0x0011fb87, 0x0015fb87, 0x0019fb87,
|
||||
0x001dfb87, 0x0001c397, 0x0005c397, 0x0009c397, 0x000dc397, 0x0011c397,
|
||||
0x0015c397, 0x0019c397, 0x001dc397, 0x0001cb97, 0x0005cb97, 0x0009cb97,
|
||||
0x000dcb97, 0x0011cb97, 0x0015cb97, 0x0019cb97, 0x001dcb97, 0x0001d397,
|
||||
0x0005d397, 0x0009d397, 0x000dd397, 0x0011d397, 0x0015d397, 0x0019d397,
|
||||
0x001dd397, 0x0001db97, 0x0005db97, 0x0009db97, 0x000ddb97, 0x0011db97,
|
||||
0x0015db97, 0x0019db97, 0x001ddb97, 0x0001e397, 0x0005e397, 0x0009e397,
|
||||
0x000de397, 0x0011e397, 0x0015e397, 0x0019e397, 0x001de397, 0x0001eb97,
|
||||
0x0005eb97, 0x0009eb97, 0x000deb97, 0x0011eb97, 0x0015eb97, 0x0019eb97,
|
||||
0x001deb97, 0x0001f397, 0x0005f397, 0x0009f397, 0x000df397, 0x0011f397,
|
||||
0x0015f397, 0x0019f397, 0x001df397, 0x0001fb97, 0x0005fb97, 0x0009fb97,
|
||||
0x000dfb97, 0x0011fb97, 0x0015fb97, 0x0019fb97, 0x001dfb97, 0x0001c3a7,
|
||||
0x0005c3a7, 0x0009c3a7, 0x000dc3a7, 0x0011c3a7, 0x0015c3a7, 0x0019c3a7,
|
||||
0x001dc3a7, 0x0001cba7, 0x0005cba7, 0x0009cba7, 0x000dcba7, 0x0011cba7,
|
||||
0x0015cba7, 0x0019cba7, 0x001dcba7, 0x0001d3a7, 0x0005d3a7, 0x0009d3a7,
|
||||
0x000dd3a7, 0x0011d3a7, 0x0015d3a7, 0x0019d3a7, 0x001dd3a7, 0x0001dba7,
|
||||
0x0005dba7, 0x0009dba7, 0x000ddba7, 0x0011dba7, 0x0015dba7, 0x0019dba7,
|
||||
0x001ddba7, 0x0001e3a7, 0x0005e3a7, 0x0009e3a7, 0x000de3a7, 0x0011e3a7,
|
||||
0x0015e3a7, 0x0019e3a7, 0x001de3a7, 0x0001eba7, 0x0005eba7, 0x0009eba7,
|
||||
0x000deba7, 0x0011eba7, 0x0015eba7, 0x0019eba7, 0x001deba7, 0x0001f3a7,
|
||||
0x0005f3a7, 0x0009f3a7, 0x000df3a7, 0x0011f3a7, 0x0015f3a7, 0x0019f3a7,
|
||||
0x001df3a7, 0x0001fba7, 0x0005fba7, 0x0009fba7, 0x000dfba7, 0x0011fba7,
|
||||
0x0015fba7, 0x0019fba7, 0x001dfba7, 0x0001c3b7, 0x0005c3b7, 0x0009c3b7,
|
||||
0x000dc3b7, 0x0011c3b7, 0x0015c3b7, 0x0019c3b7, 0x001dc3b7, 0x0001cbb7,
|
||||
0x0005cbb7, 0x0009cbb7, 0x000dcbb7, 0x0011cbb7, 0x0015cbb7, 0x0019cbb7,
|
||||
0x001dcbb7, 0x0001d3b7, 0x0005d3b7, 0x0009d3b7, 0x000dd3b7, 0x0011d3b7,
|
||||
0x0015d3b7, 0x0019d3b7, 0x001dd3b7, 0x0001dbb7, 0x0005dbb7, 0x0009dbb7,
|
||||
0x000ddbb7, 0x0011dbb7, 0x0015dbb7, 0x0019dbb7, 0x001ddbb7, 0x0001e3b7,
|
||||
0x0005e3b7, 0x0009e3b7, 0x000de3b7, 0x0011e3b7, 0x0015e3b7, 0x0019e3b7,
|
||||
0x001de3b7, 0x0001ebb7, 0x0005ebb7, 0x0009ebb7, 0x000debb7, 0x0011ebb7,
|
||||
0x0015ebb7, 0x0019ebb7, 0x001debb7, 0x0001f3b7, 0x0005f3b7, 0x0009f3b7,
|
||||
0x000df3b7, 0x0011f3b7, 0x0015f3b7, 0x0019f3b7, 0x001df3b7, 0x0001fbb7,
|
||||
0x0005fbb7, 0x0009fbb7, 0x000dfbb7, 0x0011fbb7, 0x0015fbb7, 0x0019fbb7,
|
||||
0x001dfbb7, 0x0001c3c7, 0x0005c3c7, 0x0009c3c7, 0x000dc3c7, 0x0011c3c7,
|
||||
0x0015c3c7, 0x0019c3c7, 0x001dc3c7, 0x0001cbc7, 0x0005cbc7, 0x0009cbc7,
|
||||
0x000dcbc7, 0x0011cbc7, 0x0015cbc7, 0x0019cbc7, 0x001dcbc7, 0x0001d3c7,
|
||||
0x0005d3c7, 0x0009d3c7, 0x000dd3c7, 0x0011d3c7, 0x0015d3c7, 0x0019d3c7,
|
||||
0x001dd3c7, 0x0001dbc7, 0x0005dbc7, 0x0009dbc7, 0x000ddbc7, 0x0011dbc7,
|
||||
0x0015dbc7, 0x0019dbc7, 0x001ddbc7, 0x0001e3c7, 0x0005e3c7, 0x0009e3c7,
|
||||
0x000de3c7, 0x0011e3c7, 0x0015e3c7, 0x0019e3c7, 0x001de3c7, 0x0001ebc7,
|
||||
0x0005ebc7, 0x0009ebc7, 0x000debc7, 0x0011ebc7, 0x0015ebc7, 0x0019ebc7,
|
||||
0x001debc7, 0x0001f3c7, 0x0005f3c7, 0x0009f3c7, 0x000df3c7, 0x0011f3c7,
|
||||
0x0015f3c7, 0x0019f3c7, 0x001df3c7, 0x0001fbc7, 0x0005fbc7, 0x0009fbc7,
|
||||
0x000dfbc7, 0x0011fbc7, 0x0015fbc7, 0x0019fbc7, 0x001dfbc7, 0x0001c3d7,
|
||||
0x0005c3d7, 0x0009c3d7, 0x000dc3d7, 0x0011c3d7, 0x0015c3d7, 0x0019c3d7,
|
||||
0x001dc3d7, 0x0001cbd7, 0x0005cbd7, 0x0009cbd7, 0x000dcbd7, 0x0011cbd7,
|
||||
0x0015cbd7, 0x0019cbd7, 0x001dcbd7, 0x0001d3d7, 0x0005d3d7, 0x0009d3d7,
|
||||
0x000dd3d7, 0x0011d3d7, 0x0015d3d7, 0x0019d3d7, 0x001dd3d7, 0x0001dbd7,
|
||||
0x0005dbd7, 0x0009dbd7, 0x000ddbd7, 0x0011dbd7, 0x0015dbd7, 0x0019dbd7,
|
||||
0x001ddbd7, 0x0001e3d7, 0x0005e3d7, 0x0009e3d7, 0x000de3d7, 0x0011e3d7,
|
||||
0x0015e3d7, 0x0019e3d7, 0x001de3d7, 0x0001ebd7, 0x0005ebd7, 0x0009ebd7,
|
||||
0x000debd7, 0x0011ebd7, 0x0015ebd7, 0x0019ebd7, 0x001debd7, 0x0001f3d7,
|
||||
0x0005f3d7, 0x0009f3d7, 0x000df3d7, 0x0011f3d7, 0x0015f3d7, 0x0019f3d7,
|
||||
0x001df3d7, 0x0001fbd7, 0x0005fbd7, 0x0009fbd7, 0x000dfbd7, 0x0011fbd7,
|
||||
0x0015fbd7, 0x0019fbd7, 0x001dfbd7, 0x0001c3e7, 0x0005c3e7, 0x0009c3e7,
|
||||
0x000dc3e7, 0x0011c3e7, 0x0015c3e7, 0x0019c3e7, 0x001dc3e7, 0x0001cbe7,
|
||||
0x0005cbe7, 0x0009cbe7, 0x000dcbe7, 0x0011cbe7, 0x0015cbe7, 0x0019cbe7,
|
||||
0x001dcbe7, 0x0001d3e7, 0x0005d3e7, 0x0009d3e7, 0x000dd3e7, 0x0011d3e7,
|
||||
0x0015d3e7, 0x0019d3e7, 0x001dd3e7, 0x0001dbe7, 0x0005dbe7, 0x0009dbe7,
|
||||
0x000ddbe7, 0x0011dbe7, 0x0015dbe7, 0x0019dbe7, 0x001ddbe7, 0x0001e3e7,
|
||||
0x0005e3e7, 0x0009e3e7, 0x000de3e7, 0x0011e3e7, 0x0015e3e7, 0x0019e3e7,
|
||||
0x001de3e7, 0x0001ebe7, 0x0005ebe7, 0x0009ebe7, 0x000debe7, 0x0011ebe7,
|
||||
0x0015ebe7, 0x0019ebe7, 0x001debe7, 0x0001f3e7, 0x0005f3e7, 0x0009f3e7,
|
||||
0x000df3e7, 0x0011f3e7, 0x0015f3e7, 0x0019f3e7, 0x001df3e7, 0x0001fbe7,
|
||||
0x0005fbe7, 0x0009fbe7, 0x000dfbe7, 0x0011fbe7, 0x0015fbe7, 0x0019fbe7,
|
||||
0x001dfbe7, 0x0001c3f7, 0x0005c3f7, 0x0009c3f7, 0x000dc3f7, 0x0011c3f7,
|
||||
0x0015c3f7, 0x0019c3f7, 0x001dc3f7, 0x0001cbf7, 0x0005cbf7, 0x0009cbf7,
|
||||
0x000dcbf7, 0x0011cbf7, 0x0015cbf7, 0x0019cbf7, 0x001dcbf7, 0x0001d3f7,
|
||||
0x0005d3f7, 0x0009d3f7, 0x000dd3f7, 0x0011d3f7, 0x0015d3f7, 0x0019d3f7,
|
||||
0x001dd3f7, 0x0001dbf7, 0x0005dbf7, 0x0009dbf7, 0x000ddbf7, 0x0011dbf7,
|
||||
0x0015dbf7, 0x0019dbf7, 0x001ddbf7, 0x0001e3f7, 0x0005e3f7, 0x0009e3f7,
|
||||
0x000de3f7, 0x0011e3f7, 0x0015e3f7, 0x0019e3f7, 0x001de3f7, 0x0001ebf7,
|
||||
0x0005ebf7, 0x0009ebf7, 0x000debf7, 0x0011ebf7, 0x0015ebf7, 0x0019ebf7,
|
||||
0x001debf7, 0x0001f3f7, 0x0005f3f7, 0x0009f3f7, 0x000df3f7, 0x0011f3f7,
|
||||
0x0015f3f7, 0x0019f3f7, 0x001df3f7, 0x0001fbf7, 0x0005fbf7, 0x0009fbf7,
|
||||
0x000dfbf7, 0x0011fbf7, 0x0015fbf7, 0x0019fbf7, 0x001dfbf7, 0x00e1c387,
|
||||
0x02e1c387, 0x04e1c387, 0x06e1c387, 0x08e1c387, 0x0ae1c387, 0x0ce1c387,
|
||||
0x0ee1c387, 0x00e5c387, 0x02e5c387, 0x04e5c387, 0x06e5c387, 0x08e5c387,
|
||||
0x0ae5c387, 0x0ce5c387, 0x0ee5c387, 0x00e9c387, 0x02e9c387, 0x04e9c387,
|
||||
0x06e9c387, 0x08e9c387, 0x0ae9c387, 0x0ce9c387, 0x0ee9c387, 0x00edc387,
|
||||
0x02edc387, 0x04edc387, 0x06edc387, 0x08edc387, 0x0aedc387, 0x0cedc387,
|
||||
0x0eedc387, 0x00f1c387, 0x02f1c387, 0x04f1c387, 0x06f1c387, 0x08f1c387,
|
||||
0x0af1c387, 0x0cf1c387, 0x0ef1c387, 0x00f5c387, 0x02f5c387, 0x04f5c387,
|
||||
0x06f5c387, 0x08f5c387, 0x0af5c387, 0x0cf5c387, 0x0ef5c387, 0x00f9c387,
|
||||
0x02f9c387, 0x04f9c387, 0x06f9c387, 0x08f9c387, 0x0af9c387, 0x0cf9c387,
|
||||
0x0ef9c387, 0x00fdc387, 0x02fdc387, 0x04fdc387, 0x06fdc387, 0x08fdc387,
|
||||
0x0afdc387, 0x0cfdc387, 0x0efdc387, 0x00e1cb87, 0x02e1cb87, 0x04e1cb87,
|
||||
0x06e1cb87, 0x08e1cb87, 0x0ae1cb87, 0x0ce1cb87, 0x0ee1cb87, 0x00e5cb87,
|
||||
0x02e5cb87, 0x04e5cb87, 0x06e5cb87, 0x08e5cb87, 0x0ae5cb87, 0x0ce5cb87,
|
||||
0x0ee5cb87, 0x00e9cb87, 0x02e9cb87, 0x04e9cb87, 0x06e9cb87, 0x08e9cb87,
|
||||
0x0ae9cb87, 0x0ce9cb87, 0x0ee9cb87, 0x00edcb87, 0x02edcb87, 0x04edcb87,
|
||||
0x06edcb87, 0x08edcb87, 0x0aedcb87, 0x0cedcb87, 0x0eedcb87, 0x00f1cb87,
|
||||
0x02f1cb87, 0x04f1cb87, 0x06f1cb87, 0x08f1cb87, 0x0af1cb87, 0x0cf1cb87,
|
||||
0x0ef1cb87, 0x00f5cb87, 0x02f5cb87, 0x04f5cb87, 0x06f5cb87, 0x08f5cb87,
|
||||
0x0af5cb87, 0x0cf5cb87, 0x0ef5cb87, 0x00f9cb87, 0x02f9cb87, 0x04f9cb87,
|
||||
0x06f9cb87, 0x08f9cb87,
|
||||
};
|
||||
|
||||
static const uint32_t kZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
0, 4, 8, 7, 7, 7, 7, 7, 7, 7, 7, 11, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
};
|
||||
|
||||
static const uint64_t kNonZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
|
||||
0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
|
||||
0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
|
||||
0x00000afb, 0x00000efb, 0x0000b2cb, 0x0001b2cb, 0x0002b2cb, 0x0003b2cb,
|
||||
0x0000b6cb, 0x0001b6cb, 0x0002b6cb, 0x0003b6cb, 0x0000bacb, 0x0001bacb,
|
||||
0x0002bacb, 0x0003bacb, 0x0000becb, 0x0001becb, 0x0002becb, 0x0003becb,
|
||||
0x0000b2db, 0x0001b2db, 0x0002b2db, 0x0003b2db, 0x0000b6db, 0x0001b6db,
|
||||
0x0002b6db, 0x0003b6db, 0x0000badb, 0x0001badb, 0x0002badb, 0x0003badb,
|
||||
0x0000bedb, 0x0001bedb, 0x0002bedb, 0x0003bedb, 0x0000b2eb, 0x0001b2eb,
|
||||
0x0002b2eb, 0x0003b2eb, 0x0000b6eb, 0x0001b6eb, 0x0002b6eb, 0x0003b6eb,
|
||||
0x0000baeb, 0x0001baeb, 0x0002baeb, 0x0003baeb, 0x0000beeb, 0x0001beeb,
|
||||
0x0002beeb, 0x0003beeb, 0x0000b2fb, 0x0001b2fb, 0x0002b2fb, 0x0003b2fb,
|
||||
0x0000b6fb, 0x0001b6fb, 0x0002b6fb, 0x0003b6fb, 0x0000bafb, 0x0001bafb,
|
||||
0x0002bafb, 0x0003bafb, 0x0000befb, 0x0001befb, 0x0002befb, 0x0003befb,
|
||||
0x002cb2cb, 0x006cb2cb, 0x00acb2cb, 0x00ecb2cb, 0x002db2cb, 0x006db2cb,
|
||||
0x00adb2cb, 0x00edb2cb, 0x002eb2cb, 0x006eb2cb, 0x00aeb2cb, 0x00eeb2cb,
|
||||
0x002fb2cb, 0x006fb2cb, 0x00afb2cb, 0x00efb2cb, 0x002cb6cb, 0x006cb6cb,
|
||||
0x00acb6cb, 0x00ecb6cb, 0x002db6cb, 0x006db6cb, 0x00adb6cb, 0x00edb6cb,
|
||||
0x002eb6cb, 0x006eb6cb, 0x00aeb6cb, 0x00eeb6cb, 0x002fb6cb, 0x006fb6cb,
|
||||
0x00afb6cb, 0x00efb6cb, 0x002cbacb, 0x006cbacb, 0x00acbacb, 0x00ecbacb,
|
||||
0x002dbacb, 0x006dbacb, 0x00adbacb, 0x00edbacb, 0x002ebacb, 0x006ebacb,
|
||||
0x00aebacb, 0x00eebacb, 0x002fbacb, 0x006fbacb, 0x00afbacb, 0x00efbacb,
|
||||
0x002cbecb, 0x006cbecb, 0x00acbecb, 0x00ecbecb, 0x002dbecb, 0x006dbecb,
|
||||
0x00adbecb, 0x00edbecb, 0x002ebecb, 0x006ebecb, 0x00aebecb, 0x00eebecb,
|
||||
0x002fbecb, 0x006fbecb, 0x00afbecb, 0x00efbecb, 0x002cb2db, 0x006cb2db,
|
||||
0x00acb2db, 0x00ecb2db, 0x002db2db, 0x006db2db, 0x00adb2db, 0x00edb2db,
|
||||
0x002eb2db, 0x006eb2db, 0x00aeb2db, 0x00eeb2db, 0x002fb2db, 0x006fb2db,
|
||||
0x00afb2db, 0x00efb2db, 0x002cb6db, 0x006cb6db, 0x00acb6db, 0x00ecb6db,
|
||||
0x002db6db, 0x006db6db, 0x00adb6db, 0x00edb6db, 0x002eb6db, 0x006eb6db,
|
||||
0x00aeb6db, 0x00eeb6db, 0x002fb6db, 0x006fb6db, 0x00afb6db, 0x00efb6db,
|
||||
0x002cbadb, 0x006cbadb, 0x00acbadb, 0x00ecbadb, 0x002dbadb, 0x006dbadb,
|
||||
0x00adbadb, 0x00edbadb, 0x002ebadb, 0x006ebadb, 0x00aebadb, 0x00eebadb,
|
||||
0x002fbadb, 0x006fbadb, 0x00afbadb, 0x00efbadb, 0x002cbedb, 0x006cbedb,
|
||||
0x00acbedb, 0x00ecbedb, 0x002dbedb, 0x006dbedb, 0x00adbedb, 0x00edbedb,
|
||||
0x002ebedb, 0x006ebedb, 0x00aebedb, 0x00eebedb, 0x002fbedb, 0x006fbedb,
|
||||
0x00afbedb, 0x00efbedb, 0x002cb2eb, 0x006cb2eb, 0x00acb2eb, 0x00ecb2eb,
|
||||
0x002db2eb, 0x006db2eb, 0x00adb2eb, 0x00edb2eb, 0x002eb2eb, 0x006eb2eb,
|
||||
0x00aeb2eb, 0x00eeb2eb, 0x002fb2eb, 0x006fb2eb, 0x00afb2eb, 0x00efb2eb,
|
||||
0x002cb6eb, 0x006cb6eb, 0x00acb6eb, 0x00ecb6eb, 0x002db6eb, 0x006db6eb,
|
||||
0x00adb6eb, 0x00edb6eb, 0x002eb6eb, 0x006eb6eb, 0x00aeb6eb, 0x00eeb6eb,
|
||||
0x002fb6eb, 0x006fb6eb, 0x00afb6eb, 0x00efb6eb, 0x002cbaeb, 0x006cbaeb,
|
||||
0x00acbaeb, 0x00ecbaeb, 0x002dbaeb, 0x006dbaeb, 0x00adbaeb, 0x00edbaeb,
|
||||
0x002ebaeb, 0x006ebaeb, 0x00aebaeb, 0x00eebaeb, 0x002fbaeb, 0x006fbaeb,
|
||||
0x00afbaeb, 0x00efbaeb, 0x002cbeeb, 0x006cbeeb, 0x00acbeeb, 0x00ecbeeb,
|
||||
0x002dbeeb, 0x006dbeeb, 0x00adbeeb, 0x00edbeeb, 0x002ebeeb, 0x006ebeeb,
|
||||
0x00aebeeb, 0x00eebeeb, 0x002fbeeb, 0x006fbeeb, 0x00afbeeb, 0x00efbeeb,
|
||||
0x002cb2fb, 0x006cb2fb, 0x00acb2fb, 0x00ecb2fb, 0x002db2fb, 0x006db2fb,
|
||||
0x00adb2fb, 0x00edb2fb, 0x002eb2fb, 0x006eb2fb, 0x00aeb2fb, 0x00eeb2fb,
|
||||
0x002fb2fb, 0x006fb2fb, 0x00afb2fb, 0x00efb2fb, 0x002cb6fb, 0x006cb6fb,
|
||||
0x00acb6fb, 0x00ecb6fb, 0x002db6fb, 0x006db6fb, 0x00adb6fb, 0x00edb6fb,
|
||||
0x002eb6fb, 0x006eb6fb, 0x00aeb6fb, 0x00eeb6fb, 0x002fb6fb, 0x006fb6fb,
|
||||
0x00afb6fb, 0x00efb6fb, 0x002cbafb, 0x006cbafb, 0x00acbafb, 0x00ecbafb,
|
||||
0x002dbafb, 0x006dbafb, 0x00adbafb, 0x00edbafb, 0x002ebafb, 0x006ebafb,
|
||||
0x00aebafb, 0x00eebafb, 0x002fbafb, 0x006fbafb, 0x00afbafb, 0x00efbafb,
|
||||
0x002cbefb, 0x006cbefb, 0x00acbefb, 0x00ecbefb, 0x002dbefb, 0x006dbefb,
|
||||
0x00adbefb, 0x00edbefb, 0x002ebefb, 0x006ebefb, 0x00aebefb, 0x00eebefb,
|
||||
0x002fbefb, 0x006fbefb, 0x00afbefb, 0x00efbefb, 0x0b2cb2cb, 0x1b2cb2cb,
|
||||
0x2b2cb2cb, 0x3b2cb2cb, 0x0b6cb2cb, 0x1b6cb2cb, 0x2b6cb2cb, 0x3b6cb2cb,
|
||||
0x0bacb2cb, 0x1bacb2cb, 0x2bacb2cb, 0x3bacb2cb, 0x0becb2cb, 0x1becb2cb,
|
||||
0x2becb2cb, 0x3becb2cb, 0x0b2db2cb, 0x1b2db2cb, 0x2b2db2cb, 0x3b2db2cb,
|
||||
0x0b6db2cb, 0x1b6db2cb, 0x2b6db2cb, 0x3b6db2cb, 0x0badb2cb, 0x1badb2cb,
|
||||
0x2badb2cb, 0x3badb2cb, 0x0bedb2cb, 0x1bedb2cb, 0x2bedb2cb, 0x3bedb2cb,
|
||||
0x0b2eb2cb, 0x1b2eb2cb, 0x2b2eb2cb, 0x3b2eb2cb, 0x0b6eb2cb, 0x1b6eb2cb,
|
||||
0x2b6eb2cb, 0x3b6eb2cb, 0x0baeb2cb, 0x1baeb2cb, 0x2baeb2cb, 0x3baeb2cb,
|
||||
0x0beeb2cb, 0x1beeb2cb, 0x2beeb2cb, 0x3beeb2cb, 0x0b2fb2cb, 0x1b2fb2cb,
|
||||
0x2b2fb2cb, 0x3b2fb2cb, 0x0b6fb2cb, 0x1b6fb2cb, 0x2b6fb2cb, 0x3b6fb2cb,
|
||||
0x0bafb2cb, 0x1bafb2cb, 0x2bafb2cb, 0x3bafb2cb, 0x0befb2cb, 0x1befb2cb,
|
||||
0x2befb2cb, 0x3befb2cb, 0x0b2cb6cb, 0x1b2cb6cb, 0x2b2cb6cb, 0x3b2cb6cb,
|
||||
0x0b6cb6cb, 0x1b6cb6cb, 0x2b6cb6cb, 0x3b6cb6cb, 0x0bacb6cb, 0x1bacb6cb,
|
||||
0x2bacb6cb, 0x3bacb6cb, 0x0becb6cb, 0x1becb6cb, 0x2becb6cb, 0x3becb6cb,
|
||||
0x0b2db6cb, 0x1b2db6cb, 0x2b2db6cb, 0x3b2db6cb, 0x0b6db6cb, 0x1b6db6cb,
|
||||
0x2b6db6cb, 0x3b6db6cb, 0x0badb6cb, 0x1badb6cb, 0x2badb6cb, 0x3badb6cb,
|
||||
0x0bedb6cb, 0x1bedb6cb, 0x2bedb6cb, 0x3bedb6cb, 0x0b2eb6cb, 0x1b2eb6cb,
|
||||
0x2b2eb6cb, 0x3b2eb6cb, 0x0b6eb6cb, 0x1b6eb6cb, 0x2b6eb6cb, 0x3b6eb6cb,
|
||||
0x0baeb6cb, 0x1baeb6cb, 0x2baeb6cb, 0x3baeb6cb, 0x0beeb6cb, 0x1beeb6cb,
|
||||
0x2beeb6cb, 0x3beeb6cb, 0x0b2fb6cb, 0x1b2fb6cb, 0x2b2fb6cb, 0x3b2fb6cb,
|
||||
0x0b6fb6cb, 0x1b6fb6cb, 0x2b6fb6cb, 0x3b6fb6cb, 0x0bafb6cb, 0x1bafb6cb,
|
||||
0x2bafb6cb, 0x3bafb6cb, 0x0befb6cb, 0x1befb6cb, 0x2befb6cb, 0x3befb6cb,
|
||||
0x0b2cbacb, 0x1b2cbacb, 0x2b2cbacb, 0x3b2cbacb, 0x0b6cbacb, 0x1b6cbacb,
|
||||
0x2b6cbacb, 0x3b6cbacb, 0x0bacbacb, 0x1bacbacb, 0x2bacbacb, 0x3bacbacb,
|
||||
0x0becbacb, 0x1becbacb, 0x2becbacb, 0x3becbacb, 0x0b2dbacb, 0x1b2dbacb,
|
||||
0x2b2dbacb, 0x3b2dbacb, 0x0b6dbacb, 0x1b6dbacb, 0x2b6dbacb, 0x3b6dbacb,
|
||||
0x0badbacb, 0x1badbacb, 0x2badbacb, 0x3badbacb, 0x0bedbacb, 0x1bedbacb,
|
||||
0x2bedbacb, 0x3bedbacb, 0x0b2ebacb, 0x1b2ebacb, 0x2b2ebacb, 0x3b2ebacb,
|
||||
0x0b6ebacb, 0x1b6ebacb, 0x2b6ebacb, 0x3b6ebacb, 0x0baebacb, 0x1baebacb,
|
||||
0x2baebacb, 0x3baebacb, 0x0beebacb, 0x1beebacb, 0x2beebacb, 0x3beebacb,
|
||||
0x0b2fbacb, 0x1b2fbacb, 0x2b2fbacb, 0x3b2fbacb, 0x0b6fbacb, 0x1b6fbacb,
|
||||
0x2b6fbacb, 0x3b6fbacb, 0x0bafbacb, 0x1bafbacb, 0x2bafbacb, 0x3bafbacb,
|
||||
0x0befbacb, 0x1befbacb, 0x2befbacb, 0x3befbacb, 0x0b2cbecb, 0x1b2cbecb,
|
||||
0x2b2cbecb, 0x3b2cbecb, 0x0b6cbecb, 0x1b6cbecb, 0x2b6cbecb, 0x3b6cbecb,
|
||||
0x0bacbecb, 0x1bacbecb, 0x2bacbecb, 0x3bacbecb, 0x0becbecb, 0x1becbecb,
|
||||
0x2becbecb, 0x3becbecb, 0x0b2dbecb, 0x1b2dbecb, 0x2b2dbecb, 0x3b2dbecb,
|
||||
0x0b6dbecb, 0x1b6dbecb, 0x2b6dbecb, 0x3b6dbecb, 0x0badbecb, 0x1badbecb,
|
||||
0x2badbecb, 0x3badbecb, 0x0bedbecb, 0x1bedbecb, 0x2bedbecb, 0x3bedbecb,
|
||||
0x0b2ebecb, 0x1b2ebecb, 0x2b2ebecb, 0x3b2ebecb, 0x0b6ebecb, 0x1b6ebecb,
|
||||
0x2b6ebecb, 0x3b6ebecb, 0x0baebecb, 0x1baebecb, 0x2baebecb, 0x3baebecb,
|
||||
0x0beebecb, 0x1beebecb, 0x2beebecb, 0x3beebecb, 0x0b2fbecb, 0x1b2fbecb,
|
||||
0x2b2fbecb, 0x3b2fbecb, 0x0b6fbecb, 0x1b6fbecb, 0x2b6fbecb, 0x3b6fbecb,
|
||||
0x0bafbecb, 0x1bafbecb, 0x2bafbecb, 0x3bafbecb, 0x0befbecb, 0x1befbecb,
|
||||
0x2befbecb, 0x3befbecb, 0x0b2cb2db, 0x1b2cb2db, 0x2b2cb2db, 0x3b2cb2db,
|
||||
0x0b6cb2db, 0x1b6cb2db, 0x2b6cb2db, 0x3b6cb2db, 0x0bacb2db, 0x1bacb2db,
|
||||
0x2bacb2db, 0x3bacb2db, 0x0becb2db, 0x1becb2db, 0x2becb2db, 0x3becb2db,
|
||||
0x0b2db2db, 0x1b2db2db, 0x2b2db2db, 0x3b2db2db, 0x0b6db2db, 0x1b6db2db,
|
||||
0x2b6db2db, 0x3b6db2db, 0x0badb2db, 0x1badb2db, 0x2badb2db, 0x3badb2db,
|
||||
0x0bedb2db, 0x1bedb2db, 0x2bedb2db, 0x3bedb2db, 0x0b2eb2db, 0x1b2eb2db,
|
||||
0x2b2eb2db, 0x3b2eb2db, 0x0b6eb2db, 0x1b6eb2db, 0x2b6eb2db, 0x3b6eb2db,
|
||||
0x0baeb2db, 0x1baeb2db, 0x2baeb2db, 0x3baeb2db, 0x0beeb2db, 0x1beeb2db,
|
||||
0x2beeb2db, 0x3beeb2db, 0x0b2fb2db, 0x1b2fb2db, 0x2b2fb2db, 0x3b2fb2db,
|
||||
0x0b6fb2db, 0x1b6fb2db, 0x2b6fb2db, 0x3b6fb2db, 0x0bafb2db, 0x1bafb2db,
|
||||
0x2bafb2db, 0x3bafb2db, 0x0befb2db, 0x1befb2db, 0x2befb2db, 0x3befb2db,
|
||||
0x0b2cb6db, 0x1b2cb6db, 0x2b2cb6db, 0x3b2cb6db, 0x0b6cb6db, 0x1b6cb6db,
|
||||
0x2b6cb6db, 0x3b6cb6db, 0x0bacb6db, 0x1bacb6db, 0x2bacb6db, 0x3bacb6db,
|
||||
0x0becb6db, 0x1becb6db, 0x2becb6db, 0x3becb6db, 0x0b2db6db, 0x1b2db6db,
|
||||
0x2b2db6db, 0x3b2db6db, 0x0b6db6db, 0x1b6db6db, 0x2b6db6db, 0x3b6db6db,
|
||||
0x0badb6db, 0x1badb6db, 0x2badb6db, 0x3badb6db, 0x0bedb6db, 0x1bedb6db,
|
||||
0x2bedb6db, 0x3bedb6db, 0x0b2eb6db, 0x1b2eb6db, 0x2b2eb6db, 0x3b2eb6db,
|
||||
0x0b6eb6db, 0x1b6eb6db, 0x2b6eb6db, 0x3b6eb6db, 0x0baeb6db, 0x1baeb6db,
|
||||
0x2baeb6db, 0x3baeb6db,
|
||||
};
|
||||
|
||||
static const uint32_t kNonZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
6, 6, 6, 6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
};
|
||||
|
||||
static const uint16_t kStaticCommandCodeBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
0, 256, 128, 384, 64, 320, 192, 448,
|
||||
32, 288, 160, 416, 96, 352, 224, 480,
|
||||
16, 272, 144, 400, 80, 336, 208, 464,
|
||||
48, 304, 176, 432, 112, 368, 240, 496,
|
||||
8, 264, 136, 392, 72, 328, 200, 456,
|
||||
40, 296, 168, 424, 104, 360, 232, 488,
|
||||
24, 280, 152, 408, 88, 344, 216, 472,
|
||||
56, 312, 184, 440, 120, 376, 248, 504,
|
||||
4, 260, 132, 388, 68, 324, 196, 452,
|
||||
36, 292, 164, 420, 100, 356, 228, 484,
|
||||
20, 276, 148, 404, 84, 340, 212, 468,
|
||||
52, 308, 180, 436, 116, 372, 244, 500,
|
||||
12, 268, 140, 396, 76, 332, 204, 460,
|
||||
44, 300, 172, 428, 108, 364, 236, 492,
|
||||
28, 284, 156, 412, 92, 348, 220, 476,
|
||||
60, 316, 188, 444, 124, 380, 252, 508,
|
||||
2, 258, 130, 386, 66, 322, 194, 450,
|
||||
34, 290, 162, 418, 98, 354, 226, 482,
|
||||
18, 274, 146, 402, 82, 338, 210, 466,
|
||||
50, 306, 178, 434, 114, 370, 242, 498,
|
||||
10, 266, 138, 394, 74, 330, 202, 458,
|
||||
42, 298, 170, 426, 106, 362, 234, 490,
|
||||
26, 282, 154, 410, 90, 346, 218, 474,
|
||||
58, 314, 186, 442, 122, 378, 250, 506,
|
||||
6, 262, 134, 390, 70, 326, 198, 454,
|
||||
38, 294, 166, 422, 102, 358, 230, 486,
|
||||
22, 278, 150, 406, 86, 342, 214, 470,
|
||||
54, 310, 182, 438, 118, 374, 246, 502,
|
||||
14, 270, 142, 398, 78, 334, 206, 462,
|
||||
46, 302, 174, 430, 110, 366, 238, 494,
|
||||
30, 286, 158, 414, 94, 350, 222, 478,
|
||||
62, 318, 190, 446, 126, 382, 254, 510,
|
||||
1, 257, 129, 385, 65, 321, 193, 449,
|
||||
33, 289, 161, 417, 97, 353, 225, 481,
|
||||
17, 273, 145, 401, 81, 337, 209, 465,
|
||||
49, 305, 177, 433, 113, 369, 241, 497,
|
||||
9, 265, 137, 393, 73, 329, 201, 457,
|
||||
41, 297, 169, 425, 105, 361, 233, 489,
|
||||
25, 281, 153, 409, 89, 345, 217, 473,
|
||||
57, 313, 185, 441, 121, 377, 249, 505,
|
||||
5, 261, 133, 389, 69, 325, 197, 453,
|
||||
37, 293, 165, 421, 101, 357, 229, 485,
|
||||
21, 277, 149, 405, 85, 341, 213, 469,
|
||||
53, 309, 181, 437, 117, 373, 245, 501,
|
||||
13, 269, 141, 397, 77, 333, 205, 461,
|
||||
45, 301, 173, 429, 109, 365, 237, 493,
|
||||
29, 285, 157, 413, 93, 349, 221, 477,
|
||||
61, 317, 189, 445, 125, 381, 253, 509,
|
||||
3, 259, 131, 387, 67, 323, 195, 451,
|
||||
35, 291, 163, 419, 99, 355, 227, 483,
|
||||
19, 275, 147, 403, 83, 339, 211, 467,
|
||||
51, 307, 179, 435, 115, 371, 243, 499,
|
||||
11, 267, 139, 395, 75, 331, 203, 459,
|
||||
43, 299, 171, 427, 107, 363, 235, 491,
|
||||
27, 283, 155, 411, 91, 347, 219, 475,
|
||||
59, 315, 187, 443, 123, 379, 251, 507,
|
||||
7, 1031, 519, 1543, 263, 1287, 775, 1799,
|
||||
135, 1159, 647, 1671, 391, 1415, 903, 1927,
|
||||
71, 1095, 583, 1607, 327, 1351, 839, 1863,
|
||||
199, 1223, 711, 1735, 455, 1479, 967, 1991,
|
||||
39, 1063, 551, 1575, 295, 1319, 807, 1831,
|
||||
167, 1191, 679, 1703, 423, 1447, 935, 1959,
|
||||
103, 1127, 615, 1639, 359, 1383, 871, 1895,
|
||||
231, 1255, 743, 1767, 487, 1511, 999, 2023,
|
||||
23, 1047, 535, 1559, 279, 1303, 791, 1815,
|
||||
151, 1175, 663, 1687, 407, 1431, 919, 1943,
|
||||
87, 1111, 599, 1623, 343, 1367, 855, 1879,
|
||||
215, 1239, 727, 1751, 471, 1495, 983, 2007,
|
||||
55, 1079, 567, 1591, 311, 1335, 823, 1847,
|
||||
183, 1207, 695, 1719, 439, 1463, 951, 1975,
|
||||
119, 1143, 631, 1655, 375, 1399, 887, 1911,
|
||||
247, 1271, 759, 1783, 503, 1527, 1015, 2039,
|
||||
15, 1039, 527, 1551, 271, 1295, 783, 1807,
|
||||
143, 1167, 655, 1679, 399, 1423, 911, 1935,
|
||||
79, 1103, 591, 1615, 335, 1359, 847, 1871,
|
||||
207, 1231, 719, 1743, 463, 1487, 975, 1999,
|
||||
47, 1071, 559, 1583, 303, 1327, 815, 1839,
|
||||
175, 1199, 687, 1711, 431, 1455, 943, 1967,
|
||||
111, 1135, 623, 1647, 367, 1391, 879, 1903,
|
||||
239, 1263, 751, 1775, 495, 1519, 1007, 2031,
|
||||
31, 1055, 543, 1567, 287, 1311, 799, 1823,
|
||||
159, 1183, 671, 1695, 415, 1439, 927, 1951,
|
||||
95, 1119, 607, 1631, 351, 1375, 863, 1887,
|
||||
223, 1247, 735, 1759, 479, 1503, 991, 2015,
|
||||
63, 1087, 575, 1599, 319, 1343, 831, 1855,
|
||||
191, 1215, 703, 1727, 447, 1471, 959, 1983,
|
||||
127, 1151, 639, 1663, 383, 1407, 895, 1919,
|
||||
255, 1279, 767, 1791, 511, 1535, 1023, 2047,
|
||||
};
|
||||
|
||||
static BROTLI_INLINE void StoreStaticCommandHuffmanTree(
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
BrotliWriteBits(
|
||||
56, BROTLI_MAKE_UINT64_T(0x926244U, 0x16307003U), storage_ix, storage);
|
||||
BrotliWriteBits(3, 0x00000000U, storage_ix, storage);
|
||||
}
|
||||
|
||||
static const uint16_t kStaticDistanceCodeBits[64] = {
|
||||
0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
|
||||
2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
|
||||
1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
|
||||
3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63,
|
||||
};
|
||||
|
||||
static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage);
|
||||
}
|
||||
/* GENERATED CODE END */
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */
|
||||
101
external/duckdb/third_party/brotli/enc/fast_log.cpp
vendored
Normal file
101
external/duckdb/third_party/brotli/enc/fast_log.cpp
vendored
Normal file
@@ -0,0 +1,101 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
#include "fast_log.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
/* ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
|
||||
const double duckdb_brotli::kBrotliLog2Table[BROTLI_LOG2_TABLE_SIZE] = {
|
||||
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
|
||||
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
|
||||
2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
|
||||
3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
|
||||
3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
|
||||
3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
|
||||
4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
|
||||
4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
|
||||
4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
|
||||
4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
|
||||
4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
|
||||
5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
|
||||
5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
|
||||
5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
|
||||
5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
|
||||
5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
|
||||
5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
|
||||
5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
|
||||
5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
|
||||
5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
|
||||
5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
|
||||
5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
|
||||
6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
|
||||
6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
|
||||
6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
|
||||
6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
|
||||
6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
|
||||
6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
|
||||
6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
|
||||
6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
|
||||
6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
|
||||
6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
|
||||
6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
|
||||
6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
|
||||
6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
|
||||
6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
|
||||
6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
|
||||
6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
|
||||
6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
|
||||
6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
|
||||
6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
|
||||
6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
|
||||
6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
|
||||
7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
|
||||
7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
|
||||
7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
|
||||
7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
|
||||
7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
|
||||
7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
|
||||
7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
|
||||
7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
|
||||
7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
|
||||
7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
|
||||
7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
|
||||
7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
|
||||
7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
|
||||
7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
|
||||
7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
|
||||
7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
|
||||
7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
|
||||
7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
|
||||
7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
|
||||
7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
|
||||
7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
|
||||
7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
|
||||
7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
|
||||
7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
|
||||
7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
|
||||
7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
|
||||
7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
|
||||
7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
|
||||
7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
|
||||
7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
|
||||
7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
|
||||
7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
|
||||
7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
|
||||
7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
|
||||
7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
|
||||
7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
|
||||
7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
|
||||
7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
|
||||
7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
|
||||
7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
|
||||
7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
|
||||
7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
|
||||
7.9943534368588578f
|
||||
};
|
||||
|
||||
|
||||
63
external/duckdb/third_party/brotli/enc/fast_log.h
vendored
Normal file
63
external/duckdb/third_party/brotli/enc/fast_log.h
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Utilities for fast computation of logarithms. */
|
||||
|
||||
#ifndef BROTLI_ENC_FAST_LOG_H_
|
||||
#define BROTLI_ENC_FAST_LOG_H_
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
|
||||
#if defined(BROTLI_BSR32)
|
||||
return BROTLI_BSR32((uint32_t)n);
|
||||
#else
|
||||
uint32_t result = 0;
|
||||
while (n >>= 1) result++;
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define BROTLI_LOG2_TABLE_SIZE 256
|
||||
|
||||
/* A lookup table for small values of log2(int) to be used in entropy
|
||||
computation. */
|
||||
BROTLI_INTERNAL extern const double kBrotliLog2Table[BROTLI_LOG2_TABLE_SIZE];
|
||||
|
||||
/* Visual Studio 2012 and Android API levels < 18 do not have the log2()
|
||||
* function defined, so we use log() and a multiplication instead. */
|
||||
#if !defined(BROTLI_HAVE_LOG2)
|
||||
#if ((defined(_MSC_VER) && _MSC_VER <= 1700) || \
|
||||
(defined(__ANDROID_API__) && __ANDROID_API__ < 18))
|
||||
#define BROTLI_HAVE_LOG2 0
|
||||
#else
|
||||
#define BROTLI_HAVE_LOG2 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define LOG_2_INV 1.4426950408889634
|
||||
|
||||
/* Faster logarithm for small integers, with the property of log2(0) == 0. */
|
||||
static BROTLI_INLINE double FastLog2(size_t v) {
|
||||
if (v < BROTLI_LOG2_TABLE_SIZE) {
|
||||
return kBrotliLog2Table[v];
|
||||
}
|
||||
#if !(BROTLI_HAVE_LOG2)
|
||||
return log((double)v) * LOG_2_INV;
|
||||
#else
|
||||
return log2((double)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_FAST_LOG_H_ */
|
||||
68
external/duckdb/third_party/brotli/enc/find_match_length.h
vendored
Normal file
68
external/duckdb/third_party/brotli/enc/find_match_length.h
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function to find maximal matching prefixes of strings. */
|
||||
|
||||
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* Separate implementation for little-endian 64-bit targets, for speed. */
|
||||
#if defined(BROTLI_TZCNT64) && BROTLI_64_BITS && BROTLI_LITTLE_ENDIAN
|
||||
static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
const uint8_t *s1_orig = s1;
|
||||
for (; limit >= 8; limit -= 8) {
|
||||
uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^
|
||||
BROTLI_UNALIGNED_LOAD64LE(s1);
|
||||
s2 += 8;
|
||||
if (x != 0) {
|
||||
size_t matching_bits = (size_t)BROTLI_TZCNT64(x);
|
||||
return (size_t)(s1 - s1_orig) + (matching_bits >> 3);
|
||||
}
|
||||
s1 += 8;
|
||||
}
|
||||
while (limit && *s1 == *s2) {
|
||||
limit--;
|
||||
++s2;
|
||||
++s1;
|
||||
}
|
||||
return (size_t)(s1 - s1_orig);
|
||||
}
|
||||
#else
|
||||
static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
size_t matched = 0;
|
||||
const uint8_t* s2_limit = s2 + limit;
|
||||
const uint8_t* s2_ptr = s2;
|
||||
/* Find out how long the match is. We loop over the data 32 bits at a
|
||||
time until we find a 32-bit block that doesn't match; then we find
|
||||
the first non-matching bit and use that to calculate the total
|
||||
length of the match. */
|
||||
while (s2_ptr <= s2_limit - 4 &&
|
||||
BrotliUnalignedRead32(s2_ptr) ==
|
||||
BrotliUnalignedRead32(s1 + matched)) {
|
||||
s2_ptr += 4;
|
||||
matched += 4;
|
||||
}
|
||||
while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
|
||||
++s2_ptr;
|
||||
++matched;
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */
|
||||
96
external/duckdb/third_party/brotli/enc/histogram.cpp
vendored
Normal file
96
external/duckdb/third_party/brotli/enc/histogram.cpp
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Build per-context histograms of literals, commands and distance codes. */
|
||||
|
||||
#include "histogram.h"
|
||||
|
||||
#include "../common/context.h"
|
||||
#include "block_splitter.h"
|
||||
#include "command.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
typedef struct BlockSplitIterator {
|
||||
const BlockSplit* split_; /* Not owned. */
|
||||
size_t idx_;
|
||||
size_t type_;
|
||||
size_t length_;
|
||||
} BlockSplitIterator;
|
||||
|
||||
static void InitBlockSplitIterator(BlockSplitIterator* self,
|
||||
const BlockSplit* split) {
|
||||
self->split_ = split;
|
||||
self->idx_ = 0;
|
||||
self->type_ = 0;
|
||||
self->length_ = split->lengths ? split->lengths[0] : 0;
|
||||
}
|
||||
|
||||
static void BlockSplitIteratorNext(BlockSplitIterator* self) {
|
||||
if (self->length_ == 0) {
|
||||
++self->idx_;
|
||||
self->type_ = self->split_->types[self->idx_];
|
||||
self->length_ = self->split_->lengths[self->idx_];
|
||||
}
|
||||
--self->length_;
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliBuildHistogramsWithContext(
|
||||
const Command* cmds, const size_t num_commands,
|
||||
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
|
||||
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
|
||||
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
|
||||
const ContextType* context_modes, HistogramLiteral* literal_histograms,
|
||||
HistogramCommand* insert_and_copy_histograms,
|
||||
HistogramDistance* copy_dist_histograms) {
|
||||
size_t pos = start_pos;
|
||||
BlockSplitIterator literal_it;
|
||||
BlockSplitIterator insert_and_copy_it;
|
||||
BlockSplitIterator dist_it;
|
||||
size_t i;
|
||||
|
||||
InitBlockSplitIterator(&literal_it, literal_split);
|
||||
InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
|
||||
InitBlockSplitIterator(&dist_it, dist_split);
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
const Command* cmd = &cmds[i];
|
||||
size_t j;
|
||||
BlockSplitIteratorNext(&insert_and_copy_it);
|
||||
HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
|
||||
cmd->cmd_prefix_);
|
||||
/* TODO(eustas): unwrap iterator blocks. */
|
||||
for (j = cmd->insert_len_; j != 0; --j) {
|
||||
size_t context;
|
||||
BlockSplitIteratorNext(&literal_it);
|
||||
context = literal_it.type_;
|
||||
if (context_modes) {
|
||||
ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]);
|
||||
context = (context << BROTLI_LITERAL_CONTEXT_BITS) +
|
||||
BROTLI_CONTEXT(prev_byte, prev_byte2, lut);
|
||||
}
|
||||
HistogramAddLiteral(&literal_histograms[context],
|
||||
ringbuffer[pos & mask]);
|
||||
prev_byte2 = prev_byte;
|
||||
prev_byte = ringbuffer[pos & mask];
|
||||
++pos;
|
||||
}
|
||||
pos += CommandCopyLen(cmd);
|
||||
if (CommandCopyLen(cmd)) {
|
||||
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
||||
prev_byte = ringbuffer[(pos - 1) & mask];
|
||||
if (cmd->cmd_prefix_ >= 128) {
|
||||
size_t context;
|
||||
BlockSplitIteratorNext(&dist_it);
|
||||
context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
|
||||
CommandDistanceContext(cmd);
|
||||
HistogramAddDistance(©_dist_histograms[context],
|
||||
cmd->dist_prefix_ & 0x3FF);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
210
external/duckdb/third_party/brotli/enc/histogram.h
vendored
Normal file
210
external/duckdb/third_party/brotli/enc/histogram.h
vendored
Normal file
@@ -0,0 +1,210 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Models the histograms of literals, commands and distance codes. */
|
||||
|
||||
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
||||
#define BROTLI_ENC_HISTOGRAM_H_
|
||||
|
||||
#include <string.h> /* memset */
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/context.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "block_splitter.h"
|
||||
#include "command.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
|
||||
#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
|
||||
|
||||
#define FN(X) X ## Literal
|
||||
#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
|
||||
#define DataType uint8_t
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: Histogram, DATA_SIZE, DataType */
|
||||
|
||||
/* A simple container for histograms of data in blocks. */
|
||||
|
||||
typedef struct FN(Histogram) {
|
||||
uint32_t data_[DATA_SIZE];
|
||||
size_t total_count_;
|
||||
double bit_cost_;
|
||||
} FN(Histogram);
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
|
||||
memset(self->data_, 0, sizeof(self->data_));
|
||||
self->total_count_ = 0;
|
||||
self->bit_cost_ = HUGE_VAL;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(ClearHistograms)(
|
||||
FN(Histogram)* array, size_t length) {
|
||||
size_t i;
|
||||
for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
|
||||
++self->data_[val];
|
||||
++self->total_count_;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
|
||||
const DataType* p, size_t n) {
|
||||
self->total_count_ += n;
|
||||
n += 1;
|
||||
while (--n) ++self->data_[*p++];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
|
||||
const FN(Histogram)* v) {
|
||||
size_t i;
|
||||
self->total_count_ += v->total_count_;
|
||||
for (i = 0; i < DATA_SIZE; ++i) {
|
||||
self->data_[i] += v->data_[i];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
|
||||
#undef DataType
|
||||
#undef DATA_SIZE
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Command
|
||||
#define DataType uint16_t
|
||||
#define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: Histogram, DATA_SIZE, DataType */
|
||||
|
||||
/* A simple container for histograms of data in blocks. */
|
||||
|
||||
typedef struct FN(Histogram) {
|
||||
uint32_t data_[DATA_SIZE];
|
||||
size_t total_count_;
|
||||
double bit_cost_;
|
||||
} FN(Histogram);
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
|
||||
memset(self->data_, 0, sizeof(self->data_));
|
||||
self->total_count_ = 0;
|
||||
self->bit_cost_ = HUGE_VAL;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(ClearHistograms)(
|
||||
FN(Histogram)* array, size_t length) {
|
||||
size_t i;
|
||||
for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
|
||||
++self->data_[val];
|
||||
++self->total_count_;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
|
||||
const DataType* p, size_t n) {
|
||||
self->total_count_ += n;
|
||||
n += 1;
|
||||
while (--n) ++self->data_[*p++];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
|
||||
const FN(Histogram)* v) {
|
||||
size_t i;
|
||||
self->total_count_ += v->total_count_;
|
||||
for (i = 0; i < DATA_SIZE; ++i) {
|
||||
self->data_[i] += v->data_[i];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
|
||||
#undef DATA_SIZE
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Distance
|
||||
#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: Histogram, DATA_SIZE, DataType */
|
||||
|
||||
/* A simple container for histograms of data in blocks. */
|
||||
|
||||
typedef struct FN(Histogram) {
|
||||
uint32_t data_[DATA_SIZE];
|
||||
size_t total_count_;
|
||||
double bit_cost_;
|
||||
} FN(Histogram);
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
|
||||
memset(self->data_, 0, sizeof(self->data_));
|
||||
self->total_count_ = 0;
|
||||
self->bit_cost_ = HUGE_VAL;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(ClearHistograms)(
|
||||
FN(Histogram)* array, size_t length) {
|
||||
size_t i;
|
||||
for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
|
||||
++self->data_[val];
|
||||
++self->total_count_;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
|
||||
const DataType* p, size_t n) {
|
||||
self->total_count_ += n;
|
||||
n += 1;
|
||||
while (--n) ++self->data_[*p++];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
|
||||
const FN(Histogram)* v) {
|
||||
size_t i;
|
||||
self->total_count_ += v->total_count_;
|
||||
for (i = 0; i < DATA_SIZE; ++i) {
|
||||
self->data_[i] += v->data_[i];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
|
||||
#undef DataType
|
||||
#undef DATA_SIZE
|
||||
#undef FN
|
||||
|
||||
BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
|
||||
const Command* cmds, const size_t num_commands,
|
||||
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
|
||||
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
|
||||
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
|
||||
const ContextType* context_modes, HistogramLiteral* literal_histograms,
|
||||
HistogramCommand* insert_and_copy_histograms,
|
||||
HistogramDistance* copy_dist_histograms);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_HISTOGRAM_H_ */
|
||||
176
external/duckdb/third_party/brotli/enc/literal_cost.cpp
vendored
Normal file
176
external/duckdb/third_party/brotli/enc/literal_cost.cpp
vendored
Normal file
@@ -0,0 +1,176 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Literal cost model to allow backward reference replacement to be efficient.
|
||||
*/
|
||||
|
||||
#include "literal_cost.h"
|
||||
|
||||
#include <string.h> /* memset */
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "fast_log.h"
|
||||
#include "utf8_util.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
|
||||
if (c < 128) {
|
||||
return 0; /* Next one is the 'Byte 1' again. */
|
||||
} else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
|
||||
return BROTLI_MIN(size_t, 1, clamp);
|
||||
} else {
|
||||
/* Let's decide over the last byte if this ends the sequence. */
|
||||
if (last < 0xE0) {
|
||||
return 0; /* Completed two or three byte coding. */
|
||||
} else { /* Next one is the 'Byte 3' of utf-8 encoding. */
|
||||
return BROTLI_MIN(size_t, 2, clamp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t* data) {
|
||||
size_t counts[3] = { 0 };
|
||||
size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
|
||||
size_t last_c = 0;
|
||||
size_t i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
size_t c = data[(pos + i) & mask];
|
||||
++counts[UTF8Position(last_c, c, 2)];
|
||||
last_c = c;
|
||||
}
|
||||
if (counts[2] < 500) {
|
||||
max_utf8 = 1;
|
||||
}
|
||||
if (counts[1] + counts[2] < 25) {
|
||||
max_utf8 = 0;
|
||||
}
|
||||
return max_utf8;
|
||||
}
|
||||
|
||||
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t* data,
|
||||
size_t* histogram, float* cost) {
|
||||
/* max_utf8 is 0 (normal ASCII single byte modeling),
|
||||
1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
|
||||
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
|
||||
size_t window_half = 495;
|
||||
size_t in_window = BROTLI_MIN(size_t, window_half, len);
|
||||
size_t in_window_utf8[3] = { 0 };
|
||||
size_t i;
|
||||
memset(histogram, 0, 3 * 256 * sizeof(histogram[0]));
|
||||
|
||||
{ /* Bootstrap histograms. */
|
||||
size_t last_c = 0;
|
||||
size_t utf8_pos = 0;
|
||||
for (i = 0; i < in_window; ++i) {
|
||||
size_t c = data[(pos + i) & mask];
|
||||
++histogram[256 * utf8_pos + c];
|
||||
++in_window_utf8[utf8_pos];
|
||||
utf8_pos = UTF8Position(last_c, c, max_utf8);
|
||||
last_c = c;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute bit costs with sliding window. */
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (i >= window_half) {
|
||||
/* Remove a byte in the past. */
|
||||
size_t c =
|
||||
i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
|
||||
size_t last_c =
|
||||
i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
|
||||
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
||||
--histogram[256 * utf8_pos2 + data[(pos + i - window_half) & mask]];
|
||||
--in_window_utf8[utf8_pos2];
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
/* Add a byte in the future. */
|
||||
size_t c = data[(pos + i + window_half - 1) & mask];
|
||||
size_t last_c = data[(pos + i + window_half - 2) & mask];
|
||||
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
||||
++histogram[256 * utf8_pos2 + data[(pos + i + window_half) & mask]];
|
||||
++in_window_utf8[utf8_pos2];
|
||||
}
|
||||
{
|
||||
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
|
||||
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
|
||||
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
|
||||
size_t masked_pos = (pos + i) & mask;
|
||||
size_t histo = histogram[256 * utf8_pos + data[masked_pos]];
|
||||
double lit_cost;
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
|
||||
lit_cost += 0.02905;
|
||||
if (lit_cost < 1.0) {
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
/* Make the first bytes more expensive -- seems to help, not sure why.
|
||||
Perhaps because the entropy source is changing its properties
|
||||
rapidly in the beginning of the file, perhaps because the beginning
|
||||
of the data is a statistical "anomaly". */
|
||||
if (i < 2000) {
|
||||
lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
|
||||
}
|
||||
cost[i] = (float)lit_cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t* data,
|
||||
size_t* histogram, float* cost) {
|
||||
if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
|
||||
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, histogram, cost);
|
||||
return;
|
||||
} else {
|
||||
size_t window_half = 2000;
|
||||
size_t in_window = BROTLI_MIN(size_t, window_half, len);
|
||||
size_t i;
|
||||
memset(histogram, 0, 256 * sizeof(histogram[0]));
|
||||
|
||||
/* Bootstrap histogram. */
|
||||
for (i = 0; i < in_window; ++i) {
|
||||
++histogram[data[(pos + i) & mask]];
|
||||
}
|
||||
|
||||
/* Compute bit costs with sliding window. */
|
||||
for (i = 0; i < len; ++i) {
|
||||
size_t histo;
|
||||
if (i >= window_half) {
|
||||
/* Remove a byte in the past. */
|
||||
--histogram[data[(pos + i - window_half) & mask]];
|
||||
--in_window;
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
/* Add a byte in the future. */
|
||||
++histogram[data[(pos + i + window_half) & mask]];
|
||||
++in_window;
|
||||
}
|
||||
histo = histogram[data[(pos + i) & mask]];
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
{
|
||||
double lit_cost = FastLog2(in_window) - FastLog2(histo);
|
||||
lit_cost += 0.029;
|
||||
if (lit_cost < 1.0) {
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
cost[i] = (float)lit_cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
28
external/duckdb/third_party/brotli/enc/literal_cost.h
vendored
Normal file
28
external/duckdb/third_party/brotli/enc/literal_cost.h
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Literal cost model to allow backward reference replacement to be efficient.
|
||||
*/
|
||||
|
||||
#ifndef BROTLI_ENC_LITERAL_COST_H_
|
||||
#define BROTLI_ENC_LITERAL_COST_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* Estimates how many bits the literals in the interval [pos, pos + len) in the
|
||||
ring-buffer (data, mask) will take entropy coded and writes these estimates
|
||||
to the cost[0..len) array. */
|
||||
BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
|
||||
size_t pos, size_t len, size_t mask, const uint8_t* data, size_t* histogram,
|
||||
float* cost);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_LITERAL_COST_H_ */
|
||||
190
external/duckdb/third_party/brotli/enc/memory.cpp
vendored
Normal file
190
external/duckdb/third_party/brotli/enc/memory.cpp
vendored
Normal file
@@ -0,0 +1,190 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Algorithms for distributing the literals and commands of a metablock between
|
||||
block types and contexts. */
|
||||
|
||||
#include "memory.h"
|
||||
|
||||
#include <stdlib.h> /* exit, free, malloc */
|
||||
#include <string.h> /* memcpy */
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
#define MAX_NEW_ALLOCATED (BROTLI_ENCODER_MEMORY_MANAGER_SLOTS >> 2)
|
||||
#define MAX_NEW_FREED (BROTLI_ENCODER_MEMORY_MANAGER_SLOTS >> 2)
|
||||
#define MAX_PERM_ALLOCATED (BROTLI_ENCODER_MEMORY_MANAGER_SLOTS >> 1)
|
||||
|
||||
#define PERM_ALLOCATED_OFFSET 0
|
||||
#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
|
||||
#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
|
||||
|
||||
void duckdb_brotli::BrotliInitMemoryManager(
|
||||
MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
|
||||
void* opaque) {
|
||||
if (!alloc_func) {
|
||||
m->alloc_func = duckdb_brotli::BrotliDefaultAllocFunc;
|
||||
m->free_func = duckdb_brotli::BrotliDefaultFreeFunc;
|
||||
m->opaque = 0;
|
||||
} else {
|
||||
m->alloc_func = alloc_func;
|
||||
m->free_func = free_func;
|
||||
m->opaque = opaque;
|
||||
}
|
||||
#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
m->is_oom = BROTLI_FALSE;
|
||||
m->perm_allocated = 0;
|
||||
m->new_allocated = 0;
|
||||
m->new_freed = 0;
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
}
|
||||
|
||||
#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
|
||||
void* duckdb_brotli::BrotliAllocate(MemoryManager* m, size_t n) {
|
||||
void* result = m->alloc_func(m->opaque, n);
|
||||
if (!result) exit(EXIT_FAILURE);
|
||||
return result;
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliFree(MemoryManager* m, void* p) {
|
||||
m->free_func(m->opaque, p);
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliWipeOutMemoryManager(MemoryManager* m) {
|
||||
BROTLI_UNUSED(m);
|
||||
}
|
||||
|
||||
#else /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
|
||||
void SortPointers(void** items, const size_t n) {
|
||||
/* Shell sort. */
|
||||
/* TODO(eustas): fine-tune for "many slots" case */
|
||||
static const size_t gaps[] = {23, 10, 4, 1};
|
||||
int g = 0;
|
||||
for (; g < 4; ++g) {
|
||||
size_t gap = gaps[g];
|
||||
size_t i;
|
||||
for (i = gap; i < n; ++i) {
|
||||
size_t j = i;
|
||||
void* tmp = items[i];
|
||||
for (; j >= gap && tmp < items[j - gap]; j -= gap) {
|
||||
items[j] = items[j - gap];
|
||||
}
|
||||
items[j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
|
||||
size_t a_read_index = 0;
|
||||
size_t b_read_index = 0;
|
||||
size_t a_write_index = 0;
|
||||
size_t b_write_index = 0;
|
||||
size_t annihilated = 0;
|
||||
while (a_read_index < a_len && b_read_index < b_len) {
|
||||
if (a[a_read_index] == b[b_read_index]) {
|
||||
a_read_index++;
|
||||
b_read_index++;
|
||||
annihilated++;
|
||||
} else if (a[a_read_index] < b[b_read_index]) {
|
||||
a[a_write_index++] = a[a_read_index++];
|
||||
} else {
|
||||
b[b_write_index++] = b[b_read_index++];
|
||||
}
|
||||
}
|
||||
while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
|
||||
while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
|
||||
return annihilated;
|
||||
}
|
||||
|
||||
static void CollectGarbagePointers(MemoryManager* m) {
|
||||
size_t annihilated;
|
||||
SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
|
||||
SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
|
||||
annihilated = Annihilate(
|
||||
m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
|
||||
m->pointers + NEW_FREED_OFFSET, m->new_freed);
|
||||
m->new_allocated -= annihilated;
|
||||
m->new_freed -= annihilated;
|
||||
|
||||
if (m->new_freed != 0) {
|
||||
annihilated = Annihilate(
|
||||
m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
|
||||
m->pointers + NEW_FREED_OFFSET, m->new_freed);
|
||||
m->perm_allocated -= annihilated;
|
||||
m->new_freed -= annihilated;
|
||||
BROTLI_DCHECK(m->new_freed == 0);
|
||||
}
|
||||
|
||||
if (m->new_allocated != 0) {
|
||||
BROTLI_DCHECK(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
|
||||
memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
|
||||
m->pointers + NEW_ALLOCATED_OFFSET,
|
||||
sizeof(void*) * m->new_allocated);
|
||||
m->perm_allocated += m->new_allocated;
|
||||
m->new_allocated = 0;
|
||||
SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
|
||||
}
|
||||
}
|
||||
|
||||
void* duckdb_brotli::BrotliAllocate(MemoryManager* m, size_t n) {
|
||||
void* result = m->alloc_func(m->opaque, n);
|
||||
if (!result) {
|
||||
m->is_oom = BROTLI_TRUE;
|
||||
return NULL;
|
||||
}
|
||||
if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
|
||||
m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliFree(MemoryManager* m, void* p) {
|
||||
if (!p) return;
|
||||
m->free_func(m->opaque, p);
|
||||
if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
|
||||
m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliWipeOutMemoryManager(MemoryManager* m) {
|
||||
size_t i;
|
||||
CollectGarbagePointers(m);
|
||||
/* Now all unfreed pointers are in perm-allocated list. */
|
||||
for (i = 0; i < m->perm_allocated; ++i) {
|
||||
m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
|
||||
}
|
||||
m->perm_allocated = 0;
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
|
||||
void* duckdb_brotli::BrotliBootstrapAlloc(size_t size,
|
||||
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
|
||||
if (!alloc_func && !free_func) {
|
||||
return malloc(size);
|
||||
} else if (alloc_func && free_func) {
|
||||
return alloc_func(opaque, size);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void duckdb_brotli::BrotliBootstrapFree(void* address, MemoryManager* m) {
|
||||
if (!address) {
|
||||
/* Should not happen! */
|
||||
return;
|
||||
} else {
|
||||
/* Copy values, as those would be freed. */
|
||||
brotli_free_func free_func = m->free_func;
|
||||
void* opaque = m->opaque;
|
||||
free_func(opaque, address);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
127
external/duckdb/third_party/brotli/enc/memory.h
vendored
Normal file
127
external/duckdb/third_party/brotli/enc/memory.h
vendored
Normal file
@@ -0,0 +1,127 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Macros for memory management. */
|
||||
|
||||
#ifndef BROTLI_ENC_MEMORY_H_
|
||||
#define BROTLI_ENC_MEMORY_H_
|
||||
|
||||
#include <string.h> /* memcpy */
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
#if !defined(BROTLI_ENCODER_CLEANUP_ON_OOM) && \
|
||||
!defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
#define BROTLI_ENCODER_EXIT_ON_OOM
|
||||
#endif
|
||||
|
||||
#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
#if defined(BROTLI_EXPERIMENTAL)
|
||||
#define BROTLI_ENCODER_MEMORY_MANAGER_SLOTS (48*1024)
|
||||
#else /* BROTLI_EXPERIMENTAL */
|
||||
#define BROTLI_ENCODER_MEMORY_MANAGER_SLOTS 256
|
||||
#endif /* BROTLI_EXPERIMENTAL */
|
||||
#else /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
#define BROTLI_ENCODER_MEMORY_MANAGER_SLOTS 0
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
|
||||
typedef struct MemoryManager {
|
||||
brotli_alloc_func alloc_func;
|
||||
brotli_free_func free_func;
|
||||
void* opaque;
|
||||
#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
BROTLI_BOOL is_oom;
|
||||
size_t perm_allocated;
|
||||
size_t new_allocated;
|
||||
size_t new_freed;
|
||||
void* pointers[BROTLI_ENCODER_MEMORY_MANAGER_SLOTS];
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
} MemoryManager;
|
||||
|
||||
BROTLI_INTERNAL void BrotliInitMemoryManager(
|
||||
MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
|
||||
void* opaque);
|
||||
|
||||
BROTLI_INTERNAL void* BrotliAllocate(MemoryManager* m, size_t n);
|
||||
#define BROTLI_ALLOC(M, T, N) \
|
||||
((N) > 0 ? ((T*)BrotliAllocate((M), (N) * sizeof(T))) : NULL)
|
||||
|
||||
BROTLI_INTERNAL void BrotliFree(MemoryManager* m, void* p);
|
||||
#define BROTLI_FREE(M, P) { \
|
||||
BrotliFree((M), (P)); \
|
||||
P = NULL; \
|
||||
}
|
||||
|
||||
#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
#define BROTLI_IS_OOM(M) (!!0)
|
||||
#else /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
#define BROTLI_IS_OOM(M) (!!(M)->is_oom)
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
|
||||
/*
|
||||
BROTLI_IS_NULL is a fake check, BROTLI_IS_OOM does the heavy lifting.
|
||||
The only purpose of it is to explain static analyzers the state of things.
|
||||
NB: use ONLY together with BROTLI_IS_OOM
|
||||
AND ONLY for allocations in the current scope.
|
||||
*/
|
||||
#if defined(__clang_analyzer__) && !defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
#define BROTLI_IS_NULL(A) ((A) == nullptr)
|
||||
#else /* defined(__clang_analyzer__) */
|
||||
#define BROTLI_IS_NULL(A) (!!0)
|
||||
#endif /* defined(__clang_analyzer__) */
|
||||
|
||||
BROTLI_INTERNAL void BrotliWipeOutMemoryManager(MemoryManager* m);
|
||||
|
||||
/*
|
||||
Dynamically grows array capacity to at least the requested size
|
||||
M: MemoryManager
|
||||
T: data type
|
||||
A: array
|
||||
C: capacity
|
||||
R: requested size
|
||||
*/
|
||||
#define BROTLI_ENSURE_CAPACITY(M, T, A, C, R) { \
|
||||
if (C < (R)) { \
|
||||
size_t _new_size = (C == 0) ? (R) : C; \
|
||||
T* new_array; \
|
||||
while (_new_size < (R)) _new_size *= 2; \
|
||||
new_array = BROTLI_ALLOC((M), T, _new_size); \
|
||||
if (!BROTLI_IS_OOM(M) && !BROTLI_IS_NULL(new_array) && C != 0) \
|
||||
memcpy(new_array, A, C * sizeof(T)); \
|
||||
BROTLI_FREE((M), A); \
|
||||
A = new_array; \
|
||||
C = _new_size; \
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
Appends value and dynamically grows array capacity when needed
|
||||
M: MemoryManager
|
||||
T: data type
|
||||
A: array
|
||||
C: array capacity
|
||||
S: array size
|
||||
V: value to append
|
||||
*/
|
||||
#define BROTLI_ENSURE_CAPACITY_APPEND(M, T, A, C, S, V) { \
|
||||
(S)++; \
|
||||
BROTLI_ENSURE_CAPACITY(M, T, A, C, S); \
|
||||
A[(S) - 1] = (V); \
|
||||
}
|
||||
|
||||
/* "Bootstrap" allocations are not tracked by memory manager; should be used
|
||||
only to allocate MemoryManager itself (or structure containing it). */
|
||||
BROTLI_INTERNAL void* BrotliBootstrapAlloc(size_t size,
|
||||
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
|
||||
BROTLI_INTERNAL void BrotliBootstrapFree(void* address, MemoryManager* m);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_MEMORY_H_ */
|
||||
1225
external/duckdb/third_party/brotli/enc/metablock.cpp
vendored
Normal file
1225
external/duckdb/third_party/brotli/enc/metablock.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
102
external/duckdb/third_party/brotli/enc/metablock.h
vendored
Normal file
102
external/duckdb/third_party/brotli/enc/metablock.h
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Algorithms for distributing the literals and commands of a metablock between
|
||||
block types and contexts. */
|
||||
|
||||
#ifndef BROTLI_ENC_METABLOCK_H_
|
||||
#define BROTLI_ENC_METABLOCK_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/context.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "block_splitter.h"
|
||||
#include "command.h"
|
||||
#include "histogram.h"
|
||||
#include "memory.h"
|
||||
#include "quality.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
typedef struct MetaBlockSplit {
|
||||
BlockSplit literal_split;
|
||||
BlockSplit command_split;
|
||||
BlockSplit distance_split;
|
||||
uint32_t* literal_context_map;
|
||||
size_t literal_context_map_size;
|
||||
uint32_t* distance_context_map;
|
||||
size_t distance_context_map_size;
|
||||
HistogramLiteral* literal_histograms;
|
||||
size_t literal_histograms_size;
|
||||
HistogramCommand* command_histograms;
|
||||
size_t command_histograms_size;
|
||||
HistogramDistance* distance_histograms;
|
||||
size_t distance_histograms_size;
|
||||
} MetaBlockSplit;
|
||||
|
||||
static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
|
||||
BrotliInitBlockSplit(&mb->literal_split);
|
||||
BrotliInitBlockSplit(&mb->command_split);
|
||||
BrotliInitBlockSplit(&mb->distance_split);
|
||||
mb->literal_context_map = 0;
|
||||
mb->literal_context_map_size = 0;
|
||||
mb->distance_context_map = 0;
|
||||
mb->distance_context_map_size = 0;
|
||||
mb->literal_histograms = 0;
|
||||
mb->literal_histograms_size = 0;
|
||||
mb->command_histograms = 0;
|
||||
mb->command_histograms_size = 0;
|
||||
mb->distance_histograms = 0;
|
||||
mb->distance_histograms_size = 0;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void DestroyMetaBlockSplit(
|
||||
MemoryManager* m, MetaBlockSplit* mb) {
|
||||
BrotliDestroyBlockSplit(m, &mb->literal_split);
|
||||
BrotliDestroyBlockSplit(m, &mb->command_split);
|
||||
BrotliDestroyBlockSplit(m, &mb->distance_split);
|
||||
BROTLI_FREE(m, mb->literal_context_map);
|
||||
BROTLI_FREE(m, mb->distance_context_map);
|
||||
BROTLI_FREE(m, mb->literal_histograms);
|
||||
BROTLI_FREE(m, mb->command_histograms);
|
||||
BROTLI_FREE(m, mb->distance_histograms);
|
||||
}
|
||||
|
||||
/* Uses the slow shortest-path block splitter and does context clustering.
|
||||
The distance parameters are dynamically selected based on the commands
|
||||
which get recomputed under the new distance parameters. The new distance
|
||||
parameters are stored into *params. */
|
||||
BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
|
||||
const uint8_t* ringbuffer,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
BrotliEncoderParams* params,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
Command* cmds,
|
||||
size_t num_commands,
|
||||
ContextType literal_context_mode,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
/* Uses a fast greedy block splitter that tries to merge current block with the
|
||||
last or the second last block and uses a static context clustering which
|
||||
is the same for all block types. */
|
||||
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
|
||||
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
|
||||
uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
|
||||
size_t num_contexts, const uint32_t* static_context_map,
|
||||
const Command* commands, size_t n_commands, MetaBlockSplit* mb);
|
||||
|
||||
BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliDistanceParams* params,
|
||||
uint32_t npostfix, uint32_t ndirect, BROTLI_BOOL large_window);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_METABLOCK_H_ */
|
||||
50
external/duckdb/third_party/brotli/enc/prefix.h
vendored
Normal file
50
external/duckdb/third_party/brotli/enc/prefix.h
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Functions for encoding of integers into prefix codes the amount of extra
|
||||
bits, and the actual values of the extra bits. */
|
||||
|
||||
#ifndef BROTLI_ENC_PREFIX_H_
|
||||
#define BROTLI_ENC_PREFIX_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_constants.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "fast_log.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* Here distance_code is an intermediate code, i.e. one of the special codes or
|
||||
the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
|
||||
static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
|
||||
size_t num_direct_codes,
|
||||
size_t postfix_bits,
|
||||
uint16_t* code,
|
||||
uint32_t* extra_bits) {
|
||||
if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
|
||||
*code = (uint16_t)distance_code;
|
||||
*extra_bits = 0;
|
||||
return;
|
||||
} else {
|
||||
size_t dist = ((size_t)1 << (postfix_bits + 2u)) +
|
||||
(distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
|
||||
size_t bucket = Log2FloorNonZero(dist) - 1;
|
||||
size_t postfix_mask = (1u << postfix_bits) - 1;
|
||||
size_t postfix = dist & postfix_mask;
|
||||
size_t prefix = (dist >> bucket) & 1;
|
||||
size_t offset = (2 + prefix) << bucket;
|
||||
size_t nbits = bucket - postfix_bits;
|
||||
*code = (uint16_t)((nbits << 10) |
|
||||
(BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
|
||||
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
|
||||
*extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_PREFIX_H_ */
|
||||
202
external/duckdb/third_party/brotli/enc/quality.h
vendored
Normal file
202
external/duckdb/third_party/brotli/enc/quality.h
vendored
Normal file
@@ -0,0 +1,202 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Constants and formulas that affect speed-ratio trade-offs and thus define
|
||||
quality levels. */
|
||||
|
||||
#ifndef BROTLI_ENC_QUALITY_H_
|
||||
#define BROTLI_ENC_QUALITY_H_
|
||||
|
||||
#include <brotli/encode.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "brotli_params.h"
|
||||
|
||||
#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
|
||||
#define FAST_TWO_PASS_COMPRESSION_QUALITY 1
|
||||
#define ZOPFLIFICATION_QUALITY 10
|
||||
#define HQ_ZOPFLIFICATION_QUALITY 11
|
||||
|
||||
#define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
|
||||
#define MIN_QUALITY_FOR_BLOCK_SPLIT 4
|
||||
#define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4
|
||||
#define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
|
||||
#define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5
|
||||
#define MIN_QUALITY_FOR_CONTEXT_MODELING 5
|
||||
#define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7
|
||||
#define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10
|
||||
|
||||
/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
|
||||
so we buffer at most this much literals and commands. */
|
||||
#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
|
||||
|
||||
/* Returns hash-table size for quality levels 0 and 1. */
|
||||
static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
|
||||
return quality == FAST_ONE_PASS_COMPRESSION_QUALITY ? 1 << 15 : 1 << 17;
|
||||
}
|
||||
|
||||
/* The maximum length for which the zopflification uses distinct distances. */
|
||||
#define MAX_ZOPFLI_LEN_QUALITY_10 150
|
||||
#define MAX_ZOPFLI_LEN_QUALITY_11 325
|
||||
|
||||
/* Do not thoroughly search when a long copy is found. */
|
||||
#define BROTLI_LONG_COPY_QUICK_STEP 16384
|
||||
|
||||
static BROTLI_INLINE size_t MaxZopfliLen(const BrotliEncoderParams* params) {
|
||||
return params->quality <= 10 ?
|
||||
MAX_ZOPFLI_LEN_QUALITY_10 :
|
||||
MAX_ZOPFLI_LEN_QUALITY_11;
|
||||
}
|
||||
|
||||
/* Number of best candidates to evaluate to expand Zopfli chain. */
|
||||
static BROTLI_INLINE size_t MaxZopfliCandidates(
|
||||
const BrotliEncoderParams* params) {
|
||||
return params->quality <= 10 ? 1 : 5;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
|
||||
params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
|
||||
BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
|
||||
if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
|
||||
params->large_window = BROTLI_FALSE;
|
||||
}
|
||||
if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
|
||||
params->lgwin = BROTLI_MIN_WINDOW_BITS;
|
||||
} else {
|
||||
int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
|
||||
BROTLI_MAX_WINDOW_BITS;
|
||||
if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns optimized lg_block value. */
|
||||
static BROTLI_INLINE int ComputeLgBlock(const BrotliEncoderParams* params) {
|
||||
int lgblock = params->lgblock;
|
||||
if (params->quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
|
||||
params->quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
|
||||
lgblock = params->lgwin;
|
||||
} else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
|
||||
lgblock = 14;
|
||||
} else if (lgblock == 0) {
|
||||
lgblock = 16;
|
||||
if (params->quality >= 9 && params->lgwin > lgblock) {
|
||||
lgblock = BROTLI_MIN(int, 18, params->lgwin);
|
||||
}
|
||||
} else {
|
||||
lgblock = BROTLI_MIN(int, BROTLI_MAX_INPUT_BLOCK_BITS,
|
||||
BROTLI_MAX(int, BROTLI_MIN_INPUT_BLOCK_BITS, lgblock));
|
||||
}
|
||||
return lgblock;
|
||||
}
|
||||
|
||||
/* Returns log2 of the size of main ring buffer area.
|
||||
Allocate at least lgwin + 1 bits for the ring buffer so that the newly
|
||||
added block fits there completely and we still get lgwin bits and at least
|
||||
read_block_size_bits + 1 bits because the copy tail length needs to be
|
||||
smaller than ring-buffer size. */
|
||||
static BROTLI_INLINE int ComputeRbBits(const BrotliEncoderParams* params) {
|
||||
return 1 + BROTLI_MAX(int, params->lgwin, params->lgblock);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t MaxMetablockSize(
|
||||
const BrotliEncoderParams* params) {
|
||||
int bits =
|
||||
BROTLI_MIN(int, ComputeRbBits(params), BROTLI_MAX_INPUT_BLOCK_BITS);
|
||||
return (size_t)1 << bits;
|
||||
}
|
||||
|
||||
/* When searching for backward references and have not seen matches for a long
|
||||
time, we can skip some match lookups. Unsuccessful match lookups are very
|
||||
expensive and this kind of a heuristic speeds up compression quite a lot.
|
||||
At first 8 byte strides are taken and every second byte is put to hasher.
|
||||
After 4x more literals stride by 16 bytes, every put 4-th byte to hasher.
|
||||
Applied only to qualities 2 to 9. */
|
||||
static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch(
|
||||
const BrotliEncoderParams* params) {
|
||||
return params->quality < 9 ? 64 : 512;
|
||||
}
|
||||
|
||||
/* Quality to hasher mapping:
|
||||
|
||||
- q02: h02 (longest_match_quickly), b16, l5
|
||||
|
||||
- q03: h03 (longest_match_quickly), b17, l5
|
||||
|
||||
- q04: h04 (longest_match_quickly), b17, l5
|
||||
- q04: h54 (longest_match_quickly), b20, l7 | for large files
|
||||
|
||||
- q05: h05 (longest_match ), b14, l4
|
||||
- q05: h06 (longest_match64 ), b15, l5 | for large files
|
||||
- q05: h40 (forgetful_chain ), b15, l4 | for small window
|
||||
|
||||
- q06: h05 (longest_match ), b14, l4
|
||||
- q06: h06 (longest_match64 ), b15, l5 | for large files
|
||||
- q06: h40 (forgetful_chain ), b15, l4 | for small window
|
||||
|
||||
- q07: h05 (longest_match ), b15, l4
|
||||
- q07: h06 (longest_match64 ), b15, l5 | for large files
|
||||
- q07: h41 (forgetful_chain ), b15, l4 | for small window
|
||||
|
||||
- q08: h05 (longest_match ), b15, l4
|
||||
- q08: h06 (longest_match64 ), b15, l5 | for large files
|
||||
- q08: h41 (forgetful_chain ), b15, l4 | for small window
|
||||
|
||||
- q09: h05 (longest_match ), b15, l4
|
||||
- q09: h06 (longest_match64 ), b15, l5 | for large files
|
||||
- q09: h42 (forgetful_chain ), b15, l4 | for small window
|
||||
|
||||
- q10: t10 (to_binary_tree ), b17, l128
|
||||
|
||||
- q11: t10 (to_binary_tree ), b17, l128
|
||||
|
||||
Where "q" is quality, "h" is hasher type, "b" is bucket bits,
|
||||
"l" is source len. */
|
||||
static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
|
||||
BrotliHasherParams* hparams) {
|
||||
if (params->quality > 9) {
|
||||
hparams->type = 10;
|
||||
} else if (params->quality == 4 && params->size_hint >= (1 << 20)) {
|
||||
hparams->type = 54;
|
||||
} else if (params->quality < 5) {
|
||||
hparams->type = params->quality;
|
||||
} else if (params->lgwin <= 16) {
|
||||
hparams->type = params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
|
||||
} else if (params->size_hint >= (1 << 20) && params->lgwin >= 19) {
|
||||
hparams->type = 6;
|
||||
hparams->block_bits = params->quality - 1;
|
||||
hparams->bucket_bits = 15;
|
||||
hparams->num_last_distances_to_check =
|
||||
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
|
||||
} else {
|
||||
/* TODO(eustas): often previous setting (H6) is faster and denser; consider
|
||||
adding an option to use it. */
|
||||
hparams->type = 5;
|
||||
hparams->block_bits = params->quality - 1;
|
||||
hparams->bucket_bits = params->quality < 7 ? 14 : 15;
|
||||
hparams->num_last_distances_to_check =
|
||||
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
|
||||
}
|
||||
|
||||
if (params->lgwin > 24) {
|
||||
/* Different hashers for large window brotli: not for qualities <= 2,
|
||||
these are too fast for large window. Not for qualities >= 10: their
|
||||
hasher already works well with large window. So the changes are:
|
||||
H3 --> H35: for quality 3.
|
||||
H54 --> H55: for quality 4 with size hint > 1MB
|
||||
H6 --> H65: for qualities 5, 6, 7, 8, 9. */
|
||||
if (hparams->type == 3) {
|
||||
hparams->type = 35;
|
||||
}
|
||||
if (hparams->type == 54) {
|
||||
hparams->type = 55;
|
||||
}
|
||||
if (hparams->type == 6) {
|
||||
hparams->type = 65;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_QUALITY_H_ */
|
||||
24
external/duckdb/third_party/brotli/enc/resolve-multi-includes.py
vendored
Executable file
24
external/duckdb/third_party/brotli/enc/resolve-multi-includes.py
vendored
Executable file
@@ -0,0 +1,24 @@
|
||||
# brotli uses a weird c templating mechanism using _inc.h files
|
||||
# this does not play well with things like amalagamation
|
||||
# this script inlines the variuos headers
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
for filename in os.listdir('.'):
|
||||
if not (filename.endswith('.cpp') or filename.endswith('.h')):
|
||||
continue
|
||||
|
||||
file_lines = open(filename, 'r').readlines()
|
||||
if '_inc.h' not in '\n'.join(file_lines):
|
||||
continue
|
||||
|
||||
out = open (filename, 'w')
|
||||
|
||||
for line in file_lines:
|
||||
if '#include' in line and '_inc.h' in line:
|
||||
match = re.search(r'#include\s+"(.+)".*', line).group(1)
|
||||
include = open(match, 'r').readlines();
|
||||
out.write(''.join(include))
|
||||
continue
|
||||
out.write(line)
|
||||
164
external/duckdb/third_party/brotli/enc/ringbuffer.h
vendored
Normal file
164
external/duckdb/third_party/brotli/enc/ringbuffer.h
vendored
Normal file
@@ -0,0 +1,164 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Sliding window over the input data. */
|
||||
|
||||
#ifndef BROTLI_ENC_RINGBUFFER_H_
|
||||
#define BROTLI_ENC_RINGBUFFER_H_
|
||||
|
||||
#include <string.h> /* memcpy */
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "memory.h"
|
||||
#include "quality.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
|
||||
data in a circular manner: writing a byte writes it to:
|
||||
`position() % (1 << window_bits)'.
|
||||
For convenience, the RingBuffer array contains another copy of the
|
||||
first `1 << tail_bits' bytes:
|
||||
buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
|
||||
and another copy of the last two bytes:
|
||||
buffer_[-1] == buffer_[(1 << window_bits) - 1] and
|
||||
buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
|
||||
typedef struct RingBuffer {
|
||||
/* Size of the ring-buffer is (1 << window_bits) + tail_size_. */
|
||||
const uint32_t size_;
|
||||
const uint32_t mask_;
|
||||
const uint32_t tail_size_;
|
||||
const uint32_t total_size_;
|
||||
|
||||
uint32_t cur_size_;
|
||||
/* Position to write in the ring buffer. */
|
||||
uint32_t pos_;
|
||||
/* The actual ring buffer containing the copy of the last two bytes, the data,
|
||||
and the copy of the beginning as a tail. */
|
||||
uint8_t* data_;
|
||||
/* The start of the ring-buffer. */
|
||||
uint8_t* buffer_;
|
||||
} RingBuffer;
|
||||
|
||||
static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
|
||||
rb->cur_size_ = 0;
|
||||
rb->pos_ = 0;
|
||||
rb->data_ = 0;
|
||||
rb->buffer_ = 0;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void RingBufferSetup(
|
||||
const BrotliEncoderParams* params, RingBuffer* rb) {
|
||||
int window_bits = ComputeRbBits(params);
|
||||
int tail_bits = params->lgblock;
|
||||
*(uint32_t*)&rb->size_ = 1u << window_bits;
|
||||
*(uint32_t*)&rb->mask_ = (1u << window_bits) - 1;
|
||||
*(uint32_t*)&rb->tail_size_ = 1u << tail_bits;
|
||||
*(uint32_t*)&rb->total_size_ = rb->size_ + rb->tail_size_;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void RingBufferFree(MemoryManager* m, RingBuffer* rb) {
|
||||
BROTLI_FREE(m, rb->data_);
|
||||
}
|
||||
|
||||
/* Allocates or re-allocates data_ to the given length + plus some slack
|
||||
region before and after. Fills the slack regions with zeros. */
|
||||
static BROTLI_INLINE void RingBufferInitBuffer(
|
||||
MemoryManager* m, const uint32_t buflen, RingBuffer* rb) {
|
||||
static const size_t kSlackForEightByteHashingEverywhere = 7;
|
||||
uint8_t* new_data = BROTLI_ALLOC(
|
||||
m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
|
||||
size_t i;
|
||||
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_data)) return;
|
||||
if (rb->data_) {
|
||||
memcpy(new_data, rb->data_,
|
||||
2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
|
||||
BROTLI_FREE(m, rb->data_);
|
||||
}
|
||||
rb->data_ = new_data;
|
||||
rb->cur_size_ = buflen;
|
||||
rb->buffer_ = rb->data_ + 2;
|
||||
rb->buffer_[-2] = rb->buffer_[-1] = 0;
|
||||
for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
|
||||
rb->buffer_[rb->cur_size_ + i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void RingBufferWriteTail(
|
||||
const uint8_t* bytes, size_t n, RingBuffer* rb) {
|
||||
const size_t masked_pos = rb->pos_ & rb->mask_;
|
||||
if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
|
||||
/* Just fill the tail buffer with the beginning data. */
|
||||
const size_t p = rb->size_ + masked_pos;
|
||||
memcpy(&rb->buffer_[p], bytes,
|
||||
BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
|
||||
}
|
||||
}
|
||||
|
||||
/* Push bytes into the ring buffer. */
|
||||
static BROTLI_INLINE void RingBufferWrite(
|
||||
MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
|
||||
if (rb->pos_ == 0 && n < rb->tail_size_) {
|
||||
/* Special case for the first write: to process the first block, we don't
|
||||
need to allocate the whole ring-buffer and we don't need the tail
|
||||
either. However, we do this memory usage optimization only if the
|
||||
first write is less than the tail size, which is also the input block
|
||||
size, otherwise it is likely that other blocks will follow and we
|
||||
will need to reallocate to the full size anyway. */
|
||||
rb->pos_ = (uint32_t)n;
|
||||
RingBufferInitBuffer(m, rb->pos_, rb);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
memcpy(rb->buffer_, bytes, n);
|
||||
return;
|
||||
}
|
||||
if (rb->cur_size_ < rb->total_size_) {
|
||||
/* Lazily allocate the full buffer. */
|
||||
RingBufferInitBuffer(m, rb->total_size_, rb);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
/* Initialize the last two bytes to zero, so that we don't have to worry
|
||||
later when we copy the last two bytes to the first two positions. */
|
||||
rb->buffer_[rb->size_ - 2] = 0;
|
||||
rb->buffer_[rb->size_ - 1] = 0;
|
||||
/* Initialize tail; might be touched by "best_len++" optimization when
|
||||
ring buffer is "full". */
|
||||
rb->buffer_[rb->size_] = 241;
|
||||
}
|
||||
{
|
||||
const size_t masked_pos = rb->pos_ & rb->mask_;
|
||||
/* The length of the writes is limited so that we do not need to worry
|
||||
about a write */
|
||||
RingBufferWriteTail(bytes, n, rb);
|
||||
if (BROTLI_PREDICT_TRUE(masked_pos + n <= rb->size_)) {
|
||||
/* A single write fits. */
|
||||
memcpy(&rb->buffer_[masked_pos], bytes, n);
|
||||
} else {
|
||||
/* Split into two writes.
|
||||
Copy into the end of the buffer, including the tail buffer. */
|
||||
memcpy(&rb->buffer_[masked_pos], bytes,
|
||||
BROTLI_MIN(size_t, n, rb->total_size_ - masked_pos));
|
||||
/* Copy into the beginning of the buffer */
|
||||
memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
|
||||
n - (rb->size_ - masked_pos));
|
||||
}
|
||||
}
|
||||
{
|
||||
BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
|
||||
uint32_t rb_pos_mask = (1u << 31) - 1;
|
||||
rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
|
||||
rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
|
||||
rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
|
||||
if (not_first_lap) {
|
||||
/* Wrap, but preserve not-a-first-lap feature. */
|
||||
rb->pos_ |= 1u << 31;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_RINGBUFFER_H_ */
|
||||
106
external/duckdb/third_party/brotli/enc/state.h
vendored
Normal file
106
external/duckdb/third_party/brotli/enc/state.h
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
/* Copyright 2022 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Encoder state. */
|
||||
|
||||
#ifndef BROTLI_ENC_STATE_H_
|
||||
#define BROTLI_ENC_STATE_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "command.h"
|
||||
#include "compress_fragment.h"
|
||||
#include "compress_fragment_two_pass.h"
|
||||
#include "brotli_hash.h"
|
||||
#include "memory.h"
|
||||
#include "brotli_params.h"
|
||||
#include "ringbuffer.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
typedef enum BrotliEncoderStreamState {
|
||||
/* Default state. */
|
||||
BROTLI_STREAM_PROCESSING = 0,
|
||||
/* Intermediate state; after next block is emitted, byte-padding should be
|
||||
performed before getting back to default state. */
|
||||
BROTLI_STREAM_FLUSH_REQUESTED = 1,
|
||||
/* Last metablock was produced; no more input is acceptable. */
|
||||
BROTLI_STREAM_FINISHED = 2,
|
||||
/* Flushing compressed block and writing meta-data block header. */
|
||||
BROTLI_STREAM_METADATA_HEAD = 3,
|
||||
/* Writing metadata block body. */
|
||||
BROTLI_STREAM_METADATA_BODY = 4
|
||||
} BrotliEncoderStreamState;
|
||||
|
||||
typedef enum BrotliEncoderFlintState {
|
||||
BROTLI_FLINT_NEEDS_2_BYTES = 2,
|
||||
BROTLI_FLINT_NEEDS_1_BYTE = 1,
|
||||
BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
|
||||
BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
|
||||
BROTLI_FLINT_DONE = -2
|
||||
} BrotliEncoderFlintState;
|
||||
|
||||
typedef struct BrotliEncoderStateStruct {
|
||||
BrotliEncoderParams params;
|
||||
|
||||
MemoryManager memory_manager_;
|
||||
|
||||
uint64_t input_pos_;
|
||||
RingBuffer ringbuffer_;
|
||||
size_t cmd_alloc_size_;
|
||||
Command *commands_;
|
||||
size_t num_commands_;
|
||||
size_t num_literals_;
|
||||
size_t last_insert_len_;
|
||||
uint64_t last_flush_pos_;
|
||||
uint64_t last_processed_pos_;
|
||||
int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
|
||||
int saved_dist_cache_[4];
|
||||
uint16_t last_bytes_;
|
||||
uint8_t last_bytes_bits_;
|
||||
/* "Flint" is a tiny uncompressed block emitted before the continuation
|
||||
block to unwire literal context from previous data. Despite being int8_t,
|
||||
field is actually BrotliEncoderFlintState enum. */
|
||||
int8_t flint_;
|
||||
uint8_t prev_byte_;
|
||||
uint8_t prev_byte2_;
|
||||
size_t storage_size_;
|
||||
uint8_t *storage_;
|
||||
|
||||
Hasher hasher_;
|
||||
|
||||
/* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
|
||||
int small_table_[1 << 10]; /* 4KiB */
|
||||
int *large_table_; /* Allocated only when needed */
|
||||
size_t large_table_size_;
|
||||
|
||||
BrotliOnePassArena *one_pass_arena_;
|
||||
BrotliTwoPassArena *two_pass_arena_;
|
||||
|
||||
/* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
|
||||
uint32_t *command_buf_;
|
||||
uint8_t *literal_buf_;
|
||||
|
||||
uint64_t total_in_;
|
||||
uint8_t *next_out_;
|
||||
size_t available_out_;
|
||||
uint64_t total_out_;
|
||||
/* Temporary buffer for padding flush bits or metadata block header / body. */
|
||||
union {
|
||||
uint64_t u64[2];
|
||||
uint8_t u8[16];
|
||||
} tiny_buf_;
|
||||
uint32_t remaining_metadata_bytes_;
|
||||
BrotliEncoderStreamState stream_state_;
|
||||
|
||||
BROTLI_BOOL is_last_block_emitted_;
|
||||
BROTLI_BOOL is_initialized_;
|
||||
} BrotliEncoderStateStruct;
|
||||
|
||||
typedef struct BrotliEncoderStateStruct BrotliEncoderStateInternal;
|
||||
#define BrotliEncoderState BrotliEncoderStateInternal
|
||||
}
|
||||
#endif // BROTLI_ENC_STATE_H_
|
||||
538
external/duckdb/third_party/brotli/enc/static_dict.cpp
vendored
Normal file
538
external/duckdb/third_party/brotli/enc/static_dict.cpp
vendored
Normal file
@@ -0,0 +1,538 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
#include "static_dict.h"
|
||||
|
||||
#include "../common/dictionary.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "../common/transform.h"
|
||||
#include "encoder_dict.h"
|
||||
#include "find_match_length.h"
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return h >> (32 - kDictNumBits);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
|
||||
uint32_t* matches) {
|
||||
uint32_t match = (uint32_t)((distance << 5) + len_code);
|
||||
matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
|
||||
const uint8_t* data,
|
||||
size_t id,
|
||||
size_t len,
|
||||
size_t maxlen) {
|
||||
const size_t offset = dictionary->offsets_by_length[len] + len * id;
|
||||
return FindMatchLengthWithLimit(&dictionary->data[offset], data,
|
||||
BROTLI_MIN(size_t, len, maxlen));
|
||||
}
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
|
||||
DictWord w, const uint8_t* data, size_t max_length) {
|
||||
if (w.len > max_length) {
|
||||
return BROTLI_FALSE;
|
||||
} else {
|
||||
const size_t offset = dictionary->offsets_by_length[w.len] +
|
||||
(size_t)w.len * (size_t)w.idx;
|
||||
const uint8_t* dict = &dictionary->data[offset];
|
||||
if (w.transform == 0) {
|
||||
/* Match against base dictionary word. */
|
||||
return
|
||||
TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
|
||||
} else if (w.transform == 10) {
|
||||
/* Match against uppercase first transform.
|
||||
Note that there are only ASCII uppercase words in the lookup table. */
|
||||
return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
|
||||
(dict[0] ^ 32) == data[0] &&
|
||||
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
|
||||
w.len - 1u);
|
||||
} else {
|
||||
/* Match against uppercase all transform.
|
||||
Note that there are only ASCII uppercase words in the lookup table. */
|
||||
size_t i;
|
||||
for (i = 0; i < w.len; ++i) {
|
||||
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
||||
if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
|
||||
} else {
|
||||
if (dict[i] != data[i]) return BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Finds matches for a single static dictionary */
|
||||
static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor(
|
||||
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
|
||||
size_t min_length, size_t max_length, uint32_t* matches) {
|
||||
BROTLI_BOOL has_found_match = BROTLI_FALSE;
|
||||
#if defined(BROTLI_EXPERIMENTAL)
|
||||
if (dictionary->has_words_heavy) {
|
||||
const BrotliTrieNode* node = &dictionary->trie.root;
|
||||
size_t l = 0;
|
||||
while (node && l < max_length) {
|
||||
uint8_t c;
|
||||
if (l >= min_length && node->len_) {
|
||||
AddMatch(node->idx_, l, node->len_, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
}
|
||||
c = data[l++];
|
||||
node = BrotliTrieSub(&dictionary->trie, node, c);
|
||||
}
|
||||
return has_found_match;
|
||||
}
|
||||
#endif /* BROTLI_EXPERIMENTAL */
|
||||
{
|
||||
size_t offset = dictionary->buckets[Hash(data)];
|
||||
BROTLI_BOOL end = !offset;
|
||||
while (!end) {
|
||||
DictWord w = dictionary->dict_words[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0) {
|
||||
const size_t matchlen =
|
||||
DictMatchLength(dictionary->words, data, id, l, max_length);
|
||||
const uint8_t* s;
|
||||
size_t minlen;
|
||||
size_t maxlen;
|
||||
size_t len;
|
||||
/* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
|
||||
if (matchlen == l) {
|
||||
AddMatch(id, l, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
}
|
||||
/* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
|
||||
"" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
|
||||
if (matchlen >= l - 1) {
|
||||
AddMatch(id + 12 * n, l - 1, l, matches);
|
||||
if (l + 2 < max_length &&
|
||||
data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
|
||||
data[l + 2] == ' ') {
|
||||
AddMatch(id + 49 * n, l + 3, l, matches);
|
||||
}
|
||||
has_found_match = BROTLI_TRUE;
|
||||
}
|
||||
/* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
|
||||
minlen = min_length;
|
||||
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
|
||||
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
|
||||
for (len = minlen; len <= maxlen; ++len) {
|
||||
size_t cut = l - len;
|
||||
size_t transform_id = (cut << 2) +
|
||||
(size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
|
||||
AddMatch(id + transform_id * n, len, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
}
|
||||
if (matchlen < l || l + 6 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
s = &data[l];
|
||||
/* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + n, l + 1, l, matches);
|
||||
if (s[1] == 'a') {
|
||||
if (s[2] == ' ') {
|
||||
AddMatch(id + 28 * n, l + 3, l, matches);
|
||||
} else if (s[2] == 's') {
|
||||
if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 't') {
|
||||
if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 'n') {
|
||||
if (s[3] == 'd' && s[4] == ' ') {
|
||||
AddMatch(id + 10 * n, l + 5, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[1] == 'b') {
|
||||
if (s[2] == 'y' && s[3] == ' ') {
|
||||
AddMatch(id + 38 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'i') {
|
||||
if (s[2] == 'n') {
|
||||
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 's') {
|
||||
if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'f') {
|
||||
if (s[2] == 'o') {
|
||||
if (s[3] == 'r' && s[4] == ' ') {
|
||||
AddMatch(id + 25 * n, l + 5, l, matches);
|
||||
}
|
||||
} else if (s[2] == 'r') {
|
||||
if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
|
||||
AddMatch(id + 37 * n, l + 6, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[1] == 'o') {
|
||||
if (s[2] == 'f') {
|
||||
if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 'n') {
|
||||
if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'n') {
|
||||
if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
|
||||
AddMatch(id + 80 * n, l + 5, l, matches);
|
||||
}
|
||||
} else if (s[1] == 't') {
|
||||
if (s[2] == 'h') {
|
||||
if (s[3] == 'e') {
|
||||
if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
|
||||
} else if (s[3] == 'a') {
|
||||
if (s[4] == 't' && s[5] == ' ') {
|
||||
AddMatch(id + 29 * n, l + 6, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[2] == 'o') {
|
||||
if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'w') {
|
||||
if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
|
||||
AddMatch(id + 35 * n, l + 6, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[0] == '"') {
|
||||
AddMatch(id + 19 * n, l + 1, l, matches);
|
||||
if (s[1] == '>') {
|
||||
AddMatch(id + 21 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + 20 * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 31 * n, l + 2, l, matches);
|
||||
if (s[2] == 'T' && s[3] == 'h') {
|
||||
if (s[4] == 'e') {
|
||||
if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
|
||||
} else if (s[4] == 'i') {
|
||||
if (s[5] == 's' && s[6] == ' ') {
|
||||
AddMatch(id + 75 * n, l + 7, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (s[0] == ',') {
|
||||
AddMatch(id + 76 * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 14 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '\n') {
|
||||
AddMatch(id + 22 * n, l + 1, l, matches);
|
||||
if (s[1] == '\t') {
|
||||
AddMatch(id + 50 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == ']') {
|
||||
AddMatch(id + 24 * n, l + 1, l, matches);
|
||||
} else if (s[0] == '\'') {
|
||||
AddMatch(id + 36 * n, l + 1, l, matches);
|
||||
} else if (s[0] == ':') {
|
||||
AddMatch(id + 51 * n, l + 1, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
AddMatch(id + 57 * n, l + 1, l, matches);
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + 70 * n, l + 2, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + 86 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'a') {
|
||||
if (s[1] == 'l' && s[2] == ' ') {
|
||||
AddMatch(id + 84 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'e') {
|
||||
if (s[1] == 'd') {
|
||||
if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
|
||||
} else if (s[1] == 'r') {
|
||||
if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
|
||||
} else if (s[1] == 's') {
|
||||
if (s[2] == 't' && s[3] == ' ') {
|
||||
AddMatch(id + 95 * n, l + 4, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[0] == 'f') {
|
||||
if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
|
||||
AddMatch(id + 90 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'i') {
|
||||
if (s[1] == 'v') {
|
||||
if (s[2] == 'e' && s[3] == ' ') {
|
||||
AddMatch(id + 92 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'z') {
|
||||
if (s[2] == 'e' && s[3] == ' ') {
|
||||
AddMatch(id + 100 * n, l + 4, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[0] == 'l') {
|
||||
if (s[1] == 'e') {
|
||||
if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
|
||||
AddMatch(id + 93 * n, l + 5, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'y') {
|
||||
if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'o') {
|
||||
if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
|
||||
AddMatch(id + 106 * n, l + 4, l, matches);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
||||
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
||||
transform. */
|
||||
const BROTLI_BOOL is_all_caps =
|
||||
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(dictionary->words, w, data, max_length)) {
|
||||
continue;
|
||||
}
|
||||
/* Transform "" + kUppercase{First,All} + "" */
|
||||
AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
if (l + 1 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms "" + kUppercase{First,All} + <suffix> */
|
||||
s = &data[l];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '"') {
|
||||
AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
|
||||
if (s[1] == '>') {
|
||||
AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == ',') {
|
||||
AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '\'') {
|
||||
AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Transforms with prefixes " " and "." */
|
||||
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
||||
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
|
||||
size_t offset = dictionary->buckets[Hash(&data[1])];
|
||||
BROTLI_BOOL end = !offset;
|
||||
while (!end) {
|
||||
DictWord w = dictionary->dict_words[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0) {
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
|
||||
"." + BROTLI_TRANSFORM_IDENTITY + "" */
|
||||
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
if (l + 2 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
|
||||
"." + BROTLI_TRANSFORM_IDENTITY + <suffix>
|
||||
*/
|
||||
s = &data[l + 1];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
|
||||
} else if (is_space) {
|
||||
if (s[0] == ',') {
|
||||
AddMatch(id + 103 * n, l + 2, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 33 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + 71 * n, l + 2, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 52 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + 81 * n, l + 3, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + 98 * n, l + 3, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (is_space) {
|
||||
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
|
||||
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
|
||||
transform. */
|
||||
const BROTLI_BOOL is_all_caps =
|
||||
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kUppercase{First,All} + "" */
|
||||
AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
if (l + 2 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kUppercase{First,All} + <suffix> */
|
||||
s = &data[l + 1];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
|
||||
} else if (s[0] == ',') {
|
||||
if (!is_all_caps) {
|
||||
AddMatch(id + 109 * n, l + 2, l, matches);
|
||||
}
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (max_length >= 6) {
|
||||
/* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
|
||||
if ((data[1] == ' ' &&
|
||||
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
||||
(data[0] == 0xC2 && data[1] == 0xA0)) {
|
||||
size_t offset = dictionary->buckets[Hash(&data[2])];
|
||||
BROTLI_BOOL end = !offset;
|
||||
while (!end) {
|
||||
DictWord w = dictionary->dict_words[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0 &&
|
||||
IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
|
||||
if (data[0] == 0xC2) {
|
||||
AddMatch(id + 102 * n, l + 2, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
||||
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
|
||||
AddMatch(id + t * n, l + 3, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (max_length >= 9) {
|
||||
/* Transforms with prefixes " the " and ".com/" */
|
||||
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
|
||||
data[3] == 'e' && data[4] == ' ') ||
|
||||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
||||
data[3] == 'm' && data[4] == '/')) {
|
||||
size_t offset = dictionary->buckets[Hash(&data[5])];
|
||||
BROTLI_BOOL end = !offset;
|
||||
while (!end) {
|
||||
DictWord w = dictionary->dict_words[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0 &&
|
||||
IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
|
||||
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
if (l + 5 < max_length) {
|
||||
const uint8_t* s = &data[l + 5];
|
||||
if (data[0] == ' ') {
|
||||
if (l + 8 < max_length &&
|
||||
s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
|
||||
AddMatch(id + 62 * n, l + 9, l, matches);
|
||||
if (l + 12 < max_length &&
|
||||
s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
|
||||
AddMatch(id + 73 * n, l + 13, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return has_found_match;
|
||||
}
|
||||
|
||||
/* Finds matches for one or more dictionaries, if multiple are present
|
||||
in the contextual dictionary */
|
||||
BROTLI_BOOL duckdb_brotli::BrotliFindAllStaticDictionaryMatches(
|
||||
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
|
||||
size_t min_length, size_t max_length, uint32_t* matches) {
|
||||
BROTLI_BOOL has_found_match =
|
||||
BrotliFindAllStaticDictionaryMatchesFor(
|
||||
dictionary, data, min_length, max_length, matches);
|
||||
|
||||
if (!!dictionary->parent && dictionary->parent->num_dictionaries > 1) {
|
||||
uint32_t matches2[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
|
||||
int l;
|
||||
const BrotliEncoderDictionary* dictionary2 = dictionary->parent->dict[0];
|
||||
if (dictionary2 == dictionary) {
|
||||
dictionary2 = dictionary->parent->dict[1];
|
||||
}
|
||||
|
||||
for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
|
||||
matches2[l] = kInvalidMatch;
|
||||
}
|
||||
|
||||
has_found_match |= BrotliFindAllStaticDictionaryMatchesFor(
|
||||
dictionary2, data, min_length, max_length, matches2);
|
||||
|
||||
for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
|
||||
if (matches2[l] != kInvalidMatch) {
|
||||
uint32_t dist = (uint32_t)(matches2[l] >> 5);
|
||||
uint32_t len_code = matches2[l] & 31;
|
||||
uint32_t skipdist = (uint32_t)((uint32_t)(1 << dictionary->words->
|
||||
size_bits_by_length[len_code]) & ~1u) *
|
||||
(uint32_t)dictionary->num_transforms;
|
||||
/* TODO(lode): check for dist overflow */
|
||||
dist += skipdist;
|
||||
AddMatch(dist, (size_t)l, len_code, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
return has_found_match;
|
||||
}
|
||||
|
||||
37
external/duckdb/third_party/brotli/enc/static_dict.h
vendored
Normal file
37
external/duckdb/third_party/brotli/enc/static_dict.h
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Class to model the static dictionary. */
|
||||
|
||||
#ifndef BROTLI_ENC_STATIC_DICT_H_
|
||||
#define BROTLI_ENC_STATIC_DICT_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/dictionary.h"
|
||||
#include "../common/brotli_platform.h"
|
||||
#include "encoder_dict.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
|
||||
static const uint32_t kInvalidMatch = 0xFFFFFFF;
|
||||
|
||||
/* Matches data against static dictionary words, and for each length l,
|
||||
for which a match is found, updates matches[l] to be the minimum possible
|
||||
(distance << 5) + len_code.
|
||||
Returns 1 if matches have been found, otherwise 0.
|
||||
Prerequisites:
|
||||
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
|
||||
all elements are initialized to kInvalidMatch */
|
||||
BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
||||
const BrotliEncoderDictionary* dictionary,
|
||||
const uint8_t* data, size_t min_length, size_t max_length,
|
||||
uint32_t* matches);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_STATIC_DICT_H_ */
|
||||
5862
external/duckdb/third_party/brotli/enc/static_dict_lut.h
vendored
Normal file
5862
external/duckdb/third_party/brotli/enc/static_dict_lut.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
81
external/duckdb/third_party/brotli/enc/utf8_util.cpp
vendored
Normal file
81
external/duckdb/third_party/brotli/enc/utf8_util.cpp
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Heuristics for deciding about the UTF8-ness of strings. */
|
||||
|
||||
#include "utf8_util.h"
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
using namespace duckdb_brotli;
|
||||
|
||||
static size_t BrotliParseAsUTF8(
|
||||
int* symbol, const uint8_t* input, size_t size) {
|
||||
/* ASCII */
|
||||
if ((input[0] & 0x80) == 0) {
|
||||
*symbol = input[0];
|
||||
if (*symbol > 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
/* 2-byte UTF8 */
|
||||
if (size > 1u &&
|
||||
(input[0] & 0xE0) == 0xC0 &&
|
||||
(input[1] & 0xC0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x1F) << 6) |
|
||||
(input[1] & 0x3F));
|
||||
if (*symbol > 0x7F) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
/* 3-byte UFT8 */
|
||||
if (size > 2u &&
|
||||
(input[0] & 0xF0) == 0xE0 &&
|
||||
(input[1] & 0xC0) == 0x80 &&
|
||||
(input[2] & 0xC0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x0F) << 12) |
|
||||
((input[1] & 0x3F) << 6) |
|
||||
(input[2] & 0x3F));
|
||||
if (*symbol > 0x7FF) {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
/* 4-byte UFT8 */
|
||||
if (size > 3u &&
|
||||
(input[0] & 0xF8) == 0xF0 &&
|
||||
(input[1] & 0xC0) == 0x80 &&
|
||||
(input[2] & 0xC0) == 0x80 &&
|
||||
(input[3] & 0xC0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x07) << 18) |
|
||||
((input[1] & 0x3F) << 12) |
|
||||
((input[2] & 0x3F) << 6) |
|
||||
(input[3] & 0x3F));
|
||||
if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
/* Not UTF8, emit a special symbol above the UTF8-code space */
|
||||
*symbol = 0x110000 | input[0];
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
|
||||
BROTLI_BOOL duckdb_brotli::BrotliIsMostlyUTF8(
|
||||
const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction) {
|
||||
size_t size_utf8 = 0;
|
||||
size_t i = 0;
|
||||
while (i < length) {
|
||||
int symbol;
|
||||
size_t bytes_read =
|
||||
BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
|
||||
i += bytes_read;
|
||||
if (symbol < 0x110000) size_utf8 += bytes_read;
|
||||
}
|
||||
return TO_BROTLI_BOOL((double)size_utf8 > min_fraction * (double)length);
|
||||
}
|
||||
|
||||
|
||||
29
external/duckdb/third_party/brotli/enc/utf8_util.h
vendored
Normal file
29
external/duckdb/third_party/brotli/enc/utf8_util.h
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Heuristics for deciding about the UTF8-ness of strings. */
|
||||
|
||||
#ifndef BROTLI_ENC_UTF8_UTIL_H_
|
||||
#define BROTLI_ENC_UTF8_UTIL_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
|
||||
/* Returns 1 if at least min_fraction of the bytes between pos and
|
||||
pos + length in the (data, mask) ring-buffer is UTF8-encoded, otherwise
|
||||
returns 0. */
|
||||
BROTLI_INTERNAL BROTLI_BOOL BrotliIsMostlyUTF8(
|
||||
const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction);
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_UTF8_UTIL_H_ */
|
||||
84
external/duckdb/third_party/brotli/enc/write_bits.h
vendored
Normal file
84
external/duckdb/third_party/brotli/enc/write_bits.h
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Write bits into a byte array. */
|
||||
|
||||
#ifndef BROTLI_ENC_WRITE_BITS_H_
|
||||
#define BROTLI_ENC_WRITE_BITS_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#include "../common/brotli_platform.h"
|
||||
|
||||
namespace duckdb_brotli {
|
||||
|
||||
/* This function writes bits into bytes in increasing addresses, and within
|
||||
a byte least-significant-bit first.
|
||||
|
||||
The function can write up to 56 bits in one go with WriteBits
|
||||
Example: let's assume that 3 bits (Rs below) have been written already:
|
||||
|
||||
BYTE-0 BYTE+1 BYTE+2
|
||||
|
||||
0000 0RRR 0000 0000 0000 0000
|
||||
|
||||
Now, we could write 5 or less bits in MSB by just shifting by 3
|
||||
and OR'ing to BYTE-0.
|
||||
|
||||
For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
|
||||
and locate the rest in BYTE+1, BYTE+2, etc. */
|
||||
static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
|
||||
uint64_t bits,
|
||||
size_t* BROTLI_RESTRICT pos,
|
||||
uint8_t* BROTLI_RESTRICT array) {
|
||||
BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits,
|
||||
(uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
|
||||
(int)*pos));
|
||||
BROTLI_DCHECK((bits >> n_bits) == 0);
|
||||
BROTLI_DCHECK(n_bits <= 56);
|
||||
#if defined(BROTLI_LITTLE_ENDIAN)
|
||||
/* This branch of the code can write up to 56 bits at a time,
|
||||
7 bits are lost by being perhaps already in *p and at least
|
||||
1 bit is needed to initialize the bit-stream ahead (i.e. if 7
|
||||
bits are in *p and we write 57 bits, then the next write will
|
||||
access a byte that was never initialized). */
|
||||
{
|
||||
uint8_t* p = &array[*pos >> 3];
|
||||
uint64_t v = (uint64_t)(*p); /* Zero-extend 8 to 64 bits. */
|
||||
v |= bits << (*pos & 7);
|
||||
BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */
|
||||
*pos += n_bits;
|
||||
}
|
||||
#else
|
||||
/* implicit & 0xFF is assumed for uint8_t arithmetics */
|
||||
{
|
||||
uint8_t* array_pos = &array[*pos >> 3];
|
||||
const size_t bits_reserved_in_first_byte = (*pos & 7);
|
||||
size_t bits_left_to_write;
|
||||
bits <<= bits_reserved_in_first_byte;
|
||||
*array_pos++ |= (uint8_t)bits;
|
||||
for (bits_left_to_write = n_bits + bits_reserved_in_first_byte;
|
||||
bits_left_to_write >= 9;
|
||||
bits_left_to_write -= 8) {
|
||||
bits >>= 8;
|
||||
*array_pos++ = (uint8_t)bits;
|
||||
}
|
||||
*array_pos = 0;
|
||||
*pos += n_bits;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
|
||||
size_t pos, uint8_t* array) {
|
||||
BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", (int)pos));
|
||||
BROTLI_DCHECK((pos & 7) == 0);
|
||||
array[pos >> 3] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_WRITE_BITS_H_ */
|
||||
Reference in New Issue
Block a user