should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,36 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function to find backward reference copies. */
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/brotli_platform.h"
#include "command.h"
#include "brotli_hash.h"
#include "quality.h"
namespace duckdb_brotli {
/* "commands" points to the next output command to write to, "*num_commands" is
initially the total amount of commands output by previous
CreateBackwardReferences calls, and must be incremented by the amount written
by this call. */
BROTLI_INTERNAL void BrotliCreateBackwardReferences(size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals);
}
#endif /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */

View File

@@ -0,0 +1,935 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function to find backward reference copies. */
#include "backward_references_hq.h"
#include <string.h> /* memcpy, memset */
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/brotli_platform.h"
#include "command.h"
#include "compound_dictionary.h"
#include "encoder_dict.h"
#include "fast_log.h"
#include "find_match_length.h"
#include "literal_cost.h"
#include "memory.h"
#include "brotli_params.h"
#include "prefix.h"
#include "quality.h"
using namespace duckdb_brotli;
/* BrotliCalculateDistanceCodeLimit(BROTLI_MAX_ALLOWED_DISTANCE, 3, 120). */
#define BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE 544
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
static const uint32_t kDistanceCacheIndex[] = {
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
};
static const int kDistanceCacheOffset[] = {
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
};
void duckdb_brotli::BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
ZopfliNode stub;
size_t i;
stub.length = 1;
stub.distance = 0;
stub.dcode_insert_length = 0;
stub.u.cost = kInfinity;
for (i = 0; i < length; ++i) array[i] = stub;
}
static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
return self->length & 0x1FFFFFF;
}
static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
const uint32_t modifier = self->length >> 25;
return ZopfliNodeCopyLength(self) + 9u - modifier;
}
static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
return self->distance;
}
static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
const uint32_t short_code = self->dcode_insert_length >> 27;
return short_code == 0 ?
ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 :
short_code - 1;
}
static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF);
}
/* Temporary data for ZopfliCostModelSetFromCommands. */
typedef struct ZopfliCostModelArena {
uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
} ZopfliCostModelArena;
/* Histogram based cost model for zopflification. */
typedef struct ZopfliCostModel {
/* The insert and copy length symbols. */
float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
float* cost_dist_;
uint32_t distance_histogram_size;
/* Cumulative costs of literals per position in the stream. */
float* literal_costs_;
float min_cost_cmd_;
size_t num_bytes_;
/* Temporary data. */
union {
size_t literal_histograms[3 * 256];
ZopfliCostModelArena arena;
};
} ZopfliCostModel;
static void InitZopfliCostModel(
MemoryManager* m, ZopfliCostModel* self, const BrotliDistanceParams* dist,
size_t num_bytes) {
self->num_bytes_ = num_bytes;
self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size_limit);
self->distance_histogram_size = dist->alphabet_size_limit;
if (BROTLI_IS_OOM(m)) return;
}
static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
BROTLI_FREE(m, self->literal_costs_);
BROTLI_FREE(m, self->cost_dist_);
}
static void SetCost(const uint32_t* histogram, size_t histogram_size,
BROTLI_BOOL literal_histogram, float* cost) {
size_t sum = 0;
size_t missing_symbol_sum;
float log2sum;
float missing_symbol_cost;
size_t i;
for (i = 0; i < histogram_size; i++) {
sum += histogram[i];
}
log2sum = (float)FastLog2(sum);
missing_symbol_sum = sum;
if (!literal_histogram) {
for (i = 0; i < histogram_size; i++) {
if (histogram[i] == 0) missing_symbol_sum++;
}
}
missing_symbol_cost = (float)FastLog2(missing_symbol_sum) + 2;
for (i = 0; i < histogram_size; i++) {
if (histogram[i] == 0) {
cost[i] = missing_symbol_cost;
continue;
}
/* Shannon bits for this symbol. */
cost[i] = log2sum - (float)FastLog2(histogram[i]);
/* Cannot be coded with less than 1 bit */
if (cost[i] < 1) cost[i] = 1;
}
}
static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const Command* commands,
size_t num_commands,
size_t last_insert_len) {
ZopfliCostModelArena* arena = &self->arena;
size_t pos = position - last_insert_len;
float min_cost_cmd = kInfinity;
size_t i;
float* cost_cmd = self->cost_cmd_;
memset(arena->histogram_literal, 0, sizeof(arena->histogram_literal));
memset(arena->histogram_cmd, 0, sizeof(arena->histogram_cmd));
memset(arena->histogram_dist, 0, sizeof(arena->histogram_dist));
for (i = 0; i < num_commands; i++) {
size_t inslength = commands[i].insert_len_;
size_t copylength = CommandCopyLen(&commands[i]);
size_t distcode = commands[i].dist_prefix_ & 0x3FF;
size_t cmdcode = commands[i].cmd_prefix_;
size_t j;
arena->histogram_cmd[cmdcode]++;
if (cmdcode >= 128) arena->histogram_dist[distcode]++;
for (j = 0; j < inslength; j++) {
arena->histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
}
pos += inslength + copylength;
}
SetCost(arena->histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
arena->cost_literal);
SetCost(arena->histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
cost_cmd);
SetCost(arena->histogram_dist, self->distance_histogram_size, BROTLI_FALSE,
self->cost_dist_);
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
}
self->min_cost_cmd_ = min_cost_cmd;
{
float* literal_costs = self->literal_costs_;
float literal_carry = 0.0;
size_t num_bytes = self->num_bytes_;
literal_costs[0] = 0.0;
for (i = 0; i < num_bytes; ++i) {
literal_carry +=
arena->cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
literal_costs[i + 1] = literal_costs[i] + literal_carry;
literal_carry -= literal_costs[i + 1] - literal_costs[i];
}
}
}
static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
float* literal_costs = self->literal_costs_;
float literal_carry = 0.0;
float* cost_dist = self->cost_dist_;
float* cost_cmd = self->cost_cmd_;
size_t num_bytes = self->num_bytes_;
size_t i;
BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
ringbuffer, self->literal_histograms,
&literal_costs[1]);
literal_costs[0] = 0.0;
for (i = 0; i < num_bytes; ++i) {
literal_carry += literal_costs[i + 1];
literal_costs[i + 1] = literal_costs[i] + literal_carry;
literal_carry -= literal_costs[i + 1] - literal_costs[i];
}
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
}
for (i = 0; i < self->distance_histogram_size; ++i) {
cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
}
self->min_cost_cmd_ = (float)FastLog2(11);
}
static BROTLI_INLINE float ZopfliCostModelGetCommandCost(
const ZopfliCostModel* self, uint16_t cmdcode) {
return self->cost_cmd_[cmdcode];
}
static BROTLI_INLINE float ZopfliCostModelGetDistanceCost(
const ZopfliCostModel* self, size_t distcode) {
return self->cost_dist_[distcode];
}
static BROTLI_INLINE float ZopfliCostModelGetLiteralCosts(
const ZopfliCostModel* self, size_t from, size_t to) {
return self->literal_costs_[to] - self->literal_costs_[from];
}
static BROTLI_INLINE float ZopfliCostModelGetMinCostCmd(
const ZopfliCostModel* self) {
return self->min_cost_cmd_;
}
/* REQUIRES: len >= 2, start_pos <= pos */
/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
/* Maintains the "ZopfliNode array invariant". */
static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
size_t start_pos, size_t len, size_t len_code, size_t dist,
size_t short_code, float cost) {
ZopfliNode* next = &nodes[pos + len];
next->length = (uint32_t)(len | ((len + 9u - len_code) << 25));
next->distance = (uint32_t)dist;
next->dcode_insert_length = (uint32_t)(
(short_code << 27) | (pos - start_pos));
next->u.cost = cost;
}
typedef struct PosData {
size_t pos;
int distance_cache[4];
float costdiff;
float cost;
} PosData;
/* Maintains the smallest 8 cost difference together with their positions */
typedef struct StartPosQueue {
PosData q_[8];
size_t idx_;
} StartPosQueue;
static BROTLI_INLINE void InitStartPosQueue(StartPosQueue* self) {
self->idx_ = 0;
}
static size_t StartPosQueueSize(const StartPosQueue* self) {
return BROTLI_MIN(size_t, self->idx_, 8);
}
static void StartPosQueuePush(StartPosQueue* self, const PosData* posdata) {
size_t offset = ~(self->idx_++) & 7;
size_t len = StartPosQueueSize(self);
size_t i;
PosData* q = self->q_;
q[offset] = *posdata;
/* Restore the sorted order. In the list of |len| items at most |len - 1|
adjacent element comparisons / swaps are required. */
for (i = 1; i < len; ++i) {
if (q[offset & 7].costdiff > q[(offset + 1) & 7].costdiff) {
BROTLI_SWAP(PosData, q, offset & 7, (offset + 1) & 7);
}
++offset;
}
}
static const PosData* StartPosQueueAt(const StartPosQueue* self, size_t k) {
return &self->q_[(k - self->idx_) & 7];
}
/* Returns the minimum possible copy length that can improve the cost of any */
/* future position. */
static size_t ComputeMinimumCopyLength(const float start_cost,
const ZopfliNode* nodes,
const size_t num_bytes,
const size_t pos) {
/* Compute the minimum possible cost of reaching any future position. */
float min_cost = start_cost;
size_t len = 2;
size_t next_len_bucket = 4;
size_t next_len_offset = 10;
while (pos + len <= num_bytes && nodes[pos + len].u.cost <= min_cost) {
/* We already reached (pos + len) with no more cost than the minimum
possible cost of reaching anything from this pos, so there is no point in
looking for lengths <= len. */
++len;
if (len == next_len_offset) {
/* We reached the next copy length code bucket, so we add one more
extra bit to the minimum cost. */
min_cost += 1.0f;
next_len_offset += next_len_bucket;
next_len_bucket *= 2;
}
}
return len;
}
/* REQUIRES: nodes[pos].cost < kInfinity
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
static uint32_t ComputeDistanceShortcut(const size_t block_start,
const size_t pos,
const size_t max_backward_limit,
const size_t gap,
const ZopfliNode* nodes) {
const size_t clen = ZopfliNodeCopyLength(&nodes[pos]);
const size_t ilen = nodes[pos].dcode_insert_length & 0x7FFFFFF;
const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]);
/* Since |block_start + pos| is the end position of the command, the copy part
starts from |block_start + pos - clen|. Distances that are greater than
this or greater than |max_backward_limit| + |gap| are static dictionary
references, and do not update the last distances.
Also distance code 0 (last distance) does not update the last distances. */
if (pos == 0) {
return 0;
} else if (dist + clen <= block_start + pos + gap &&
dist <= max_backward_limit + gap &&
ZopfliNodeDistanceCode(&nodes[pos]) > 0) {
return (uint32_t)pos;
} else {
return nodes[pos - clen - ilen].u.shortcut;
}
}
/* Fills in dist_cache[0..3] with the last four distances (as defined by
Section 4. of the Spec) that would be used at (block_start + pos) if we
used the shortest path of commands from block_start, computed from
nodes[0..pos]. The last four distances at block_start are in
starting_dist_cache[0..3].
REQUIRES: nodes[pos].cost < kInfinity
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
static void ComputeDistanceCache(const size_t pos,
const int* starting_dist_cache,
const ZopfliNode* nodes,
int* dist_cache) {
int idx = 0;
size_t p = nodes[pos].u.shortcut;
while (idx < 4 && p > 0) {
const size_t ilen = nodes[p].dcode_insert_length & 0x7FFFFFF;
const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
dist_cache[idx++] = (int)dist;
/* Because of prerequisite, p >= clen + ilen >= 2. */
p = nodes[p - clen - ilen].u.shortcut;
}
for (; idx < 4; ++idx) {
dist_cache[idx] = *starting_dist_cache++;
}
}
/* Maintains "ZopfliNode array invariant" and pushes node to the queue, if it
is eligible. */
static void EvaluateNode(
const size_t block_start, const size_t pos, const size_t max_backward_limit,
const size_t gap, const int* starting_dist_cache,
const ZopfliCostModel* model, StartPosQueue* queue, ZopfliNode* nodes) {
/* Save cost, because ComputeDistanceCache invalidates it. */
float node_cost = nodes[pos].u.cost;
nodes[pos].u.shortcut = ComputeDistanceShortcut(
block_start, pos, max_backward_limit, gap, nodes);
if (node_cost <= ZopfliCostModelGetLiteralCosts(model, 0, pos)) {
PosData posdata;
posdata.pos = pos;
posdata.cost = node_cost;
posdata.costdiff = node_cost -
ZopfliCostModelGetLiteralCosts(model, 0, pos);
ComputeDistanceCache(
pos, starting_dist_cache, nodes, posdata.distance_cache);
StartPosQueuePush(queue, &posdata);
}
}
/* Returns longest copy length. */
static size_t UpdateNodes(
const size_t num_bytes, const size_t block_start, const size_t pos,
const uint8_t* ringbuffer, const size_t ringbuffer_mask,
const BrotliEncoderParams* params, const size_t max_backward_limit,
const int* starting_dist_cache, const size_t num_matches,
const BackwardMatch* matches, const ZopfliCostModel* model,
StartPosQueue* queue, ZopfliNode* nodes) {
const size_t stream_offset = params->stream_offset;
const size_t cur_ix = block_start + pos;
const size_t cur_ix_masked = cur_ix & ringbuffer_mask;
const size_t max_distance = BROTLI_MIN(size_t, cur_ix, max_backward_limit);
const size_t dictionary_start = BROTLI_MIN(size_t,
cur_ix + stream_offset, max_backward_limit);
const size_t max_len = num_bytes - pos;
const size_t max_zopfli_len = MaxZopfliLen(params);
const size_t max_iters = MaxZopfliCandidates(params);
size_t min_len;
size_t result = 0;
size_t k;
const CompoundDictionary* addon = &params->dictionary.compound;
size_t gap = addon->total_size;
EvaluateNode(block_start + stream_offset, pos, max_backward_limit, gap,
starting_dist_cache, model, queue, nodes);
{
const PosData* posdata = StartPosQueueAt(queue, 0);
float min_cost = (posdata->cost + ZopfliCostModelGetMinCostCmd(model) +
ZopfliCostModelGetLiteralCosts(model, posdata->pos, pos));
min_len = ComputeMinimumCopyLength(min_cost, nodes, num_bytes, pos);
}
/* Go over the command starting positions in order of increasing cost
difference. */
for (k = 0; k < max_iters && k < StartPosQueueSize(queue); ++k) {
const PosData* posdata = StartPosQueueAt(queue, k);
const size_t start = posdata->pos;
const uint16_t inscode = GetInsertLengthCode(pos - start);
const float start_costdiff = posdata->costdiff;
const float base_cost = start_costdiff + (float)GetInsertExtra(inscode) +
ZopfliCostModelGetLiteralCosts(model, 0, pos);
/* Look for last distance matches using the distance cache from this
starting position. */
size_t best_len = min_len - 1;
size_t j = 0;
for (; j < BROTLI_NUM_DISTANCE_SHORT_CODES && best_len < max_len; ++j) {
const size_t idx = kDistanceCacheIndex[j];
const size_t backward =
(size_t)(posdata->distance_cache[idx] + kDistanceCacheOffset[j]);
size_t prev_ix = cur_ix - backward;
size_t len = 0;
uint8_t continuation = ringbuffer[cur_ix_masked + best_len];
if (cur_ix_masked + best_len > ringbuffer_mask) {
break;
}
if (BROTLI_PREDICT_FALSE(backward > dictionary_start + gap)) {
/* Word dictionary -> ignore. */
continue;
}
if (backward <= max_distance) {
/* Regular backward reference. */
if (prev_ix >= cur_ix) {
continue;
}
prev_ix &= ringbuffer_mask;
if (prev_ix + best_len > ringbuffer_mask ||
continuation != ringbuffer[prev_ix + best_len]) {
continue;
}
len = FindMatchLengthWithLimit(&ringbuffer[prev_ix],
&ringbuffer[cur_ix_masked],
max_len);
} else if (backward > dictionary_start) {
size_t d = 0;
size_t offset;
size_t limit;
const uint8_t* source;
offset = dictionary_start + 1 + addon->total_size - 1;
while (offset >= backward + addon->chunk_offsets[d + 1]) d++;
source = addon->chunk_source[d];
offset = offset - addon->chunk_offsets[d] - backward;
limit = addon->chunk_offsets[d + 1] - addon->chunk_offsets[d] - offset;
limit = limit > max_len ? max_len : limit;
if (best_len >= limit ||
continuation != source[offset + best_len]) {
continue;
}
len = FindMatchLengthWithLimit(&source[offset],
&ringbuffer[cur_ix_masked],
limit);
} else {
/* "Gray" area. It is addressable by decoder, but this encoder
instance does not have that data -> should not touch it. */
continue;
}
{
const float dist_cost = base_cost +
ZopfliCostModelGetDistanceCost(model, j);
size_t l;
for (l = best_len + 1; l <= len; ++l) {
const uint16_t copycode = GetCopyLengthCode(l);
const uint16_t cmdcode =
CombineLengthCodes(inscode, copycode, j == 0);
const float cost = (cmdcode < 128 ? base_cost : dist_cost) +
(float)GetCopyExtra(copycode) +
ZopfliCostModelGetCommandCost(model, cmdcode);
if (cost < nodes[pos + l].u.cost) {
UpdateZopfliNode(nodes, pos, start, l, l, backward, j + 1, cost);
result = BROTLI_MAX(size_t, result, l);
}
best_len = l;
}
}
}
/* At higher iterations look only for new last distance matches, since
looking only for new command start positions with the same distances
does not help much. */
if (k >= 2) continue;
{
/* Loop through all possible copy lengths at this position. */
size_t len = min_len;
for (j = 0; j < num_matches; ++j) {
BackwardMatch match = matches[j];
size_t dist = match.distance;
BROTLI_BOOL is_dictionary_match =
TO_BROTLI_BOOL(dist > dictionary_start + gap);
/* We already tried all possible last distance matches, so we can use
normal distance code here. */
size_t dist_code = dist + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
uint16_t dist_symbol;
uint32_t distextra;
uint32_t distnumextra;
float dist_cost;
size_t max_match_len;
PrefixEncodeCopyDistance(
dist_code, params->dist.num_direct_distance_codes,
params->dist.distance_postfix_bits, &dist_symbol, &distextra);
distnumextra = dist_symbol >> 10;
dist_cost = base_cost + (float)distnumextra +
ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF);
/* Try all copy lengths up until the maximum copy length corresponding
to this distance. If the distance refers to the static dictionary, or
the maximum length is long enough, try only one maximum length. */
max_match_len = BackwardMatchLength(&match);
if (len < max_match_len &&
(is_dictionary_match || max_match_len > max_zopfli_len)) {
len = max_match_len;
}
for (; len <= max_match_len; ++len) {
const size_t len_code =
is_dictionary_match ? BackwardMatchLengthCode(&match) : len;
const uint16_t copycode = GetCopyLengthCode(len_code);
const uint16_t cmdcode = CombineLengthCodes(inscode, copycode, 0);
const float cost = dist_cost + (float)GetCopyExtra(copycode) +
ZopfliCostModelGetCommandCost(model, cmdcode);
if (cost < nodes[pos + len].u.cost) {
UpdateZopfliNode(nodes, pos, start, len, len_code, dist, 0, cost);
result = BROTLI_MAX(size_t, result, len);
}
}
}
}
}
return result;
}
static size_t ComputeShortestPathFromNodes(size_t num_bytes,
ZopfliNode* nodes) {
size_t index = num_bytes;
size_t num_commands = 0;
while ((nodes[index].dcode_insert_length & 0x7FFFFFF) == 0 &&
nodes[index].length == 1) --index;
nodes[index].u.next = BROTLI_UINT32_MAX;
while (index != 0) {
size_t len = ZopfliNodeCommandLength(&nodes[index]);
index -= len;
nodes[index].u.next = (uint32_t)len;
num_commands++;
}
return num_commands;
}
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
void duckdb_brotli::BrotliZopfliCreateCommands(const size_t num_bytes,
const size_t block_start, const ZopfliNode* nodes, int* dist_cache,
size_t* last_insert_len, const BrotliEncoderParams* params,
Command* commands, size_t* num_literals) {
const size_t stream_offset = params->stream_offset;
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
size_t pos = 0;
uint32_t offset = nodes[0].u.next;
size_t i;
size_t gap = params->dictionary.compound.total_size;
for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
const ZopfliNode* next = &nodes[pos + offset];
size_t copy_length = ZopfliNodeCopyLength(next);
size_t insert_length = next->dcode_insert_length & 0x7FFFFFF;
pos += insert_length;
offset = next->u.next;
if (i == 0) {
insert_length += *last_insert_len;
*last_insert_len = 0;
}
{
size_t distance = ZopfliNodeCopyDistance(next);
size_t len_code = ZopfliNodeLengthCode(next);
size_t dictionary_start = BROTLI_MIN(size_t,
block_start + pos + stream_offset, max_backward_limit);
BROTLI_BOOL is_dictionary =
TO_BROTLI_BOOL(distance > dictionary_start + gap);
size_t dist_code = ZopfliNodeDistanceCode(next);
InitCommand(&commands[i], &params->dist, insert_length,
copy_length, (int)len_code - (int)copy_length, dist_code);
if (!is_dictionary && dist_code > 0) {
dist_cache[3] = dist_cache[2];
dist_cache[2] = dist_cache[1];
dist_cache[1] = dist_cache[0];
dist_cache[0] = (int)distance;
}
}
*num_literals += insert_length;
pos += copy_length;
}
*last_insert_len += num_bytes - pos;
}
static size_t ZopfliIterate(size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, const size_t gap, const int* dist_cache,
const ZopfliCostModel* model, const uint32_t* num_matches,
const BackwardMatch* matches, ZopfliNode* nodes) {
const size_t stream_offset = params->stream_offset;
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
const size_t max_zopfli_len = MaxZopfliLen(params);
StartPosQueue queue;
size_t cur_match_pos = 0;
size_t i;
nodes[0].length = 0;
nodes[0].u.cost = 0;
InitStartPosQueue(&queue);
for (i = 0; i + 3 < num_bytes; i++) {
size_t skip = UpdateNodes(num_bytes, position, i, ringbuffer,
ringbuffer_mask, params, max_backward_limit, dist_cache,
num_matches[i], &matches[cur_match_pos], model, &queue, nodes);
if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
cur_match_pos += num_matches[i];
if (num_matches[i] == 1 &&
BackwardMatchLength(&matches[cur_match_pos - 1]) > max_zopfli_len) {
skip = BROTLI_MAX(size_t,
BackwardMatchLength(&matches[cur_match_pos - 1]), skip);
}
if (skip > 1) {
skip--;
while (skip) {
i++;
if (i + 3 >= num_bytes) break;
EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
dist_cache, model, &queue, nodes);
cur_match_pos += num_matches[i];
skip--;
}
}
}
return ComputeShortestPathFromNodes(num_bytes, nodes);
}
static void MergeMatches(BackwardMatch* dst,
BackwardMatch* src1, size_t len1, BackwardMatch* src2, size_t len2) {
while (len1 > 0 && len2 > 0) {
size_t l1 = BackwardMatchLength(src1);
size_t l2 = BackwardMatchLength(src2);
if (l1 < l2 || ((l1 == l2) && (src1->distance < src2->distance))) {
*dst++ = *src1++;
len1--;
} else {
*dst++ = *src2++;
len2--;
}
}
while (len1-- > 0) *dst++ = *src1++;
while (len2-- > 0) *dst++ = *src2++;
}
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
size_t duckdb_brotli::BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
const int* dist_cache, Hasher* hasher, ZopfliNode* nodes) {
const size_t stream_offset = params->stream_offset;
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
const size_t max_zopfli_len = MaxZopfliLen(params);
StartPosQueue queue;
BackwardMatch* BROTLI_RESTRICT matches =
BROTLI_ALLOC(m, BackwardMatch, 2 * (MAX_NUM_MATCHES_H10 + 64));
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
position + num_bytes - StoreLookaheadH10() + 1 : position;
size_t i;
const CompoundDictionary* addon = &params->dictionary.compound;
size_t gap = addon->total_size;
size_t lz_matches_offset =
(addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) || BROTLI_IS_NULL(matches)) {
return 0;
}
nodes[0].length = 0;
nodes[0].u.cost = 0;
InitZopfliCostModel(m, model, &params->dist, num_bytes);
if (BROTLI_IS_OOM(m)) return 0;
ZopfliCostModelSetFromLiteralCosts(
model, position, ringbuffer, ringbuffer_mask);
InitStartPosQueue(&queue);
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
const size_t pos = position + i;
const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
const size_t dictionary_start = BROTLI_MIN(size_t,
pos + stream_offset, max_backward_limit);
size_t skip;
size_t num_matches;
int dict_id = 0;
if (params->dictionary.contextual.context_based) {
uint8_t p1 = pos >= 1 ?
ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
uint8_t p2 = pos >= 2 ?
ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
dict_id = params->dictionary.contextual.context_map[
BROTLI_CONTEXT(p1, p2, literal_context_lut)];
}
num_matches = FindAllMatchesH10(&hasher->privat._H10,
params->dictionary.contextual.dict[dict_id],
ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
dictionary_start + gap, params, &matches[lz_matches_offset]);
if (addon->num_chunks != 0) {
size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
ringbuffer, ringbuffer_mask, pos, 3, num_bytes - i,
dictionary_start, params->dist.max_distance,
&matches[lz_matches_offset - 64], 64);
MergeMatches(matches, &matches[lz_matches_offset - 64], cd_matches,
&matches[lz_matches_offset], num_matches);
num_matches += cd_matches;
}
if (num_matches > 0 &&
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
matches[0] = matches[num_matches - 1];
num_matches = 1;
}
skip = UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
params, max_backward_limit, dist_cache, num_matches, matches, model,
&queue, nodes);
if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
skip = BROTLI_MAX(size_t, BackwardMatchLength(&matches[0]), skip);
}
if (skip > 1) {
/* Add the tail of the copy to the hasher. */
StoreRangeH10(&hasher->privat._H10,
ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
size_t, pos + skip, store_end));
skip--;
while (skip) {
i++;
if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
dist_cache, model, &queue, nodes);
skip--;
}
}
}
CleanupZopfliCostModel(m, model);
BROTLI_FREE(m, model);
BROTLI_FREE(m, matches);
return ComputeShortestPathFromNodes(num_bytes, nodes);
}
void duckdb_brotli::BrotliCreateZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals) {
ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
BrotliInitZopfliNodes(nodes, num_bytes + 1);
*num_commands += BrotliZopfliComputeShortestPath(m, num_bytes,
position, ringbuffer, ringbuffer_mask, literal_context_lut, params,
dist_cache, hasher, nodes);
if (BROTLI_IS_OOM(m)) return;
BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
last_insert_len, params, commands, num_literals);
BROTLI_FREE(m, nodes);
}
void duckdb_brotli::BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals) {
const size_t stream_offset = params->stream_offset;
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
uint32_t* num_matches = BROTLI_ALLOC(m, uint32_t, num_bytes);
size_t matches_size = 4 * num_bytes;
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
position + num_bytes - StoreLookaheadH10() + 1 : position;
size_t cur_match_pos = 0;
size_t i;
size_t orig_num_literals;
size_t orig_last_insert_len;
int orig_dist_cache[4];
size_t orig_num_commands;
ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
ZopfliNode* nodes;
BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
const CompoundDictionary* addon = &params->dictionary.compound;
size_t gap = addon->total_size;
size_t shadow_matches =
(addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) ||
BROTLI_IS_NULL(num_matches) || BROTLI_IS_NULL(matches)) {
return;
}
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
const size_t pos = position + i;
size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
size_t dictionary_start = BROTLI_MIN(size_t,
pos + stream_offset, max_backward_limit);
size_t max_length = num_bytes - i;
size_t num_found_matches;
size_t cur_match_end;
size_t j;
int dict_id = 0;
if (params->dictionary.contextual.context_based) {
uint8_t p1 = pos >= 1 ?
ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
uint8_t p2 = pos >= 2 ?
ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
dict_id = params->dictionary.contextual.context_map[
BROTLI_CONTEXT(p1, p2, literal_context_lut)];
}
/* Ensure that we have enough free slots. */
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
if (BROTLI_IS_OOM(m)) return;
num_found_matches = FindAllMatchesH10(&hasher->privat._H10,
params->dictionary.contextual.dict[dict_id],
ringbuffer, ringbuffer_mask, pos, max_length,
max_distance, dictionary_start + gap, params,
&matches[cur_match_pos + shadow_matches]);
if (addon->num_chunks != 0) {
size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
ringbuffer, ringbuffer_mask, pos, 3, max_length,
dictionary_start, params->dist.max_distance,
&matches[cur_match_pos + shadow_matches - 64], 64);
MergeMatches(&matches[cur_match_pos],
&matches[cur_match_pos + shadow_matches - 64], cd_matches,
&matches[cur_match_pos + shadow_matches], num_found_matches);
num_found_matches += cd_matches;
}
cur_match_end = cur_match_pos + num_found_matches;
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
BROTLI_DCHECK(BackwardMatchLength(&matches[j]) <=
BackwardMatchLength(&matches[j + 1]));
}
num_matches[i] = (uint32_t)num_found_matches;
if (num_found_matches > 0) {
const size_t match_len = BackwardMatchLength(&matches[cur_match_end - 1]);
if (match_len > MAX_ZOPFLI_LEN_QUALITY_11) {
const size_t skip = match_len - 1;
matches[cur_match_pos++] = matches[cur_match_end - 1];
num_matches[i] = 1;
/* Add the tail of the copy to the hasher. */
StoreRangeH10(&hasher->privat._H10,
ringbuffer, ringbuffer_mask, pos + 1,
BROTLI_MIN(size_t, pos + match_len, store_end));
memset(&num_matches[i + 1], 0, skip * sizeof(num_matches[0]));
i += skip;
} else {
cur_match_pos = cur_match_end;
}
}
}
orig_num_literals = *num_literals;
orig_last_insert_len = *last_insert_len;
memcpy(orig_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
orig_num_commands = *num_commands;
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
InitZopfliCostModel(m, model, &params->dist, num_bytes);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < 2; i++) {
BrotliInitZopfliNodes(nodes, num_bytes + 1);
if (i == 0) {
ZopfliCostModelSetFromLiteralCosts(
model, position, ringbuffer, ringbuffer_mask);
} else {
ZopfliCostModelSetFromCommands(model, position, ringbuffer,
ringbuffer_mask, commands, *num_commands - orig_num_commands,
orig_last_insert_len);
}
*num_commands = orig_num_commands;
*num_literals = orig_num_literals;
*last_insert_len = orig_last_insert_len;
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
*num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
ringbuffer_mask, params, gap, dist_cache, model, num_matches, matches,
nodes);
BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
last_insert_len, params, commands, num_literals);
}
CleanupZopfliCostModel(m, model);
BROTLI_FREE(m, model);
BROTLI_FREE(m, nodes);
BROTLI_FREE(m, matches);
BROTLI_FREE(m, num_matches);
}

View File

@@ -0,0 +1,92 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function to find backward reference copies. */
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/brotli_platform.h"
#include "command.h"
#include "brotli_hash.h"
#include "memory.h"
#include "quality.h"
namespace duckdb_brotli {
BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals);
BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals);
typedef struct ZopfliNode {
/* Best length to get up to this byte (not including this byte itself)
highest 7 bit is used to reconstruct the length code. */
uint32_t length;
/* Distance associated with the length. */
uint32_t distance;
/* Number of literal inserts before this copy; highest 5 bits contain
distance short code + 1 (or zero if no short code). */
uint32_t dcode_insert_length;
/* This union holds information used by dynamic-programming. During forward
pass |cost| it used to store the goal function. When node is processed its
|cost| is invalidated in favor of |shortcut|. On path back-tracing pass
|next| is assigned the offset to next node on the path. */
union {
/* Smallest cost to get to this byte from the beginning, as found so far. */
float cost;
/* Offset to the next node on the path. Equals to command_length() of the
next node on the path. For last node equals to BROTLI_UINT32_MAX */
uint32_t next;
/* Node position that provides next distance for distance cache. */
uint32_t shortcut;
} u;
} ZopfliNode;
BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
/* Computes the shortest path of commands from position to at most
position + num_bytes.
On return, path->size() is the number of commands found and path[i] is the
length of the i-th command (copy length plus insert length).
Note that the sum of the lengths of all commands can be less than num_bytes.
On return, the nodes[0..num_bytes] array will have the following
"ZopfliNode array invariant":
For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
(1) nodes[i].copy_length() >= 2
(2) nodes[i].command_length() <= i and
(3) nodes[i - nodes[i].command_length()].cost < kInfinity */
BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
MemoryManager* m, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
const int* dist_cache, Hasher* hasher, ZopfliNode* nodes);
BROTLI_INTERNAL void BrotliZopfliCreateCommands(
const size_t num_bytes, const size_t block_start, const ZopfliNode* nodes,
int* dist_cache, size_t* last_insert_len, const BrotliEncoderParams* params,
Command* commands, size_t* num_literals);
}
#endif /* BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ */

View File

@@ -0,0 +1,410 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Functions to estimate the bit cost of Huffman trees. */
#include "bit_cost.h"
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/brotli_platform.h"
#include "fast_log.h"
#include "histogram.h"
using namespace duckdb_brotli;
#define FN(X) duckdb_brotli:: X ## Literal
/* NOLINT(build/header_guard) */
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* template parameters: FN */
#define HistogramType FN(Histogram)
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
static const double kOneSymbolHistogramCost = 12;
static const double kTwoSymbolHistogramCost = 20;
static const double kThreeSymbolHistogramCost = 28;
static const double kFourSymbolHistogramCost = 37;
const size_t data_size = FN(HistogramDataSize)();
int count = 0;
size_t s[5];
double bits = 0.0;
size_t i;
if (histogram->total_count_ == 0) {
return kOneSymbolHistogramCost;
}
for (i = 0; i < data_size; ++i) {
if (histogram->data_[i] > 0) {
s[count] = i;
++count;
if (count > 4) break;
}
}
if (count == 1) {
return kOneSymbolHistogramCost;
}
if (count == 2) {
return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
}
if (count == 3) {
const uint32_t histo0 = histogram->data_[s[0]];
const uint32_t histo1 = histogram->data_[s[1]];
const uint32_t histo2 = histogram->data_[s[2]];
const uint32_t histomax =
BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
return (kThreeSymbolHistogramCost +
2 * (histo0 + histo1 + histo2) - histomax);
}
if (count == 4) {
uint32_t histo[4];
uint32_t h23;
uint32_t histomax;
for (i = 0; i < 4; ++i) {
histo[i] = histogram->data_[s[i]];
}
/* Sort */
for (i = 0; i < 4; ++i) {
size_t j;
for (j = i + 1; j < 4; ++j) {
if (histo[j] > histo[i]) {
BROTLI_SWAP(uint32_t, histo, j, i);
}
}
}
h23 = histo[2] + histo[3];
histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
return (kFourSymbolHistogramCost +
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
}
{
/* In this loop we compute the entropy of the histogram and simultaneously
build a simplified histogram of the code length codes where we use the
zero repeat code 17, but we don't use the non-zero repeat code 16. */
size_t max_depth = 1;
uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
const double log2total = FastLog2(histogram->total_count_);
for (i = 0; i < data_size;) {
if (histogram->data_[i] > 0) {
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
= log2(total_count) - log2(count(symbol)) */
double log2p = log2total - FastLog2(histogram->data_[i]);
/* Approximate the bit depth by round(-log2(P(symbol))) */
size_t depth = (size_t)(log2p + 0.5);
bits += histogram->data_[i] * log2p;
if (depth > 15) {
depth = 15;
}
if (depth > max_depth) {
max_depth = depth;
}
++depth_histo[depth];
++i;
} else {
/* Compute the run length of zeros and add the appropriate number of 0
and 17 code length codes to the code length code histogram. */
uint32_t reps = 1;
size_t k;
for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
++reps;
}
i += reps;
if (i == data_size) {
/* Don't add any cost for the last zero run, since these are encoded
only implicitly. */
break;
}
if (reps < 3) {
depth_histo[0] += reps;
} else {
reps -= 2;
while (reps > 0) {
++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
/* Add the 3 extra bits for the 17 code length code. */
bits += 3;
reps >>= 3;
}
}
}
}
/* Add the estimated encoding cost of the code length code histogram. */
bits += (double)(18 + 2 * max_depth);
/* Add the entropy of the code length code histogram. */
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
}
return bits;
}
#undef HistogramType
#undef FN
#define FN(X) duckdb_brotli:: X ## Command
/* NOLINT(build/header_guard) */
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* template parameters: FN */
#define HistogramType FN(Histogram)
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
static const double kOneSymbolHistogramCost = 12;
static const double kTwoSymbolHistogramCost = 20;
static const double kThreeSymbolHistogramCost = 28;
static const double kFourSymbolHistogramCost = 37;
const size_t data_size = FN(HistogramDataSize)();
int count = 0;
size_t s[5];
double bits = 0.0;
size_t i;
if (histogram->total_count_ == 0) {
return kOneSymbolHistogramCost;
}
for (i = 0; i < data_size; ++i) {
if (histogram->data_[i] > 0) {
s[count] = i;
++count;
if (count > 4) break;
}
}
if (count == 1) {
return kOneSymbolHistogramCost;
}
if (count == 2) {
return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
}
if (count == 3) {
const uint32_t histo0 = histogram->data_[s[0]];
const uint32_t histo1 = histogram->data_[s[1]];
const uint32_t histo2 = histogram->data_[s[2]];
const uint32_t histomax =
BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
return (kThreeSymbolHistogramCost +
2 * (histo0 + histo1 + histo2) - histomax);
}
if (count == 4) {
uint32_t histo[4];
uint32_t h23;
uint32_t histomax;
for (i = 0; i < 4; ++i) {
histo[i] = histogram->data_[s[i]];
}
/* Sort */
for (i = 0; i < 4; ++i) {
size_t j;
for (j = i + 1; j < 4; ++j) {
if (histo[j] > histo[i]) {
BROTLI_SWAP(uint32_t, histo, j, i);
}
}
}
h23 = histo[2] + histo[3];
histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
return (kFourSymbolHistogramCost +
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
}
{
/* In this loop we compute the entropy of the histogram and simultaneously
build a simplified histogram of the code length codes where we use the
zero repeat code 17, but we don't use the non-zero repeat code 16. */
size_t max_depth = 1;
uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
const double log2total = FastLog2(histogram->total_count_);
for (i = 0; i < data_size;) {
if (histogram->data_[i] > 0) {
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
= log2(total_count) - log2(count(symbol)) */
double log2p = log2total - FastLog2(histogram->data_[i]);
/* Approximate the bit depth by round(-log2(P(symbol))) */
size_t depth = (size_t)(log2p + 0.5);
bits += histogram->data_[i] * log2p;
if (depth > 15) {
depth = 15;
}
if (depth > max_depth) {
max_depth = depth;
}
++depth_histo[depth];
++i;
} else {
/* Compute the run length of zeros and add the appropriate number of 0
and 17 code length codes to the code length code histogram. */
uint32_t reps = 1;
size_t k;
for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
++reps;
}
i += reps;
if (i == data_size) {
/* Don't add any cost for the last zero run, since these are encoded
only implicitly. */
break;
}
if (reps < 3) {
depth_histo[0] += reps;
} else {
reps -= 2;
while (reps > 0) {
++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
/* Add the 3 extra bits for the 17 code length code. */
bits += 3;
reps >>= 3;
}
}
}
}
/* Add the estimated encoding cost of the code length code histogram. */
bits += (double)(18 + 2 * max_depth);
/* Add the entropy of the code length code histogram. */
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
}
return bits;
}
#undef HistogramType
#undef FN
#define FN(X) duckdb_brotli:: X ## Distance
/* NOLINT(build/header_guard) */
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* template parameters: FN */
#define HistogramType FN(Histogram)
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
static const double kOneSymbolHistogramCost = 12;
static const double kTwoSymbolHistogramCost = 20;
static const double kThreeSymbolHistogramCost = 28;
static const double kFourSymbolHistogramCost = 37;
const size_t data_size = FN(HistogramDataSize)();
int count = 0;
size_t s[5];
double bits = 0.0;
size_t i;
if (histogram->total_count_ == 0) {
return kOneSymbolHistogramCost;
}
for (i = 0; i < data_size; ++i) {
if (histogram->data_[i] > 0) {
s[count] = i;
++count;
if (count > 4) break;
}
}
if (count == 1) {
return kOneSymbolHistogramCost;
}
if (count == 2) {
return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
}
if (count == 3) {
const uint32_t histo0 = histogram->data_[s[0]];
const uint32_t histo1 = histogram->data_[s[1]];
const uint32_t histo2 = histogram->data_[s[2]];
const uint32_t histomax =
BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
return (kThreeSymbolHistogramCost +
2 * (histo0 + histo1 + histo2) - histomax);
}
if (count == 4) {
uint32_t histo[4];
uint32_t h23;
uint32_t histomax;
for (i = 0; i < 4; ++i) {
histo[i] = histogram->data_[s[i]];
}
/* Sort */
for (i = 0; i < 4; ++i) {
size_t j;
for (j = i + 1; j < 4; ++j) {
if (histo[j] > histo[i]) {
BROTLI_SWAP(uint32_t, histo, j, i);
}
}
}
h23 = histo[2] + histo[3];
histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
return (kFourSymbolHistogramCost +
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
}
{
/* In this loop we compute the entropy of the histogram and simultaneously
build a simplified histogram of the code length codes where we use the
zero repeat code 17, but we don't use the non-zero repeat code 16. */
size_t max_depth = 1;
uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
const double log2total = FastLog2(histogram->total_count_);
for (i = 0; i < data_size;) {
if (histogram->data_[i] > 0) {
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
= log2(total_count) - log2(count(symbol)) */
double log2p = log2total - FastLog2(histogram->data_[i]);
/* Approximate the bit depth by round(-log2(P(symbol))) */
size_t depth = (size_t)(log2p + 0.5);
bits += histogram->data_[i] * log2p;
if (depth > 15) {
depth = 15;
}
if (depth > max_depth) {
max_depth = depth;
}
++depth_histo[depth];
++i;
} else {
/* Compute the run length of zeros and add the appropriate number of 0
and 17 code length codes to the code length code histogram. */
uint32_t reps = 1;
size_t k;
for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
++reps;
}
i += reps;
if (i == data_size) {
/* Don't add any cost for the last zero run, since these are encoded
only implicitly. */
break;
}
if (reps < 3) {
depth_histo[0] += reps;
} else {
reps -= 2;
while (reps > 0) {
++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
/* Add the 3 extra bits for the 17 code length code. */
bits += 3;
reps >>= 3;
}
}
}
}
/* Add the estimated encoding cost of the code length code histogram. */
bits += (double)(18 + 2 * max_depth);
/* Add the entropy of the code length code histogram. */
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
}
return bits;
}
#undef HistogramType
#undef FN

View File

@@ -0,0 +1,60 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Functions to estimate the bit cost of Huffman trees. */
#ifndef BROTLI_ENC_BIT_COST_H_
#define BROTLI_ENC_BIT_COST_H_
#include <brotli/types.h>
#include "../common/brotli_platform.h"
#include "fast_log.h"
#include "histogram.h"
namespace duckdb_brotli {
static BROTLI_INLINE double ShannonEntropy(
const uint32_t* population, size_t size, size_t* total) {
size_t sum = 0;
double retval = 0;
const uint32_t* population_end = population + size;
size_t p;
if (size & 1) {
goto odd_number_of_elements_left;
}
while (population < population_end) {
p = *population++;
sum += p;
retval -= (double)p * FastLog2(p);
odd_number_of_elements_left:
p = *population++;
sum += p;
retval -= (double)p * FastLog2(p);
}
if (sum) retval += (double)sum * FastLog2(sum);
*total = sum;
return retval;
}
static BROTLI_INLINE double BitsEntropy(
const uint32_t* population, size_t size) {
size_t sum;
double retval = ShannonEntropy(population, size, &sum);
if (retval < (double)sum) {
/* At least one bit per literal is needed. */
retval = (double)sum;
}
return retval;
}
BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
}
#endif /* BROTLI_ENC_BIT_COST_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,48 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Block split point selection utilities. */
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
#define BROTLI_ENC_BLOCK_SPLITTER_H_
#include <brotli/types.h>
#include "../common/brotli_platform.h"
#include "command.h"
#include "memory.h"
#include "quality.h"
namespace duckdb_brotli {
typedef struct BlockSplit {
size_t num_types; /* Amount of distinct types */
size_t num_blocks; /* Amount of values in types and length */
uint8_t* types;
uint32_t* lengths;
size_t types_alloc_size;
size_t lengths_alloc_size;
} BlockSplit;
BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
BlockSplit* self);
BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
const Command* cmds,
const size_t num_commands,
const uint8_t* data,
const size_t offset,
const size_t mask,
const BrotliEncoderParams* params,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split);
}
#endif /* BROTLI_ENC_BLOCK_SPLITTER_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,85 @@
/* Copyright 2014 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Functions to convert brotli-related data structures into the
brotli bit stream. The functions here operate under
assumption that there is enough space in the storage, i.e., there are
no out-of-range checks anywhere.
These functions do bit addressing into a byte array. The byte array
is called "storage" and the index to the bit is called storage_ix
in function arguments. */
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
#include <brotli/types.h>
#include "../common/context.h"
#include "../common/brotli_platform.h"
#include "command.h"
#include "entropy_encode.h"
#include "memory.h"
#include "metablock.h"
namespace duckdb_brotli {
/* All Store functions here will use a storage_ix, which is always the bit
position for the current storage. */
BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
HuffmanTree* tree, const uint32_t* histogram, const size_t histogram_total,
const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
uint8_t* storage);
/* REQUIRES: length > 0 */
/* REQUIRES: length <= (1 << 24) */
BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
const uint8_t* input, size_t start_pos, size_t length, size_t mask,
uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
const BrotliEncoderParams* params, ContextType literal_context_mode,
const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
size_t* storage_ix, uint8_t* storage);
/* Stores the meta-block without doing any block splitting, just collects
one histogram per block category and uses that for entropy coding.
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
const uint8_t* input, size_t start_pos, size_t length, size_t mask,
BROTLI_BOOL is_last, const BrotliEncoderParams* params,
const Command* commands, size_t n_commands,
size_t* storage_ix, uint8_t* storage);
/* Same as above, but uses static prefix codes for histograms with a only a few
symbols, and uses static code length prefix codes for all other histograms.
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
const uint8_t* input, size_t start_pos, size_t length, size_t mask,
BROTLI_BOOL is_last, const BrotliEncoderParams* params,
const Command* commands, size_t n_commands,
size_t* storage_ix, uint8_t* storage);
/* This is for storing uncompressed blocks (simple raw storage of
bytes-as-bytes).
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
BROTLI_BOOL is_final_block, const uint8_t* BROTLI_RESTRICT input,
size_t position, size_t mask, size_t len,
size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage);
#if defined(BROTLI_TEST)
void GetBlockLengthPrefixCodeForTest(uint32_t, size_t*, uint32_t*, uint32_t*);
#endif
}
#endif /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,47 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Parameters for the Brotli encoder with chosen quality levels. */
#ifndef BROTLI_ENC_PARAMS_H_
#define BROTLI_ENC_PARAMS_H_
#include <brotli/encode.h>
#include "encoder_dict.h"
typedef struct BrotliHasherParams {
int type;
int bucket_bits;
int block_bits;
int num_last_distances_to_check;
} BrotliHasherParams;
typedef struct BrotliDistanceParams {
uint32_t distance_postfix_bits;
uint32_t num_direct_distance_codes;
uint32_t alphabet_size_max;
uint32_t alphabet_size_limit;
size_t max_distance;
} BrotliDistanceParams;
/* Encoding parameters */
typedef struct BrotliEncoderParams {
BrotliEncoderMode mode;
int quality;
int lgwin;
int lgblock;
size_t stream_offset;
size_t size_hint;
BROTLI_BOOL disable_literal_context_modeling;
BROTLI_BOOL large_window;
BrotliHasherParams hasher;
BrotliDistanceParams dist;
/* TODO(eustas): rename to BrotliShared... */
duckdb_brotli::SharedEncoderDictionary dictionary;
} BrotliEncoderParams;
#endif /* BROTLI_ENC_PARAMS_H_ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,24 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#include "command.h"
#include <brotli/types.h>
using namespace duckdb_brotli;
const uint32_t duckdb_brotli::kBrotliInsBase[BROTLI_NUM_INS_COPY_CODES] = {
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26,
34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594};
const uint32_t duckdb_brotli::kBrotliInsExtra[BROTLI_NUM_INS_COPY_CODES] = {
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24};
const uint32_t duckdb_brotli::kBrotliCopyBase[BROTLI_NUM_INS_COPY_CODES] = {
2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18,
22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118};
const uint32_t duckdb_brotli::kBrotliCopyExtra[BROTLI_NUM_INS_COPY_CODES] = {
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24};

View File

@@ -0,0 +1,187 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* This class models a sequence of literals and a backward reference copy. */
#ifndef BROTLI_ENC_COMMAND_H_
#define BROTLI_ENC_COMMAND_H_
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/brotli_platform.h"
#include "fast_log.h"
#include "brotli_params.h"
#include "prefix.h"
namespace duckdb_brotli {
BROTLI_INTERNAL extern const uint32_t
kBrotliInsBase[BROTLI_NUM_INS_COPY_CODES];
BROTLI_INTERNAL extern const uint32_t
kBrotliInsExtra[BROTLI_NUM_INS_COPY_CODES];
BROTLI_INTERNAL extern const uint32_t
kBrotliCopyBase[BROTLI_NUM_INS_COPY_CODES];
BROTLI_INTERNAL extern const uint32_t
kBrotliCopyExtra[BROTLI_NUM_INS_COPY_CODES];
static BROTLI_INLINE uint16_t GetInsertLengthCode(size_t insertlen) {
if (insertlen < 6) {
return (uint16_t)insertlen;
} else if (insertlen < 130) {
uint32_t nbits = Log2FloorNonZero(insertlen - 2) - 1u;
return (uint16_t)((nbits << 1) + ((insertlen - 2) >> nbits) + 2);
} else if (insertlen < 2114) {
return (uint16_t)(Log2FloorNonZero(insertlen - 66) + 10);
} else if (insertlen < 6210) {
return 21u;
} else if (insertlen < 22594) {
return 22u;
} else {
return 23u;
}
}
static BROTLI_INLINE uint16_t GetCopyLengthCode(size_t copylen) {
if (copylen < 10) {
return (uint16_t)(copylen - 2);
} else if (copylen < 134) {
uint32_t nbits = Log2FloorNonZero(copylen - 6) - 1u;
return (uint16_t)((nbits << 1) + ((copylen - 6) >> nbits) + 4);
} else if (copylen < 2118) {
return (uint16_t)(Log2FloorNonZero(copylen - 70) + 12);
} else {
return 23u;
}
}
static BROTLI_INLINE uint16_t CombineLengthCodes(
uint16_t inscode, uint16_t copycode, BROTLI_BOOL use_last_distance) {
uint16_t bits64 =
(uint16_t)((copycode & 0x7u) | ((inscode & 0x7u) << 3u));
if (use_last_distance && inscode < 8u && copycode < 16u) {
return (copycode < 8u) ? bits64 : (bits64 | 64u);
} else {
/* Specification: 5 Encoding of ... (last table) */
/* offset = 2 * index, where index is in range [0..8] */
uint32_t offset = 2u * ((copycode >> 3u) + 3u * (inscode >> 3u));
/* All values in specification are K * 64,
where K = [2, 3, 6, 4, 5, 8, 7, 9, 10],
i + 1 = [1, 2, 3, 4, 5, 6, 7, 8, 9],
K - i - 1 = [1, 1, 3, 0, 0, 2, 0, 1, 2] = D.
All values in D require only 2 bits to encode.
Magic constant is shifted 6 bits left, to avoid final multiplication. */
offset = (offset << 5u) + 0x40u + ((0x520D40u >> offset) & 0xC0u);
return (uint16_t)(offset | bits64);
}
}
static BROTLI_INLINE void GetLengthCode(size_t insertlen, size_t copylen,
BROTLI_BOOL use_last_distance,
uint16_t* code) {
uint16_t inscode = GetInsertLengthCode(insertlen);
uint16_t copycode = GetCopyLengthCode(copylen);
*code = CombineLengthCodes(inscode, copycode, use_last_distance);
}
static BROTLI_INLINE uint32_t GetInsertBase(uint16_t inscode) {
return kBrotliInsBase[inscode];
}
static BROTLI_INLINE uint32_t GetInsertExtra(uint16_t inscode) {
return kBrotliInsExtra[inscode];
}
static BROTLI_INLINE uint32_t GetCopyBase(uint16_t copycode) {
return kBrotliCopyBase[copycode];
}
static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
return kBrotliCopyExtra[copycode];
}
typedef struct Command {
uint32_t insert_len_;
/* Stores copy_len in low 25 bits and copy_code - copy_len in high 7 bit. */
uint32_t copy_len_;
/* Stores distance extra bits. */
uint32_t dist_extra_;
uint16_t cmd_prefix_;
/* Stores distance code in low 10 bits
and number of extra bits in high 6 bits. */
uint16_t dist_prefix_;
} Command;
/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
static BROTLI_INLINE void InitCommand(Command* self,
const BrotliDistanceParams* dist, size_t insertlen,
size_t copylen, int copylen_code_delta, size_t distance_code) {
/* Don't rely on signed int representation, use honest casts. */
uint32_t delta = (uint8_t)((int8_t)copylen_code_delta);
self->insert_len_ = (uint32_t)insertlen;
self->copy_len_ = (uint32_t)(copylen | (delta << 25));
/* The distance prefix and extra bits are stored in this Command as if
npostfix and ndirect were 0, they are only recomputed later after the
clustering if needed. */
PrefixEncodeCopyDistance(
distance_code, dist->num_direct_distance_codes,
dist->distance_postfix_bits, &self->dist_prefix_, &self->dist_extra_);
GetLengthCode(
insertlen, (size_t)((int)copylen + copylen_code_delta),
TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_);
}
static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
self->insert_len_ = (uint32_t)insertlen;
self->copy_len_ = 4 << 25;
self->dist_extra_ = 0;
self->dist_prefix_ = BROTLI_NUM_DISTANCE_SHORT_CODES;
GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_);
}
static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(
const Command* self, const BrotliDistanceParams* dist) {
if ((self->dist_prefix_ & 0x3FFu) <
BROTLI_NUM_DISTANCE_SHORT_CODES + dist->num_direct_distance_codes) {
return self->dist_prefix_ & 0x3FFu;
} else {
uint32_t dcode = self->dist_prefix_ & 0x3FFu;
uint32_t nbits = self->dist_prefix_ >> 10;
uint32_t extra = self->dist_extra_;
uint32_t postfix_mask = (1U << dist->distance_postfix_bits) - 1U;
uint32_t hcode = (dcode - dist->num_direct_distance_codes -
BROTLI_NUM_DISTANCE_SHORT_CODES) >>
dist->distance_postfix_bits;
uint32_t lcode = (dcode - dist->num_direct_distance_codes -
BROTLI_NUM_DISTANCE_SHORT_CODES) & postfix_mask;
uint32_t offset = ((2U + (hcode & 1U)) << nbits) - 4U;
return ((offset + extra) << dist->distance_postfix_bits) + lcode +
dist->num_direct_distance_codes + BROTLI_NUM_DISTANCE_SHORT_CODES;
}
}
static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
uint32_t r = self->cmd_prefix_ >> 6;
uint32_t c = self->cmd_prefix_ & 7;
if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
return c;
}
return 3;
}
static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
return self->copy_len_ & 0x1FFFFFF;
}
static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
uint32_t modifier = self->copy_len_ >> 25;
int32_t delta = (int8_t)((uint8_t)(modifier | ((modifier & 0x40) << 1)));
return (uint32_t)((int32_t)(self->copy_len_ & 0x1FFFFFF) + delta);
}
}
#endif /* BROTLI_ENC_COMMAND_H_ */

View File

@@ -0,0 +1,209 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#include "compound_dictionary.h"
#include <brotli/types.h>
#include "../common/brotli_platform.h"
#include "memory.h"
#include "quality.h"
using namespace duckdb_brotli;
static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
const uint8_t* source, size_t source_size, uint32_t bucket_bits,
uint32_t slot_bits, uint32_t hash_bits, uint16_t bucket_limit) {
/* Step 1: create "bloated" hasher. */
uint32_t num_slots = 1u << slot_bits;
uint32_t num_buckets = 1u << bucket_bits;
uint32_t hash_shift = 64u - bucket_bits;
uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
uint32_t slot_mask = num_slots - 1;
size_t alloc_size = (sizeof(uint32_t) << slot_bits) +
(sizeof(uint32_t) << slot_bits) +
(sizeof(uint16_t) << bucket_bits) +
(sizeof(uint32_t) << bucket_bits) +
(sizeof(uint32_t) * source_size);
uint8_t* flat = NULL;
PreparedDictionary* result = NULL;
uint16_t* num = NULL;
uint32_t* bucket_heads = NULL;
uint32_t* next_bucket = NULL;
uint32_t* slot_offsets = NULL;
uint16_t* heads = NULL;
uint32_t* items = NULL;
uint8_t** source_ref = NULL;
uint32_t i;
uint32_t* slot_size = NULL;
uint32_t* slot_limit = NULL;
uint32_t total_items = 0;
if (slot_bits > 16) return NULL;
if (slot_bits > bucket_bits) return NULL;
if (bucket_bits - slot_bits >= 16) return NULL;
flat = BROTLI_ALLOC(m, uint8_t, alloc_size);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(flat)) return NULL;
slot_size = (uint32_t*)flat;
slot_limit = (uint32_t*)(&slot_size[num_slots]);
num = (uint16_t*)(&slot_limit[num_slots]);
bucket_heads = (uint32_t*)(&num[num_buckets]);
next_bucket = (uint32_t*)(&bucket_heads[num_buckets]);
memset(num, 0, num_buckets * sizeof(num[0]));
/* TODO(eustas): apply custom "store" order. */
for (i = 0; i + 7 < source_size; ++i) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(&source[i]) & hash_mask) *
kPreparedDictionaryHashMul64Long;
const uint32_t key = (uint32_t)(h >> hash_shift);
uint16_t count = num[key];
next_bucket[i] = (count == 0) ? ((uint32_t)(-1)) : bucket_heads[key];
bucket_heads[key] = i;
count++;
if (count > bucket_limit) count = bucket_limit;
num[key] = count;
}
/* Step 2: find slot limits. */
for (i = 0; i < num_slots; ++i) {
BROTLI_BOOL overflow = BROTLI_FALSE;
slot_limit[i] = bucket_limit;
while (BROTLI_TRUE) {
uint32_t limit = slot_limit[i];
size_t j;
uint32_t count = 0;
overflow = BROTLI_FALSE;
for (j = i; j < num_buckets; j += num_slots) {
uint32_t size = num[j];
/* Last chain may span behind 64K limit; overflow happens only if
we are about to use 0xFFFF+ as item offset. */
if (count >= 0xFFFF) {
overflow = BROTLI_TRUE;
break;
}
if (size > limit) size = limit;
count += size;
}
if (!overflow) {
slot_size[i] = count;
total_items += count;
break;
}
slot_limit[i]--;
}
}
/* Step 3: transfer data to "slim" hasher. */
alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
(sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
sizeof(uint8_t*);
result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
BROTLI_FREE(m, flat);
return NULL;
}
slot_offsets = (uint32_t*)(&result[1]);
heads = (uint16_t*)(&slot_offsets[num_slots]);
items = (uint32_t*)(&heads[num_buckets]);
source_ref = (uint8_t**)(&items[total_items]);
result->magic = kLeanPreparedDictionaryMagic;
result->num_items = total_items;
result->source_size = (uint32_t)source_size;
result->hash_bits = hash_bits;
result->bucket_bits = bucket_bits;
result->slot_bits = slot_bits;
BROTLI_UNALIGNED_STORE_PTR(source_ref, source);
total_items = 0;
for (i = 0; i < num_slots; ++i) {
slot_offsets[i] = total_items;
total_items += slot_size[i];
slot_size[i] = 0;
}
for (i = 0; i < num_buckets; ++i) {
uint32_t slot = i & slot_mask;
uint32_t count = num[i];
uint32_t pos;
size_t j;
size_t cursor = slot_size[slot];
if (count > slot_limit[slot]) count = slot_limit[slot];
if (count == 0) {
heads[i] = 0xFFFF;
continue;
}
heads[i] = (uint16_t)cursor;
cursor += slot_offsets[slot];
slot_size[slot] += count;
pos = bucket_heads[i];
for (j = 0; j < count; j++) {
items[cursor++] = pos;
pos = next_bucket[pos];
}
items[cursor - 1] |= 0x80000000;
}
BROTLI_FREE(m, flat);
return result;
}
PreparedDictionary* duckdb_brotli::CreatePreparedDictionary(MemoryManager* m,
const uint8_t* source, size_t source_size) {
uint32_t bucket_bits = 17;
uint32_t slot_bits = 7;
uint32_t hash_bits = 40;
uint16_t bucket_limit = 32;
size_t volume = 16u << bucket_bits;
/* Tune parameters to fit dictionary size. */
while (volume < source_size && bucket_bits < 22) {
bucket_bits++;
slot_bits++;
volume <<= 1;
}
return CreatePreparedDictionaryWithParams(m,
source, source_size, bucket_bits, slot_bits, hash_bits, bucket_limit);
}
void duckdb_brotli::DestroyPreparedDictionary(MemoryManager* m,
PreparedDictionary* dictionary) {
if (!dictionary) return;
BROTLI_FREE(m, dictionary);
}
BROTLI_BOOL duckdb_brotli::AttachPreparedDictionary(
CompoundDictionary* compound, const PreparedDictionary* dictionary) {
size_t length = 0;
size_t index = 0;
if (compound->num_chunks == SHARED_BROTLI_MAX_COMPOUND_DICTS) {
return BROTLI_FALSE;
}
if (!dictionary) return BROTLI_FALSE;
length = dictionary->source_size;
index = compound->num_chunks;
compound->total_size += length;
compound->chunks[index] = dictionary;
compound->chunk_offsets[index + 1] = compound->total_size;
{
uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]);
uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]);
const void* tail = (void*)&items[dictionary->num_items];
if (dictionary->magic == kPreparedDictionaryMagic) {
compound->chunk_source[index] = (const uint8_t*)tail;
} else {
/* dictionary->magic == kLeanPreparedDictionaryMagic */
compound->chunk_source[index] =
(const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
}
}
compound->num_chunks++;
return BROTLI_TRUE;
}

View File

@@ -0,0 +1,75 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#ifndef BROTLI_ENC_PREPARED_DICTIONARY_H_
#define BROTLI_ENC_PREPARED_DICTIONARY_H_
#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
#include "../common/brotli_platform.h"
#include "../common/brotli_constants.h"
#include "memory.h"
namespace duckdb_brotli {
/* "Fat" prepared dictionary, could be cooked outside of C implementation,
* e.g. on Java side. LZ77 data is copied inside PreparedDictionary struct. */
static const uint32_t kPreparedDictionaryMagic = 0xDEBCEDE0;
static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1;
static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2;
/* "Lean" prepared dictionary. LZ77 data is referenced. It is the responsibility
* of caller of "prepare dictionary" to keep the LZ77 data while prepared
* dictionary is in use. */
static const uint32_t kLeanPreparedDictionaryMagic = 0xDEBCEDE3;
static const uint64_t kPreparedDictionaryHashMul64Long = BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
typedef struct PreparedDictionary {
uint32_t magic;
uint32_t num_items;
uint32_t source_size;
uint32_t hash_bits;
uint32_t bucket_bits;
uint32_t slot_bits;
/* --- Dynamic size members --- */
/* uint32_t slot_offsets[1 << slot_bits]; */
/* uint16_t heads[1 << bucket_bits]; */
/* uint32_t items[variable]; */
/* [maybe] uint8_t* source_ref, depending on magic. */
/* [maybe] uint8_t source[source_size], depending on magic. */
} PreparedDictionary;
BROTLI_INTERNAL PreparedDictionary *CreatePreparedDictionary(duckdb_brotli::MemoryManager *m, const uint8_t *source,
size_t source_size);
BROTLI_INTERNAL void DestroyPreparedDictionary(duckdb_brotli::MemoryManager *m, PreparedDictionary *dictionary);
typedef struct CompoundDictionary {
/* LZ77 prefix, compound dictionary */
size_t num_chunks;
size_t total_size;
/* Client instances. */
const PreparedDictionary *chunks[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
const uint8_t *chunk_source[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
size_t chunk_offsets[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
size_t num_prepared_instances_;
/* Owned instances. */
PreparedDictionary *prepared_instances_[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
} CompoundDictionary;
BROTLI_INTERNAL BROTLI_BOOL AttachPreparedDictionary(CompoundDictionary *compound,
const PreparedDictionary *dictionary);
}
#endif /* BROTLI_ENC_PREPARED_DICTIONARY */

View File

@@ -0,0 +1,796 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses one-pass processing: when we find a backward
match, we immediately emit the corresponding command and literal codes to
the bit stream.
Adapted from the CompressFragment() function in
https://github.com/google/snappy/blob/master/snappy.cc */
#include "compress_fragment.h"
#include <string.h> /* memcmp, memcpy, memset */
#include <brotli/types.h>
#include "../common/brotli_platform.h"
#include "brotli_bit_stream.h"
#include "entropy_encode.h"
#include "fast_log.h"
#include "find_match_length.h"
#include "write_bits.h"
using namespace duckdb_brotli;
#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
/* kHashMul32 multiplier has these properties:
* The multiplier must be odd. Otherwise we may lose the highest bit.
* No long streaks of ones or zeros.
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
static const uint32_t kHashMul32 = 0x1E35A7BD;
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
return (uint32_t)(h >> shift);
}
static BROTLI_INLINE uint32_t HashBytesAtOffset(
uint64_t v, int offset, size_t shift) {
BROTLI_DCHECK(offset >= 0);
BROTLI_DCHECK(offset <= 3);
{
const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
return (uint32_t)(h >> shift);
}
}
static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
return TO_BROTLI_BOOL(
BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
p1[4] == p2[4]);
}
/* Builds a literal prefix code into "depths" and "bits" based on the statistics
of the "input" string and stores it into the bit stream.
Note that the prefix code here is built from the pre-LZ77 input, therefore
we can only approximate the statistics of the actual literal stream.
Moreover, for long inputs we build a histogram from a sample of the input
and thus have to assign a non-zero depth for each literal.
Returns estimated compression ratio millibytes/char for encoding given input
with generated code. */
static size_t BuildAndStoreLiteralPrefixCode(BrotliOnePassArena* s,
const uint8_t* input,
const size_t input_size,
uint8_t depths[256],
uint16_t bits[256],
size_t* storage_ix,
uint8_t* storage) {
uint32_t* BROTLI_RESTRICT const histogram = s->histogram;
size_t histogram_total;
size_t i;
memset(histogram, 0, sizeof(s->histogram));
if (input_size < (1 << 15)) {
for (i = 0; i < input_size; ++i) {
++histogram[input[i]];
}
histogram_total = input_size;
for (i = 0; i < 256; ++i) {
/* We weigh the first 11 samples with weight 3 to account for the
balancing effect of the LZ77 phase on the histogram. */
const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
histogram[i] += adjust;
histogram_total += adjust;
}
} else {
static const size_t kSampleRate = 29;
for (i = 0; i < input_size; i += kSampleRate) {
++histogram[input[i]];
}
histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
for (i = 0; i < 256; ++i) {
/* We add 1 to each population count to avoid 0 bit depths (since this is
only a sample and we don't know if the symbol appears or not), and we
weigh the first 11 samples with weight 3 to account for the balancing
effect of the LZ77 phase on the histogram (more frequent symbols are
more likely to be in backward references instead as literals). */
const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
histogram[i] += adjust;
histogram_total += adjust;
}
}
BrotliBuildAndStoreHuffmanTreeFast(s->tree, histogram, histogram_total,
/* max_bits = */ 8,
depths, bits, storage_ix, storage);
{
size_t literal_ratio = 0;
for (i = 0; i < 256; ++i) {
if (histogram[i]) literal_ratio += histogram[i] * depths[i];
}
/* Estimated encoding ratio, millibytes per symbol. */
return (literal_ratio * 125) / histogram_total;
}
}
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
"bits" based on "histogram" and stores it into the bit stream. */
static void BuildAndStoreCommandPrefixCode(BrotliOnePassArena* s,
size_t* storage_ix, uint8_t* storage) {
const uint32_t* const histogram = s->cmd_histo;
uint8_t* const depth = s->cmd_depth;
uint16_t* const bits = s->cmd_bits;
uint8_t* BROTLI_RESTRICT const tmp_depth = s->tmp_depth;
uint16_t* BROTLI_RESTRICT const tmp_bits = s->tmp_bits;
/* TODO(eustas): do only once on initialization. */
memset(tmp_depth, 0, BROTLI_NUM_COMMAND_SYMBOLS);
BrotliCreateHuffmanTree(histogram, 64, 15, s->tree, depth);
BrotliCreateHuffmanTree(&histogram[64], 64, 14, s->tree, &depth[64]);
/* We have to jump through a few hoops here in order to compute
the command bits because the symbols are in a different order than in
the full alphabet. This looks complicated, but having the symbols
in this order in the command bits saves a few branches in the Emit*
functions. */
memcpy(tmp_depth, depth, 24);
memcpy(tmp_depth + 24, depth + 40, 8);
memcpy(tmp_depth + 32, depth + 24, 8);
memcpy(tmp_depth + 40, depth + 48, 8);
memcpy(tmp_depth + 48, depth + 32, 8);
memcpy(tmp_depth + 56, depth + 56, 8);
BrotliConvertBitDepthsToSymbols(tmp_depth, 64, tmp_bits);
memcpy(bits, tmp_bits, 48);
memcpy(bits + 24, tmp_bits + 32, 16);
memcpy(bits + 32, tmp_bits + 48, 16);
memcpy(bits + 40, tmp_bits + 24, 16);
memcpy(bits + 48, tmp_bits + 40, 16);
memcpy(bits + 56, tmp_bits + 56, 16);
BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
{
/* Create the bit length array for the full command alphabet. */
size_t i;
memset(tmp_depth, 0, 64); /* only 64 first values were used */
memcpy(tmp_depth, depth, 8);
memcpy(tmp_depth + 64, depth + 8, 8);
memcpy(tmp_depth + 128, depth + 16, 8);
memcpy(tmp_depth + 192, depth + 24, 8);
memcpy(tmp_depth + 384, depth + 32, 8);
for (i = 0; i < 8; ++i) {
tmp_depth[128 + 8 * i] = depth[40 + i];
tmp_depth[256 + 8 * i] = depth[48 + i];
tmp_depth[448 + 8 * i] = depth[56 + i];
}
/* TODO(eustas): could/should full-length machinery be avoided? */
BrotliStoreHuffmanTree(
tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS, s->tree, storage_ix, storage);
}
BrotliStoreHuffmanTree(&depth[64], 64, s->tree, storage_ix, storage);
}
/* REQUIRES: insertlen < 6210 */
static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (insertlen < 6) {
const size_t code = insertlen + 40;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
++histo[code];
} else if (insertlen < 130) {
const size_t tail = insertlen - 2;
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
const size_t prefix = tail >> nbits;
const size_t inscode = (nbits << 1) + prefix + 42;
BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
++histo[inscode];
} else if (insertlen < 2114) {
const size_t tail = insertlen - 66;
const uint32_t nbits = Log2FloorNonZero(tail);
const size_t code = nbits + 50;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
++histo[code];
} else {
BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
++histo[61];
}
}
static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (insertlen < 22594) {
BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
++histo[62];
} else {
BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
++histo[63];
}
}
static BROTLI_INLINE void EmitCopyLen(size_t copylen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (copylen < 10) {
BrotliWriteBits(
depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
++histo[copylen + 14];
} else if (copylen < 134) {
const size_t tail = copylen - 6;
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
const size_t prefix = tail >> nbits;
const size_t code = (nbits << 1) + prefix + 20;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
++histo[code];
} else if (copylen < 2118) {
const size_t tail = copylen - 70;
const uint32_t nbits = Log2FloorNonZero(tail);
const size_t code = nbits + 28;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
++histo[code];
} else {
BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
++histo[39];
}
}
static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (copylen < 12) {
BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
++histo[copylen - 4];
} else if (copylen < 72) {
const size_t tail = copylen - 8;
const uint32_t nbits = Log2FloorNonZero(tail) - 1;
const size_t prefix = tail >> nbits;
const size_t code = (nbits << 1) + prefix + 4;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
++histo[code];
} else if (copylen < 136) {
const size_t tail = copylen - 8;
const size_t code = (tail >> 5) + 30;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(5, tail & 31, storage_ix, storage);
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
++histo[code];
++histo[64];
} else if (copylen < 2120) {
const size_t tail = copylen - 72;
const uint32_t nbits = Log2FloorNonZero(tail);
const size_t code = nbits + 28;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
++histo[code];
++histo[64];
} else {
BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
++histo[39];
++histo[64];
}
}
static BROTLI_INLINE void EmitDistance(size_t distance,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix, uint8_t* storage) {
const size_t d = distance + 3;
const uint32_t nbits = Log2FloorNonZero(d) - 1u;
const size_t prefix = (d >> nbits) & 1;
const size_t offset = (2 + prefix) << nbits;
const size_t distcode = 2 * (nbits - 1) + prefix + 80;
BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
BrotliWriteBits(nbits, d - offset, storage_ix, storage);
++histo[distcode];
}
static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
const uint8_t depth[256],
const uint16_t bits[256],
size_t* storage_ix, uint8_t* storage) {
size_t j;
for (j = 0; j < len; j++) {
const uint8_t lit = input[j];
BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
}
}
/* REQUIRES: len <= 1 << 24. */
static void BrotliStoreMetaBlockHeader(
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
uint8_t* storage) {
size_t nibbles = 6;
/* ISLAST */
BrotliWriteBits(1, 0, storage_ix, storage);
if (len <= (1U << 16)) {
nibbles = 4;
} else if (len <= (1U << 20)) {
nibbles = 5;
}
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
/* ISUNCOMPRESSED */
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
}
static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
uint8_t* array) {
while (n_bits > 0) {
size_t byte_pos = pos >> 3;
size_t n_unchanged_bits = pos & 7;
size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
size_t total_bits = n_unchanged_bits + n_changed_bits;
uint32_t mask =
(~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
uint32_t unchanged_bits = array[byte_pos] & mask;
uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
array[byte_pos] =
(uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
n_bits -= n_changed_bits;
bits >>= n_changed_bits;
pos += n_changed_bits;
}
}
static void RewindBitPosition(const size_t new_storage_ix,
size_t* storage_ix, uint8_t* storage) {
const size_t bitpos = new_storage_ix & 7;
const size_t mask = (1u << bitpos) - 1;
storage[new_storage_ix >> 3] &= (uint8_t)mask;
*storage_ix = new_storage_ix;
}
static BROTLI_BOOL ShouldMergeBlock(BrotliOnePassArena* s,
const uint8_t* data, size_t len, const uint8_t* depths) {
uint32_t* BROTLI_RESTRICT const histo = s->histogram;
static const size_t kSampleRate = 43;
size_t i;
memset(histo, 0, sizeof(s->histogram));
for (i = 0; i < len; i += kSampleRate) {
++histo[data[i]];
}
{
const size_t total = (len + kSampleRate - 1) / kSampleRate;
double r = (FastLog2(total) + 0.5) * (double)total + 200;
for (i = 0; i < 256; ++i) {
r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
}
return TO_BROTLI_BOOL(r >= 0.0);
}
}
/* Acceptable loss for uncompressible speedup is 2% */
#define MIN_RATIO 980
static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
const uint8_t* metablock_start, const uint8_t* next_emit,
const size_t insertlen, const size_t literal_ratio) {
const size_t compressed = (size_t)(next_emit - metablock_start);
if (compressed * 50 > insertlen) {
return BROTLI_FALSE;
} else {
return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
}
}
static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
const size_t storage_ix_start,
size_t* storage_ix, uint8_t* storage) {
const size_t len = (size_t)(end - begin);
RewindBitPosition(storage_ix_start, storage_ix, storage);
BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
memcpy(&storage[*storage_ix >> 3], begin, len);
*storage_ix += len << 3;
storage[*storage_ix >> 3] = 0;
}
static uint32_t kCmdHistoSeed[128] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0,
};
static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
BROTLI_BOOL is_last, int* table, size_t table_bits,
size_t* storage_ix, uint8_t* storage) {
uint8_t* BROTLI_RESTRICT const cmd_depth = s->cmd_depth;
uint16_t* BROTLI_RESTRICT const cmd_bits = s->cmd_bits;
uint32_t* BROTLI_RESTRICT const cmd_histo = s->cmd_histo;
uint8_t* BROTLI_RESTRICT const lit_depth = s->lit_depth;
uint16_t* BROTLI_RESTRICT const lit_bits = s->lit_bits;
const uint8_t* ip_end;
/* "next_emit" is a pointer to the first byte that is not covered by a
previous copy. Bytes between "next_emit" and the start of the next copy or
the end of the input will be emitted as literal bytes. */
const uint8_t* next_emit = input;
/* Save the start of the first block for position and distance computations.
*/
const uint8_t* base_ip = input;
static const size_t kFirstBlockSize = 3 << 15;
static const size_t kMergeBlockSize = 1 << 16;
const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
const size_t kMinMatchLen = 5;
const uint8_t* metablock_start = input;
size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
size_t total_block_size = block_size;
/* Save the bit position of the MLEN field of the meta-block header, so that
we can update it later if we decide to extend this meta-block. */
size_t mlen_storage_ix = *storage_ix + 3;
size_t literal_ratio;
const uint8_t* ip;
int last_distance;
const size_t shift = 64u - table_bits;
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
/* No block splits, no contexts. */
BrotliWriteBits(13, 0, storage_ix, storage);
literal_ratio = BuildAndStoreLiteralPrefixCode(
s, input, block_size, s->lit_depth, s->lit_bits, storage_ix, storage);
{
/* Store the pre-compressed command and distance prefix codes. */
size_t i;
for (i = 0; i + 7 < s->cmd_code_numbits; i += 8) {
BrotliWriteBits(8, s->cmd_code[i >> 3], storage_ix, storage);
}
}
BrotliWriteBits(s->cmd_code_numbits & 7,
s->cmd_code[s->cmd_code_numbits >> 3], storage_ix, storage);
emit_commands:
/* Initialize the command and distance histograms. We will gather
statistics of command and distance codes during the processing
of this block and use it to update the command and distance
prefix codes for the next block. */
memcpy(s->cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
/* "ip" is the input pointer. */
ip = input;
last_distance = -1;
ip_end = input + block_size;
if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
/* For the last block, we need to keep a 16 bytes margin so that we can be
sure that all distances are at most window size - 16.
For all other blocks, we only need to keep a margin of 5 bytes so that
we don't go over the block size with a copy. */
const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
input_size - kInputMarginBytes);
const uint8_t* ip_limit = input + len_limit;
uint32_t next_hash;
for (next_hash = Hash(++ip, shift); ; ) {
/* Step 1: Scan forward in the input looking for a 5-byte-long match.
If we get close to exhausting the input then goto emit_remainder.
Heuristic match skipping: If 32 bytes are scanned with no matches
found, start looking only at every other byte. If 32 more bytes are
scanned, look at every third byte, etc.. When a match is found,
immediately go back to looking at every byte. This is a small loss
(~5% performance, ~0.1% density) for compressible data due to more
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
win since the compressor quickly "realizes" the data is incompressible
and doesn't bother looking for matches everywhere.
The "skip" variable keeps track of how many bytes there are since the
last match; dividing it by 32 (i.e. right-shifting by five) gives the
number of bytes to move ahead for each iteration. */
uint32_t skip = 32;
const uint8_t* next_ip = ip;
const uint8_t* candidate;
BROTLI_DCHECK(next_emit < ip);
trawl:
do {
uint32_t hash = next_hash;
uint32_t bytes_between_hash_lookups = skip++ >> 5;
BROTLI_DCHECK(hash == Hash(next_ip, shift));
ip = next_ip;
next_ip = ip + bytes_between_hash_lookups;
if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
goto emit_remainder;
}
next_hash = Hash(next_ip, shift);
candidate = ip - last_distance;
if (IsMatch(ip, candidate)) {
if (BROTLI_PREDICT_TRUE(candidate < ip)) {
table[hash] = (int)(ip - base_ip);
break;
}
}
candidate = base_ip + table[hash];
BROTLI_DCHECK(candidate >= base_ip);
BROTLI_DCHECK(candidate < ip);
table[hash] = (int)(ip - base_ip);
} while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
/* Check copy distance. If candidate is not feasible, continue search.
Checking is done outside of hot loop to reduce overhead. */
if (ip - candidate > MAX_DISTANCE) goto trawl;
/* Step 2: Emit the found match together with the literal bytes from
"next_emit" to the bit stream, and then see if we can find a next match
immediately afterwards. Repeat until we find no match for the input
without emitting some literal bytes. */
{
/* We have a 5-byte match at ip, and we need to emit bytes in
[next_emit, ip). */
const uint8_t* base = ip;
size_t matched = 5 + FindMatchLengthWithLimit(
candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
int distance = (int)(base - candidate); /* > 0 */
size_t insert = (size_t)(base - next_emit);
ip += matched;
BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
(int)(next_emit - base_ip), (unsigned long)insert, 2));
BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
if (BROTLI_PREDICT_TRUE(insert < 6210)) {
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
literal_ratio)) {
EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
storage_ix, storage);
input_size -= (size_t)(base - input);
input = base;
next_emit = input;
goto next_block;
} else {
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
}
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
storage_ix, storage);
if (distance == last_distance) {
BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
++cmd_histo[64];
} else {
EmitDistance((size_t)distance, cmd_depth, cmd_bits,
cmd_histo, storage_ix, storage);
last_distance = distance;
}
EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
"[CompressFragment] pos = %d insert = %d copy = %d\n"
"[CompressFragment] pos = %d distance = %d\n",
(int)(base - base_ip), (int)distance,
(int)(base - base_ip) + 2, 0, (int)matched - 2,
(int)(base - base_ip) + 2, (int)distance));
next_emit = ip;
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some positions
within the last copy. */
{
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
table[prev_hash] = (int)(ip - base_ip - 3);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = (int)(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
table[prev_hash] = (int)(ip - base_ip - 1);
candidate = base_ip + table[cur_hash];
table[cur_hash] = (int)(ip - base_ip);
}
}
while (IsMatch(ip, candidate)) {
/* We have a 5-byte match at ip, and no need to emit any literal bytes
prior to ip. */
const uint8_t* base = ip;
size_t matched = 5 + FindMatchLengthWithLimit(
candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
if (ip - candidate > MAX_DISTANCE) break;
ip += matched;
last_distance = (int)(base - candidate); /* > 0 */
BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
cmd_histo, storage_ix, storage);
BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
"[CompressFragment] pos = %d distance = %d\n",
(int)(base - base_ip), 0, (int)matched,
(int)(base - base_ip), (int)last_distance));
next_emit = ip;
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some positions
within the last copy. */
{
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
table[prev_hash] = (int)(ip - base_ip - 3);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = (int)(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
table[prev_hash] = (int)(ip - base_ip - 1);
candidate = base_ip + table[cur_hash];
table[cur_hash] = (int)(ip - base_ip);
}
}
next_hash = Hash(++ip, shift);
}
}
emit_remainder:
BROTLI_DCHECK(next_emit <= ip_end);
input += block_size;
input_size -= block_size;
block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
/* Decide if we want to continue this meta-block instead of emitting the
last insert-only command. */
if (input_size > 0 &&
total_block_size + block_size <= (1 << 20) &&
ShouldMergeBlock(s, input, block_size, lit_depth)) {
BROTLI_DCHECK(total_block_size > (1 << 16));
/* Update the size of the current meta-block and continue emitting commands.
We can do this because the current size and the new size both have 5
nibbles. */
total_block_size += block_size;
UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
goto emit_commands;
}
/* Emit the remaining bytes as literals. */
if (next_emit < ip_end) {
const size_t insert = (size_t)(ip_end - next_emit);
BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
(int)(next_emit - base_ip), (unsigned long)insert, 2));
if (BROTLI_PREDICT_TRUE(insert < 6210)) {
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
literal_ratio)) {
EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
storage_ix, storage);
} else {
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
storage_ix, storage);
}
}
next_emit = ip_end;
next_block:
/* If we have more data, write a new meta-block header and prefix codes and
then continue emitting commands. */
if (input_size > 0) {
metablock_start = input;
block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
total_block_size = block_size;
/* Save the bit position of the MLEN field of the meta-block header, so that
we can update it later if we decide to extend this meta-block. */
mlen_storage_ix = *storage_ix + 3;
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
/* No block splits, no contexts. */
BrotliWriteBits(13, 0, storage_ix, storage);
literal_ratio = BuildAndStoreLiteralPrefixCode(
s, input, block_size, lit_depth, lit_bits, storage_ix, storage);
BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
goto emit_commands;
}
if (!is_last) {
/* If this is not the last block, update the command and distance prefix
codes for the next block and store the compressed forms. */
s->cmd_code[0] = 0;
s->cmd_code_numbits = 0;
BuildAndStoreCommandPrefixCode(s, &s->cmd_code_numbits, s->cmd_code);
}
}
#define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
#define BAKE_METHOD_PARAM_(B) \
static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \
BrotliOnePassArena* s, const uint8_t* input, size_t input_size, \
BROTLI_BOOL is_last, int* table, size_t* storage_ix, uint8_t* storage) { \
BrotliCompressFragmentFastImpl(s, input, input_size, is_last, table, B, \
storage_ix, storage); \
}
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
#undef BAKE_METHOD_PARAM_
void duckdb_brotli::BrotliCompressFragmentFast(
BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
BROTLI_BOOL is_last, int* table, size_t table_size,
size_t* storage_ix, uint8_t* storage) {
const size_t initial_storage_ix = *storage_ix;
const size_t table_bits = Log2FloorNonZero(table_size);
if (input_size == 0) {
BROTLI_DCHECK(is_last);
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
return;
}
switch (table_bits) {
#define CASE_(B) \
case B: \
BrotliCompressFragmentFastImpl ## B( \
s, input, input_size, is_last, table, storage_ix, storage);\
break;
FOR_TABLE_BITS_(CASE_)
#undef CASE_
default: BROTLI_DCHECK(0); break;
}
/* If output is larger than single uncompressed block, rewrite it. */
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
storage_ix, storage);
}
if (is_last) {
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
}
}
#undef FOR_TABLE_BITS_

View File

@@ -0,0 +1,82 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses one-pass processing: when we find a backward
match, we immediately emit the corresponding command and literal codes to
the bit stream. */
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/brotli_platform.h"
#include "entropy_encode.h"
namespace duckdb_brotli {
typedef struct BrotliOnePassArena {
uint8_t lit_depth[256];
uint16_t lit_bits[256];
/* Command and distance prefix codes (each 64 symbols, stored back-to-back)
used for the next block. The command prefix code is over a smaller alphabet
with the following 64 symbols:
0 - 15: insert length code 0, copy length code 0 - 15, same distance
16 - 39: insert length code 0, copy length code 0 - 23
40 - 63: insert length code 0 - 23, copy length code 0
Note that symbols 16 and 40 represent the same code in the full alphabet,
but we do not use either of them. */
uint8_t cmd_depth[128];
uint16_t cmd_bits[128];
uint32_t cmd_histo[128];
/* The compressed form of the command and distance prefix codes for the next
block. */
uint8_t cmd_code[512];
size_t cmd_code_numbits;
HuffmanTree tree[2 * BROTLI_NUM_LITERAL_SYMBOLS + 1];
uint32_t histogram[256];
uint8_t tmp_depth[BROTLI_NUM_COMMAND_SYMBOLS];
uint16_t tmp_bits[64];
} BrotliOnePassArena;
/* Compresses "input" string to the "*storage" buffer as one or more complete
meta-blocks, and updates the "*storage_ix" bit position.
If "is_last" is 1, emits an additional empty last meta-block.
"cmd_depth" and "cmd_bits" contain the command and distance prefix codes
(see comment in encode.h) used for the encoding of this input fragment.
If "is_last" is 0, they are updated to reflect the statistics
of this input fragment, to be used for the encoding of the next fragment.
"*cmd_code_numbits" is the number of bits of the compressed representation
of the command and distance prefix codes, and "cmd_code" is an array of
at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
command and distance prefix codes. If "is_last" is 0, these are also
updated to represent the updated "cmd_depth" and "cmd_bits".
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
OUTPUT: maximal copy distance <= |input_size|
OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
BROTLI_INTERNAL void BrotliCompressFragmentFast(BrotliOnePassArena* s,
const uint8_t* input,
size_t input_size,
BROTLI_BOOL is_last,
int* table, size_t table_size,
size_t* storage_ix,
uint8_t* storage);
}
#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */

View File

@@ -0,0 +1,653 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses two-pass processing: in the first pass we save
the found backward matches and literal bytes into a buffer, and in the
second pass we emit them into the bit stream using prefix codes built based
on the actual command and literal byte histograms. */
#include "compress_fragment_two_pass.h"
#include <string.h> /* memcmp, memcpy, memset */
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/brotli_platform.h"
#include "bit_cost.h"
#include "brotli_bit_stream.h"
#include "entropy_encode.h"
#include "fast_log.h"
#include "find_match_length.h"
#include "write_bits.h"
using namespace duckdb_brotli;
#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
/* kHashMul32 multiplier has these properties:
* The multiplier must be odd. Otherwise we may lose the highest bit.
* No long streaks of ones or zeros.
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
static const uint32_t kHashMul32 = 0x1E35A7BD;
static BROTLI_INLINE uint32_t Hash(const uint8_t* p,
size_t shift, size_t length) {
const uint64_t h =
(BROTLI_UNALIGNED_LOAD64LE(p) << ((8 - length) * 8)) * kHashMul32;
return (uint32_t)(h >> shift);
}
static BROTLI_INLINE uint32_t HashBytesAtOffset(uint64_t v, size_t offset,
size_t shift, size_t length) {
BROTLI_DCHECK(offset <= 8 - length);
{
const uint64_t h = ((v >> (8 * offset)) << ((8 - length) * 8)) * kHashMul32;
return (uint32_t)(h >> shift);
}
}
static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2,
size_t length) {
if (BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2)) {
if (length == 4) return BROTLI_TRUE;
return TO_BROTLI_BOOL(p1[4] == p2[4] && p1[5] == p2[5]);
}
return BROTLI_FALSE;
}
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
"bits" based on "histogram" and stores it into the bit stream. */
static void BuildAndStoreCommandPrefixCode(BrotliTwoPassArena* s,
size_t* storage_ix,
uint8_t* storage) {
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
/* TODO(eustas): initialize once. */
memset(s->tmp_depth, 0, sizeof(s->tmp_depth));
BrotliCreateHuffmanTree(s->cmd_histo, 64, 15, s->tmp_tree, s->cmd_depth);
BrotliCreateHuffmanTree(&s->cmd_histo[64], 64, 14, s->tmp_tree,
&s->cmd_depth[64]);
/* We have to jump through a few hoops here in order to compute
the command bits because the symbols are in a different order than in
the full alphabet. This looks complicated, but having the symbols
in this order in the command bits saves a few branches in the Emit*
functions. */
memcpy(s->tmp_depth, s->cmd_depth + 24, 24);
memcpy(s->tmp_depth + 24, s->cmd_depth, 8);
memcpy(s->tmp_depth + 32, s->cmd_depth + 48, 8);
memcpy(s->tmp_depth + 40, s->cmd_depth + 8, 8);
memcpy(s->tmp_depth + 48, s->cmd_depth + 56, 8);
memcpy(s->tmp_depth + 56, s->cmd_depth + 16, 8);
BrotliConvertBitDepthsToSymbols(s->tmp_depth, 64, s->tmp_bits);
memcpy(s->cmd_bits, s->tmp_bits + 24, 16);
memcpy(s->cmd_bits + 8, s->tmp_bits + 40, 16);
memcpy(s->cmd_bits + 16, s->tmp_bits + 56, 16);
memcpy(s->cmd_bits + 24, s->tmp_bits, 48);
memcpy(s->cmd_bits + 48, s->tmp_bits + 32, 16);
memcpy(s->cmd_bits + 56, s->tmp_bits + 48, 16);
BrotliConvertBitDepthsToSymbols(&s->cmd_depth[64], 64, &s->cmd_bits[64]);
{
/* Create the bit length array for the full command alphabet. */
size_t i;
memset(s->tmp_depth, 0, 64); /* only 64 first values were used */
memcpy(s->tmp_depth, s->cmd_depth + 24, 8);
memcpy(s->tmp_depth + 64, s->cmd_depth + 32, 8);
memcpy(s->tmp_depth + 128, s->cmd_depth + 40, 8);
memcpy(s->tmp_depth + 192, s->cmd_depth + 48, 8);
memcpy(s->tmp_depth + 384, s->cmd_depth + 56, 8);
for (i = 0; i < 8; ++i) {
s->tmp_depth[128 + 8 * i] = s->cmd_depth[i];
s->tmp_depth[256 + 8 * i] = s->cmd_depth[8 + i];
s->tmp_depth[448 + 8 * i] = s->cmd_depth[16 + i];
}
BrotliStoreHuffmanTree(s->tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS,
s->tmp_tree, storage_ix, storage);
}
BrotliStoreHuffmanTree(&s->cmd_depth[64], 64, s->tmp_tree, storage_ix,
storage);
}
static BROTLI_INLINE void EmitInsertLen(
uint32_t insertlen, uint32_t** commands) {
if (insertlen < 6) {
**commands = insertlen;
} else if (insertlen < 130) {
const uint32_t tail = insertlen - 2;
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
const uint32_t prefix = tail >> nbits;
const uint32_t inscode = (nbits << 1) + prefix + 2;
const uint32_t extra = tail - (prefix << nbits);
**commands = inscode | (extra << 8);
} else if (insertlen < 2114) {
const uint32_t tail = insertlen - 66;
const uint32_t nbits = Log2FloorNonZero(tail);
const uint32_t code = nbits + 10;
const uint32_t extra = tail - (1u << nbits);
**commands = code | (extra << 8);
} else if (insertlen < 6210) {
const uint32_t extra = insertlen - 2114;
**commands = 21 | (extra << 8);
} else if (insertlen < 22594) {
const uint32_t extra = insertlen - 6210;
**commands = 22 | (extra << 8);
} else {
const uint32_t extra = insertlen - 22594;
**commands = 23 | (extra << 8);
}
++(*commands);
}
static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
if (copylen < 10) {
**commands = (uint32_t)(copylen + 38);
} else if (copylen < 134) {
const size_t tail = copylen - 6;
const size_t nbits = Log2FloorNonZero(tail) - 1;
const size_t prefix = tail >> nbits;
const size_t code = (nbits << 1) + prefix + 44;
const size_t extra = tail - (prefix << nbits);
**commands = (uint32_t)(code | (extra << 8));
} else if (copylen < 2118) {
const size_t tail = copylen - 70;
const size_t nbits = Log2FloorNonZero(tail);
const size_t code = nbits + 52;
const size_t extra = tail - ((size_t)1 << nbits);
**commands = (uint32_t)(code | (extra << 8));
} else {
const size_t extra = copylen - 2118;
**commands = (uint32_t)(63 | (extra << 8));
}
++(*commands);
}
static BROTLI_INLINE void EmitCopyLenLastDistance(
size_t copylen, uint32_t** commands) {
if (copylen < 12) {
**commands = (uint32_t)(copylen + 20);
++(*commands);
} else if (copylen < 72) {
const size_t tail = copylen - 8;
const size_t nbits = Log2FloorNonZero(tail) - 1;
const size_t prefix = tail >> nbits;
const size_t code = (nbits << 1) + prefix + 28;
const size_t extra = tail - (prefix << nbits);
**commands = (uint32_t)(code | (extra << 8));
++(*commands);
} else if (copylen < 136) {
const size_t tail = copylen - 8;
const size_t code = (tail >> 5) + 54;
const size_t extra = tail & 31;
**commands = (uint32_t)(code | (extra << 8));
++(*commands);
**commands = 64;
++(*commands);
} else if (copylen < 2120) {
const size_t tail = copylen - 72;
const size_t nbits = Log2FloorNonZero(tail);
const size_t code = nbits + 52;
const size_t extra = tail - ((size_t)1 << nbits);
**commands = (uint32_t)(code | (extra << 8));
++(*commands);
**commands = 64;
++(*commands);
} else {
const size_t extra = copylen - 2120;
**commands = (uint32_t)(63 | (extra << 8));
++(*commands);
**commands = 64;
++(*commands);
}
}
static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
uint32_t d = distance + 3;
uint32_t nbits = Log2FloorNonZero(d) - 1;
const uint32_t prefix = (d >> nbits) & 1;
const uint32_t offset = (2 + prefix) << nbits;
const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
uint32_t extra = d - offset;
**commands = distcode | (extra << 8);
++(*commands);
}
/* REQUIRES: len <= 1 << 24. */
static void BrotliStoreMetaBlockHeader(
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
uint8_t* storage) {
size_t nibbles = 6;
/* ISLAST */
BrotliWriteBits(1, 0, storage_ix, storage);
if (len <= (1U << 16)) {
nibbles = 4;
} else if (len <= (1U << 20)) {
nibbles = 5;
}
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
/* ISUNCOMPRESSED */
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
}
static BROTLI_INLINE void CreateCommands(const uint8_t* input,
size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
size_t table_bits, size_t min_match,
uint8_t** literals, uint32_t** commands) {
/* "ip" is the input pointer. */
const uint8_t* ip = input;
const size_t shift = 64u - table_bits;
const uint8_t* ip_end = input + block_size;
/* "next_emit" is a pointer to the first byte that is not covered by a
previous copy. Bytes between "next_emit" and the start of the next copy or
the end of the input will be emitted as literal bytes. */
const uint8_t* next_emit = input;
int last_distance = -1;
const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
/* For the last block, we need to keep a 16 bytes margin so that we can be
sure that all distances are at most window size - 16.
For all other blocks, we only need to keep a margin of 5 bytes so that
we don't go over the block size with a copy. */
const size_t len_limit = BROTLI_MIN(size_t, block_size - min_match,
input_size - kInputMarginBytes);
const uint8_t* ip_limit = input + len_limit;
uint32_t next_hash;
for (next_hash = Hash(++ip, shift, min_match); ; ) {
/* Step 1: Scan forward in the input looking for a 6-byte-long match.
If we get close to exhausting the input then goto emit_remainder.
Heuristic match skipping: If 32 bytes are scanned with no matches
found, start looking only at every other byte. If 32 more bytes are
scanned, look at every third byte, etc.. When a match is found,
immediately go back to looking at every byte. This is a small loss
(~5% performance, ~0.1% density) for compressible data due to more
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
win since the compressor quickly "realizes" the data is incompressible
and doesn't bother looking for matches everywhere.
The "skip" variable keeps track of how many bytes there are since the
last match; dividing it by 32 (ie. right-shifting by five) gives the
number of bytes to move ahead for each iteration. */
uint32_t skip = 32;
const uint8_t* next_ip = ip;
const uint8_t* candidate;
BROTLI_DCHECK(next_emit < ip);
trawl:
do {
uint32_t hash = next_hash;
uint32_t bytes_between_hash_lookups = skip++ >> 5;
ip = next_ip;
BROTLI_DCHECK(hash == Hash(ip, shift, min_match));
next_ip = ip + bytes_between_hash_lookups;
if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
goto emit_remainder;
}
next_hash = Hash(next_ip, shift, min_match);
candidate = ip - last_distance;
if (IsMatch(ip, candidate, min_match)) {
if (BROTLI_PREDICT_TRUE(candidate < ip)) {
table[hash] = (int)(ip - base_ip);
break;
}
}
candidate = base_ip + table[hash];
BROTLI_DCHECK(candidate >= base_ip);
BROTLI_DCHECK(candidate < ip);
table[hash] = (int)(ip - base_ip);
} while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate, min_match)));
/* Check copy distance. If candidate is not feasible, continue search.
Checking is done outside of hot loop to reduce overhead. */
if (ip - candidate > MAX_DISTANCE) goto trawl;
/* Step 2: Emit the found match together with the literal bytes from
"next_emit", and then see if we can find a next match immediately
afterwards. Repeat until we find no match for the input
without emitting some literal bytes. */
{
/* We have a 6-byte match at ip, and we need to emit bytes in
[next_emit, ip). */
const uint8_t* base = ip;
size_t matched = min_match + FindMatchLengthWithLimit(
candidate + min_match, ip + min_match,
(size_t)(ip_end - ip) - min_match);
int distance = (int)(base - candidate); /* > 0 */
int insert = (int)(base - next_emit);
ip += matched;
BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
EmitInsertLen((uint32_t)insert, commands);
BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n",
(int)(next_emit - base_ip), insert, 2));
memcpy(*literals, next_emit, (size_t)insert);
*literals += insert;
if (distance == last_distance) {
**commands = 64;
++(*commands);
} else {
EmitDistance((uint32_t)distance, commands);
last_distance = distance;
}
EmitCopyLenLastDistance(matched, commands);
BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
"[CompressFragment] pos = %d insert = %d copy = %d\n"
"[CompressFragment] pos = %d distance = %d\n",
(int)(base - base_ip), (int)distance,
(int)(base - base_ip) + 2, 0, (int)matched - 2,
(int)(base - base_ip) + 2, (int)distance));
next_emit = ip;
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
{
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some
positions within the last copy. */
uint64_t input_bytes;
uint32_t cur_hash;
uint32_t prev_hash;
if (min_match == 4) {
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 3);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 1);
} else {
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 5);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 4);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 3);
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 1);
}
candidate = base_ip + table[cur_hash];
table[cur_hash] = (int)(ip - base_ip);
}
}
while (ip - candidate <= MAX_DISTANCE &&
IsMatch(ip, candidate, min_match)) {
/* We have a 6-byte match at ip, and no need to emit any
literal bytes prior to ip. */
const uint8_t* base = ip;
size_t matched = min_match + FindMatchLengthWithLimit(
candidate + min_match, ip + min_match,
(size_t)(ip_end - ip) - min_match);
ip += matched;
last_distance = (int)(base - candidate); /* > 0 */
BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
EmitCopyLen(matched, commands);
EmitDistance((uint32_t)last_distance, commands);
BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
"[CompressFragment] pos = %d distance = %d\n",
(int)(base - base_ip), 0, (int)matched,
(int)(base - base_ip), (int)last_distance));
next_emit = ip;
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
{
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some
positions within the last copy. */
uint64_t input_bytes;
uint32_t cur_hash;
uint32_t prev_hash;
if (min_match == 4) {
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 3);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 1);
} else {
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 5);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 4);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 3);
input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
table[prev_hash] = (int)(ip - base_ip - 1);
}
candidate = base_ip + table[cur_hash];
table[cur_hash] = (int)(ip - base_ip);
}
}
next_hash = Hash(++ip, shift, min_match);
}
}
emit_remainder:
BROTLI_DCHECK(next_emit <= ip_end);
/* Emit the remaining bytes as literals. */
if (next_emit < ip_end) {
const uint32_t insert = (uint32_t)(ip_end - next_emit);
EmitInsertLen(insert, commands);
BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n",
(int)(next_emit - base_ip), insert, 2));
memcpy(*literals, next_emit, insert);
*literals += insert;
}
}
static void StoreCommands(BrotliTwoPassArena* s,
const uint8_t* literals, const size_t num_literals,
const uint32_t* commands, const size_t num_commands,
size_t* storage_ix, uint8_t* storage) {
static const uint32_t kNumExtraBits[128] = {
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
6, 7, 8, 9, 10, 12, 14, 24, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 2, 2, 3, 3, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
};
static const uint32_t kInsertOffset[24] = {
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26,
34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594,
};
size_t i;
memset(s->lit_histo, 0, sizeof(s->lit_histo));
/* TODO(eustas): is that necessary? */
memset(s->cmd_depth, 0, sizeof(s->cmd_depth));
/* TODO(eustas): is that necessary? */
memset(s->cmd_bits, 0, sizeof(s->cmd_bits));
memset(s->cmd_histo, 0, sizeof(s->cmd_histo));
for (i = 0; i < num_literals; ++i) {
++s->lit_histo[literals[i]];
}
BrotliBuildAndStoreHuffmanTreeFast(s->tmp_tree, s->lit_histo, num_literals,
/* max_bits = */ 8, s->lit_depth,
s->lit_bits, storage_ix, storage);
for (i = 0; i < num_commands; ++i) {
const uint32_t code = commands[i] & 0xFF;
BROTLI_DCHECK(code < 128);
++s->cmd_histo[code];
}
s->cmd_histo[1] += 1;
s->cmd_histo[2] += 1;
s->cmd_histo[64] += 1;
s->cmd_histo[84] += 1;
BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
for (i = 0; i < num_commands; ++i) {
const uint32_t cmd = commands[i];
const uint32_t code = cmd & 0xFF;
const uint32_t extra = cmd >> 8;
BROTLI_DCHECK(code < 128);
BrotliWriteBits(s->cmd_depth[code], s->cmd_bits[code], storage_ix, storage);
BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
if (code < 24) {
const uint32_t insert = kInsertOffset[code] + extra;
uint32_t j;
for (j = 0; j < insert; ++j) {
const uint8_t lit = *literals;
BrotliWriteBits(s->lit_depth[lit], s->lit_bits[lit], storage_ix,
storage);
++literals;
}
}
}
}
/* Acceptable loss for uncompressible speedup is 2% */
#define MIN_RATIO 0.98
#define SAMPLE_RATE 43
static BROTLI_BOOL ShouldCompress(BrotliTwoPassArena* s,
const uint8_t* input, size_t input_size, size_t num_literals) {
double corpus_size = (double)input_size;
if ((double)num_literals < MIN_RATIO * corpus_size) {
return BROTLI_TRUE;
} else {
const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
size_t i;
memset(s->lit_histo, 0, sizeof(s->lit_histo));
for (i = 0; i < input_size; i += SAMPLE_RATE) {
++s->lit_histo[input[i]];
}
return TO_BROTLI_BOOL(BitsEntropy(s->lit_histo, 256) < max_total_bit_cost);
}
}
static void RewindBitPosition(const size_t new_storage_ix,
size_t* storage_ix, uint8_t* storage) {
const size_t bitpos = new_storage_ix & 7;
const size_t mask = (1u << bitpos) - 1;
storage[new_storage_ix >> 3] &= (uint8_t)mask;
*storage_ix = new_storage_ix;
}
static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
size_t* storage_ix, uint8_t* storage) {
BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
memcpy(&storage[*storage_ix >> 3], input, input_size);
*storage_ix += input_size << 3;
storage[*storage_ix >> 3] = 0;
}
static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
int* table, size_t table_bits, size_t min_match,
size_t* storage_ix, uint8_t* storage) {
/* Save the start of the first block for position and distance computations.
*/
const uint8_t* base_ip = input;
BROTLI_UNUSED(is_last);
while (input_size > 0) {
size_t block_size =
BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
uint32_t* commands = command_buf;
uint8_t* literals = literal_buf;
size_t num_literals;
CreateCommands(input, block_size, input_size, base_ip, table,
table_bits, min_match, &literals, &commands);
num_literals = (size_t)(literals - literal_buf);
if (ShouldCompress(s, input, block_size, num_literals)) {
const size_t num_commands = (size_t)(commands - command_buf);
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
/* No block splits, no contexts. */
BrotliWriteBits(13, 0, storage_ix, storage);
StoreCommands(s, literal_buf, num_literals, command_buf, num_commands,
storage_ix, storage);
} else {
/* Since we did not find many backward references and the entropy of
the data is close to 8 bits, we can simply emit an uncompressed block.
This makes compression speed of uncompressible data about 3x faster. */
EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
}
input += block_size;
input_size -= block_size;
}
}
#define FOR_TABLE_BITS_(X) \
X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
#define BAKE_METHOD_PARAM_(B) \
static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B( \
BrotliTwoPassArena* s, const uint8_t* input, size_t input_size, \
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf, \
int* table, size_t* storage_ix, uint8_t* storage) { \
size_t min_match = (B <= 15) ? 4 : 6; \
BrotliCompressFragmentTwoPassImpl(s, input, input_size, is_last, command_buf,\
literal_buf, table, B, min_match, storage_ix, storage); \
}
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
#undef BAKE_METHOD_PARAM_
void duckdb_brotli::BrotliCompressFragmentTwoPass(
BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
const size_t initial_storage_ix = *storage_ix;
const size_t table_bits = Log2FloorNonZero(table_size);
switch (table_bits) {
#define CASE_(B) \
case B: \
BrotliCompressFragmentTwoPassImpl ## B( \
s, input, input_size, is_last, command_buf, \
literal_buf, table, storage_ix, storage); \
break;
FOR_TABLE_BITS_(CASE_)
#undef CASE_
default: BROTLI_DCHECK(0); break;
}
/* If output is larger than single uncompressed block, rewrite it. */
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
RewindBitPosition(initial_storage_ix, storage_ix, storage);
EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
}
if (is_last) {
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
}
}
#undef FOR_TABLE_BITS_

View File

@@ -0,0 +1,68 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses two-pass processing: in the first pass we save
the found backward matches and literal bytes into a buffer, and in the
second pass we emit them into the bit stream using prefix codes built based
on the actual command and literal byte histograms. */
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/brotli_platform.h"
#include "entropy_encode.h"
namespace duckdb_brotli {
/* TODO(eustas): turn to macro. */
static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
typedef struct BrotliTwoPassArena {
uint32_t lit_histo[256];
uint8_t lit_depth[256];
uint16_t lit_bits[256];
uint32_t cmd_histo[128];
uint8_t cmd_depth[128];
uint16_t cmd_bits[128];
/* BuildAndStoreCommandPrefixCode */
HuffmanTree tmp_tree[2 * BROTLI_NUM_LITERAL_SYMBOLS + 1];
uint8_t tmp_depth[BROTLI_NUM_COMMAND_SYMBOLS];
uint16_t tmp_bits[64];
} BrotliTwoPassArena;
/* Compresses "input" string to the "*storage" buffer as one or more complete
meta-blocks, and updates the "*storage_ix" bit position.
If "is_last" is 1, emits an additional empty last meta-block.
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
REQUIRES: "command_buf" and "literal_buf" point to at least
kCompressFragmentTwoPassBlockSize long arrays.
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
REQUIRES: "table_size" is a power of two
OUTPUT: maximal copy distance <= |input_size|
OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(BrotliTwoPassArena* s,
const uint8_t* input,
size_t input_size,
BROTLI_BOOL is_last,
uint32_t* command_buf,
uint8_t* literal_buf,
int* table,
size_t table_size,
size_t* storage_ix,
uint8_t* storage);
}
#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Hash table on the 4-byte prefixes of static dictionary words. */
#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
#define BROTLI_ENC_DICTIONARY_HASH_H_
#include <brotli/types.h>
namespace duckdb_brotli {
extern const uint16_t kStaticDictionaryHashWords[32768];
extern const uint8_t kStaticDictionaryHashLengths[32768];
}
#endif /* BROTLI_ENC_DICTIONARY_HASH_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,636 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#include "encoder_dict.h"
#include <stdlib.h> /* malloc, free */
#include "../common/dictionary.h"
#include "../common/brotli_platform.h"
#include "../common/shared_dictionary_internal.h"
#include "../common/transform.h"
#include "compound_dictionary.h"
#include "dictionary_hash.h"
#include "memory.h"
#include "quality.h"
#include "brotli_hash.h"
using namespace duckdb_brotli;
#define NUM_HASH_BITS 15u
#define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS)
static void BrotliTrieInit(BrotliTrie* trie) {
trie->pool_capacity = 0;
trie->pool_size = 0;
trie->pool = 0;
/* Set up the root node */
trie->root.single = 0;
trie->root.len_ = 0;
trie->root.idx_ = 0;
trie->root.sub = 0;
}
static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) {
BrotliFree(m, trie->pool);
}
/* Initializes to RFC 7932 static dictionary / transforms. */
static void InitEncoderDictionary(BrotliEncoderDictionary* dict) {
dict->words = BrotliGetDictionary();
dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms;
dict->hash_table_words = kStaticDictionaryHashWords;
dict->hash_table_lengths = kStaticDictionaryHashLengths;
dict->buckets = kStaticDictionaryBuckets;
dict->dict_words = kStaticDictionaryWords;
dict->cutoffTransformsCount = kCutoffTransformsCount;
dict->cutoffTransforms = kCutoffTransforms;
dict->parent = 0;
dict->hash_table_data_words_ = 0;
dict->hash_table_data_lengths_ = 0;
dict->buckets_alloc_size_ = 0;
dict->buckets_data_ = 0;
dict->dict_words_alloc_size_ = 0;
dict->dict_words_data_ = 0;
dict->words_instance_ = 0;
dict->has_words_heavy = BROTLI_FALSE;
BrotliTrieInit(&dict->trie);
}
static void BrotliDestroyEncoderDictionary(MemoryManager* m,
BrotliEncoderDictionary* dict) {
BrotliFree(m, dict->hash_table_data_words_);
BrotliFree(m, dict->hash_table_data_lengths_);
BrotliFree(m, dict->buckets_data_);
BrotliFree(m, dict->dict_words_data_);
BrotliFree(m, dict->words_instance_);
BrotliTrieFree(m, &dict->trie);
}
#if defined(BROTLI_EXPERIMENTAL)
/* Word length must be at least 4 bytes */
static uint32_t Hash(const uint8_t* data, int bits) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return h >> (32 - bits);
}
/* Theoretical max possible word size after transform */
#define kTransformedBufferSize \
(256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH)
/* To be safe buffer must have at least kTransformedBufferSize */
static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform,
const BrotliTransforms* transforms,
const BrotliEncoderDictionary* dict,
uint8_t* buffer, size_t* size) {
const uint8_t* dict_word = &dict->words->data[
dict->words->offsets_by_length[len] + (uint32_t)len * word_idx];
*size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len,
transforms, transform);
}
static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) {
DictWord result;
result.len = len;
result.transform = transform;
result.idx = idx;
return result;
}
static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie,
BrotliTrieNode** keep) {
uint32_t result;
uint32_t keep_index = 0;
if (keep && *keep != &trie->root) {
/* Optional node to keep, since address may change after re-allocating */
keep_index = (uint32_t)(*keep - trie->pool);
}
if (trie->pool_size == 0) {
/* Have a dummy node in the front. We do not want the result to be 0, it
must be at least 1, 0 represents "null pointer" */
trie->pool_size = 1;
}
BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity,
trie->pool_size + num);
if (BROTLI_IS_OOM(m)) return 0;
/* Init the new nodes to empty */
memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num);
result = (uint32_t)trie->pool_size;
trie->pool_size += num;
if (keep && *keep != &trie->root) {
*keep = trie->pool + keep_index;
}
return result;
}
/**
* len and idx: payload for last node
* word, size: the string
* index: position in the string
*/
static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len,
uint32_t idx, const uint8_t* word, size_t size, int index,
BrotliTrieNode* node, BrotliTrie* trie) {
BrotliTrieNode* child = 0;
uint8_t c;
if ((size_t)index == size) {
if (!node->len_ || idx < node->idx_) {
node->len_ = len;
node->idx_ = idx;
}
return BROTLI_TRUE;
}
c = word[index];
if (node->single && c != node->c) {
BrotliTrieNode old = trie->pool[node->sub];
uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
node->single = 0;
node->sub = new_nodes;
trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16;
trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] =
old;
}
if (!node->sub) {
uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
node->single = 1;
node->c = c;
node->sub = new_node;
}
if (node->single) {
child = &trie->pool[node->sub];
} else {
if (!trie->pool[node->sub + (c >> 4)].sub) {
uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
trie->pool[node->sub + (c >> 4)].sub = new_nodes;
}
child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)];
}
return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie);
}
static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx,
const uint8_t* word, size_t size, BrotliTrie* trie) {
return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie);
}
const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
const BrotliTrieNode* node, uint8_t c) {
BrotliTrieNode* temp_node;
if (node->single) {
if (node->c == c) return &trie->pool[node->sub];
return 0;
}
if (!node->sub) return 0;
temp_node = &trie->pool[node->sub + (c >> 4)];
if (!temp_node->sub) return 0;
return &trie->pool[temp_node->sub + (c & 15)];
}
static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie,
const uint8_t* word, size_t size) {
const BrotliTrieNode* node = &trie->root;
size_t i;
for (i = 0; i < size; i++) {
node = BrotliTrieSub(trie, node, word[i]);
if (!node) return 0;
}
return node;
}
static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m,
const BrotliTransforms* transforms,
BrotliEncoderDictionary* dict) {
uint32_t i;
DictWord* dict_words;
uint16_t* buckets;
DictWord** words_by_hash;
size_t* words_by_hash_size;
size_t* words_by_hash_capacity;
BrotliTrie dedup;
uint8_t word[kTransformedBufferSize];
size_t word_size;
size_t total = 0;
uint8_t l;
uint16_t idx;
BrotliTrieInit(&dedup);
words_by_hash = (DictWord**)BrotliAllocate(m,
sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
words_by_hash_size = (size_t*)BrotliAllocate(m,
sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
words_by_hash_capacity = (size_t*)BrotliAllocate(m,
sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
memset(words_by_hash_capacity, 0,
sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
if (transforms->num_transforms > 0) {
for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
uint16_t n = dict->words->size_bits_by_length[l] ?
(uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
for (idx = 0; idx < n; ++idx) {
uint32_t key;
/* First transform (usually identity) */
TransformedDictionaryWord(idx, l, 0, transforms, dict, word,
&word_size);
/* Cannot hash words smaller than 4 bytes */
if (word_size < 4) {
/* Break instead of continue, all next words of this length will have
same length after transform */
break;
}
if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) {
return BROTLI_FALSE;
}
key = Hash(word, NUM_HASH_BITS);
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
words_by_hash_capacity[key], words_by_hash_size[key],
MakeDictWord(l, 0, idx));
++total;
}
}
}
/* These LUT transforms only supported if no custom transforms. This is
ok, we will use the heavy trie instead. */
if (transforms == BrotliGetTransforms()) {
for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
uint16_t n = dict->words->size_bits_by_length[l] ?
(uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
for (idx = 0; idx < n; ++idx) {
int k;
BROTLI_BOOL is_ascii = BROTLI_TRUE;
size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx;
const uint8_t* data = &dict->words->data[offset];
for (k = 0; k < l; ++k) {
if (data[k] >= 128) is_ascii = BROTLI_FALSE;
}
if (data[0] < 128) {
int transform = 9; /* {empty, uppercase first, empty} */
uint32_t ix = idx + (uint32_t)transform * n;
const BrotliTrieNode* it;
TransformedDictionaryWord(idx, l, transform, transforms,
dict, word, &word_size);
it = BrotliTrieFind(&dedup, word, word_size);
if (!it || it->idx_ > ix) {
uint32_t key = Hash(word, NUM_HASH_BITS);
if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
return BROTLI_FALSE;
}
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
words_by_hash_capacity[key], words_by_hash_size[key],
MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx));
++total;
}
}
if (is_ascii) {
int transform = 44; /* {empty, uppercase all, empty} */
uint32_t ix = idx + (uint32_t)transform * n;
const BrotliTrieNode* it;
TransformedDictionaryWord(idx, l, transform, transforms,
dict, word, &word_size);
it = BrotliTrieFind(&dedup, word, word_size);
if (!it || it->idx_ > ix) {
uint32_t key = Hash(word, NUM_HASH_BITS);
if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
return BROTLI_FALSE;
}
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
words_by_hash_capacity[key], words_by_hash_size[key],
MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx));
++total;
}
}
}
}
}
dict_words = (DictWord*)BrotliAllocate(m,
sizeof(*dict->dict_words) * (total + 1));
buckets = (uint16_t*)BrotliAllocate(m,
sizeof(*dict->buckets) * NUM_HASH_BUCKETS);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
dict->dict_words_alloc_size_ = total + 1;
dict->dict_words = dict->dict_words_data_ = dict_words;
dict->buckets_alloc_size_ = NUM_HASH_BUCKETS;
dict->buckets = dict->buckets_data_ = buckets;
/* Unused; makes offsets start from 1. */
dict_words[0] = MakeDictWord(0, 0, 0);
total = 1;
for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
size_t num_words = words_by_hash_size[i];
if (num_words > 0) {
buckets[i] = (uint16_t)(total);
memcpy(&dict_words[total], &words_by_hash[i][0],
sizeof(dict_words[0]) * num_words);
total += num_words;
dict_words[total - 1].len |= 0x80;
} else {
buckets[i] = 0;
}
}
for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
BrotliFree(m, words_by_hash[i]);
}
BrotliFree(m, words_by_hash);
BrotliFree(m, words_by_hash_size);
BrotliFree(m, words_by_hash_capacity);
BrotliTrieFree(m, &dedup);
return BROTLI_TRUE;
}
static void BuildDictionaryHashTable(uint16_t* hash_table_words,
uint8_t* hash_table_lengths, const BrotliDictionary* dict) {
int j, len;
/* The order of the loops is such that in case of collision, words with
shorter length are preferred, and in case of same length, words with
smaller index. There is only a single word per bucket. */
/* TODO(lode): consider adding optional user-supplied frequency_map to use
for preferred words instead, this can make the encoder better for
quality 9 and below without affecting the decoder */
memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords));
memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths));
for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH;
len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) {
const size_t num_words = dict->size_bits_by_length[len] ?
(1u << dict->size_bits_by_length[len]) : 0;
for (j = (int)num_words - 1; j >= 0; --j) {
size_t offset = dict->offsets_by_length[len] +
(size_t)len * (size_t)j;
const uint8_t* word = &dict->data[offset];
const uint32_t key = Hash(word, 14);
int idx = (int)(key << 1) + (len < 8 ? 1 : 0);
BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS);
hash_table_words[idx] = (uint16_t)j;
hash_table_lengths[idx] = (uint8_t)len;
}
}
}
static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m,
const BrotliTransforms* transforms,
BrotliEncoderDictionary* dict) {
int i, j, l;
for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) {
for (l = 0; l < 32; l++) {
int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u);
for (i = 0; i < num; i++) {
uint8_t transformed[kTransformedBufferSize];
size_t size;
TransformedDictionaryWord(
(uint32_t)i, l, j, transforms, dict, transformed, &size);
if (size < 4) continue;
if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j),
transformed, size, &dict->trie)) {
return BROTLI_FALSE;
}
}
}
}
return BROTLI_TRUE;
}
/* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for
the custom transforms, where possible within the limits of the
cutoffTransforms encoding. The fast encoder uses this to do fast lookup for
transforms that remove the N last characters (OmitLast). */
static void ComputeCutoffTransforms(
const BrotliTransforms* transforms,
uint32_t* count, uint64_t* data) {
int i;
/* The encoding in a 64-bit integer of transform N in the data is: (N << 2) +
((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity
transform code must be 0-63, for N=1 the transform code must be 4-67, ...,
for N=9 it must be 36-99.
TODO(lode): consider a simple flexible uint8_t[10] instead of the uint64_t
for the cutoff transforms, so that shared dictionaries can have the
OmitLast transforms anywhere without loss. */
*count = 0;
*data = 0;
for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
int idx = transforms->cutOffTransforms[i];
if (idx == -1) break; /* Not found */
if (idx < (i << 2)) break; /* Too small for the encoding */
if (idx >= (i << 2) + 64) break; /* Too large for the encoding */
(*count)++;
*data |= (uint64_t)(((uint64_t)idx -
((uint64_t)i << 2u)) << ((uint64_t)i * 6u));
}
}
static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality,
const BrotliTransforms* transforms,
BrotliEncoderDictionary* current) {
int default_words = current->words == BrotliGetDictionary();
int default_transforms = transforms == BrotliGetTransforms();
if (default_words && default_transforms) {
/* hashes are already set to Brotli defaults */
return BROTLI_TRUE;
}
current->hash_table_data_words_ = (uint16_t*)BrotliAllocate(
m, sizeof(kStaticDictionaryHashWords));
current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate(
m, sizeof(kStaticDictionaryHashLengths));
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
current->hash_table_words = current->hash_table_data_words_;
current->hash_table_lengths = current->hash_table_data_lengths_;
BuildDictionaryHashTable(current->hash_table_data_words_,
current->hash_table_data_lengths_, current->words);
ComputeCutoffTransforms(transforms,
&current->cutoffTransformsCount, &current->cutoffTransforms);
/* Only compute the data for slow encoder if the requested quality is high
enough to need it */
if (quality >= ZOPFLIFICATION_QUALITY) {
if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE;
/* For the built-in Brotli transforms, there is a hard-coded function to
handle all transforms, but for custom transforms, we use the following
large hammer instead */
current->has_words_heavy = !default_transforms;
if (current->has_words_heavy) {
if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE;
}
}
return BROTLI_TRUE;
}
#endif /* BROTLI_EXPERIMENTAL */
void duckdb_brotli::BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) {
dict->magic = kSharedDictionaryMagic;
dict->compound.num_chunks = 0;
dict->compound.total_size = 0;
dict->compound.chunk_offsets[0] = 0;
dict->compound.num_prepared_instances_ = 0;
dict->contextual.context_based = 0;
dict->contextual.num_dictionaries = 1;
dict->contextual.instances_ = 0;
dict->contextual.num_instances_ = 1; /* The instance_ field */
dict->contextual.dict[0] = &dict->contextual.instance_;
InitEncoderDictionary(&dict->contextual.instance_);
dict->contextual.instance_.parent = &dict->contextual;
dict->max_quality = BROTLI_MAX_QUALITY;
}
#if defined(BROTLI_EXPERIMENTAL)
/* TODO(eustas): make sure that tooling will warn user if not all the cutoff
transforms are available (for low-quality encoder). */
static BROTLI_BOOL InitCustomSharedEncoderDictionary(
MemoryManager* m, const BrotliSharedDictionary* decoded_dict,
int quality, SharedEncoderDictionary* dict) {
ContextualEncoderDictionary* contextual;
CompoundDictionary* compound;
BrotliEncoderDictionary* instances;
int i;
BrotliInitSharedEncoderDictionary(dict);
contextual = &dict->contextual;
compound = &dict->compound;
for (i = 0; i < (int)decoded_dict->num_prefix; i++) {
PreparedDictionary* prepared = CreatePreparedDictionary(m,
decoded_dict->prefix[i], decoded_dict->prefix_size[i]);
AttachPreparedDictionary(compound, prepared);
/* remember for cleanup */
compound->prepared_instances_[
compound->num_prepared_instances_++] = prepared;
}
dict->max_quality = quality;
contextual->context_based = decoded_dict->context_based;
if (decoded_dict->context_based) {
memcpy(contextual->context_map, decoded_dict->context_map,
SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS);
}
contextual->num_dictionaries = decoded_dict->num_dictionaries;
contextual->num_instances_ = decoded_dict->num_dictionaries;
if (contextual->num_instances_ == 1) {
instances = &contextual->instance_;
} else {
contextual->instances_ = (BrotliEncoderDictionary*)
BrotliAllocate(m, sizeof(*contextual->instances_) *
contextual->num_instances_);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
instances = contextual->instances_;
}
for (i = 0; i < (int)contextual->num_instances_; i++) {
BrotliEncoderDictionary* current = &instances[i];
InitEncoderDictionary(current);
current->parent = &dict->contextual;
if (decoded_dict->words[i] == BrotliGetDictionary()) {
current->words = BrotliGetDictionary();
} else {
current->words_instance_ = (BrotliDictionary*)BrotliAllocate(
m, sizeof(BrotliDictionary));
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
*current->words_instance_ = *decoded_dict->words[i];
current->words = current->words_instance_;
}
current->num_transforms =
(uint32_t)decoded_dict->transforms[i]->num_transforms;
if (!ComputeDictionary(
m, quality, decoded_dict->transforms[i], current)) {
return BROTLI_FALSE;
}
contextual->dict[i] = current;
}
return BROTLI_TRUE; /* success */
}
BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
MemoryManager* m, const uint8_t* encoded_dict, size_t size,
int quality, SharedEncoderDictionary* dict) {
BROTLI_BOOL success = BROTLI_FALSE;
BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance(
m->alloc_func, m->free_func, m->opaque);
if (!decoded_dict) { /* OOM */
return BROTLI_FALSE;
}
success = BrotliSharedDictionaryAttach(
decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict);
if (success) {
success = InitCustomSharedEncoderDictionary(m,
decoded_dict, quality, dict);
}
BrotliSharedDictionaryDestroyInstance(decoded_dict);
return success;
}
#endif /* BROTLI_EXPERIMENTAL */
void duckdb_brotli::BrotliCleanupSharedEncoderDictionary(MemoryManager* m,
SharedEncoderDictionary* dict) {
size_t i;
for (i = 0; i < dict->compound.num_prepared_instances_; i++) {
DestroyPreparedDictionary(m,
(PreparedDictionary*)dict->compound.prepared_instances_[i]);
}
if (dict->contextual.num_instances_ == 1) {
BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_);
} else if (dict->contextual.num_instances_ > 1) {
for (i = 0; i < dict->contextual.num_instances_; i++) {
BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]);
}
BrotliFree(m, dict->contextual.instances_);
}
}
ManagedDictionary* duckdb_brotli::BrotliCreateManagedDictionary(
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc(
sizeof(ManagedDictionary), alloc_func, free_func, opaque);
if (result == NULL) return NULL;
result->magic = kManagedDictionaryMagic;
BrotliInitMemoryManager(
&result->memory_manager_, alloc_func, free_func, opaque);
result->dictionary = NULL;
return result;
}
void duckdb_brotli::BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) {
if (!dictionary) return;
BrotliBootstrapFree(dictionary, &dictionary->memory_manager_);
}
/* Escalate internal functions visibility; for testing purposes only. */
#if defined(BROTLI_TEST)
void InitEncoderDictionaryForTest(BrotliEncoderDictionary*);
void InitEncoderDictionaryForTest(BrotliEncoderDictionary* d) {
InitEncoderDictionary(d);
}
#endif

View File

@@ -0,0 +1,153 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#ifndef BROTLI_ENC_ENCODER_DICT_H_
#define BROTLI_ENC_ENCODER_DICT_H_
#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
#include "../common/dictionary.h"
#include "../common/brotli_platform.h"
#include "compound_dictionary.h"
#include "memory.h"
#include "static_dict_lut.h"
namespace duckdb_brotli {
/*
Dictionary hierarchy for Encoder:
-SharedEncoderDictionary
--CompoundDictionary
---PreparedDictionary [up to 15x]
= prefix dictionary with precomputed hashes
--ContextualEncoderDictionary
---BrotliEncoderDictionary [up to 64x]
= for each context, precomputed static dictionary with words + transforms
Dictionary hiearchy from common: similar, but without precomputed hashes
-BrotliSharedDictionary
--BrotliDictionary [up to 64x]
--BrotliTransforms [up to 64x]
--const uint8_t* prefix [up to 15x]: compound dictionaries
*/
typedef struct BrotliTrieNode {
uint8_t single; /* if 1, sub is a single node for c instead of 256 */
uint8_t c;
uint8_t len_; /* untransformed length */
uint32_t idx_; /* word index + num words * transform index */
uint32_t sub; /* index of sub node(s) in the pool */
} BrotliTrieNode;
typedef struct BrotliTrie {
BrotliTrieNode* pool;
size_t pool_capacity;
size_t pool_size;
BrotliTrieNode root;
} BrotliTrie;
#if defined(BROTLI_EXPERIMENTAL)
BROTLI_INTERNAL const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
const BrotliTrieNode* node, uint8_t c);
#endif /* BROTLI_EXPERIMENTAL */
/* Dictionary data (words and transforms) for 1 possible context */
typedef struct BrotliEncoderDictionary {
const BrotliDictionary* words;
uint32_t num_transforms;
/* cut off for fast encoder */
uint32_t cutoffTransformsCount;
uint64_t cutoffTransforms;
/* from dictionary_hash.h, for fast encoder */
const uint16_t* hash_table_words;
const uint8_t* hash_table_lengths;
/* from static_dict_lut.h, for slow encoder */
const uint16_t* buckets;
const DictWord* dict_words;
/* Heavy version, for use by slow encoder when there are custom transforms.
Contains every possible transformed dictionary word in a trie. It encodes
about as fast as the non-heavy encoder but consumes a lot of memory and
takes time to build. */
BrotliTrie trie;
BROTLI_BOOL has_words_heavy;
/* Reference to other dictionaries. */
const struct ContextualEncoderDictionary* parent;
/* Allocated memory, used only when not using the Brotli defaults */
uint16_t* hash_table_data_words_;
uint8_t* hash_table_data_lengths_;
size_t buckets_alloc_size_;
uint16_t* buckets_data_;
size_t dict_words_alloc_size_;
DictWord* dict_words_data_;
BrotliDictionary* words_instance_;
} BrotliEncoderDictionary;
/* Dictionary data for all 64 contexts */
typedef struct ContextualEncoderDictionary {
BROTLI_BOOL context_based;
uint8_t num_dictionaries;
uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
const BrotliEncoderDictionary* dict[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
/* If num_instances_ is 1, instance_ is used, else dynamic allocation with
instances_ is used. */
size_t num_instances_;
BrotliEncoderDictionary instance_;
BrotliEncoderDictionary* instances_;
} ContextualEncoderDictionary;
typedef struct SharedEncoderDictionary {
/* Magic value to distinguish this struct from PreparedDictionary for
certain external usages. */
uint32_t magic;
/* LZ77 prefix, compound dictionary */
CompoundDictionary compound;
/* Custom static dictionary (optionally context-based) */
ContextualEncoderDictionary contextual;
/* The maximum quality the dictionary was computed for */
int max_quality;
} SharedEncoderDictionary;
typedef struct ManagedDictionary {
uint32_t magic;
MemoryManager memory_manager_;
uint32_t* dictionary;
} ManagedDictionary;
/* Initializes to the brotli built-in dictionary */
BROTLI_INTERNAL void BrotliInitSharedEncoderDictionary(
SharedEncoderDictionary* dict);
#if defined(BROTLI_EXPERIMENTAL)
/* Initializes to shared dictionary that will be parsed from
encoded_dict. Requires that you keep the encoded_dict buffer
around, parts of data will point to it. */
BROTLI_INTERNAL BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
MemoryManager* m, const uint8_t* encoded_dict, size_t size,
int quality, SharedEncoderDictionary* dict);
#endif /* BROTLI_EXPERIMENTAL */
BROTLI_INTERNAL void BrotliCleanupSharedEncoderDictionary(
MemoryManager* m, SharedEncoderDictionary* dict);
BROTLI_INTERNAL ManagedDictionary* BrotliCreateManagedDictionary(
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
BROTLI_INTERNAL void BrotliDestroyManagedDictionary(
ManagedDictionary* dictionary);
}
#endif /* BROTLI_ENC_ENCODER_DICT_H_ */

View File

@@ -0,0 +1,500 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Entropy encoding (Huffman) utilities. */
#include "entropy_encode.h"
#include <string.h> /* memset */
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/brotli_platform.h"
using namespace duckdb_brotli;
const size_t duckdb_brotli::kBrotliShellGaps[] = {132, 57, 23, 10, 4, 1};
BROTLI_BOOL duckdb_brotli::BrotliSetDepth(
int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
int stack[16];
int level = 0;
int p = p0;
BROTLI_DCHECK(max_depth <= 15);
stack[0] = -1;
while (BROTLI_TRUE) {
if (pool[p].index_left_ >= 0) {
level++;
if (level > max_depth) return BROTLI_FALSE;
stack[level] = pool[p].index_right_or_value_;
p = pool[p].index_left_;
continue;
} else {
depth[pool[p].index_right_or_value_] = (uint8_t)level;
}
while (level >= 0 && stack[level] == -1) level--;
if (level < 0) return BROTLI_TRUE;
p = stack[level];
stack[level] = -1;
}
}
/* Sort the root nodes, least popular first. */
static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
const HuffmanTree* v0, const HuffmanTree* v1) {
if (v0->total_count_ != v1->total_count_) {
return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
}
return TO_BROTLI_BOOL(v0->index_right_or_value_ > v1->index_right_or_value_);
}
/* This function will create a Huffman tree.
The catch here is that the tree cannot be arbitrarily deep.
Brotli specifies a maximum depth of 15 bits for "code trees"
and 7 bits for "code length code trees."
count_limit is the value that is to be faked as the minimum value
and this minimum value is raised until the tree matches the
maximum length requirement.
This algorithm is not of excellent performance for very long data blocks,
especially when population counts are longer than 2**tree_limit, but
we are not planning to use this with extremely long blocks.
See http://en.wikipedia.org/wiki/Huffman_coding */
void duckdb_brotli::BrotliCreateHuffmanTree(const uint32_t* data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
uint8_t* depth) {
uint32_t count_limit;
HuffmanTree sentinel;
InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
/* For block sizes below 64 kB, we never need to do a second iteration
of this loop. Probably all of our block sizes will be smaller than
that, so this loop is mostly of academic interest. If we actually
would need this, we would be better off with the Katajainen algorithm. */
for (count_limit = 1; ; count_limit *= 2) {
size_t n = 0;
size_t i;
size_t j;
size_t k;
for (i = length; i != 0;) {
--i;
if (data[i]) {
const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
}
}
if (n == 1) {
depth[tree[0].index_right_or_value_] = 1; /* Only one element. */
break;
}
SortHuffmanTreeItems(tree, n, SortHuffmanTree);
/* The nodes are:
[0, n): the sorted leaf nodes that we start with.
[n]: we add a sentinel here.
[n + 1, 2n): new parent nodes are added here, starting from
(n+1). These are naturally in ascending order.
[2n]: we add a sentinel at the end as well.
There will be (2n+1) elements at the end. */
tree[n] = sentinel;
tree[n + 1] = sentinel;
i = 0; /* Points to the next leaf node. */
j = n + 1; /* Points to the next non-leaf node. */
for (k = n - 1; k != 0; --k) {
size_t left, right;
if (tree[i].total_count_ <= tree[j].total_count_) {
left = i;
++i;
} else {
left = j;
++j;
}
if (tree[i].total_count_ <= tree[j].total_count_) {
right = i;
++i;
} else {
right = j;
++j;
}
{
/* The sentinel node becomes the parent node. */
size_t j_end = 2 * n - k;
tree[j_end].total_count_ =
tree[left].total_count_ + tree[right].total_count_;
tree[j_end].index_left_ = (int16_t)left;
tree[j_end].index_right_or_value_ = (int16_t)right;
/* Add back the last sentinel node. */
tree[j_end + 1] = sentinel;
}
}
if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
/* We need to pack the Huffman tree in tree_limit bits. If this was not
successful, add fake entities to the lowest values and retry. */
break;
}
}
}
static void Reverse(uint8_t* v, size_t start, size_t end) {
--end;
while (start < end) {
uint8_t tmp = v[start];
v[start] = v[end];
v[end] = tmp;
++start;
--end;
}
}
static void BrotliWriteHuffmanTreeRepetitions(
const uint8_t previous_value,
const uint8_t value,
size_t repetitions,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
BROTLI_DCHECK(repetitions > 0);
if (previous_value != value) {
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions == 7) {
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions < 3) {
size_t i;
for (i = 0; i < repetitions; ++i) {
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
}
} else {
size_t start = *tree_size;
repetitions -= 3;
while (BROTLI_TRUE) {
tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
extra_bits_data[*tree_size] = repetitions & 0x3;
++(*tree_size);
repetitions >>= 2;
if (repetitions == 0) {
break;
}
--repetitions;
}
Reverse(tree, start, *tree_size);
Reverse(extra_bits_data, start, *tree_size);
}
}
static void BrotliWriteHuffmanTreeRepetitionsZeros(
size_t repetitions,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
if (repetitions == 11) {
tree[*tree_size] = 0;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions < 3) {
size_t i;
for (i = 0; i < repetitions; ++i) {
tree[*tree_size] = 0;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
}
} else {
size_t start = *tree_size;
repetitions -= 3;
while (BROTLI_TRUE) {
tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
extra_bits_data[*tree_size] = repetitions & 0x7;
++(*tree_size);
repetitions >>= 3;
if (repetitions == 0) {
break;
}
--repetitions;
}
Reverse(tree, start, *tree_size);
Reverse(extra_bits_data, start, *tree_size);
}
}
void duckdb_brotli::BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
uint8_t* good_for_rle) {
size_t nonzero_count = 0;
size_t stride;
size_t limit;
size_t sum;
const size_t streak_limit = 1240;
/* Let's make the Huffman code more compatible with RLE encoding. */
size_t i;
for (i = 0; i < length; i++) {
if (counts[i]) {
++nonzero_count;
}
}
if (nonzero_count < 16) {
return;
}
while (length != 0 && counts[length - 1] == 0) {
--length;
}
if (length == 0) {
return; /* All zeros. */
}
/* Now counts[0..length - 1] does not have trailing zeros. */
{
size_t nonzeros = 0;
uint32_t smallest_nonzero = 1 << 30;
for (i = 0; i < length; ++i) {
if (counts[i] != 0) {
++nonzeros;
if (smallest_nonzero > counts[i]) {
smallest_nonzero = counts[i];
}
}
}
if (nonzeros < 5) {
/* Small histogram will model it well. */
return;
}
if (smallest_nonzero < 4) {
size_t zeros = length - nonzeros;
if (zeros < 6) {
for (i = 1; i < length - 1; ++i) {
if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
counts[i] = 1;
}
}
}
}
if (nonzeros < 28) {
return;
}
}
/* 2) Let's mark all population counts that already can be encoded
with an RLE code. */
memset(good_for_rle, 0, length);
{
/* Let's not spoil any of the existing good RLE codes.
Mark any seq of 0's that is longer as 5 as a good_for_rle.
Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
uint32_t symbol = counts[0];
size_t step = 0;
for (i = 0; i <= length; ++i) {
if (i == length || counts[i] != symbol) {
if ((symbol == 0 && step >= 5) ||
(symbol != 0 && step >= 7)) {
size_t k;
for (k = 0; k < step; ++k) {
good_for_rle[i - k - 1] = 1;
}
}
step = 1;
if (i != length) {
symbol = counts[i];
}
} else {
++step;
}
}
}
/* 3) Let's replace those population counts that lead to more RLE codes.
Math here is in 24.8 fixed point representation. */
stride = 0;
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
sum = 0;
for (i = 0; i <= length; ++i) {
if (i == length || good_for_rle[i] ||
(i != 0 && good_for_rle[i - 1]) ||
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
if (stride >= 4 || (stride >= 3 && sum == 0)) {
size_t k;
/* The stride must end, collapse what we have, if we have enough (4). */
size_t count = (sum + stride / 2) / stride;
if (count == 0) {
count = 1;
}
if (sum == 0) {
/* Don't make an all zeros stride to be upgraded to ones. */
count = 0;
}
for (k = 0; k < stride; ++k) {
/* We don't want to change value at counts[i],
that is already belonging to the next stride. Thus - 1. */
counts[i - k - 1] = (uint32_t)count;
}
}
stride = 0;
sum = 0;
if (i < length - 2) {
/* All interesting strides have a count of at least 4, */
/* at least when non-zeros. */
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
} else if (i < length) {
limit = 256 * counts[i];
} else {
limit = 0;
}
}
++stride;
if (i != length) {
sum += counts[i];
if (stride >= 4) {
limit = (256 * sum + stride / 2) / stride;
}
if (stride == 4) {
limit += 120;
}
}
}
}
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
BROTLI_BOOL* use_rle_for_non_zero,
BROTLI_BOOL* use_rle_for_zero) {
size_t total_reps_zero = 0;
size_t total_reps_non_zero = 0;
size_t count_reps_zero = 1;
size_t count_reps_non_zero = 1;
size_t i;
for (i = 0; i < length;) {
const uint8_t value = depth[i];
size_t reps = 1;
size_t k;
for (k = i + 1; k < length && depth[k] == value; ++k) {
++reps;
}
if (reps >= 3 && value == 0) {
total_reps_zero += reps;
++count_reps_zero;
}
if (reps >= 4 && value != 0) {
total_reps_non_zero += reps;
++count_reps_non_zero;
}
i += reps;
}
*use_rle_for_non_zero =
TO_BROTLI_BOOL(total_reps_non_zero > count_reps_non_zero * 2);
*use_rle_for_zero = TO_BROTLI_BOOL(total_reps_zero > count_reps_zero * 2);
}
void duckdb_brotli::BrotliWriteHuffmanTree(const uint8_t* depth,
size_t length,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
size_t i;
BROTLI_BOOL use_rle_for_non_zero = BROTLI_FALSE;
BROTLI_BOOL use_rle_for_zero = BROTLI_FALSE;
/* Throw away trailing zeros. */
size_t new_length = length;
for (i = 0; i < length; ++i) {
if (depth[length - i - 1] == 0) {
--new_length;
} else {
break;
}
}
/* First gather statistics on if it is a good idea to do RLE. */
if (length > 50) {
/* Find RLE coding for longer codes.
Shorter codes seem not to benefit from RLE. */
DecideOverRleUse(depth, new_length,
&use_rle_for_non_zero, &use_rle_for_zero);
}
/* Actual RLE coding. */
for (i = 0; i < new_length;) {
const uint8_t value = depth[i];
size_t reps = 1;
if ((value != 0 && use_rle_for_non_zero) ||
(value == 0 && use_rle_for_zero)) {
size_t k;
for (k = i + 1; k < new_length && depth[k] == value; ++k) {
++reps;
}
}
if (value == 0) {
BrotliWriteHuffmanTreeRepetitionsZeros(
reps, tree_size, tree, extra_bits_data);
} else {
BrotliWriteHuffmanTreeRepetitions(previous_value,
value, reps, tree_size,
tree, extra_bits_data);
previous_value = value;
}
i += reps;
}
}
static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */
0x00, 0x08, 0x04, 0x0C, 0x02, 0x0A, 0x06, 0x0E,
0x01, 0x09, 0x05, 0x0D, 0x03, 0x0B, 0x07, 0x0F
};
size_t retval = kLut[bits & 0x0F];
size_t i;
for (i = 4; i < num_bits; i += 4) {
retval <<= 4;
bits = (uint16_t)(bits >> 4);
retval |= kLut[bits & 0x0F];
}
retval >>= ((0 - num_bits) & 0x03);
return (uint16_t)retval;
}
/* 0..15 are values for bits */
#define MAX_HUFFMAN_BITS 16
void duckdb_brotli::BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
size_t len,
uint16_t* bits) {
/* In Brotli, all bit depths are [1..15]
0 bit depth means that the symbol does not exist. */
uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
uint16_t next_code[MAX_HUFFMAN_BITS];
size_t i;
int code = 0;
for (i = 0; i < len; ++i) {
++bl_count[depth[i]];
}
bl_count[0] = 0;
next_code[0] = 0;
for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
code = (code + bl_count[i - 1]) << 1;
next_code[i] = (uint16_t)code;
}
for (i = 0; i < len; ++i) {
if (depth[i]) {
bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
}
}
}

View File

@@ -0,0 +1,119 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Entropy encoding (Huffman) utilities. */
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
#define BROTLI_ENC_ENTROPY_ENCODE_H_
#include <brotli/types.h>
#include "../common/brotli_platform.h"
namespace duckdb_brotli {
/* A node of a Huffman tree. */
typedef struct HuffmanTree {
uint32_t total_count_;
int16_t index_left_;
int16_t index_right_or_value_;
} HuffmanTree;
static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
int16_t left, int16_t right) {
self->total_count_ = count;
self->index_left_ = left;
self->index_right_or_value_ = right;
}
/* Returns 1 is assignment of depths succeeded, otherwise 0. */
BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
/* This function will create a Huffman tree.
The (data,length) contains the population counts.
The tree_limit is the maximum bit depth of the Huffman codes.
The depth contains the tree, i.e., how many bits are used for
the symbol.
The actual Huffman tree is constructed in the tree[] array, which has to
be at least 2 * length + 1 long.
See http://en.wikipedia.org/wiki/Huffman_coding */
BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
uint8_t* depth);
/* Change the population counts in a way that the consequent
Huffman tree compression, especially its RLE-part will be more
likely to compress this data more efficiently.
length contains the size of the histogram.
counts contains the population counts.
good_for_rle is a buffer of at least length size */
BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
size_t length, uint32_t* counts, uint8_t* good_for_rle);
/* Write a Huffman tree from bit depths into the bit-stream representation
of a Huffman tree. The generated Huffman tree is to be compressed once
more using a Huffman tree */
BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
size_t num,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data);
/* Get the actual bit values for a tree of bit depths. */
BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
size_t len,
uint16_t* bits);
BROTLI_INTERNAL extern const size_t kBrotliShellGaps[6];
/* Input size optimized Shell sort. */
typedef BROTLI_BOOL (*HuffmanTreeComparator)(
const HuffmanTree*, const HuffmanTree*);
static BROTLI_INLINE void SortHuffmanTreeItems(HuffmanTree* items,
const size_t n, HuffmanTreeComparator comparator) {
if (n < 13) {
/* Insertion sort. */
size_t i;
for (i = 1; i < n; ++i) {
HuffmanTree tmp = items[i];
size_t k = i;
size_t j = i - 1;
while (comparator(&tmp, &items[j])) {
items[k] = items[j];
k = j;
if (!j--) break;
}
items[k] = tmp;
}
return;
} else {
/* Shell sort. */
int g = n < 57 ? 2 : 0;
for (; g < 6; ++g) {
size_t gap = kBrotliShellGaps[g];
size_t i;
for (i = gap; i < n; ++i) {
size_t j = i;
HuffmanTree tmp = items[i];
for (; j >= gap && comparator(&tmp, &items[j - gap]); j -= gap) {
items[j] = items[j - gap];
}
items[j] = tmp;
}
}
}
}
}
#endif /* BROTLI_ENC_ENTROPY_ENCODE_H_ */

View File

@@ -0,0 +1,538 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Static entropy codes used for faster meta-block encoding. */
#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/brotli_platform.h"
#include "write_bits.h"
namespace duckdb_brotli {
static const uint8_t kCodeLengthDepth[18] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
};
static const uint8_t kStaticCommandCodeDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
};
static const uint8_t kStaticDistanceCodeDepth[64] = {
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
};
/* GENERATED CODE START */
static const uint32_t kCodeLengthBits[18] = {
0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
};
static BROTLI_INLINE void StoreStaticCodeLengthCode(
size_t* storage_ix, uint8_t* storage) {
BrotliWriteBits(
40, BROTLI_MAKE_UINT64_T(0x0000FFu, 0x55555554u), storage_ix, storage);
}
static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
0x00003b87, 0x00000397, 0x00000b97, 0x00001397, 0x00001b97, 0x00002397,
0x00002b97, 0x00003397, 0x00003b97, 0x000003a7, 0x00000ba7, 0x000013a7,
0x00001ba7, 0x000023a7, 0x00002ba7, 0x000033a7, 0x00003ba7, 0x000003b7,
0x00000bb7, 0x000013b7, 0x00001bb7, 0x000023b7, 0x00002bb7, 0x000033b7,
0x00003bb7, 0x000003c7, 0x00000bc7, 0x000013c7, 0x00001bc7, 0x000023c7,
0x00002bc7, 0x000033c7, 0x00003bc7, 0x000003d7, 0x00000bd7, 0x000013d7,
0x00001bd7, 0x000023d7, 0x00002bd7, 0x000033d7, 0x00003bd7, 0x000003e7,
0x00000be7, 0x000013e7, 0x00001be7, 0x000023e7, 0x00002be7, 0x000033e7,
0x00003be7, 0x000003f7, 0x00000bf7, 0x000013f7, 0x00001bf7, 0x000023f7,
0x00002bf7, 0x000033f7, 0x00003bf7, 0x0001c387, 0x0005c387, 0x0009c387,
0x000dc387, 0x0011c387, 0x0015c387, 0x0019c387, 0x001dc387, 0x0001cb87,
0x0005cb87, 0x0009cb87, 0x000dcb87, 0x0011cb87, 0x0015cb87, 0x0019cb87,
0x001dcb87, 0x0001d387, 0x0005d387, 0x0009d387, 0x000dd387, 0x0011d387,
0x0015d387, 0x0019d387, 0x001dd387, 0x0001db87, 0x0005db87, 0x0009db87,
0x000ddb87, 0x0011db87, 0x0015db87, 0x0019db87, 0x001ddb87, 0x0001e387,
0x0005e387, 0x0009e387, 0x000de387, 0x0011e387, 0x0015e387, 0x0019e387,
0x001de387, 0x0001eb87, 0x0005eb87, 0x0009eb87, 0x000deb87, 0x0011eb87,
0x0015eb87, 0x0019eb87, 0x001deb87, 0x0001f387, 0x0005f387, 0x0009f387,
0x000df387, 0x0011f387, 0x0015f387, 0x0019f387, 0x001df387, 0x0001fb87,
0x0005fb87, 0x0009fb87, 0x000dfb87, 0x0011fb87, 0x0015fb87, 0x0019fb87,
0x001dfb87, 0x0001c397, 0x0005c397, 0x0009c397, 0x000dc397, 0x0011c397,
0x0015c397, 0x0019c397, 0x001dc397, 0x0001cb97, 0x0005cb97, 0x0009cb97,
0x000dcb97, 0x0011cb97, 0x0015cb97, 0x0019cb97, 0x001dcb97, 0x0001d397,
0x0005d397, 0x0009d397, 0x000dd397, 0x0011d397, 0x0015d397, 0x0019d397,
0x001dd397, 0x0001db97, 0x0005db97, 0x0009db97, 0x000ddb97, 0x0011db97,
0x0015db97, 0x0019db97, 0x001ddb97, 0x0001e397, 0x0005e397, 0x0009e397,
0x000de397, 0x0011e397, 0x0015e397, 0x0019e397, 0x001de397, 0x0001eb97,
0x0005eb97, 0x0009eb97, 0x000deb97, 0x0011eb97, 0x0015eb97, 0x0019eb97,
0x001deb97, 0x0001f397, 0x0005f397, 0x0009f397, 0x000df397, 0x0011f397,
0x0015f397, 0x0019f397, 0x001df397, 0x0001fb97, 0x0005fb97, 0x0009fb97,
0x000dfb97, 0x0011fb97, 0x0015fb97, 0x0019fb97, 0x001dfb97, 0x0001c3a7,
0x0005c3a7, 0x0009c3a7, 0x000dc3a7, 0x0011c3a7, 0x0015c3a7, 0x0019c3a7,
0x001dc3a7, 0x0001cba7, 0x0005cba7, 0x0009cba7, 0x000dcba7, 0x0011cba7,
0x0015cba7, 0x0019cba7, 0x001dcba7, 0x0001d3a7, 0x0005d3a7, 0x0009d3a7,
0x000dd3a7, 0x0011d3a7, 0x0015d3a7, 0x0019d3a7, 0x001dd3a7, 0x0001dba7,
0x0005dba7, 0x0009dba7, 0x000ddba7, 0x0011dba7, 0x0015dba7, 0x0019dba7,
0x001ddba7, 0x0001e3a7, 0x0005e3a7, 0x0009e3a7, 0x000de3a7, 0x0011e3a7,
0x0015e3a7, 0x0019e3a7, 0x001de3a7, 0x0001eba7, 0x0005eba7, 0x0009eba7,
0x000deba7, 0x0011eba7, 0x0015eba7, 0x0019eba7, 0x001deba7, 0x0001f3a7,
0x0005f3a7, 0x0009f3a7, 0x000df3a7, 0x0011f3a7, 0x0015f3a7, 0x0019f3a7,
0x001df3a7, 0x0001fba7, 0x0005fba7, 0x0009fba7, 0x000dfba7, 0x0011fba7,
0x0015fba7, 0x0019fba7, 0x001dfba7, 0x0001c3b7, 0x0005c3b7, 0x0009c3b7,
0x000dc3b7, 0x0011c3b7, 0x0015c3b7, 0x0019c3b7, 0x001dc3b7, 0x0001cbb7,
0x0005cbb7, 0x0009cbb7, 0x000dcbb7, 0x0011cbb7, 0x0015cbb7, 0x0019cbb7,
0x001dcbb7, 0x0001d3b7, 0x0005d3b7, 0x0009d3b7, 0x000dd3b7, 0x0011d3b7,
0x0015d3b7, 0x0019d3b7, 0x001dd3b7, 0x0001dbb7, 0x0005dbb7, 0x0009dbb7,
0x000ddbb7, 0x0011dbb7, 0x0015dbb7, 0x0019dbb7, 0x001ddbb7, 0x0001e3b7,
0x0005e3b7, 0x0009e3b7, 0x000de3b7, 0x0011e3b7, 0x0015e3b7, 0x0019e3b7,
0x001de3b7, 0x0001ebb7, 0x0005ebb7, 0x0009ebb7, 0x000debb7, 0x0011ebb7,
0x0015ebb7, 0x0019ebb7, 0x001debb7, 0x0001f3b7, 0x0005f3b7, 0x0009f3b7,
0x000df3b7, 0x0011f3b7, 0x0015f3b7, 0x0019f3b7, 0x001df3b7, 0x0001fbb7,
0x0005fbb7, 0x0009fbb7, 0x000dfbb7, 0x0011fbb7, 0x0015fbb7, 0x0019fbb7,
0x001dfbb7, 0x0001c3c7, 0x0005c3c7, 0x0009c3c7, 0x000dc3c7, 0x0011c3c7,
0x0015c3c7, 0x0019c3c7, 0x001dc3c7, 0x0001cbc7, 0x0005cbc7, 0x0009cbc7,
0x000dcbc7, 0x0011cbc7, 0x0015cbc7, 0x0019cbc7, 0x001dcbc7, 0x0001d3c7,
0x0005d3c7, 0x0009d3c7, 0x000dd3c7, 0x0011d3c7, 0x0015d3c7, 0x0019d3c7,
0x001dd3c7, 0x0001dbc7, 0x0005dbc7, 0x0009dbc7, 0x000ddbc7, 0x0011dbc7,
0x0015dbc7, 0x0019dbc7, 0x001ddbc7, 0x0001e3c7, 0x0005e3c7, 0x0009e3c7,
0x000de3c7, 0x0011e3c7, 0x0015e3c7, 0x0019e3c7, 0x001de3c7, 0x0001ebc7,
0x0005ebc7, 0x0009ebc7, 0x000debc7, 0x0011ebc7, 0x0015ebc7, 0x0019ebc7,
0x001debc7, 0x0001f3c7, 0x0005f3c7, 0x0009f3c7, 0x000df3c7, 0x0011f3c7,
0x0015f3c7, 0x0019f3c7, 0x001df3c7, 0x0001fbc7, 0x0005fbc7, 0x0009fbc7,
0x000dfbc7, 0x0011fbc7, 0x0015fbc7, 0x0019fbc7, 0x001dfbc7, 0x0001c3d7,
0x0005c3d7, 0x0009c3d7, 0x000dc3d7, 0x0011c3d7, 0x0015c3d7, 0x0019c3d7,
0x001dc3d7, 0x0001cbd7, 0x0005cbd7, 0x0009cbd7, 0x000dcbd7, 0x0011cbd7,
0x0015cbd7, 0x0019cbd7, 0x001dcbd7, 0x0001d3d7, 0x0005d3d7, 0x0009d3d7,
0x000dd3d7, 0x0011d3d7, 0x0015d3d7, 0x0019d3d7, 0x001dd3d7, 0x0001dbd7,
0x0005dbd7, 0x0009dbd7, 0x000ddbd7, 0x0011dbd7, 0x0015dbd7, 0x0019dbd7,
0x001ddbd7, 0x0001e3d7, 0x0005e3d7, 0x0009e3d7, 0x000de3d7, 0x0011e3d7,
0x0015e3d7, 0x0019e3d7, 0x001de3d7, 0x0001ebd7, 0x0005ebd7, 0x0009ebd7,
0x000debd7, 0x0011ebd7, 0x0015ebd7, 0x0019ebd7, 0x001debd7, 0x0001f3d7,
0x0005f3d7, 0x0009f3d7, 0x000df3d7, 0x0011f3d7, 0x0015f3d7, 0x0019f3d7,
0x001df3d7, 0x0001fbd7, 0x0005fbd7, 0x0009fbd7, 0x000dfbd7, 0x0011fbd7,
0x0015fbd7, 0x0019fbd7, 0x001dfbd7, 0x0001c3e7, 0x0005c3e7, 0x0009c3e7,
0x000dc3e7, 0x0011c3e7, 0x0015c3e7, 0x0019c3e7, 0x001dc3e7, 0x0001cbe7,
0x0005cbe7, 0x0009cbe7, 0x000dcbe7, 0x0011cbe7, 0x0015cbe7, 0x0019cbe7,
0x001dcbe7, 0x0001d3e7, 0x0005d3e7, 0x0009d3e7, 0x000dd3e7, 0x0011d3e7,
0x0015d3e7, 0x0019d3e7, 0x001dd3e7, 0x0001dbe7, 0x0005dbe7, 0x0009dbe7,
0x000ddbe7, 0x0011dbe7, 0x0015dbe7, 0x0019dbe7, 0x001ddbe7, 0x0001e3e7,
0x0005e3e7, 0x0009e3e7, 0x000de3e7, 0x0011e3e7, 0x0015e3e7, 0x0019e3e7,
0x001de3e7, 0x0001ebe7, 0x0005ebe7, 0x0009ebe7, 0x000debe7, 0x0011ebe7,
0x0015ebe7, 0x0019ebe7, 0x001debe7, 0x0001f3e7, 0x0005f3e7, 0x0009f3e7,
0x000df3e7, 0x0011f3e7, 0x0015f3e7, 0x0019f3e7, 0x001df3e7, 0x0001fbe7,
0x0005fbe7, 0x0009fbe7, 0x000dfbe7, 0x0011fbe7, 0x0015fbe7, 0x0019fbe7,
0x001dfbe7, 0x0001c3f7, 0x0005c3f7, 0x0009c3f7, 0x000dc3f7, 0x0011c3f7,
0x0015c3f7, 0x0019c3f7, 0x001dc3f7, 0x0001cbf7, 0x0005cbf7, 0x0009cbf7,
0x000dcbf7, 0x0011cbf7, 0x0015cbf7, 0x0019cbf7, 0x001dcbf7, 0x0001d3f7,
0x0005d3f7, 0x0009d3f7, 0x000dd3f7, 0x0011d3f7, 0x0015d3f7, 0x0019d3f7,
0x001dd3f7, 0x0001dbf7, 0x0005dbf7, 0x0009dbf7, 0x000ddbf7, 0x0011dbf7,
0x0015dbf7, 0x0019dbf7, 0x001ddbf7, 0x0001e3f7, 0x0005e3f7, 0x0009e3f7,
0x000de3f7, 0x0011e3f7, 0x0015e3f7, 0x0019e3f7, 0x001de3f7, 0x0001ebf7,
0x0005ebf7, 0x0009ebf7, 0x000debf7, 0x0011ebf7, 0x0015ebf7, 0x0019ebf7,
0x001debf7, 0x0001f3f7, 0x0005f3f7, 0x0009f3f7, 0x000df3f7, 0x0011f3f7,
0x0015f3f7, 0x0019f3f7, 0x001df3f7, 0x0001fbf7, 0x0005fbf7, 0x0009fbf7,
0x000dfbf7, 0x0011fbf7, 0x0015fbf7, 0x0019fbf7, 0x001dfbf7, 0x00e1c387,
0x02e1c387, 0x04e1c387, 0x06e1c387, 0x08e1c387, 0x0ae1c387, 0x0ce1c387,
0x0ee1c387, 0x00e5c387, 0x02e5c387, 0x04e5c387, 0x06e5c387, 0x08e5c387,
0x0ae5c387, 0x0ce5c387, 0x0ee5c387, 0x00e9c387, 0x02e9c387, 0x04e9c387,
0x06e9c387, 0x08e9c387, 0x0ae9c387, 0x0ce9c387, 0x0ee9c387, 0x00edc387,
0x02edc387, 0x04edc387, 0x06edc387, 0x08edc387, 0x0aedc387, 0x0cedc387,
0x0eedc387, 0x00f1c387, 0x02f1c387, 0x04f1c387, 0x06f1c387, 0x08f1c387,
0x0af1c387, 0x0cf1c387, 0x0ef1c387, 0x00f5c387, 0x02f5c387, 0x04f5c387,
0x06f5c387, 0x08f5c387, 0x0af5c387, 0x0cf5c387, 0x0ef5c387, 0x00f9c387,
0x02f9c387, 0x04f9c387, 0x06f9c387, 0x08f9c387, 0x0af9c387, 0x0cf9c387,
0x0ef9c387, 0x00fdc387, 0x02fdc387, 0x04fdc387, 0x06fdc387, 0x08fdc387,
0x0afdc387, 0x0cfdc387, 0x0efdc387, 0x00e1cb87, 0x02e1cb87, 0x04e1cb87,
0x06e1cb87, 0x08e1cb87, 0x0ae1cb87, 0x0ce1cb87, 0x0ee1cb87, 0x00e5cb87,
0x02e5cb87, 0x04e5cb87, 0x06e5cb87, 0x08e5cb87, 0x0ae5cb87, 0x0ce5cb87,
0x0ee5cb87, 0x00e9cb87, 0x02e9cb87, 0x04e9cb87, 0x06e9cb87, 0x08e9cb87,
0x0ae9cb87, 0x0ce9cb87, 0x0ee9cb87, 0x00edcb87, 0x02edcb87, 0x04edcb87,
0x06edcb87, 0x08edcb87, 0x0aedcb87, 0x0cedcb87, 0x0eedcb87, 0x00f1cb87,
0x02f1cb87, 0x04f1cb87, 0x06f1cb87, 0x08f1cb87, 0x0af1cb87, 0x0cf1cb87,
0x0ef1cb87, 0x00f5cb87, 0x02f5cb87, 0x04f5cb87, 0x06f5cb87, 0x08f5cb87,
0x0af5cb87, 0x0cf5cb87, 0x0ef5cb87, 0x00f9cb87, 0x02f9cb87, 0x04f9cb87,
0x06f9cb87, 0x08f9cb87,
};
static const uint32_t kZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
0, 4, 8, 7, 7, 7, 7, 7, 7, 7, 7, 11, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
};
static const uint64_t kNonZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
0x00000afb, 0x00000efb, 0x0000b2cb, 0x0001b2cb, 0x0002b2cb, 0x0003b2cb,
0x0000b6cb, 0x0001b6cb, 0x0002b6cb, 0x0003b6cb, 0x0000bacb, 0x0001bacb,
0x0002bacb, 0x0003bacb, 0x0000becb, 0x0001becb, 0x0002becb, 0x0003becb,
0x0000b2db, 0x0001b2db, 0x0002b2db, 0x0003b2db, 0x0000b6db, 0x0001b6db,
0x0002b6db, 0x0003b6db, 0x0000badb, 0x0001badb, 0x0002badb, 0x0003badb,
0x0000bedb, 0x0001bedb, 0x0002bedb, 0x0003bedb, 0x0000b2eb, 0x0001b2eb,
0x0002b2eb, 0x0003b2eb, 0x0000b6eb, 0x0001b6eb, 0x0002b6eb, 0x0003b6eb,
0x0000baeb, 0x0001baeb, 0x0002baeb, 0x0003baeb, 0x0000beeb, 0x0001beeb,
0x0002beeb, 0x0003beeb, 0x0000b2fb, 0x0001b2fb, 0x0002b2fb, 0x0003b2fb,
0x0000b6fb, 0x0001b6fb, 0x0002b6fb, 0x0003b6fb, 0x0000bafb, 0x0001bafb,
0x0002bafb, 0x0003bafb, 0x0000befb, 0x0001befb, 0x0002befb, 0x0003befb,
0x002cb2cb, 0x006cb2cb, 0x00acb2cb, 0x00ecb2cb, 0x002db2cb, 0x006db2cb,
0x00adb2cb, 0x00edb2cb, 0x002eb2cb, 0x006eb2cb, 0x00aeb2cb, 0x00eeb2cb,
0x002fb2cb, 0x006fb2cb, 0x00afb2cb, 0x00efb2cb, 0x002cb6cb, 0x006cb6cb,
0x00acb6cb, 0x00ecb6cb, 0x002db6cb, 0x006db6cb, 0x00adb6cb, 0x00edb6cb,
0x002eb6cb, 0x006eb6cb, 0x00aeb6cb, 0x00eeb6cb, 0x002fb6cb, 0x006fb6cb,
0x00afb6cb, 0x00efb6cb, 0x002cbacb, 0x006cbacb, 0x00acbacb, 0x00ecbacb,
0x002dbacb, 0x006dbacb, 0x00adbacb, 0x00edbacb, 0x002ebacb, 0x006ebacb,
0x00aebacb, 0x00eebacb, 0x002fbacb, 0x006fbacb, 0x00afbacb, 0x00efbacb,
0x002cbecb, 0x006cbecb, 0x00acbecb, 0x00ecbecb, 0x002dbecb, 0x006dbecb,
0x00adbecb, 0x00edbecb, 0x002ebecb, 0x006ebecb, 0x00aebecb, 0x00eebecb,
0x002fbecb, 0x006fbecb, 0x00afbecb, 0x00efbecb, 0x002cb2db, 0x006cb2db,
0x00acb2db, 0x00ecb2db, 0x002db2db, 0x006db2db, 0x00adb2db, 0x00edb2db,
0x002eb2db, 0x006eb2db, 0x00aeb2db, 0x00eeb2db, 0x002fb2db, 0x006fb2db,
0x00afb2db, 0x00efb2db, 0x002cb6db, 0x006cb6db, 0x00acb6db, 0x00ecb6db,
0x002db6db, 0x006db6db, 0x00adb6db, 0x00edb6db, 0x002eb6db, 0x006eb6db,
0x00aeb6db, 0x00eeb6db, 0x002fb6db, 0x006fb6db, 0x00afb6db, 0x00efb6db,
0x002cbadb, 0x006cbadb, 0x00acbadb, 0x00ecbadb, 0x002dbadb, 0x006dbadb,
0x00adbadb, 0x00edbadb, 0x002ebadb, 0x006ebadb, 0x00aebadb, 0x00eebadb,
0x002fbadb, 0x006fbadb, 0x00afbadb, 0x00efbadb, 0x002cbedb, 0x006cbedb,
0x00acbedb, 0x00ecbedb, 0x002dbedb, 0x006dbedb, 0x00adbedb, 0x00edbedb,
0x002ebedb, 0x006ebedb, 0x00aebedb, 0x00eebedb, 0x002fbedb, 0x006fbedb,
0x00afbedb, 0x00efbedb, 0x002cb2eb, 0x006cb2eb, 0x00acb2eb, 0x00ecb2eb,
0x002db2eb, 0x006db2eb, 0x00adb2eb, 0x00edb2eb, 0x002eb2eb, 0x006eb2eb,
0x00aeb2eb, 0x00eeb2eb, 0x002fb2eb, 0x006fb2eb, 0x00afb2eb, 0x00efb2eb,
0x002cb6eb, 0x006cb6eb, 0x00acb6eb, 0x00ecb6eb, 0x002db6eb, 0x006db6eb,
0x00adb6eb, 0x00edb6eb, 0x002eb6eb, 0x006eb6eb, 0x00aeb6eb, 0x00eeb6eb,
0x002fb6eb, 0x006fb6eb, 0x00afb6eb, 0x00efb6eb, 0x002cbaeb, 0x006cbaeb,
0x00acbaeb, 0x00ecbaeb, 0x002dbaeb, 0x006dbaeb, 0x00adbaeb, 0x00edbaeb,
0x002ebaeb, 0x006ebaeb, 0x00aebaeb, 0x00eebaeb, 0x002fbaeb, 0x006fbaeb,
0x00afbaeb, 0x00efbaeb, 0x002cbeeb, 0x006cbeeb, 0x00acbeeb, 0x00ecbeeb,
0x002dbeeb, 0x006dbeeb, 0x00adbeeb, 0x00edbeeb, 0x002ebeeb, 0x006ebeeb,
0x00aebeeb, 0x00eebeeb, 0x002fbeeb, 0x006fbeeb, 0x00afbeeb, 0x00efbeeb,
0x002cb2fb, 0x006cb2fb, 0x00acb2fb, 0x00ecb2fb, 0x002db2fb, 0x006db2fb,
0x00adb2fb, 0x00edb2fb, 0x002eb2fb, 0x006eb2fb, 0x00aeb2fb, 0x00eeb2fb,
0x002fb2fb, 0x006fb2fb, 0x00afb2fb, 0x00efb2fb, 0x002cb6fb, 0x006cb6fb,
0x00acb6fb, 0x00ecb6fb, 0x002db6fb, 0x006db6fb, 0x00adb6fb, 0x00edb6fb,
0x002eb6fb, 0x006eb6fb, 0x00aeb6fb, 0x00eeb6fb, 0x002fb6fb, 0x006fb6fb,
0x00afb6fb, 0x00efb6fb, 0x002cbafb, 0x006cbafb, 0x00acbafb, 0x00ecbafb,
0x002dbafb, 0x006dbafb, 0x00adbafb, 0x00edbafb, 0x002ebafb, 0x006ebafb,
0x00aebafb, 0x00eebafb, 0x002fbafb, 0x006fbafb, 0x00afbafb, 0x00efbafb,
0x002cbefb, 0x006cbefb, 0x00acbefb, 0x00ecbefb, 0x002dbefb, 0x006dbefb,
0x00adbefb, 0x00edbefb, 0x002ebefb, 0x006ebefb, 0x00aebefb, 0x00eebefb,
0x002fbefb, 0x006fbefb, 0x00afbefb, 0x00efbefb, 0x0b2cb2cb, 0x1b2cb2cb,
0x2b2cb2cb, 0x3b2cb2cb, 0x0b6cb2cb, 0x1b6cb2cb, 0x2b6cb2cb, 0x3b6cb2cb,
0x0bacb2cb, 0x1bacb2cb, 0x2bacb2cb, 0x3bacb2cb, 0x0becb2cb, 0x1becb2cb,
0x2becb2cb, 0x3becb2cb, 0x0b2db2cb, 0x1b2db2cb, 0x2b2db2cb, 0x3b2db2cb,
0x0b6db2cb, 0x1b6db2cb, 0x2b6db2cb, 0x3b6db2cb, 0x0badb2cb, 0x1badb2cb,
0x2badb2cb, 0x3badb2cb, 0x0bedb2cb, 0x1bedb2cb, 0x2bedb2cb, 0x3bedb2cb,
0x0b2eb2cb, 0x1b2eb2cb, 0x2b2eb2cb, 0x3b2eb2cb, 0x0b6eb2cb, 0x1b6eb2cb,
0x2b6eb2cb, 0x3b6eb2cb, 0x0baeb2cb, 0x1baeb2cb, 0x2baeb2cb, 0x3baeb2cb,
0x0beeb2cb, 0x1beeb2cb, 0x2beeb2cb, 0x3beeb2cb, 0x0b2fb2cb, 0x1b2fb2cb,
0x2b2fb2cb, 0x3b2fb2cb, 0x0b6fb2cb, 0x1b6fb2cb, 0x2b6fb2cb, 0x3b6fb2cb,
0x0bafb2cb, 0x1bafb2cb, 0x2bafb2cb, 0x3bafb2cb, 0x0befb2cb, 0x1befb2cb,
0x2befb2cb, 0x3befb2cb, 0x0b2cb6cb, 0x1b2cb6cb, 0x2b2cb6cb, 0x3b2cb6cb,
0x0b6cb6cb, 0x1b6cb6cb, 0x2b6cb6cb, 0x3b6cb6cb, 0x0bacb6cb, 0x1bacb6cb,
0x2bacb6cb, 0x3bacb6cb, 0x0becb6cb, 0x1becb6cb, 0x2becb6cb, 0x3becb6cb,
0x0b2db6cb, 0x1b2db6cb, 0x2b2db6cb, 0x3b2db6cb, 0x0b6db6cb, 0x1b6db6cb,
0x2b6db6cb, 0x3b6db6cb, 0x0badb6cb, 0x1badb6cb, 0x2badb6cb, 0x3badb6cb,
0x0bedb6cb, 0x1bedb6cb, 0x2bedb6cb, 0x3bedb6cb, 0x0b2eb6cb, 0x1b2eb6cb,
0x2b2eb6cb, 0x3b2eb6cb, 0x0b6eb6cb, 0x1b6eb6cb, 0x2b6eb6cb, 0x3b6eb6cb,
0x0baeb6cb, 0x1baeb6cb, 0x2baeb6cb, 0x3baeb6cb, 0x0beeb6cb, 0x1beeb6cb,
0x2beeb6cb, 0x3beeb6cb, 0x0b2fb6cb, 0x1b2fb6cb, 0x2b2fb6cb, 0x3b2fb6cb,
0x0b6fb6cb, 0x1b6fb6cb, 0x2b6fb6cb, 0x3b6fb6cb, 0x0bafb6cb, 0x1bafb6cb,
0x2bafb6cb, 0x3bafb6cb, 0x0befb6cb, 0x1befb6cb, 0x2befb6cb, 0x3befb6cb,
0x0b2cbacb, 0x1b2cbacb, 0x2b2cbacb, 0x3b2cbacb, 0x0b6cbacb, 0x1b6cbacb,
0x2b6cbacb, 0x3b6cbacb, 0x0bacbacb, 0x1bacbacb, 0x2bacbacb, 0x3bacbacb,
0x0becbacb, 0x1becbacb, 0x2becbacb, 0x3becbacb, 0x0b2dbacb, 0x1b2dbacb,
0x2b2dbacb, 0x3b2dbacb, 0x0b6dbacb, 0x1b6dbacb, 0x2b6dbacb, 0x3b6dbacb,
0x0badbacb, 0x1badbacb, 0x2badbacb, 0x3badbacb, 0x0bedbacb, 0x1bedbacb,
0x2bedbacb, 0x3bedbacb, 0x0b2ebacb, 0x1b2ebacb, 0x2b2ebacb, 0x3b2ebacb,
0x0b6ebacb, 0x1b6ebacb, 0x2b6ebacb, 0x3b6ebacb, 0x0baebacb, 0x1baebacb,
0x2baebacb, 0x3baebacb, 0x0beebacb, 0x1beebacb, 0x2beebacb, 0x3beebacb,
0x0b2fbacb, 0x1b2fbacb, 0x2b2fbacb, 0x3b2fbacb, 0x0b6fbacb, 0x1b6fbacb,
0x2b6fbacb, 0x3b6fbacb, 0x0bafbacb, 0x1bafbacb, 0x2bafbacb, 0x3bafbacb,
0x0befbacb, 0x1befbacb, 0x2befbacb, 0x3befbacb, 0x0b2cbecb, 0x1b2cbecb,
0x2b2cbecb, 0x3b2cbecb, 0x0b6cbecb, 0x1b6cbecb, 0x2b6cbecb, 0x3b6cbecb,
0x0bacbecb, 0x1bacbecb, 0x2bacbecb, 0x3bacbecb, 0x0becbecb, 0x1becbecb,
0x2becbecb, 0x3becbecb, 0x0b2dbecb, 0x1b2dbecb, 0x2b2dbecb, 0x3b2dbecb,
0x0b6dbecb, 0x1b6dbecb, 0x2b6dbecb, 0x3b6dbecb, 0x0badbecb, 0x1badbecb,
0x2badbecb, 0x3badbecb, 0x0bedbecb, 0x1bedbecb, 0x2bedbecb, 0x3bedbecb,
0x0b2ebecb, 0x1b2ebecb, 0x2b2ebecb, 0x3b2ebecb, 0x0b6ebecb, 0x1b6ebecb,
0x2b6ebecb, 0x3b6ebecb, 0x0baebecb, 0x1baebecb, 0x2baebecb, 0x3baebecb,
0x0beebecb, 0x1beebecb, 0x2beebecb, 0x3beebecb, 0x0b2fbecb, 0x1b2fbecb,
0x2b2fbecb, 0x3b2fbecb, 0x0b6fbecb, 0x1b6fbecb, 0x2b6fbecb, 0x3b6fbecb,
0x0bafbecb, 0x1bafbecb, 0x2bafbecb, 0x3bafbecb, 0x0befbecb, 0x1befbecb,
0x2befbecb, 0x3befbecb, 0x0b2cb2db, 0x1b2cb2db, 0x2b2cb2db, 0x3b2cb2db,
0x0b6cb2db, 0x1b6cb2db, 0x2b6cb2db, 0x3b6cb2db, 0x0bacb2db, 0x1bacb2db,
0x2bacb2db, 0x3bacb2db, 0x0becb2db, 0x1becb2db, 0x2becb2db, 0x3becb2db,
0x0b2db2db, 0x1b2db2db, 0x2b2db2db, 0x3b2db2db, 0x0b6db2db, 0x1b6db2db,
0x2b6db2db, 0x3b6db2db, 0x0badb2db, 0x1badb2db, 0x2badb2db, 0x3badb2db,
0x0bedb2db, 0x1bedb2db, 0x2bedb2db, 0x3bedb2db, 0x0b2eb2db, 0x1b2eb2db,
0x2b2eb2db, 0x3b2eb2db, 0x0b6eb2db, 0x1b6eb2db, 0x2b6eb2db, 0x3b6eb2db,
0x0baeb2db, 0x1baeb2db, 0x2baeb2db, 0x3baeb2db, 0x0beeb2db, 0x1beeb2db,
0x2beeb2db, 0x3beeb2db, 0x0b2fb2db, 0x1b2fb2db, 0x2b2fb2db, 0x3b2fb2db,
0x0b6fb2db, 0x1b6fb2db, 0x2b6fb2db, 0x3b6fb2db, 0x0bafb2db, 0x1bafb2db,
0x2bafb2db, 0x3bafb2db, 0x0befb2db, 0x1befb2db, 0x2befb2db, 0x3befb2db,
0x0b2cb6db, 0x1b2cb6db, 0x2b2cb6db, 0x3b2cb6db, 0x0b6cb6db, 0x1b6cb6db,
0x2b6cb6db, 0x3b6cb6db, 0x0bacb6db, 0x1bacb6db, 0x2bacb6db, 0x3bacb6db,
0x0becb6db, 0x1becb6db, 0x2becb6db, 0x3becb6db, 0x0b2db6db, 0x1b2db6db,
0x2b2db6db, 0x3b2db6db, 0x0b6db6db, 0x1b6db6db, 0x2b6db6db, 0x3b6db6db,
0x0badb6db, 0x1badb6db, 0x2badb6db, 0x3badb6db, 0x0bedb6db, 0x1bedb6db,
0x2bedb6db, 0x3bedb6db, 0x0b2eb6db, 0x1b2eb6db, 0x2b2eb6db, 0x3b2eb6db,
0x0b6eb6db, 0x1b6eb6db, 0x2b6eb6db, 0x3b6eb6db, 0x0baeb6db, 0x1baeb6db,
0x2baeb6db, 0x3baeb6db,
};
static const uint32_t kNonZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
6, 6, 6, 6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
};
static const uint16_t kStaticCommandCodeBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
0, 256, 128, 384, 64, 320, 192, 448,
32, 288, 160, 416, 96, 352, 224, 480,
16, 272, 144, 400, 80, 336, 208, 464,
48, 304, 176, 432, 112, 368, 240, 496,
8, 264, 136, 392, 72, 328, 200, 456,
40, 296, 168, 424, 104, 360, 232, 488,
24, 280, 152, 408, 88, 344, 216, 472,
56, 312, 184, 440, 120, 376, 248, 504,
4, 260, 132, 388, 68, 324, 196, 452,
36, 292, 164, 420, 100, 356, 228, 484,
20, 276, 148, 404, 84, 340, 212, 468,
52, 308, 180, 436, 116, 372, 244, 500,
12, 268, 140, 396, 76, 332, 204, 460,
44, 300, 172, 428, 108, 364, 236, 492,
28, 284, 156, 412, 92, 348, 220, 476,
60, 316, 188, 444, 124, 380, 252, 508,
2, 258, 130, 386, 66, 322, 194, 450,
34, 290, 162, 418, 98, 354, 226, 482,
18, 274, 146, 402, 82, 338, 210, 466,
50, 306, 178, 434, 114, 370, 242, 498,
10, 266, 138, 394, 74, 330, 202, 458,
42, 298, 170, 426, 106, 362, 234, 490,
26, 282, 154, 410, 90, 346, 218, 474,
58, 314, 186, 442, 122, 378, 250, 506,
6, 262, 134, 390, 70, 326, 198, 454,
38, 294, 166, 422, 102, 358, 230, 486,
22, 278, 150, 406, 86, 342, 214, 470,
54, 310, 182, 438, 118, 374, 246, 502,
14, 270, 142, 398, 78, 334, 206, 462,
46, 302, 174, 430, 110, 366, 238, 494,
30, 286, 158, 414, 94, 350, 222, 478,
62, 318, 190, 446, 126, 382, 254, 510,
1, 257, 129, 385, 65, 321, 193, 449,
33, 289, 161, 417, 97, 353, 225, 481,
17, 273, 145, 401, 81, 337, 209, 465,
49, 305, 177, 433, 113, 369, 241, 497,
9, 265, 137, 393, 73, 329, 201, 457,
41, 297, 169, 425, 105, 361, 233, 489,
25, 281, 153, 409, 89, 345, 217, 473,
57, 313, 185, 441, 121, 377, 249, 505,
5, 261, 133, 389, 69, 325, 197, 453,
37, 293, 165, 421, 101, 357, 229, 485,
21, 277, 149, 405, 85, 341, 213, 469,
53, 309, 181, 437, 117, 373, 245, 501,
13, 269, 141, 397, 77, 333, 205, 461,
45, 301, 173, 429, 109, 365, 237, 493,
29, 285, 157, 413, 93, 349, 221, 477,
61, 317, 189, 445, 125, 381, 253, 509,
3, 259, 131, 387, 67, 323, 195, 451,
35, 291, 163, 419, 99, 355, 227, 483,
19, 275, 147, 403, 83, 339, 211, 467,
51, 307, 179, 435, 115, 371, 243, 499,
11, 267, 139, 395, 75, 331, 203, 459,
43, 299, 171, 427, 107, 363, 235, 491,
27, 283, 155, 411, 91, 347, 219, 475,
59, 315, 187, 443, 123, 379, 251, 507,
7, 1031, 519, 1543, 263, 1287, 775, 1799,
135, 1159, 647, 1671, 391, 1415, 903, 1927,
71, 1095, 583, 1607, 327, 1351, 839, 1863,
199, 1223, 711, 1735, 455, 1479, 967, 1991,
39, 1063, 551, 1575, 295, 1319, 807, 1831,
167, 1191, 679, 1703, 423, 1447, 935, 1959,
103, 1127, 615, 1639, 359, 1383, 871, 1895,
231, 1255, 743, 1767, 487, 1511, 999, 2023,
23, 1047, 535, 1559, 279, 1303, 791, 1815,
151, 1175, 663, 1687, 407, 1431, 919, 1943,
87, 1111, 599, 1623, 343, 1367, 855, 1879,
215, 1239, 727, 1751, 471, 1495, 983, 2007,
55, 1079, 567, 1591, 311, 1335, 823, 1847,
183, 1207, 695, 1719, 439, 1463, 951, 1975,
119, 1143, 631, 1655, 375, 1399, 887, 1911,
247, 1271, 759, 1783, 503, 1527, 1015, 2039,
15, 1039, 527, 1551, 271, 1295, 783, 1807,
143, 1167, 655, 1679, 399, 1423, 911, 1935,
79, 1103, 591, 1615, 335, 1359, 847, 1871,
207, 1231, 719, 1743, 463, 1487, 975, 1999,
47, 1071, 559, 1583, 303, 1327, 815, 1839,
175, 1199, 687, 1711, 431, 1455, 943, 1967,
111, 1135, 623, 1647, 367, 1391, 879, 1903,
239, 1263, 751, 1775, 495, 1519, 1007, 2031,
31, 1055, 543, 1567, 287, 1311, 799, 1823,
159, 1183, 671, 1695, 415, 1439, 927, 1951,
95, 1119, 607, 1631, 351, 1375, 863, 1887,
223, 1247, 735, 1759, 479, 1503, 991, 2015,
63, 1087, 575, 1599, 319, 1343, 831, 1855,
191, 1215, 703, 1727, 447, 1471, 959, 1983,
127, 1151, 639, 1663, 383, 1407, 895, 1919,
255, 1279, 767, 1791, 511, 1535, 1023, 2047,
};
static BROTLI_INLINE void StoreStaticCommandHuffmanTree(
size_t* storage_ix, uint8_t* storage) {
BrotliWriteBits(
56, BROTLI_MAKE_UINT64_T(0x926244U, 0x16307003U), storage_ix, storage);
BrotliWriteBits(3, 0x00000000U, storage_ix, storage);
}
static const uint16_t kStaticDistanceCodeBits[64] = {
0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63,
};
static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
size_t* storage_ix, uint8_t* storage) {
BrotliWriteBits(28, 0x0369DC03u, storage_ix, storage);
}
/* GENERATED CODE END */
}
#endif /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */

View File

@@ -0,0 +1,101 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#include "fast_log.h"
using namespace duckdb_brotli;
/* ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
const double duckdb_brotli::kBrotliLog2Table[BROTLI_LOG2_TABLE_SIZE] = {
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
7.9943534368588578f
};

View File

@@ -0,0 +1,63 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Utilities for fast computation of logarithms. */
#ifndef BROTLI_ENC_FAST_LOG_H_
#define BROTLI_ENC_FAST_LOG_H_
#include <math.h>
#include <brotli/types.h>
#include "../common/brotli_platform.h"
namespace duckdb_brotli {
static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
#if defined(BROTLI_BSR32)
return BROTLI_BSR32((uint32_t)n);
#else
uint32_t result = 0;
while (n >>= 1) result++;
return result;
#endif
}
#define BROTLI_LOG2_TABLE_SIZE 256
/* A lookup table for small values of log2(int) to be used in entropy
computation. */
BROTLI_INTERNAL extern const double kBrotliLog2Table[BROTLI_LOG2_TABLE_SIZE];
/* Visual Studio 2012 and Android API levels < 18 do not have the log2()
* function defined, so we use log() and a multiplication instead. */
#if !defined(BROTLI_HAVE_LOG2)
#if ((defined(_MSC_VER) && _MSC_VER <= 1700) || \
(defined(__ANDROID_API__) && __ANDROID_API__ < 18))
#define BROTLI_HAVE_LOG2 0
#else
#define BROTLI_HAVE_LOG2 1
#endif
#endif
#define LOG_2_INV 1.4426950408889634
/* Faster logarithm for small integers, with the property of log2(0) == 0. */
static BROTLI_INLINE double FastLog2(size_t v) {
if (v < BROTLI_LOG2_TABLE_SIZE) {
return kBrotliLog2Table[v];
}
#if !(BROTLI_HAVE_LOG2)
return log((double)v) * LOG_2_INV;
#else
return log2((double)v);
#endif
}
}
#endif /* BROTLI_ENC_FAST_LOG_H_ */

View File

@@ -0,0 +1,68 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function to find maximal matching prefixes of strings. */
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
#include <brotli/types.h>
#include "../common/brotli_platform.h"
namespace duckdb_brotli {
/* Separate implementation for little-endian 64-bit targets, for speed. */
#if defined(BROTLI_TZCNT64) && BROTLI_64_BITS && BROTLI_LITTLE_ENDIAN
static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
const uint8_t *s1_orig = s1;
for (; limit >= 8; limit -= 8) {
uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^
BROTLI_UNALIGNED_LOAD64LE(s1);
s2 += 8;
if (x != 0) {
size_t matching_bits = (size_t)BROTLI_TZCNT64(x);
return (size_t)(s1 - s1_orig) + (matching_bits >> 3);
}
s1 += 8;
}
while (limit && *s1 == *s2) {
limit--;
++s2;
++s1;
}
return (size_t)(s1 - s1_orig);
}
#else
static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
size_t matched = 0;
const uint8_t* s2_limit = s2 + limit;
const uint8_t* s2_ptr = s2;
/* Find out how long the match is. We loop over the data 32 bits at a
time until we find a 32-bit block that doesn't match; then we find
the first non-matching bit and use that to calculate the total
length of the match. */
while (s2_ptr <= s2_limit - 4 &&
BrotliUnalignedRead32(s2_ptr) ==
BrotliUnalignedRead32(s1 + matched)) {
s2_ptr += 4;
matched += 4;
}
while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
++s2_ptr;
++matched;
}
return matched;
}
#endif
}
#endif /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */

View File

@@ -0,0 +1,96 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Build per-context histograms of literals, commands and distance codes. */
#include "histogram.h"
#include "../common/context.h"
#include "block_splitter.h"
#include "command.h"
using namespace duckdb_brotli;
typedef struct BlockSplitIterator {
const BlockSplit* split_; /* Not owned. */
size_t idx_;
size_t type_;
size_t length_;
} BlockSplitIterator;
static void InitBlockSplitIterator(BlockSplitIterator* self,
const BlockSplit* split) {
self->split_ = split;
self->idx_ = 0;
self->type_ = 0;
self->length_ = split->lengths ? split->lengths[0] : 0;
}
static void BlockSplitIteratorNext(BlockSplitIterator* self) {
if (self->length_ == 0) {
++self->idx_;
self->type_ = self->split_->types[self->idx_];
self->length_ = self->split_->lengths[self->idx_];
}
--self->length_;
}
void duckdb_brotli::BrotliBuildHistogramsWithContext(
const Command* cmds, const size_t num_commands,
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
const ContextType* context_modes, HistogramLiteral* literal_histograms,
HistogramCommand* insert_and_copy_histograms,
HistogramDistance* copy_dist_histograms) {
size_t pos = start_pos;
BlockSplitIterator literal_it;
BlockSplitIterator insert_and_copy_it;
BlockSplitIterator dist_it;
size_t i;
InitBlockSplitIterator(&literal_it, literal_split);
InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
InitBlockSplitIterator(&dist_it, dist_split);
for (i = 0; i < num_commands; ++i) {
const Command* cmd = &cmds[i];
size_t j;
BlockSplitIteratorNext(&insert_and_copy_it);
HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
cmd->cmd_prefix_);
/* TODO(eustas): unwrap iterator blocks. */
for (j = cmd->insert_len_; j != 0; --j) {
size_t context;
BlockSplitIteratorNext(&literal_it);
context = literal_it.type_;
if (context_modes) {
ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]);
context = (context << BROTLI_LITERAL_CONTEXT_BITS) +
BROTLI_CONTEXT(prev_byte, prev_byte2, lut);
}
HistogramAddLiteral(&literal_histograms[context],
ringbuffer[pos & mask]);
prev_byte2 = prev_byte;
prev_byte = ringbuffer[pos & mask];
++pos;
}
pos += CommandCopyLen(cmd);
if (CommandCopyLen(cmd)) {
prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd->cmd_prefix_ >= 128) {
size_t context;
BlockSplitIteratorNext(&dist_it);
context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
CommandDistanceContext(cmd);
HistogramAddDistance(&copy_dist_histograms[context],
cmd->dist_prefix_ & 0x3FF);
}
}
}
}

View File

@@ -0,0 +1,210 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Models the histograms of literals, commands and distance codes. */
#ifndef BROTLI_ENC_HISTOGRAM_H_
#define BROTLI_ENC_HISTOGRAM_H_
#include <string.h> /* memset */
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/context.h"
#include "../common/brotli_platform.h"
#include "block_splitter.h"
#include "command.h"
namespace duckdb_brotli {
/* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
#define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
#define FN(X) X ## Literal
#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
#define DataType uint8_t
/* NOLINT(build/header_guard) */
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* template parameters: Histogram, DATA_SIZE, DataType */
/* A simple container for histograms of data in blocks. */
typedef struct FN(Histogram) {
uint32_t data_[DATA_SIZE];
size_t total_count_;
double bit_cost_;
} FN(Histogram);
static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
memset(self->data_, 0, sizeof(self->data_));
self->total_count_ = 0;
self->bit_cost_ = HUGE_VAL;
}
static BROTLI_INLINE void FN(ClearHistograms)(
FN(Histogram)* array, size_t length) {
size_t i;
for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
}
static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
++self->data_[val];
++self->total_count_;
}
static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
const DataType* p, size_t n) {
self->total_count_ += n;
n += 1;
while (--n) ++self->data_[*p++];
}
static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
const FN(Histogram)* v) {
size_t i;
self->total_count_ += v->total_count_;
for (i = 0; i < DATA_SIZE; ++i) {
self->data_[i] += v->data_[i];
}
}
static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
#undef DataType
#undef DATA_SIZE
#undef FN
#define FN(X) X ## Command
#define DataType uint16_t
#define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
/* NOLINT(build/header_guard) */
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* template parameters: Histogram, DATA_SIZE, DataType */
/* A simple container for histograms of data in blocks. */
typedef struct FN(Histogram) {
uint32_t data_[DATA_SIZE];
size_t total_count_;
double bit_cost_;
} FN(Histogram);
static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
memset(self->data_, 0, sizeof(self->data_));
self->total_count_ = 0;
self->bit_cost_ = HUGE_VAL;
}
static BROTLI_INLINE void FN(ClearHistograms)(
FN(Histogram)* array, size_t length) {
size_t i;
for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
}
static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
++self->data_[val];
++self->total_count_;
}
static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
const DataType* p, size_t n) {
self->total_count_ += n;
n += 1;
while (--n) ++self->data_[*p++];
}
static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
const FN(Histogram)* v) {
size_t i;
self->total_count_ += v->total_count_;
for (i = 0; i < DATA_SIZE; ++i) {
self->data_[i] += v->data_[i];
}
}
static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
#undef DATA_SIZE
#undef FN
#define FN(X) X ## Distance
#define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
/* NOLINT(build/header_guard) */
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* template parameters: Histogram, DATA_SIZE, DataType */
/* A simple container for histograms of data in blocks. */
typedef struct FN(Histogram) {
uint32_t data_[DATA_SIZE];
size_t total_count_;
double bit_cost_;
} FN(Histogram);
static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
memset(self->data_, 0, sizeof(self->data_));
self->total_count_ = 0;
self->bit_cost_ = HUGE_VAL;
}
static BROTLI_INLINE void FN(ClearHistograms)(
FN(Histogram)* array, size_t length) {
size_t i;
for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
}
static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
++self->data_[val];
++self->total_count_;
}
static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
const DataType* p, size_t n) {
self->total_count_ += n;
n += 1;
while (--n) ++self->data_[*p++];
}
static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
const FN(Histogram)* v) {
size_t i;
self->total_count_ += v->total_count_;
for (i = 0; i < DATA_SIZE; ++i) {
self->data_[i] += v->data_[i];
}
}
static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
#undef DataType
#undef DATA_SIZE
#undef FN
BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
const Command* cmds, const size_t num_commands,
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
const ContextType* context_modes, HistogramLiteral* literal_histograms,
HistogramCommand* insert_and_copy_histograms,
HistogramDistance* copy_dist_histograms);
}
#endif /* BROTLI_ENC_HISTOGRAM_H_ */

View File

@@ -0,0 +1,176 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Literal cost model to allow backward reference replacement to be efficient.
*/
#include "literal_cost.h"
#include <string.h> /* memset */
#include <brotli/types.h>
#include "../common/brotli_platform.h"
#include "fast_log.h"
#include "utf8_util.h"
using namespace duckdb_brotli;
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
if (c < 128) {
return 0; /* Next one is the 'Byte 1' again. */
} else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
return BROTLI_MIN(size_t, 1, clamp);
} else {
/* Let's decide over the last byte if this ends the sequence. */
if (last < 0xE0) {
return 0; /* Completed two or three byte coding. */
} else { /* Next one is the 'Byte 3' of utf-8 encoding. */
return BROTLI_MIN(size_t, 2, clamp);
}
}
}
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
const uint8_t* data) {
size_t counts[3] = { 0 };
size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
size_t last_c = 0;
size_t i;
for (i = 0; i < len; ++i) {
size_t c = data[(pos + i) & mask];
++counts[UTF8Position(last_c, c, 2)];
last_c = c;
}
if (counts[2] < 500) {
max_utf8 = 1;
}
if (counts[1] + counts[2] < 25) {
max_utf8 = 0;
}
return max_utf8;
}
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
const uint8_t* data,
size_t* histogram, float* cost) {
/* max_utf8 is 0 (normal ASCII single byte modeling),
1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
size_t window_half = 495;
size_t in_window = BROTLI_MIN(size_t, window_half, len);
size_t in_window_utf8[3] = { 0 };
size_t i;
memset(histogram, 0, 3 * 256 * sizeof(histogram[0]));
{ /* Bootstrap histograms. */
size_t last_c = 0;
size_t utf8_pos = 0;
for (i = 0; i < in_window; ++i) {
size_t c = data[(pos + i) & mask];
++histogram[256 * utf8_pos + c];
++in_window_utf8[utf8_pos];
utf8_pos = UTF8Position(last_c, c, max_utf8);
last_c = c;
}
}
/* Compute bit costs with sliding window. */
for (i = 0; i < len; ++i) {
if (i >= window_half) {
/* Remove a byte in the past. */
size_t c =
i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
size_t last_c =
i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
--histogram[256 * utf8_pos2 + data[(pos + i - window_half) & mask]];
--in_window_utf8[utf8_pos2];
}
if (i + window_half < len) {
/* Add a byte in the future. */
size_t c = data[(pos + i + window_half - 1) & mask];
size_t last_c = data[(pos + i + window_half - 2) & mask];
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
++histogram[256 * utf8_pos2 + data[(pos + i + window_half) & mask]];
++in_window_utf8[utf8_pos2];
}
{
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
size_t masked_pos = (pos + i) & mask;
size_t histo = histogram[256 * utf8_pos + data[masked_pos]];
double lit_cost;
if (histo == 0) {
histo = 1;
}
lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
lit_cost += 0.02905;
if (lit_cost < 1.0) {
lit_cost *= 0.5;
lit_cost += 0.5;
}
/* Make the first bytes more expensive -- seems to help, not sure why.
Perhaps because the entropy source is changing its properties
rapidly in the beginning of the file, perhaps because the beginning
of the data is a statistical "anomaly". */
if (i < 2000) {
lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
}
cost[i] = (float)lit_cost;
}
}
}
void duckdb_brotli::BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
const uint8_t* data,
size_t* histogram, float* cost) {
if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, histogram, cost);
return;
} else {
size_t window_half = 2000;
size_t in_window = BROTLI_MIN(size_t, window_half, len);
size_t i;
memset(histogram, 0, 256 * sizeof(histogram[0]));
/* Bootstrap histogram. */
for (i = 0; i < in_window; ++i) {
++histogram[data[(pos + i) & mask]];
}
/* Compute bit costs with sliding window. */
for (i = 0; i < len; ++i) {
size_t histo;
if (i >= window_half) {
/* Remove a byte in the past. */
--histogram[data[(pos + i - window_half) & mask]];
--in_window;
}
if (i + window_half < len) {
/* Add a byte in the future. */
++histogram[data[(pos + i + window_half) & mask]];
++in_window;
}
histo = histogram[data[(pos + i) & mask]];
if (histo == 0) {
histo = 1;
}
{
double lit_cost = FastLog2(in_window) - FastLog2(histo);
lit_cost += 0.029;
if (lit_cost < 1.0) {
lit_cost *= 0.5;
lit_cost += 0.5;
}
cost[i] = (float)lit_cost;
}
}
}
}

View File

@@ -0,0 +1,28 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Literal cost model to allow backward reference replacement to be efficient.
*/
#ifndef BROTLI_ENC_LITERAL_COST_H_
#define BROTLI_ENC_LITERAL_COST_H_
#include <brotli/types.h>
#include "../common/brotli_platform.h"
namespace duckdb_brotli {
/* Estimates how many bits the literals in the interval [pos, pos + len) in the
ring-buffer (data, mask) will take entropy coded and writes these estimates
to the cost[0..len) array. */
BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
size_t pos, size_t len, size_t mask, const uint8_t* data, size_t* histogram,
float* cost);
}
#endif /* BROTLI_ENC_LITERAL_COST_H_ */

View File

@@ -0,0 +1,190 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Algorithms for distributing the literals and commands of a metablock between
block types and contexts. */
#include "memory.h"
#include <stdlib.h> /* exit, free, malloc */
#include <string.h> /* memcpy */
#include <brotli/types.h>
#include "../common/brotli_platform.h"
using namespace duckdb_brotli;
#define MAX_NEW_ALLOCATED (BROTLI_ENCODER_MEMORY_MANAGER_SLOTS >> 2)
#define MAX_NEW_FREED (BROTLI_ENCODER_MEMORY_MANAGER_SLOTS >> 2)
#define MAX_PERM_ALLOCATED (BROTLI_ENCODER_MEMORY_MANAGER_SLOTS >> 1)
#define PERM_ALLOCATED_OFFSET 0
#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
void duckdb_brotli::BrotliInitMemoryManager(
MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
void* opaque) {
if (!alloc_func) {
m->alloc_func = duckdb_brotli::BrotliDefaultAllocFunc;
m->free_func = duckdb_brotli::BrotliDefaultFreeFunc;
m->opaque = 0;
} else {
m->alloc_func = alloc_func;
m->free_func = free_func;
m->opaque = opaque;
}
#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
m->is_oom = BROTLI_FALSE;
m->perm_allocated = 0;
m->new_allocated = 0;
m->new_freed = 0;
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
}
#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
void* duckdb_brotli::BrotliAllocate(MemoryManager* m, size_t n) {
void* result = m->alloc_func(m->opaque, n);
if (!result) exit(EXIT_FAILURE);
return result;
}
void duckdb_brotli::BrotliFree(MemoryManager* m, void* p) {
m->free_func(m->opaque, p);
}
void duckdb_brotli::BrotliWipeOutMemoryManager(MemoryManager* m) {
BROTLI_UNUSED(m);
}
#else /* BROTLI_ENCODER_EXIT_ON_OOM */
void SortPointers(void** items, const size_t n) {
/* Shell sort. */
/* TODO(eustas): fine-tune for "many slots" case */
static const size_t gaps[] = {23, 10, 4, 1};
int g = 0;
for (; g < 4; ++g) {
size_t gap = gaps[g];
size_t i;
for (i = gap; i < n; ++i) {
size_t j = i;
void* tmp = items[i];
for (; j >= gap && tmp < items[j - gap]; j -= gap) {
items[j] = items[j - gap];
}
items[j] = tmp;
}
}
}
static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
size_t a_read_index = 0;
size_t b_read_index = 0;
size_t a_write_index = 0;
size_t b_write_index = 0;
size_t annihilated = 0;
while (a_read_index < a_len && b_read_index < b_len) {
if (a[a_read_index] == b[b_read_index]) {
a_read_index++;
b_read_index++;
annihilated++;
} else if (a[a_read_index] < b[b_read_index]) {
a[a_write_index++] = a[a_read_index++];
} else {
b[b_write_index++] = b[b_read_index++];
}
}
while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
return annihilated;
}
static void CollectGarbagePointers(MemoryManager* m) {
size_t annihilated;
SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
annihilated = Annihilate(
m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
m->pointers + NEW_FREED_OFFSET, m->new_freed);
m->new_allocated -= annihilated;
m->new_freed -= annihilated;
if (m->new_freed != 0) {
annihilated = Annihilate(
m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
m->pointers + NEW_FREED_OFFSET, m->new_freed);
m->perm_allocated -= annihilated;
m->new_freed -= annihilated;
BROTLI_DCHECK(m->new_freed == 0);
}
if (m->new_allocated != 0) {
BROTLI_DCHECK(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
m->pointers + NEW_ALLOCATED_OFFSET,
sizeof(void*) * m->new_allocated);
m->perm_allocated += m->new_allocated;
m->new_allocated = 0;
SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
}
}
void* duckdb_brotli::BrotliAllocate(MemoryManager* m, size_t n) {
void* result = m->alloc_func(m->opaque, n);
if (!result) {
m->is_oom = BROTLI_TRUE;
return NULL;
}
if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
return result;
}
void duckdb_brotli::BrotliFree(MemoryManager* m, void* p) {
if (!p) return;
m->free_func(m->opaque, p);
if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
}
void duckdb_brotli::BrotliWipeOutMemoryManager(MemoryManager* m) {
size_t i;
CollectGarbagePointers(m);
/* Now all unfreed pointers are in perm-allocated list. */
for (i = 0; i < m->perm_allocated; ++i) {
m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
}
m->perm_allocated = 0;
}
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
void* duckdb_brotli::BrotliBootstrapAlloc(size_t size,
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
if (!alloc_func && !free_func) {
return malloc(size);
} else if (alloc_func && free_func) {
return alloc_func(opaque, size);
}
return NULL;
}
void duckdb_brotli::BrotliBootstrapFree(void* address, MemoryManager* m) {
if (!address) {
/* Should not happen! */
return;
} else {
/* Copy values, as those would be freed. */
brotli_free_func free_func = m->free_func;
void* opaque = m->opaque;
free_func(opaque, address);
}
}

View File

@@ -0,0 +1,127 @@
/* Copyright 2016 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Macros for memory management. */
#ifndef BROTLI_ENC_MEMORY_H_
#define BROTLI_ENC_MEMORY_H_
#include <string.h> /* memcpy */
#include <brotli/types.h>
#include "../common/brotli_platform.h"
namespace duckdb_brotli {
#if !defined(BROTLI_ENCODER_CLEANUP_ON_OOM) && \
!defined(BROTLI_ENCODER_EXIT_ON_OOM)
#define BROTLI_ENCODER_EXIT_ON_OOM
#endif
#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
#if defined(BROTLI_EXPERIMENTAL)
#define BROTLI_ENCODER_MEMORY_MANAGER_SLOTS (48*1024)
#else /* BROTLI_EXPERIMENTAL */
#define BROTLI_ENCODER_MEMORY_MANAGER_SLOTS 256
#endif /* BROTLI_EXPERIMENTAL */
#else /* BROTLI_ENCODER_EXIT_ON_OOM */
#define BROTLI_ENCODER_MEMORY_MANAGER_SLOTS 0
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
typedef struct MemoryManager {
brotli_alloc_func alloc_func;
brotli_free_func free_func;
void* opaque;
#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
BROTLI_BOOL is_oom;
size_t perm_allocated;
size_t new_allocated;
size_t new_freed;
void* pointers[BROTLI_ENCODER_MEMORY_MANAGER_SLOTS];
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
} MemoryManager;
BROTLI_INTERNAL void BrotliInitMemoryManager(
MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
void* opaque);
BROTLI_INTERNAL void* BrotliAllocate(MemoryManager* m, size_t n);
#define BROTLI_ALLOC(M, T, N) \
((N) > 0 ? ((T*)BrotliAllocate((M), (N) * sizeof(T))) : NULL)
BROTLI_INTERNAL void BrotliFree(MemoryManager* m, void* p);
#define BROTLI_FREE(M, P) { \
BrotliFree((M), (P)); \
P = NULL; \
}
#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
#define BROTLI_IS_OOM(M) (!!0)
#else /* BROTLI_ENCODER_EXIT_ON_OOM */
#define BROTLI_IS_OOM(M) (!!(M)->is_oom)
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
/*
BROTLI_IS_NULL is a fake check, BROTLI_IS_OOM does the heavy lifting.
The only purpose of it is to explain static analyzers the state of things.
NB: use ONLY together with BROTLI_IS_OOM
AND ONLY for allocations in the current scope.
*/
#if defined(__clang_analyzer__) && !defined(BROTLI_ENCODER_EXIT_ON_OOM)
#define BROTLI_IS_NULL(A) ((A) == nullptr)
#else /* defined(__clang_analyzer__) */
#define BROTLI_IS_NULL(A) (!!0)
#endif /* defined(__clang_analyzer__) */
BROTLI_INTERNAL void BrotliWipeOutMemoryManager(MemoryManager* m);
/*
Dynamically grows array capacity to at least the requested size
M: MemoryManager
T: data type
A: array
C: capacity
R: requested size
*/
#define BROTLI_ENSURE_CAPACITY(M, T, A, C, R) { \
if (C < (R)) { \
size_t _new_size = (C == 0) ? (R) : C; \
T* new_array; \
while (_new_size < (R)) _new_size *= 2; \
new_array = BROTLI_ALLOC((M), T, _new_size); \
if (!BROTLI_IS_OOM(M) && !BROTLI_IS_NULL(new_array) && C != 0) \
memcpy(new_array, A, C * sizeof(T)); \
BROTLI_FREE((M), A); \
A = new_array; \
C = _new_size; \
} \
}
/*
Appends value and dynamically grows array capacity when needed
M: MemoryManager
T: data type
A: array
C: array capacity
S: array size
V: value to append
*/
#define BROTLI_ENSURE_CAPACITY_APPEND(M, T, A, C, S, V) { \
(S)++; \
BROTLI_ENSURE_CAPACITY(M, T, A, C, S); \
A[(S) - 1] = (V); \
}
/* "Bootstrap" allocations are not tracked by memory manager; should be used
only to allocate MemoryManager itself (or structure containing it). */
BROTLI_INTERNAL void* BrotliBootstrapAlloc(size_t size,
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
BROTLI_INTERNAL void BrotliBootstrapFree(void* address, MemoryManager* m);
}
#endif /* BROTLI_ENC_MEMORY_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,102 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Algorithms for distributing the literals and commands of a metablock between
block types and contexts. */
#ifndef BROTLI_ENC_METABLOCK_H_
#define BROTLI_ENC_METABLOCK_H_
#include <brotli/types.h>
#include "../common/context.h"
#include "../common/brotli_platform.h"
#include "block_splitter.h"
#include "command.h"
#include "histogram.h"
#include "memory.h"
#include "quality.h"
namespace duckdb_brotli {
typedef struct MetaBlockSplit {
BlockSplit literal_split;
BlockSplit command_split;
BlockSplit distance_split;
uint32_t* literal_context_map;
size_t literal_context_map_size;
uint32_t* distance_context_map;
size_t distance_context_map_size;
HistogramLiteral* literal_histograms;
size_t literal_histograms_size;
HistogramCommand* command_histograms;
size_t command_histograms_size;
HistogramDistance* distance_histograms;
size_t distance_histograms_size;
} MetaBlockSplit;
static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
BrotliInitBlockSplit(&mb->literal_split);
BrotliInitBlockSplit(&mb->command_split);
BrotliInitBlockSplit(&mb->distance_split);
mb->literal_context_map = 0;
mb->literal_context_map_size = 0;
mb->distance_context_map = 0;
mb->distance_context_map_size = 0;
mb->literal_histograms = 0;
mb->literal_histograms_size = 0;
mb->command_histograms = 0;
mb->command_histograms_size = 0;
mb->distance_histograms = 0;
mb->distance_histograms_size = 0;
}
static BROTLI_INLINE void DestroyMetaBlockSplit(
MemoryManager* m, MetaBlockSplit* mb) {
BrotliDestroyBlockSplit(m, &mb->literal_split);
BrotliDestroyBlockSplit(m, &mb->command_split);
BrotliDestroyBlockSplit(m, &mb->distance_split);
BROTLI_FREE(m, mb->literal_context_map);
BROTLI_FREE(m, mb->distance_context_map);
BROTLI_FREE(m, mb->literal_histograms);
BROTLI_FREE(m, mb->command_histograms);
BROTLI_FREE(m, mb->distance_histograms);
}
/* Uses the slow shortest-path block splitter and does context clustering.
The distance parameters are dynamically selected based on the commands
which get recomputed under the new distance parameters. The new distance
parameters are stored into *params. */
BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
const uint8_t* ringbuffer,
const size_t pos,
const size_t mask,
BrotliEncoderParams* params,
uint8_t prev_byte,
uint8_t prev_byte2,
Command* cmds,
size_t num_commands,
ContextType literal_context_mode,
MetaBlockSplit* mb);
/* Uses a fast greedy block splitter that tries to merge current block with the
last or the second last block and uses a static context clustering which
is the same for all block types. */
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
size_t num_contexts, const uint32_t* static_context_map,
const Command* commands, size_t n_commands, MetaBlockSplit* mb);
BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
MetaBlockSplit* mb);
BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliDistanceParams* params,
uint32_t npostfix, uint32_t ndirect, BROTLI_BOOL large_window);
}
#endif /* BROTLI_ENC_METABLOCK_H_ */

View File

@@ -0,0 +1,50 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Functions for encoding of integers into prefix codes the amount of extra
bits, and the actual values of the extra bits. */
#ifndef BROTLI_ENC_PREFIX_H_
#define BROTLI_ENC_PREFIX_H_
#include <brotli/types.h>
#include "../common/brotli_constants.h"
#include "../common/brotli_platform.h"
#include "fast_log.h"
namespace duckdb_brotli {
/* Here distance_code is an intermediate code, i.e. one of the special codes or
the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
size_t num_direct_codes,
size_t postfix_bits,
uint16_t* code,
uint32_t* extra_bits) {
if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
*code = (uint16_t)distance_code;
*extra_bits = 0;
return;
} else {
size_t dist = ((size_t)1 << (postfix_bits + 2u)) +
(distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
size_t bucket = Log2FloorNonZero(dist) - 1;
size_t postfix_mask = (1u << postfix_bits) - 1;
size_t postfix = dist & postfix_mask;
size_t prefix = (dist >> bucket) & 1;
size_t offset = (2 + prefix) << bucket;
size_t nbits = bucket - postfix_bits;
*code = (uint16_t)((nbits << 10) |
(BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
*extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
}
}
}
#endif /* BROTLI_ENC_PREFIX_H_ */

View File

@@ -0,0 +1,202 @@
/* Copyright 2016 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Constants and formulas that affect speed-ratio trade-offs and thus define
quality levels. */
#ifndef BROTLI_ENC_QUALITY_H_
#define BROTLI_ENC_QUALITY_H_
#include <brotli/encode.h>
#include "../common/brotli_platform.h"
#include "brotli_params.h"
#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
#define FAST_TWO_PASS_COMPRESSION_QUALITY 1
#define ZOPFLIFICATION_QUALITY 10
#define HQ_ZOPFLIFICATION_QUALITY 11
#define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
#define MIN_QUALITY_FOR_BLOCK_SPLIT 4
#define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4
#define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
#define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5
#define MIN_QUALITY_FOR_CONTEXT_MODELING 5
#define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7
#define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10
/* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
so we buffer at most this much literals and commands. */
#define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
/* Returns hash-table size for quality levels 0 and 1. */
static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
return quality == FAST_ONE_PASS_COMPRESSION_QUALITY ? 1 << 15 : 1 << 17;
}
/* The maximum length for which the zopflification uses distinct distances. */
#define MAX_ZOPFLI_LEN_QUALITY_10 150
#define MAX_ZOPFLI_LEN_QUALITY_11 325
/* Do not thoroughly search when a long copy is found. */
#define BROTLI_LONG_COPY_QUICK_STEP 16384
static BROTLI_INLINE size_t MaxZopfliLen(const BrotliEncoderParams* params) {
return params->quality <= 10 ?
MAX_ZOPFLI_LEN_QUALITY_10 :
MAX_ZOPFLI_LEN_QUALITY_11;
}
/* Number of best candidates to evaluate to expand Zopfli chain. */
static BROTLI_INLINE size_t MaxZopfliCandidates(
const BrotliEncoderParams* params) {
return params->quality <= 10 ? 1 : 5;
}
static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
params->large_window = BROTLI_FALSE;
}
if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
params->lgwin = BROTLI_MIN_WINDOW_BITS;
} else {
int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
BROTLI_MAX_WINDOW_BITS;
if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
}
}
/* Returns optimized lg_block value. */
static BROTLI_INLINE int ComputeLgBlock(const BrotliEncoderParams* params) {
int lgblock = params->lgblock;
if (params->quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
params->quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
lgblock = params->lgwin;
} else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
lgblock = 14;
} else if (lgblock == 0) {
lgblock = 16;
if (params->quality >= 9 && params->lgwin > lgblock) {
lgblock = BROTLI_MIN(int, 18, params->lgwin);
}
} else {
lgblock = BROTLI_MIN(int, BROTLI_MAX_INPUT_BLOCK_BITS,
BROTLI_MAX(int, BROTLI_MIN_INPUT_BLOCK_BITS, lgblock));
}
return lgblock;
}
/* Returns log2 of the size of main ring buffer area.
Allocate at least lgwin + 1 bits for the ring buffer so that the newly
added block fits there completely and we still get lgwin bits and at least
read_block_size_bits + 1 bits because the copy tail length needs to be
smaller than ring-buffer size. */
static BROTLI_INLINE int ComputeRbBits(const BrotliEncoderParams* params) {
return 1 + BROTLI_MAX(int, params->lgwin, params->lgblock);
}
static BROTLI_INLINE size_t MaxMetablockSize(
const BrotliEncoderParams* params) {
int bits =
BROTLI_MIN(int, ComputeRbBits(params), BROTLI_MAX_INPUT_BLOCK_BITS);
return (size_t)1 << bits;
}
/* When searching for backward references and have not seen matches for a long
time, we can skip some match lookups. Unsuccessful match lookups are very
expensive and this kind of a heuristic speeds up compression quite a lot.
At first 8 byte strides are taken and every second byte is put to hasher.
After 4x more literals stride by 16 bytes, every put 4-th byte to hasher.
Applied only to qualities 2 to 9. */
static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch(
const BrotliEncoderParams* params) {
return params->quality < 9 ? 64 : 512;
}
/* Quality to hasher mapping:
- q02: h02 (longest_match_quickly), b16, l5
- q03: h03 (longest_match_quickly), b17, l5
- q04: h04 (longest_match_quickly), b17, l5
- q04: h54 (longest_match_quickly), b20, l7 | for large files
- q05: h05 (longest_match ), b14, l4
- q05: h06 (longest_match64 ), b15, l5 | for large files
- q05: h40 (forgetful_chain ), b15, l4 | for small window
- q06: h05 (longest_match ), b14, l4
- q06: h06 (longest_match64 ), b15, l5 | for large files
- q06: h40 (forgetful_chain ), b15, l4 | for small window
- q07: h05 (longest_match ), b15, l4
- q07: h06 (longest_match64 ), b15, l5 | for large files
- q07: h41 (forgetful_chain ), b15, l4 | for small window
- q08: h05 (longest_match ), b15, l4
- q08: h06 (longest_match64 ), b15, l5 | for large files
- q08: h41 (forgetful_chain ), b15, l4 | for small window
- q09: h05 (longest_match ), b15, l4
- q09: h06 (longest_match64 ), b15, l5 | for large files
- q09: h42 (forgetful_chain ), b15, l4 | for small window
- q10: t10 (to_binary_tree ), b17, l128
- q11: t10 (to_binary_tree ), b17, l128
Where "q" is quality, "h" is hasher type, "b" is bucket bits,
"l" is source len. */
static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
BrotliHasherParams* hparams) {
if (params->quality > 9) {
hparams->type = 10;
} else if (params->quality == 4 && params->size_hint >= (1 << 20)) {
hparams->type = 54;
} else if (params->quality < 5) {
hparams->type = params->quality;
} else if (params->lgwin <= 16) {
hparams->type = params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
} else if (params->size_hint >= (1 << 20) && params->lgwin >= 19) {
hparams->type = 6;
hparams->block_bits = params->quality - 1;
hparams->bucket_bits = 15;
hparams->num_last_distances_to_check =
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
} else {
/* TODO(eustas): often previous setting (H6) is faster and denser; consider
adding an option to use it. */
hparams->type = 5;
hparams->block_bits = params->quality - 1;
hparams->bucket_bits = params->quality < 7 ? 14 : 15;
hparams->num_last_distances_to_check =
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
}
if (params->lgwin > 24) {
/* Different hashers for large window brotli: not for qualities <= 2,
these are too fast for large window. Not for qualities >= 10: their
hasher already works well with large window. So the changes are:
H3 --> H35: for quality 3.
H54 --> H55: for quality 4 with size hint > 1MB
H6 --> H65: for qualities 5, 6, 7, 8, 9. */
if (hparams->type == 3) {
hparams->type = 35;
}
if (hparams->type == 54) {
hparams->type = 55;
}
if (hparams->type == 6) {
hparams->type = 65;
}
}
}
#endif /* BROTLI_ENC_QUALITY_H_ */

View File

@@ -0,0 +1,24 @@
# brotli uses a weird c templating mechanism using _inc.h files
# this does not play well with things like amalagamation
# this script inlines the variuos headers
import os
import re
for filename in os.listdir('.'):
if not (filename.endswith('.cpp') or filename.endswith('.h')):
continue
file_lines = open(filename, 'r').readlines()
if '_inc.h' not in '\n'.join(file_lines):
continue
out = open (filename, 'w')
for line in file_lines:
if '#include' in line and '_inc.h' in line:
match = re.search(r'#include\s+"(.+)".*', line).group(1)
include = open(match, 'r').readlines();
out.write(''.join(include))
continue
out.write(line)

View File

@@ -0,0 +1,164 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Sliding window over the input data. */
#ifndef BROTLI_ENC_RINGBUFFER_H_
#define BROTLI_ENC_RINGBUFFER_H_
#include <string.h> /* memcpy */
#include <brotli/types.h>
#include "../common/brotli_platform.h"
#include "memory.h"
#include "quality.h"
namespace duckdb_brotli {
/* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
data in a circular manner: writing a byte writes it to:
`position() % (1 << window_bits)'.
For convenience, the RingBuffer array contains another copy of the
first `1 << tail_bits' bytes:
buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
and another copy of the last two bytes:
buffer_[-1] == buffer_[(1 << window_bits) - 1] and
buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
typedef struct RingBuffer {
/* Size of the ring-buffer is (1 << window_bits) + tail_size_. */
const uint32_t size_;
const uint32_t mask_;
const uint32_t tail_size_;
const uint32_t total_size_;
uint32_t cur_size_;
/* Position to write in the ring buffer. */
uint32_t pos_;
/* The actual ring buffer containing the copy of the last two bytes, the data,
and the copy of the beginning as a tail. */
uint8_t* data_;
/* The start of the ring-buffer. */
uint8_t* buffer_;
} RingBuffer;
static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
rb->cur_size_ = 0;
rb->pos_ = 0;
rb->data_ = 0;
rb->buffer_ = 0;
}
static BROTLI_INLINE void RingBufferSetup(
const BrotliEncoderParams* params, RingBuffer* rb) {
int window_bits = ComputeRbBits(params);
int tail_bits = params->lgblock;
*(uint32_t*)&rb->size_ = 1u << window_bits;
*(uint32_t*)&rb->mask_ = (1u << window_bits) - 1;
*(uint32_t*)&rb->tail_size_ = 1u << tail_bits;
*(uint32_t*)&rb->total_size_ = rb->size_ + rb->tail_size_;
}
static BROTLI_INLINE void RingBufferFree(MemoryManager* m, RingBuffer* rb) {
BROTLI_FREE(m, rb->data_);
}
/* Allocates or re-allocates data_ to the given length + plus some slack
region before and after. Fills the slack regions with zeros. */
static BROTLI_INLINE void RingBufferInitBuffer(
MemoryManager* m, const uint32_t buflen, RingBuffer* rb) {
static const size_t kSlackForEightByteHashingEverywhere = 7;
uint8_t* new_data = BROTLI_ALLOC(
m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
size_t i;
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_data)) return;
if (rb->data_) {
memcpy(new_data, rb->data_,
2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
BROTLI_FREE(m, rb->data_);
}
rb->data_ = new_data;
rb->cur_size_ = buflen;
rb->buffer_ = rb->data_ + 2;
rb->buffer_[-2] = rb->buffer_[-1] = 0;
for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
rb->buffer_[rb->cur_size_ + i] = 0;
}
}
static BROTLI_INLINE void RingBufferWriteTail(
const uint8_t* bytes, size_t n, RingBuffer* rb) {
const size_t masked_pos = rb->pos_ & rb->mask_;
if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
/* Just fill the tail buffer with the beginning data. */
const size_t p = rb->size_ + masked_pos;
memcpy(&rb->buffer_[p], bytes,
BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
}
}
/* Push bytes into the ring buffer. */
static BROTLI_INLINE void RingBufferWrite(
MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
if (rb->pos_ == 0 && n < rb->tail_size_) {
/* Special case for the first write: to process the first block, we don't
need to allocate the whole ring-buffer and we don't need the tail
either. However, we do this memory usage optimization only if the
first write is less than the tail size, which is also the input block
size, otherwise it is likely that other blocks will follow and we
will need to reallocate to the full size anyway. */
rb->pos_ = (uint32_t)n;
RingBufferInitBuffer(m, rb->pos_, rb);
if (BROTLI_IS_OOM(m)) return;
memcpy(rb->buffer_, bytes, n);
return;
}
if (rb->cur_size_ < rb->total_size_) {
/* Lazily allocate the full buffer. */
RingBufferInitBuffer(m, rb->total_size_, rb);
if (BROTLI_IS_OOM(m)) return;
/* Initialize the last two bytes to zero, so that we don't have to worry
later when we copy the last two bytes to the first two positions. */
rb->buffer_[rb->size_ - 2] = 0;
rb->buffer_[rb->size_ - 1] = 0;
/* Initialize tail; might be touched by "best_len++" optimization when
ring buffer is "full". */
rb->buffer_[rb->size_] = 241;
}
{
const size_t masked_pos = rb->pos_ & rb->mask_;
/* The length of the writes is limited so that we do not need to worry
about a write */
RingBufferWriteTail(bytes, n, rb);
if (BROTLI_PREDICT_TRUE(masked_pos + n <= rb->size_)) {
/* A single write fits. */
memcpy(&rb->buffer_[masked_pos], bytes, n);
} else {
/* Split into two writes.
Copy into the end of the buffer, including the tail buffer. */
memcpy(&rb->buffer_[masked_pos], bytes,
BROTLI_MIN(size_t, n, rb->total_size_ - masked_pos));
/* Copy into the beginning of the buffer */
memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
n - (rb->size_ - masked_pos));
}
}
{
BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
uint32_t rb_pos_mask = (1u << 31) - 1;
rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
if (not_first_lap) {
/* Wrap, but preserve not-a-first-lap feature. */
rb->pos_ |= 1u << 31;
}
}
}
}
#endif /* BROTLI_ENC_RINGBUFFER_H_ */

View File

@@ -0,0 +1,106 @@
/* Copyright 2022 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Encoder state. */
#ifndef BROTLI_ENC_STATE_H_
#define BROTLI_ENC_STATE_H_
#include <brotli/types.h>
#include "command.h"
#include "compress_fragment.h"
#include "compress_fragment_two_pass.h"
#include "brotli_hash.h"
#include "memory.h"
#include "brotli_params.h"
#include "ringbuffer.h"
namespace duckdb_brotli {
typedef enum BrotliEncoderStreamState {
/* Default state. */
BROTLI_STREAM_PROCESSING = 0,
/* Intermediate state; after next block is emitted, byte-padding should be
performed before getting back to default state. */
BROTLI_STREAM_FLUSH_REQUESTED = 1,
/* Last metablock was produced; no more input is acceptable. */
BROTLI_STREAM_FINISHED = 2,
/* Flushing compressed block and writing meta-data block header. */
BROTLI_STREAM_METADATA_HEAD = 3,
/* Writing metadata block body. */
BROTLI_STREAM_METADATA_BODY = 4
} BrotliEncoderStreamState;
typedef enum BrotliEncoderFlintState {
BROTLI_FLINT_NEEDS_2_BYTES = 2,
BROTLI_FLINT_NEEDS_1_BYTE = 1,
BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
BROTLI_FLINT_DONE = -2
} BrotliEncoderFlintState;
typedef struct BrotliEncoderStateStruct {
BrotliEncoderParams params;
MemoryManager memory_manager_;
uint64_t input_pos_;
RingBuffer ringbuffer_;
size_t cmd_alloc_size_;
Command *commands_;
size_t num_commands_;
size_t num_literals_;
size_t last_insert_len_;
uint64_t last_flush_pos_;
uint64_t last_processed_pos_;
int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
int saved_dist_cache_[4];
uint16_t last_bytes_;
uint8_t last_bytes_bits_;
/* "Flint" is a tiny uncompressed block emitted before the continuation
block to unwire literal context from previous data. Despite being int8_t,
field is actually BrotliEncoderFlintState enum. */
int8_t flint_;
uint8_t prev_byte_;
uint8_t prev_byte2_;
size_t storage_size_;
uint8_t *storage_;
Hasher hasher_;
/* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
int small_table_[1 << 10]; /* 4KiB */
int *large_table_; /* Allocated only when needed */
size_t large_table_size_;
BrotliOnePassArena *one_pass_arena_;
BrotliTwoPassArena *two_pass_arena_;
/* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
uint32_t *command_buf_;
uint8_t *literal_buf_;
uint64_t total_in_;
uint8_t *next_out_;
size_t available_out_;
uint64_t total_out_;
/* Temporary buffer for padding flush bits or metadata block header / body. */
union {
uint64_t u64[2];
uint8_t u8[16];
} tiny_buf_;
uint32_t remaining_metadata_bytes_;
BrotliEncoderStreamState stream_state_;
BROTLI_BOOL is_last_block_emitted_;
BROTLI_BOOL is_initialized_;
} BrotliEncoderStateStruct;
typedef struct BrotliEncoderStateStruct BrotliEncoderStateInternal;
#define BrotliEncoderState BrotliEncoderStateInternal
}
#endif // BROTLI_ENC_STATE_H_

View File

@@ -0,0 +1,538 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#include "static_dict.h"
#include "../common/dictionary.h"
#include "../common/brotli_platform.h"
#include "../common/transform.h"
#include "encoder_dict.h"
#include "find_match_length.h"
using namespace duckdb_brotli;
static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return h >> (32 - kDictNumBits);
}
static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
uint32_t* matches) {
uint32_t match = (uint32_t)((distance << 5) + len_code);
matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
}
static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
const uint8_t* data,
size_t id,
size_t len,
size_t maxlen) {
const size_t offset = dictionary->offsets_by_length[len] + len * id;
return FindMatchLengthWithLimit(&dictionary->data[offset], data,
BROTLI_MIN(size_t, len, maxlen));
}
static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
DictWord w, const uint8_t* data, size_t max_length) {
if (w.len > max_length) {
return BROTLI_FALSE;
} else {
const size_t offset = dictionary->offsets_by_length[w.len] +
(size_t)w.len * (size_t)w.idx;
const uint8_t* dict = &dictionary->data[offset];
if (w.transform == 0) {
/* Match against base dictionary word. */
return
TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
} else if (w.transform == 10) {
/* Match against uppercase first transform.
Note that there are only ASCII uppercase words in the lookup table. */
return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
(dict[0] ^ 32) == data[0] &&
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
w.len - 1u);
} else {
/* Match against uppercase all transform.
Note that there are only ASCII uppercase words in the lookup table. */
size_t i;
for (i = 0; i < w.len; ++i) {
if (dict[i] >= 'a' && dict[i] <= 'z') {
if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
} else {
if (dict[i] != data[i]) return BROTLI_FALSE;
}
}
return BROTLI_TRUE;
}
}
}
/* Finds matches for a single static dictionary */
static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor(
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
size_t min_length, size_t max_length, uint32_t* matches) {
BROTLI_BOOL has_found_match = BROTLI_FALSE;
#if defined(BROTLI_EXPERIMENTAL)
if (dictionary->has_words_heavy) {
const BrotliTrieNode* node = &dictionary->trie.root;
size_t l = 0;
while (node && l < max_length) {
uint8_t c;
if (l >= min_length && node->len_) {
AddMatch(node->idx_, l, node->len_, matches);
has_found_match = BROTLI_TRUE;
}
c = data[l++];
node = BrotliTrieSub(&dictionary->trie, node, c);
}
return has_found_match;
}
#endif /* BROTLI_EXPERIMENTAL */
{
size_t offset = dictionary->buckets[Hash(data)];
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0) {
const size_t matchlen =
DictMatchLength(dictionary->words, data, id, l, max_length);
const uint8_t* s;
size_t minlen;
size_t maxlen;
size_t len;
/* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
if (matchlen == l) {
AddMatch(id, l, l, matches);
has_found_match = BROTLI_TRUE;
}
/* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
"" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
if (matchlen >= l - 1) {
AddMatch(id + 12 * n, l - 1, l, matches);
if (l + 2 < max_length &&
data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
data[l + 2] == ' ') {
AddMatch(id + 49 * n, l + 3, l, matches);
}
has_found_match = BROTLI_TRUE;
}
/* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
minlen = min_length;
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
for (len = minlen; len <= maxlen; ++len) {
size_t cut = l - len;
size_t transform_id = (cut << 2) +
(size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
AddMatch(id + transform_id * n, len, l, matches);
has_found_match = BROTLI_TRUE;
}
if (matchlen < l || l + 6 >= max_length) {
continue;
}
s = &data[l];
/* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
if (s[0] == ' ') {
AddMatch(id + n, l + 1, l, matches);
if (s[1] == 'a') {
if (s[2] == ' ') {
AddMatch(id + 28 * n, l + 3, l, matches);
} else if (s[2] == 's') {
if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
} else if (s[2] == 't') {
if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
} else if (s[2] == 'n') {
if (s[3] == 'd' && s[4] == ' ') {
AddMatch(id + 10 * n, l + 5, l, matches);
}
}
} else if (s[1] == 'b') {
if (s[2] == 'y' && s[3] == ' ') {
AddMatch(id + 38 * n, l + 4, l, matches);
}
} else if (s[1] == 'i') {
if (s[2] == 'n') {
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
} else if (s[2] == 's') {
if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
}
} else if (s[1] == 'f') {
if (s[2] == 'o') {
if (s[3] == 'r' && s[4] == ' ') {
AddMatch(id + 25 * n, l + 5, l, matches);
}
} else if (s[2] == 'r') {
if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
AddMatch(id + 37 * n, l + 6, l, matches);
}
}
} else if (s[1] == 'o') {
if (s[2] == 'f') {
if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
} else if (s[2] == 'n') {
if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
}
} else if (s[1] == 'n') {
if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
AddMatch(id + 80 * n, l + 5, l, matches);
}
} else if (s[1] == 't') {
if (s[2] == 'h') {
if (s[3] == 'e') {
if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
} else if (s[3] == 'a') {
if (s[4] == 't' && s[5] == ' ') {
AddMatch(id + 29 * n, l + 6, l, matches);
}
}
} else if (s[2] == 'o') {
if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
}
} else if (s[1] == 'w') {
if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
AddMatch(id + 35 * n, l + 6, l, matches);
}
}
} else if (s[0] == '"') {
AddMatch(id + 19 * n, l + 1, l, matches);
if (s[1] == '>') {
AddMatch(id + 21 * n, l + 2, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + 20 * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + 31 * n, l + 2, l, matches);
if (s[2] == 'T' && s[3] == 'h') {
if (s[4] == 'e') {
if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
} else if (s[4] == 'i') {
if (s[5] == 's' && s[6] == ' ') {
AddMatch(id + 75 * n, l + 7, l, matches);
}
}
}
}
} else if (s[0] == ',') {
AddMatch(id + 76 * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + 14 * n, l + 2, l, matches);
}
} else if (s[0] == '\n') {
AddMatch(id + 22 * n, l + 1, l, matches);
if (s[1] == '\t') {
AddMatch(id + 50 * n, l + 2, l, matches);
}
} else if (s[0] == ']') {
AddMatch(id + 24 * n, l + 1, l, matches);
} else if (s[0] == '\'') {
AddMatch(id + 36 * n, l + 1, l, matches);
} else if (s[0] == ':') {
AddMatch(id + 51 * n, l + 1, l, matches);
} else if (s[0] == '(') {
AddMatch(id + 57 * n, l + 1, l, matches);
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + 70 * n, l + 2, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + 86 * n, l + 2, l, matches);
}
} else if (s[0] == 'a') {
if (s[1] == 'l' && s[2] == ' ') {
AddMatch(id + 84 * n, l + 3, l, matches);
}
} else if (s[0] == 'e') {
if (s[1] == 'd') {
if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
} else if (s[1] == 'r') {
if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
} else if (s[1] == 's') {
if (s[2] == 't' && s[3] == ' ') {
AddMatch(id + 95 * n, l + 4, l, matches);
}
}
} else if (s[0] == 'f') {
if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
AddMatch(id + 90 * n, l + 4, l, matches);
}
} else if (s[0] == 'i') {
if (s[1] == 'v') {
if (s[2] == 'e' && s[3] == ' ') {
AddMatch(id + 92 * n, l + 4, l, matches);
}
} else if (s[1] == 'z') {
if (s[2] == 'e' && s[3] == ' ') {
AddMatch(id + 100 * n, l + 4, l, matches);
}
}
} else if (s[0] == 'l') {
if (s[1] == 'e') {
if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
AddMatch(id + 93 * n, l + 5, l, matches);
}
} else if (s[1] == 'y') {
if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
}
} else if (s[0] == 'o') {
if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
AddMatch(id + 106 * n, l + 4, l, matches);
}
}
} else {
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
transform. */
const BROTLI_BOOL is_all_caps =
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
const uint8_t* s;
if (!IsMatch(dictionary->words, w, data, max_length)) {
continue;
}
/* Transform "" + kUppercase{First,All} + "" */
AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 1 >= max_length) {
continue;
}
/* Transforms "" + kUppercase{First,All} + <suffix> */
s = &data[l];
if (s[0] == ' ') {
AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
} else if (s[0] == '"') {
AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
if (s[1] == '>') {
AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
}
} else if (s[0] == ',') {
AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
}
} else if (s[0] == '\'') {
AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
} else if (s[0] == '(') {
AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
}
}
}
}
}
/* Transforms with prefixes " " and "." */
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
size_t offset = dictionary->buckets[Hash(&data[1])];
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0) {
const uint8_t* s;
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
continue;
}
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
"." + BROTLI_TRANSFORM_IDENTITY + "" */
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 2 >= max_length) {
continue;
}
/* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
"." + BROTLI_TRANSFORM_IDENTITY + <suffix>
*/
s = &data[l + 1];
if (s[0] == ' ') {
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
} else if (s[0] == '(') {
AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
} else if (is_space) {
if (s[0] == ',') {
AddMatch(id + 103 * n, l + 2, l, matches);
if (s[1] == ' ') {
AddMatch(id + 33 * n, l + 3, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + 71 * n, l + 2, l, matches);
if (s[1] == ' ') {
AddMatch(id + 52 * n, l + 3, l, matches);
}
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + 81 * n, l + 3, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + 98 * n, l + 3, l, matches);
}
}
}
} else if (is_space) {
/* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
transform. */
const BROTLI_BOOL is_all_caps =
TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
const uint8_t* s;
if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
continue;
}
/* Transforms " " + kUppercase{First,All} + "" */
AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 2 >= max_length) {
continue;
}
/* Transforms " " + kUppercase{First,All} + <suffix> */
s = &data[l + 1];
if (s[0] == ' ') {
AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
} else if (s[0] == ',') {
if (!is_all_caps) {
AddMatch(id + 109 * n, l + 2, l, matches);
}
if (s[1] == ' ') {
AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
if (s[1] == ' ') {
AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
}
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
}
}
}
}
}
if (max_length >= 6) {
/* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
if ((data[1] == ' ' &&
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
(data[0] == 0xC2 && data[1] == 0xA0)) {
size_t offset = dictionary->buckets[Hash(&data[2])];
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0 &&
IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
if (data[0] == 0xC2) {
AddMatch(id + 102 * n, l + 2, l, matches);
has_found_match = BROTLI_TRUE;
} else if (l + 2 < max_length && data[l + 2] == ' ') {
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
AddMatch(id + t * n, l + 3, l, matches);
has_found_match = BROTLI_TRUE;
}
}
}
}
}
if (max_length >= 9) {
/* Transforms with prefixes " the " and ".com/" */
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
data[3] == 'e' && data[4] == ' ') ||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
data[3] == 'm' && data[4] == '/')) {
size_t offset = dictionary->buckets[Hash(&data[5])];
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = dictionary->dict_words[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0 &&
IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 5 < max_length) {
const uint8_t* s = &data[l + 5];
if (data[0] == ' ') {
if (l + 8 < max_length &&
s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
AddMatch(id + 62 * n, l + 9, l, matches);
if (l + 12 < max_length &&
s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
AddMatch(id + 73 * n, l + 13, l, matches);
}
}
}
}
}
}
}
}
return has_found_match;
}
/* Finds matches for one or more dictionaries, if multiple are present
in the contextual dictionary */
BROTLI_BOOL duckdb_brotli::BrotliFindAllStaticDictionaryMatches(
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
size_t min_length, size_t max_length, uint32_t* matches) {
BROTLI_BOOL has_found_match =
BrotliFindAllStaticDictionaryMatchesFor(
dictionary, data, min_length, max_length, matches);
if (!!dictionary->parent && dictionary->parent->num_dictionaries > 1) {
uint32_t matches2[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
int l;
const BrotliEncoderDictionary* dictionary2 = dictionary->parent->dict[0];
if (dictionary2 == dictionary) {
dictionary2 = dictionary->parent->dict[1];
}
for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
matches2[l] = kInvalidMatch;
}
has_found_match |= BrotliFindAllStaticDictionaryMatchesFor(
dictionary2, data, min_length, max_length, matches2);
for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
if (matches2[l] != kInvalidMatch) {
uint32_t dist = (uint32_t)(matches2[l] >> 5);
uint32_t len_code = matches2[l] & 31;
uint32_t skipdist = (uint32_t)((uint32_t)(1 << dictionary->words->
size_bits_by_length[len_code]) & ~1u) *
(uint32_t)dictionary->num_transforms;
/* TODO(lode): check for dist overflow */
dist += skipdist;
AddMatch(dist, (size_t)l, len_code, matches);
}
}
}
return has_found_match;
}

View File

@@ -0,0 +1,37 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Class to model the static dictionary. */
#ifndef BROTLI_ENC_STATIC_DICT_H_
#define BROTLI_ENC_STATIC_DICT_H_
#include <brotli/types.h>
#include "../common/dictionary.h"
#include "../common/brotli_platform.h"
#include "encoder_dict.h"
namespace duckdb_brotli {
#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
static const uint32_t kInvalidMatch = 0xFFFFFFF;
/* Matches data against static dictionary words, and for each length l,
for which a match is found, updates matches[l] to be the minimum possible
(distance << 5) + len_code.
Returns 1 if matches have been found, otherwise 0.
Prerequisites:
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
all elements are initialized to kInvalidMatch */
BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
const BrotliEncoderDictionary* dictionary,
const uint8_t* data, size_t min_length, size_t max_length,
uint32_t* matches);
}
#endif /* BROTLI_ENC_STATIC_DICT_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,81 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Heuristics for deciding about the UTF8-ness of strings. */
#include "utf8_util.h"
#include <brotli/types.h>
using namespace duckdb_brotli;
static size_t BrotliParseAsUTF8(
int* symbol, const uint8_t* input, size_t size) {
/* ASCII */
if ((input[0] & 0x80) == 0) {
*symbol = input[0];
if (*symbol > 0) {
return 1;
}
}
/* 2-byte UTF8 */
if (size > 1u &&
(input[0] & 0xE0) == 0xC0 &&
(input[1] & 0xC0) == 0x80) {
*symbol = (((input[0] & 0x1F) << 6) |
(input[1] & 0x3F));
if (*symbol > 0x7F) {
return 2;
}
}
/* 3-byte UFT8 */
if (size > 2u &&
(input[0] & 0xF0) == 0xE0 &&
(input[1] & 0xC0) == 0x80 &&
(input[2] & 0xC0) == 0x80) {
*symbol = (((input[0] & 0x0F) << 12) |
((input[1] & 0x3F) << 6) |
(input[2] & 0x3F));
if (*symbol > 0x7FF) {
return 3;
}
}
/* 4-byte UFT8 */
if (size > 3u &&
(input[0] & 0xF8) == 0xF0 &&
(input[1] & 0xC0) == 0x80 &&
(input[2] & 0xC0) == 0x80 &&
(input[3] & 0xC0) == 0x80) {
*symbol = (((input[0] & 0x07) << 18) |
((input[1] & 0x3F) << 12) |
((input[2] & 0x3F) << 6) |
(input[3] & 0x3F));
if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
return 4;
}
}
/* Not UTF8, emit a special symbol above the UTF8-code space */
*symbol = 0x110000 | input[0];
return 1;
}
/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
BROTLI_BOOL duckdb_brotli::BrotliIsMostlyUTF8(
const uint8_t* data, const size_t pos, const size_t mask,
const size_t length, const double min_fraction) {
size_t size_utf8 = 0;
size_t i = 0;
while (i < length) {
int symbol;
size_t bytes_read =
BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
i += bytes_read;
if (symbol < 0x110000) size_utf8 += bytes_read;
}
return TO_BROTLI_BOOL((double)size_utf8 > min_fraction * (double)length);
}

View File

@@ -0,0 +1,29 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Heuristics for deciding about the UTF8-ness of strings. */
#ifndef BROTLI_ENC_UTF8_UTIL_H_
#define BROTLI_ENC_UTF8_UTIL_H_
#include <brotli/types.h>
#include "../common/brotli_platform.h"
namespace duckdb_brotli {
static const double kMinUTF8Ratio = 0.75;
/* Returns 1 if at least min_fraction of the bytes between pos and
pos + length in the (data, mask) ring-buffer is UTF8-encoded, otherwise
returns 0. */
BROTLI_INTERNAL BROTLI_BOOL BrotliIsMostlyUTF8(
const uint8_t* data, const size_t pos, const size_t mask,
const size_t length, const double min_fraction);
}
#endif /* BROTLI_ENC_UTF8_UTIL_H_ */

View File

@@ -0,0 +1,84 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Write bits into a byte array. */
#ifndef BROTLI_ENC_WRITE_BITS_H_
#define BROTLI_ENC_WRITE_BITS_H_
#include <brotli/types.h>
#include "../common/brotli_platform.h"
namespace duckdb_brotli {
/* This function writes bits into bytes in increasing addresses, and within
a byte least-significant-bit first.
The function can write up to 56 bits in one go with WriteBits
Example: let's assume that 3 bits (Rs below) have been written already:
BYTE-0 BYTE+1 BYTE+2
0000 0RRR 0000 0000 0000 0000
Now, we could write 5 or less bits in MSB by just shifting by 3
and OR'ing to BYTE-0.
For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
and locate the rest in BYTE+1, BYTE+2, etc. */
static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
uint64_t bits,
size_t* BROTLI_RESTRICT pos,
uint8_t* BROTLI_RESTRICT array) {
BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits,
(uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
(int)*pos));
BROTLI_DCHECK((bits >> n_bits) == 0);
BROTLI_DCHECK(n_bits <= 56);
#if defined(BROTLI_LITTLE_ENDIAN)
/* This branch of the code can write up to 56 bits at a time,
7 bits are lost by being perhaps already in *p and at least
1 bit is needed to initialize the bit-stream ahead (i.e. if 7
bits are in *p and we write 57 bits, then the next write will
access a byte that was never initialized). */
{
uint8_t* p = &array[*pos >> 3];
uint64_t v = (uint64_t)(*p); /* Zero-extend 8 to 64 bits. */
v |= bits << (*pos & 7);
BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */
*pos += n_bits;
}
#else
/* implicit & 0xFF is assumed for uint8_t arithmetics */
{
uint8_t* array_pos = &array[*pos >> 3];
const size_t bits_reserved_in_first_byte = (*pos & 7);
size_t bits_left_to_write;
bits <<= bits_reserved_in_first_byte;
*array_pos++ |= (uint8_t)bits;
for (bits_left_to_write = n_bits + bits_reserved_in_first_byte;
bits_left_to_write >= 9;
bits_left_to_write -= 8) {
bits >>= 8;
*array_pos++ = (uint8_t)bits;
}
*array_pos = 0;
*pos += n_bits;
}
#endif
}
static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
size_t pos, uint8_t* array) {
BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", (int)pos));
BROTLI_DCHECK((pos & 7) == 0);
array[pos >> 3] = 0;
}
}
#endif /* BROTLI_ENC_WRITE_BITS_H_ */