/* Copyright 2013 Google Inc. All Rights Reserved. Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Block split point selection utilities. */ #include "block_splitter.h" #include /* memcpy, memset */ #include "../common/platform.h" #include "bit_cost.h" #include "cluster.h" #include "command.h" #include "fast_log.h" #include "histogram.h" #include "memory.h" #include "quality.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif static const size_t kMaxLiteralHistograms = 100; static const size_t kMaxCommandHistograms = 50; static const double kLiteralBlockSwitchCost = 28.1; static const double kCommandBlockSwitchCost = 13.5; static const double kDistanceBlockSwitchCost = 14.6; static const size_t kLiteralStrideLength = 70; static const size_t kCommandStrideLength = 40; static const size_t kDistanceStrideLength = 40; static const size_t kSymbolsPerLiteralHistogram = 544; static const size_t kSymbolsPerCommandHistogram = 530; static const size_t kSymbolsPerDistanceHistogram = 544; static const size_t kMinLengthForBlockSplitting = 128; static const size_t kIterMulForRefining = 2; static const size_t kMinItersForRefining = 100; static size_t CountLiterals(const Command* cmds, const size_t num_commands) { /* Count how many we have. */ size_t total_length = 0; size_t i; for (i = 0; i < num_commands; ++i) { total_length += cmds[i].insert_len_; } return total_length; } static void CopyLiteralsToByteArray(const Command* cmds, const size_t num_commands, const uint8_t* data, const size_t offset, const size_t mask, uint8_t* literals) { size_t pos = 0; size_t from_pos = offset & mask; size_t i; for (i = 0; i < num_commands; ++i) { size_t insert_len = cmds[i].insert_len_; if (from_pos + insert_len > mask) { size_t head_size = mask + 1 - from_pos; memcpy(literals + pos, data + from_pos, head_size); from_pos = 0; pos += head_size; insert_len -= head_size; } if (insert_len > 0) { memcpy(literals + pos, data + from_pos, insert_len); pos += insert_len; } from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask; } } static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) { /* Initial seed should be 7. In this case, loop length is (1 << 29). */ *seed *= 16807U; return *seed; } static BROTLI_INLINE double BitCost(size_t count) { return count == 0 ? -2.0 : FastLog2(count); } #define HISTOGRAMS_PER_BATCH 64 #define CLUSTERS_PER_BATCH 16 #define FN(X) X ## Literal #define DataType uint8_t /* NOLINTNEXTLINE(build/include) */ #include "block_splitter_inc.h" #undef DataType #undef FN #define FN(X) X ## Command #define DataType uint16_t /* NOLINTNEXTLINE(build/include) */ #include "block_splitter_inc.h" #undef FN #define FN(X) X ## Distance /* NOLINTNEXTLINE(build/include) */ #include "block_splitter_inc.h" #undef DataType #undef FN void BrotliInitBlockSplit(BlockSplit* self) { self->num_types = 0; self->num_blocks = 0; self->types = 0; self->lengths = 0; self->types_alloc_size = 0; self->lengths_alloc_size = 0; } void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) { BROTLI_FREE(m, self->types); BROTLI_FREE(m, self->lengths); } /* Extracts literals, command distance and prefix codes, then applies * SplitByteVector to create partitioning. */ void BrotliSplitBlock(MemoryManager* m, const Command* cmds, const size_t num_commands, const uint8_t* data, const size_t pos, const size_t mask, const BrotliEncoderParams* params, BlockSplit* literal_split, BlockSplit* insert_and_copy_split, BlockSplit* dist_split) { { size_t literals_count = CountLiterals(cmds, num_commands); uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count); if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return; /* Create a continuous array of literals. */ CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals); /* Create the block split on the array of literals. * Literal histograms can have alphabet size up to 256. * Though, to accomodate context modeling, less than half of maximum size * is allowed. */ SplitByteVectorLiteral( m, literals, literals_count, kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, kLiteralStrideLength, kLiteralBlockSwitchCost, params, literal_split); if (BROTLI_IS_OOM(m)) return; BROTLI_FREE(m, literals); /* NB: this might be a good place for injecting extra splitting without * increasing encoder complexity; however, output parition would be less * optimal than one produced with forced splitting inside * SplitByteVector (FindBlocks / ClusterBlocks). */ } { /* Compute prefix codes for commands. */ uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands); size_t i; if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return; for (i = 0; i < num_commands; ++i) { insert_and_copy_codes[i] = cmds[i].cmd_prefix_; } /* Create the block split on the array of command prefixes. */ SplitByteVectorCommand( m, insert_and_copy_codes, num_commands, kSymbolsPerCommandHistogram, kMaxCommandHistograms, kCommandStrideLength, kCommandBlockSwitchCost, params, insert_and_copy_split); if (BROTLI_IS_OOM(m)) return; /* TODO(eustas): reuse for distances? */ BROTLI_FREE(m, insert_and_copy_codes); } { /* Create a continuous array of distance prefixes. */ uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands); size_t j = 0; size_t i; if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return; for (i = 0; i < num_commands; ++i) { const Command* cmd = &cmds[i]; if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF; } } /* Create the block split on the array of distance prefixes. */ SplitByteVectorDistance( m, distance_prefixes, j, kSymbolsPerDistanceHistogram, kMaxCommandHistograms, kDistanceStrideLength, kDistanceBlockSwitchCost, params, dist_split); if (BROTLI_IS_OOM(m)) return; BROTLI_FREE(m, distance_prefixes); } } #if defined(BROTLI_TEST) size_t CountLiteralsForTest(const Command*, const size_t); size_t CountLiteralsForTest(const Command* cmds, const size_t num_commands) { return CountLiterals(cmds, num_commands); } void CopyLiteralsToByteArrayForTest(const Command*, const size_t, const uint8_t*, const size_t, const size_t, uint8_t*); void CopyLiteralsToByteArrayForTest(const Command* cmds, const size_t num_commands, const uint8_t* data, const size_t offset, const size_t mask, uint8_t* literals) { CopyLiteralsToByteArray(cmds, num_commands, data, offset, mask, literals); } #endif #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif