diff options
Diffstat (limited to 'src/vli.c')
| -rw-r--r-- | src/vli.c | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/src/vli.c b/src/vli.c new file mode 100644 index 0000000..98625e1 --- /dev/null +++ b/src/vli.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Variable-length integer (VLI) implementation. +// Derived from the MLIR bytecode variable-width integers. +// https://mlir.llvm.org/docs/BytecodeFormat/#variable-width-integers +// +// Prefixed little-endian varint. Trailing zero bits of the first byte specify +// how many additional bytes to read. Once read, the value is shifted right by +// the number of bytes read to discard these length-encoding bits and restore +// the original integer. +// +// xxxxxxx1 <-> 0xxxxxxx +// yyyyyyyy xxxxxx10 <-> 00yyyyyy yyxxxxxx +// zzzzzzzz yyyyyyyy xxxxx100 <-> 000zzzzz zzzyyyyy yyyxxxxx +// ... +// +// Considerably more efficient than continuation-bit variants like VLQ or +// LEB128. +// +// Two bounded versions: 32 and 64 bits. +// +// Signed integers use zigzag encoding. + +#include "vli.h" + +// Prefer hardware/compiler intrinsics; fallback to de Bruijn sequence voodoo. +static size_t count_trailing_zeros(uint32_t x) +{ +#if defined(__clang__) || defined(__GNUC__) + return (size_t)__builtin_ctz(x); +#elif defined(_MSC_VER) + unsigned long Index; + _BitScanForward(&Index, x); + return Index; +#else + static const uint8_t seq[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + return seq[((x & -x) * 0x77cb531) >> 27]; +#endif +} + +size_t vli64_encode(uint8_t *p, uint64_t u) +{ + if ((u >> 7) == 0) { + p[0] = (uint8_t)(u << 1 | 0x1); + return 1; + } + uint64_t shift = u >> 7; + for (size_t bytes = 2; bytes < 9; ++bytes) { + if ((shift >>= 7) == 0) { + uint64_t encoded = (u << 1 | 0x1) << (bytes - 1); + memcpy(p, &encoded, bytes); + return bytes; + } + } + p[0] = 0; + memcpy(p + 1, &u, sizeof(u)); + return 9; +} + +size_t vli64_decode(uint8_t const *p, uint64_t *u) +{ + *u = p[0]; + if (*u & 1) { + *u >>= 1; + return 1; + } + if (*u == 0) { + memcpy(u, p + 1, sizeof(*u)); + return 1 + sizeof(*u); + } + size_t bytes = count_trailing_zeros((uint32_t)(*u)); + memcpy((uint8_t *)u + 1, p + 1, bytes); + *u >>= (bytes + 1); + return 1 + bytes; +} + +size_t vli64_encode_signed(uint8_t *p, int64_t i) +{ + return vli64_encode(p, (uint64_t)((i << 1) ^ (i >> 63))); +} + +size_t vli64_decode_signed(uint8_t const *p, int64_t *i) +{ + size_t n = vli64_decode(p, (uint64_t *)i); + uint64_t v = (uint64_t)(*i); + *i = (v >> 1) ^ -(v & 1); + return n; +} + +size_t vli32_encode(uint8_t *p, uint32_t u) +{ + if ((u >> 7) == 0) { + p[0] = (uint8_t)(u << 1 | 0x1); + return 1; + } + uint32_t shift = u >> 7; + for (size_t bytes = 2; bytes < 5; ++bytes) { + if ((shift >>= 7) == 0) { + uint32_t encoded = (u << 1 | 0x1) << (bytes - 1); + memcpy(p, &encoded, bytes); + return bytes; + } + } + p[0] = 0; + memcpy(p + 1, &u, sizeof(u)); + return 5; +} + +size_t vli32_decode(uint8_t const *p, uint32_t *u) +{ + *u = p[0]; + if (*u & 1) { + *u >>= 1; + return 1; + } + if ((*u & 0xf) == 0) { + memcpy(u, p + 1, sizeof(*u)); + return 1 + sizeof(*u); + } + size_t bytes = count_trailing_zeros(*u); + memcpy((uint8_t *)u + 1, p + 1, bytes); + *u >>= (bytes + 1); + return 1 + bytes; +} + +size_t vli32_encode_signed(uint8_t *p, int32_t i) +{ + return vli32_encode(p, (uint32_t)((i << 1) ^ (i >> 31))); +} + +size_t vli32_decode_signed(uint8_t const *p, int32_t *i) +{ + size_t n = vli32_decode(p, (uint32_t *)i); + uint32_t v = (uint32_t)(*i); + *i = (v >> 1) ^ -(v & 1); + return n; +} |
