// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // Variable-length integer (VLI) implementation. // Derived from the MLIR bytecode variable-width integers. // https://mlir.llvm.org/docs/BytecodeFormat/#variable-width-integers // // Prefixed little-endian varint. Trailing zero bits of the first byte specify // how many additional bytes to read. Once read, the value is shifted right by // the number of bytes read to discard these length-encoding bits and restore // the original integer. // // xxxxxxx1 <-> 0xxxxxxx // yyyyyyyy xxxxxx10 <-> 00yyyyyy yyxxxxxx // zzzzzzzz yyyyyyyy xxxxx100 <-> 000zzzzz zzzyyyyy yyyxxxxx // ... // // Considerably more efficient than continuation-bit variants like VLQ or // LEB128. // // Two bounded versions: 32 and 64 bits. // // Signed integers use zigzag encoding. #include "vli.h" // Prefer hardware/compiler intrinsics; fallback to de Bruijn sequence voodoo. static size_t count_trailing_zeros(uint32_t x) { #if defined(__clang__) || defined(__GNUC__) return (size_t)__builtin_ctz(x); #elif defined(_MSC_VER) unsigned long Index; _BitScanForward(&Index, x); return Index; #else static const uint8_t seq[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 }; return seq[((x & -x) * 0x77cb531) >> 27]; #endif } size_t vli64_encode(uint8_t *p, uint64_t u) { if ((u >> 7) == 0) { p[0] = (uint8_t)(u << 1 | 0x1); return 1; } uint64_t shift = u >> 7; for (size_t bytes = 2; bytes < 9; ++bytes) { if ((shift >>= 7) == 0) { uint64_t encoded = (u << 1 | 0x1) << (bytes - 1); memcpy(p, &encoded, bytes); return bytes; } } p[0] = 0; memcpy(p + 1, &u, sizeof(u)); return 9; } size_t vli64_decode(uint8_t const *p, uint64_t *u) { *u = p[0]; if (*u & 1) { *u >>= 1; return 1; } if (*u == 0) { memcpy(u, p + 1, sizeof(*u)); return 1 + sizeof(*u); } size_t bytes = count_trailing_zeros((uint32_t)(*u)); memcpy((uint8_t *)u + 1, p + 1, bytes); *u >>= (bytes + 1); return 1 + bytes; } size_t vli64_encode_signed(uint8_t *p, int64_t i) { return vli64_encode(p, (uint64_t)((i << 1) ^ (i >> 63))); } size_t vli64_decode_signed(uint8_t const *p, int64_t *i) { size_t n = vli64_decode(p, (uint64_t *)i); uint64_t v = (uint64_t)(*i); *i = (v >> 1) ^ -(v & 1); return n; } size_t vli32_encode(uint8_t *p, uint32_t u) { if ((u >> 7) == 0) { p[0] = (uint8_t)(u << 1 | 0x1); return 1; } uint32_t shift = u >> 7; for (size_t bytes = 2; bytes < 5; ++bytes) { if ((shift >>= 7) == 0) { uint32_t encoded = (u << 1 | 0x1) << (bytes - 1); memcpy(p, &encoded, bytes); return bytes; } } p[0] = 0; memcpy(p + 1, &u, sizeof(u)); return 5; } size_t vli32_decode(uint8_t const *p, uint32_t *u) { *u = p[0]; if (*u & 1) { *u >>= 1; return 1; } if ((*u & 0xf) == 0) { memcpy(u, p + 1, sizeof(*u)); return 1 + sizeof(*u); } size_t bytes = count_trailing_zeros(*u); memcpy((uint8_t *)u + 1, p + 1, bytes); *u >>= (bytes + 1); return 1 + bytes; } size_t vli32_encode_signed(uint8_t *p, int32_t i) { return vli32_encode(p, (uint32_t)((i << 1) ^ (i >> 31))); } size_t vli32_decode_signed(uint8_t const *p, int32_t *i) { size_t n = vli32_decode(p, (uint32_t *)i); uint32_t v = (uint32_t)(*i); *i = (v >> 1) ^ -(v & 1); return n; }