aboutsummaryrefslogtreecommitdiff
path: root/src/vli.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/vli.c')
-rw-r--r--src/vli.c140
1 files changed, 140 insertions, 0 deletions
diff --git a/src/vli.c b/src/vli.c
new file mode 100644
index 0000000..98625e1
--- /dev/null
+++ b/src/vli.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Variable-length integer (VLI) implementation.
+// Derived from the MLIR bytecode variable-width integers.
+// https://mlir.llvm.org/docs/BytecodeFormat/#variable-width-integers
+//
+// Prefixed little-endian varint. Trailing zero bits of the first byte specify
+// how many additional bytes to read. Once read, the value is shifted right by
+// the number of bytes read to discard these length-encoding bits and restore
+// the original integer.
+//
+// xxxxxxx1 <-> 0xxxxxxx
+// yyyyyyyy xxxxxx10 <-> 00yyyyyy yyxxxxxx
+// zzzzzzzz yyyyyyyy xxxxx100 <-> 000zzzzz zzzyyyyy yyyxxxxx
+// ...
+//
+// Considerably more efficient than continuation-bit variants like VLQ or
+// LEB128.
+//
+// Two bounded versions: 32 and 64 bits.
+//
+// Signed integers use zigzag encoding.
+
+#include "vli.h"
+
+// Prefer hardware/compiler intrinsics; fallback to de Bruijn sequence voodoo.
+static size_t count_trailing_zeros(uint32_t x)
+{
+#if defined(__clang__) || defined(__GNUC__)
+ return (size_t)__builtin_ctz(x);
+#elif defined(_MSC_VER)
+ unsigned long Index;
+ _BitScanForward(&Index, x);
+ return Index;
+#else
+ static const uint8_t seq[32] = {
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+ };
+ return seq[((x & -x) * 0x77cb531) >> 27];
+#endif
+}
+
+size_t vli64_encode(uint8_t *p, uint64_t u)
+{
+ if ((u >> 7) == 0) {
+ p[0] = (uint8_t)(u << 1 | 0x1);
+ return 1;
+ }
+ uint64_t shift = u >> 7;
+ for (size_t bytes = 2; bytes < 9; ++bytes) {
+ if ((shift >>= 7) == 0) {
+ uint64_t encoded = (u << 1 | 0x1) << (bytes - 1);
+ memcpy(p, &encoded, bytes);
+ return bytes;
+ }
+ }
+ p[0] = 0;
+ memcpy(p + 1, &u, sizeof(u));
+ return 9;
+}
+
+size_t vli64_decode(uint8_t const *p, uint64_t *u)
+{
+ *u = p[0];
+ if (*u & 1) {
+ *u >>= 1;
+ return 1;
+ }
+ if (*u == 0) {
+ memcpy(u, p + 1, sizeof(*u));
+ return 1 + sizeof(*u);
+ }
+ size_t bytes = count_trailing_zeros((uint32_t)(*u));
+ memcpy((uint8_t *)u + 1, p + 1, bytes);
+ *u >>= (bytes + 1);
+ return 1 + bytes;
+}
+
+size_t vli64_encode_signed(uint8_t *p, int64_t i)
+{
+ return vli64_encode(p, (uint64_t)((i << 1) ^ (i >> 63)));
+}
+
+size_t vli64_decode_signed(uint8_t const *p, int64_t *i)
+{
+ size_t n = vli64_decode(p, (uint64_t *)i);
+ uint64_t v = (uint64_t)(*i);
+ *i = (v >> 1) ^ -(v & 1);
+ return n;
+}
+
+size_t vli32_encode(uint8_t *p, uint32_t u)
+{
+ if ((u >> 7) == 0) {
+ p[0] = (uint8_t)(u << 1 | 0x1);
+ return 1;
+ }
+ uint32_t shift = u >> 7;
+ for (size_t bytes = 2; bytes < 5; ++bytes) {
+ if ((shift >>= 7) == 0) {
+ uint32_t encoded = (u << 1 | 0x1) << (bytes - 1);
+ memcpy(p, &encoded, bytes);
+ return bytes;
+ }
+ }
+ p[0] = 0;
+ memcpy(p + 1, &u, sizeof(u));
+ return 5;
+}
+
+size_t vli32_decode(uint8_t const *p, uint32_t *u)
+{
+ *u = p[0];
+ if (*u & 1) {
+ *u >>= 1;
+ return 1;
+ }
+ if ((*u & 0xf) == 0) {
+ memcpy(u, p + 1, sizeof(*u));
+ return 1 + sizeof(*u);
+ }
+ size_t bytes = count_trailing_zeros(*u);
+ memcpy((uint8_t *)u + 1, p + 1, bytes);
+ *u >>= (bytes + 1);
+ return 1 + bytes;
+}
+
+size_t vli32_encode_signed(uint8_t *p, int32_t i)
+{
+ return vli32_encode(p, (uint32_t)((i << 1) ^ (i >> 31)));
+}
+
+size_t vli32_decode_signed(uint8_t const *p, int32_t *i)
+{
+ size_t n = vli32_decode(p, (uint32_t *)i);
+ uint32_t v = (uint32_t)(*i);
+ *i = (v >> 1) ^ -(v & 1);
+ return n;
+}