Skip to main content

pw_varint/
pw_varint.rs

1// Copyright 2023 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15//! # pw_varint
16//!
17//! `pw_varint` provides support for encoding and decoding variable length
18//! integers.  Small values require less memory than they do with fixed
19//! encoding.  Signed integers are first zig-zag encoded to allow small
20//! negative numbers to realize the memory savings.  For more information see
21//! [Pigweed's pw_varint documentation](https://pigweed.dev/pw_varint).
22//!
23//! The encoding format used is compatible with
24//! [Protocol Buffers](https://developers.google.com/protocol-buffers/docs/encoding#varints).
25//!
26//! Encoding and decoding is provided through the [VarintEncode] and
27//! [VarintDecode] traits.
28//!
29//! # Example
30//!
31//! ```
32//! use pw_varint::{VarintEncode, VarintDecode};
33//!
34//! let mut buffer = [0u8; 64];
35//!
36//! let encoded_len = (-1i64).varint_encode(&mut buffer).unwrap();
37//!
38//! let (decoded_len, val) = i64::varint_decode(&buffer ).unwrap();
39//! ```
40
41#![cfg_attr(not(feature = "std"), no_std)]
42
43use core::num::Wrapping;
44
45use pw_status::{Error, Result};
46
47/// A trait for objects than can be decoded from a varint.
48///
49/// `pw_varint` provides implementations for [i16], [u16], [i32], [u32],
50/// [i64], and [u64].
51pub trait VarintDecode: Sized {
52    /// Decode a type from a varint encoded series of bytes.
53    ///
54    /// Signed values will be implicitly zig-zag decoded.
55    fn varint_decode(data: &[u8]) -> Result<(usize, Self)>;
56}
57
58/// A trait for objects than can be encoded into a varint.
59///
60/// `pw_varint` provides implementations for [i16], [u16], [i32], [u32],
61/// [i64], and [u64].
62pub trait VarintEncode: Sized {
63    /// Encode a type into a varint encoded series of bytes.
64    ///
65    /// Signed values will be implicitly zig-zag encoded.
66    fn varint_encode(self, data: &mut [u8]) -> Result<usize>;
67}
68
69macro_rules! unsigned_varint_impl {
70    ($t:ty) => {
71        impl VarintDecode for $t {
72            fn varint_decode(data: &[u8]) -> Result<(usize, Self)> {
73                let (data, val) = decode_u64(data)?;
74                let val = val.try_into().map_err(|_| Error::OutOfRange)?;
75                Ok((data, val))
76            }
77        }
78
79        impl VarintEncode for $t {
80            fn varint_encode(self, data: &mut [u8]) -> Result<usize> {
81                encode_u64(data, u64::from(self))
82            }
83        }
84    };
85}
86
87macro_rules! signed_varint_impl {
88    ($t:ty) => {
89        impl VarintDecode for $t {
90            fn varint_decode(data: &[u8]) -> Result<(usize, Self)> {
91                let (data, val) = decode_u64(data)?;
92                let val = zig_zag_decode(val)
93                    .try_into()
94                    .map_err(|_| Error::OutOfRange)?;
95                Ok((data, val))
96            }
97        }
98
99        impl VarintEncode for $t {
100            fn varint_encode(self, data: &mut [u8]) -> Result<usize> {
101                encode_u64(data, zig_zag_encode(i64::from(self)))
102            }
103        }
104    };
105}
106
107unsigned_varint_impl!(u8);
108unsigned_varint_impl!(u16);
109unsigned_varint_impl!(u32);
110unsigned_varint_impl!(u64);
111
112signed_varint_impl!(i8);
113signed_varint_impl!(i16);
114signed_varint_impl!(i32);
115signed_varint_impl!(i64);
116
117fn decode_u64(data: &[u8]) -> Result<(usize, u64)> {
118    let mut value: u64 = 0;
119    for (i, d) in data.iter().enumerate() {
120        value |= (u64::from(*d) & 0x7f) << (i * 7);
121
122        if (*d & 0x80) == 0 {
123            return Ok((i + 1, value));
124        }
125    }
126    Err(Error::OutOfRange)
127}
128
129fn encode_u64(data: &mut [u8], value: u64) -> Result<usize> {
130    let mut value = value;
131    for (i, d) in data.iter_mut().enumerate() {
132        let mut byte: u8 = (value & 0x7f) as u8;
133        value >>= 7;
134        if value > 0 {
135            byte |= 0x80;
136        }
137        *d = byte;
138        if value == 0 {
139            return Ok(i + 1);
140        }
141    }
142    Err(Error::OutOfRange)
143}
144
145// ZigZag encodes a signed integer. This maps small negative numbers to small,
146// unsigned positive numbers, which improves their density for LEB128 encoding.
147//
148// ZigZag encoding works by moving the sign bit from the most-significant bit to
149// the least-significant bit. For the signed k-bit integer n, the formula is
150//
151//   (n << 1) ^ (n >> (k - 1))
152//
153// See the following for a description of ZigZag encoding:
154//   https://protobuf.dev/programming-guides/encoding/#signed-ints
155fn zig_zag_encode(value: i64) -> u64 {
156    // TODO: https://pwbug.dev/524779003 - Investigate lossy casts
157    (value.cast_unsigned() << 1) ^ (value >> (i64::BITS - 1)).cast_unsigned()
158}
159
160fn zig_zag_decode(value: u64) -> i64 {
161    let value = Wrapping(value);
162    // TODO: https://pwbug.dev/524779003 - Investigate lossy casts
163    ((value >> 1) ^ (!(value & Wrapping(1)) + Wrapping(1)))
164        .0
165        .cast_signed()
166}
167
168#[cfg(test)]
169mod test {
170    use super::*;
171
172    fn success_cases_u8<T>() -> Vec<(Vec<u8>, T)>
173    where
174        T: From<u8>,
175    {
176        vec![
177            // From varint_test.cc EncodeSizeUnsigned32_SmallSingleByte.
178            (vec![0x00], 0x00.into()),
179            (vec![0x01], 0x01.into()),
180            (vec![0x02], 0x02.into()),
181            // From varint_test.cc EncodeSizeUnsigned32_LargeSingleByte.
182            (vec![0x3f], 0x3f.into()),
183            (vec![0x40], 0x40.into()),
184            (vec![0x7e], 0x7e.into()),
185            (vec![0x7f], 0x7f.into()),
186            // From varint_test.cc EncodeSizeUnsigned32_MultiByte.
187            (vec![0x80, 0x01], 128.into()),
188            (vec![0x81, 0x01], 129.into()),
189            // From https://protobuf.dev/programming-guides/encoding/.
190            (vec![0x96, 0x01], 150.into()),
191        ]
192    }
193
194    fn success_cases_i8<T>() -> Vec<(Vec<u8>, T)>
195    where
196        T: From<i8>,
197    {
198        vec![
199            // From varint_test.cc EncodeSizeSigned32_SmallSingleByte.
200            (vec![0x00], 0i8.into()),
201            (vec![0x01], (-1i8).into()),
202            (vec![0x02], 1i8.into()),
203            (vec![0x03], (-2i8).into()),
204            (vec![0x04], 2i8.into()),
205            // From varint_test.cc EncodeSizeSigned32_LargeSingleByte.
206            (vec![125], (-63i8).into()),
207            (vec![126], (63i8).into()),
208            (vec![127], (-64i8).into()),
209            // From varint_test.cc EncodeSizeSigned32_MultiByte.
210            (vec![0x80, 0x1], 64i8.into()),
211            (vec![0x81, 0x1], (-65i8).into()),
212            (vec![0x82, 0x1], 65i8.into()),
213        ]
214    }
215
216    fn success_cases_u32<T>() -> Vec<(Vec<u8>, T)>
217    where
218        T: From<u32>,
219    {
220        vec![
221            // From varint_test.cc EncodeSizeUnsigned32_MultiByte.
222            (vec![0xfe, 0xff, 0xff, 0xff, 0x0f], 0xffff_fffe.into()),
223            (vec![0xff, 0xff, 0xff, 0xff, 0x0f], 0xffff_ffff.into()),
224        ]
225    }
226
227    fn success_cases_i32<T>() -> Vec<(Vec<u8>, T)>
228    where
229        T: From<i32>,
230    {
231        vec![
232            // From varint_test.cc EncodeSizeSigned32_MultiByte.
233            (vec![0xff, 0xff, 0xff, 0xff, 0x0f], i32::MIN.into()),
234            (vec![0xfe, 0xff, 0xff, 0xff, 0x0f], i32::MAX.into()),
235        ]
236    }
237
238    #[test]
239    fn decode_test_u16() {
240        for case in success_cases_u8::<u16>() {
241            assert_eq!(u16::varint_decode(&case.0), Ok((case.0.len(), case.1)));
242        }
243
244        assert_eq!(u16::varint_decode(&[0x96]), Err(Error::OutOfRange));
245    }
246
247    #[test]
248    fn decode_test_i16() {
249        for case in success_cases_i8::<i16>() {
250            assert_eq!(i16::varint_decode(&case.0), Ok((case.0.len(), case.1)));
251        }
252
253        assert_eq!(i16::varint_decode(&[0x96]), Err(Error::OutOfRange));
254    }
255
256    #[test]
257    fn decode_test_u32() {
258        for case in success_cases_u8::<u32>()
259            .into_iter()
260            .chain(success_cases_u32::<u32>())
261        {
262            assert_eq!(u32::varint_decode(&case.0), Ok((case.0.len(), case.1)));
263        }
264
265        assert_eq!(u32::varint_decode(&[0x96]), Err(Error::OutOfRange));
266    }
267
268    #[test]
269    fn decode_test_i32() {
270        for case in success_cases_i8::<i32>()
271            .into_iter()
272            .chain(success_cases_i32::<i32>())
273        {
274            assert_eq!(i32::varint_decode(&case.0), Ok((case.0.len(), case.1)));
275        }
276
277        assert_eq!(i32::varint_decode(&[0x96]), Err(Error::OutOfRange));
278    }
279
280    #[test]
281    fn decode_test_u64() {
282        for case in success_cases_u8::<u64>()
283            .into_iter()
284            .chain(success_cases_u32::<u64>())
285        {
286            assert_eq!(u64::varint_decode(&case.0), Ok((case.0.len(), case.1)));
287        }
288
289        assert_eq!(u64::varint_decode(&[0x96]), Err(Error::OutOfRange));
290    }
291
292    #[test]
293    fn decode_test_i64() {
294        for case in success_cases_i8::<i64>()
295            .into_iter()
296            .chain(success_cases_i32::<i64>())
297        {
298            assert_eq!(i64::varint_decode(&case.0), Ok((case.0.len(), case.1)));
299        }
300
301        assert_eq!(i64::varint_decode(&[0x96]), Err(Error::OutOfRange));
302    }
303
304    #[test]
305    fn encode_test_u16() {
306        for case in success_cases_u8::<u16>() {
307            let mut buffer = [0u8; 64];
308            let len = case.1.varint_encode(&mut buffer).unwrap();
309            assert_eq!(len, case.0.len());
310            assert_eq!(&buffer[0..len], case.0);
311        }
312    }
313
314    #[test]
315    fn encode_test_i16() {
316        for case in success_cases_i8::<i16>() {
317            let mut buffer = [0u8; 64];
318            let len = case.1.varint_encode(&mut buffer).unwrap();
319            assert_eq!(len, case.0.len());
320            assert_eq!(&buffer[0..len], case.0);
321        }
322    }
323
324    #[test]
325    fn encode_test_u32() {
326        for case in success_cases_u8::<u32>()
327            .into_iter()
328            .chain(success_cases_u32::<u32>())
329        {
330            let mut buffer = [0u8; 64];
331            let len = case.1.varint_encode(&mut buffer).unwrap();
332            assert_eq!(len, case.0.len());
333            assert_eq!(&buffer[0..len], case.0);
334        }
335    }
336
337    #[test]
338    fn encode_test_i32() {
339        for case in success_cases_i8::<i32>()
340            .into_iter()
341            .chain(success_cases_i32::<i32>())
342        {
343            let mut buffer = [0u8; 64];
344            let len = case.1.varint_encode(&mut buffer).unwrap();
345            assert_eq!(len, len);
346            assert_eq!(&buffer[0..len], case.0);
347        }
348    }
349
350    #[test]
351    fn encode_test_u64() {
352        for case in success_cases_u8::<u64>()
353            .into_iter()
354            .chain(success_cases_u32::<u64>())
355        {
356            let mut buffer = [0u8; 64];
357            let len = case.1.varint_encode(&mut buffer).unwrap();
358            assert_eq!(len, case.0.len());
359            assert_eq!(&buffer[0..len], case.0);
360        }
361    }
362
363    #[test]
364    fn encode_test_i64() {
365        for case in success_cases_i8::<i64>()
366            .into_iter()
367            .chain(success_cases_i32::<i64>())
368        {
369            let mut buffer = [0u8; 64];
370            let len = case.1.varint_encode(&mut buffer).unwrap();
371            assert_eq!(len, case.0.len());
372            assert_eq!(&buffer[0..len], case.0);
373        }
374    }
375}