pw_tokenizer/
internal.rs

1// Copyright 2023 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15use core::cmp::min;
16
17use pw_status::{Error, Result};
18use pw_stream::{Cursor, Write};
19use pw_varint::VarintEncode;
20
21use crate::MessageWriter;
22
23// The `Argument` enum is used to marshal arguments to pass to the tokenization
24// engine.
25pub enum Argument<'a> {
26    String(&'a str),
27    Varint(i64),
28}
29
30impl<'a> From<&'a str> for Argument<'a> {
31    fn from(val: &'a str) -> Self {
32        Self::String(val)
33    }
34}
35
36impl From<char> for Argument<'_> {
37    fn from(val: char) -> Self {
38        Self::Varint(val as i64)
39    }
40}
41
42impl From<u8> for Argument<'_> {
43    fn from(val: u8) -> Self {
44        Self::Varint(val as i64)
45    }
46}
47
48impl From<i32> for Argument<'_> {
49    fn from(val: i32) -> Self {
50        Self::Varint(val as i64)
51    }
52}
53
54impl From<u32> for Argument<'_> {
55    fn from(val: u32) -> Self {
56        Self::Varint(val as i64)
57    }
58}
59
60// TODO: b/400978670 - investigate whether changing these
61// 64bit values to references saves space on 32bit systems.
62impl From<i64> for Argument<'_> {
63    fn from(val: i64) -> Self {
64        Self::Varint(val)
65    }
66}
67
68impl From<u64> for Argument<'_> {
69    fn from(val: u64) -> Self {
70        Self::Varint(val as i64)
71    }
72}
73
74impl From<usize> for Argument<'_> {
75    fn from(val: usize) -> Self {
76        Self::Varint(val as i64)
77    }
78}
79
80// Wraps a `Cursor` so that `tokenize_to_buffer` and `tokenize_to_writer` can
81// share implementations.  It is not meant to be used outside of
82// `tokenize_to_buffer`.
83struct CursorMessageWriter<'a> {
84    cursor: Cursor<&'a mut [u8]>,
85}
86
87impl MessageWriter for CursorMessageWriter<'_> {
88    fn new() -> Self {
89        // Ensure `tokenize_to_buffer` never calls `new()`.
90        unimplemented!();
91    }
92
93    fn write(&mut self, data: &[u8]) -> Result<()> {
94        self.cursor.write_all(data)
95    }
96
97    fn remaining(&self) -> usize {
98        self.cursor.remaining()
99    }
100
101    fn finalize(self) -> Result<()> {
102        // Ensure `tokenize_to_buffer` never calls `finalize()`.
103        unimplemented!();
104    }
105}
106
107// Encode a string in Tokenizer format: length byte + data with the high bit of
108// the length byte used to signal that the string was truncated.
109pub fn encode_string<W: MessageWriter>(writer: &mut W, value: &str) -> Result<()> {
110    const MAX_STRING_LENGTH: usize = 0x7f;
111
112    let string_bytes = value.as_bytes();
113
114    // Limit the encoding to the lesser of 127 or the available space in the buffer.
115    let max_len = min(MAX_STRING_LENGTH, writer.remaining().saturating_sub(1));
116    let overflow = max_len < string_bytes.len();
117    let len = min(max_len, string_bytes.len());
118
119    // First byte of an encoded string is it's length.
120    let mut header = len as u8;
121
122    // The high bit of the first byte is used to indicate if the string was
123    // truncated.
124    if overflow {
125        header |= 0x80;
126    }
127    writer.write(&[header])?;
128
129    writer.write(&string_bytes[..len])
130}
131
132// Write out a tokenized message to an already created `MessageWriter`.
133fn tokenize_engine<W: crate::MessageWriter>(
134    writer: &mut W,
135    token: u32,
136    args: &[Argument<'_>],
137) -> Result<()> {
138    writer.write(&token.to_le_bytes()[..])?;
139    for arg in args {
140        match arg {
141            Argument::String(s) => encode_string(writer, s)?,
142            Argument::Varint(i) => {
143                let mut encode_buffer = [0u8; 10];
144                let len = i.varint_encode(&mut encode_buffer)?;
145                let encoded_slice = encode_buffer.get(..len).ok_or(Error::OutOfRange)?;
146                writer.write(encoded_slice)?;
147            }
148        }
149    }
150
151    Ok(())
152}
153
154#[inline(never)]
155pub fn tokenize_to_buffer(buffer: &mut [u8], token: u32, args: &[Argument<'_>]) -> Result<usize> {
156    let mut writer = CursorMessageWriter {
157        cursor: Cursor::new(buffer),
158    };
159    tokenize_engine(&mut writer, token, args)?;
160    Ok(writer.cursor.position())
161}
162
163#[inline(never)]
164pub fn tokenize_to_buffer_no_args(buffer: &mut [u8], token: u32) -> Result<usize> {
165    let token_bytes = &token.to_le_bytes()[..];
166    let token_len = token_bytes.len();
167    if buffer.len() < token_len {
168        return Err(Error::OutOfRange);
169    }
170    buffer[..token_len].copy_from_slice(token_bytes);
171
172    Ok(token_len)
173}
174
175#[inline(never)]
176pub fn tokenize_to_writer<W: crate::MessageWriter>(
177    token: u32,
178    args: &[Argument<'_>],
179) -> Result<()> {
180    let mut writer = W::new();
181
182    match tokenize_engine(&mut writer, token, args) {
183        // Still finalize the writer even if the buffer
184        // is full so as to avoid loosing the entire
185        // log message.
186        Ok(_) | Err(Error::OutOfRange) =>  writer.finalize(),
187        Err(error) => Err(error),
188    }
189}
190
191#[inline(never)]
192pub fn tokenize_to_writer_no_args<W: crate::MessageWriter>(token: u32) -> Result<()> {
193    let mut writer = W::new();
194    let result = writer.write(&token.to_le_bytes()[..]);
195
196    match result {
197        // Still finalize the writer even if the buffer
198        // is full so as to avoid loosing the entire
199        // log message.
200        Ok(_) | Err(Error::OutOfRange) =>  writer.finalize(),
201        Err(error) => Err(error),
202    }
203}
204
205#[cfg(test)]
206mod test {
207    use pw_stream::Seek;
208
209    use super::*;
210
211    fn do_string_encode_test<const BUFFER_LEN: usize>(value: &str, expected: &[u8]) {
212        let mut buffer = [0u8; BUFFER_LEN];
213        let mut writer = CursorMessageWriter {
214            cursor: Cursor::new(&mut buffer),
215        };
216        encode_string(&mut writer, value).unwrap();
217
218        let len = writer.cursor.stream_position().unwrap() as usize;
219        let buffer = writer.cursor.into_inner();
220
221        assert_eq!(len, expected.len());
222        assert_eq!(&buffer[..len], expected);
223    }
224
225    #[test]
226    fn test_string_encode() {
227        do_string_encode_test::<64>("test", b"\x04test");
228        do_string_encode_test::<4>("test", b"\x83tes");
229        do_string_encode_test::<1>("test", b"\x80");
230
231        // Truncates when the string does not fit.
232        do_string_encode_test::<64>(
233            "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttest",
234            b"\xbftesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttes",
235        );
236
237        // Truncates when string is over 127 bytes.
238        do_string_encode_test::<1024>(
239            "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest",
240            b"\xfftesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttes",
241        );
242    }
243}