pw_format/lib.rs
1// Copyright 2023 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15//! The `pw_format` crate is a parser used to implement proc macros that:
16//! * Understand format string argument types at compile time.
17//! * Syntax check format strings.
18//!
19//! `pw_format` is written against `std` and is not intended to be
20//! used in an embedded context. Some efficiency and memory is traded for a
21//! more expressive interface that exposes the format string's "syntax tree"
22//! to the API client.
23//!
24//! # Proc Macros
25//!
26//! The [`macros`] module provides infrastructure for implementing proc macros
27//! that take format strings as arguments.
28//!
29//! # Example
30//!
31//! ```
32//! use pw_format::{
33//! Alignment, Argument, ConversionSpec, Flag, FormatFragment, FormatString,
34//! Length, MinFieldWidth, Precision, Primitive, Style,
35//! };
36//!
37//! let format_string =
38//! FormatString::parse_printf("long double %+ 4.2Lf is %-03hd%%.").unwrap();
39//!
40//! assert_eq!(format_string, FormatString {
41//! fragments: vec![
42//! FormatFragment::Literal("long double ".to_string()),
43//! FormatFragment::Conversion(ConversionSpec {
44//! argument: Argument::None,
45//! fill: ' ',
46//! alignment: Alignment::None,
47//! flags: [Flag::ForceSign, Flag::SpaceSign].into_iter().collect(),
48//! min_field_width: MinFieldWidth::Fixed(4),
49//! precision: Precision::Fixed(2),
50//! length: Some(Length::LongDouble),
51//! primitive: Primitive::Float,
52//! style: Style::None,
53//! }),
54//! FormatFragment::Literal(" is ".to_string()),
55//! FormatFragment::Conversion(ConversionSpec {
56//! argument: Argument::None,
57//! fill: ' ',
58//! alignment: Alignment::Left,
59//! flags: [Flag::LeftJustify, Flag::LeadingZeros]
60//! .into_iter()
61//! .collect(),
62//! min_field_width: MinFieldWidth::Fixed(3),
63//! precision: Precision::None,
64//! length: Some(Length::Short),
65//! primitive: Primitive::Integer,
66//! style: Style::None,
67//! }),
68//! FormatFragment::Literal("%.".to_string()),
69//! ]
70//! });
71//! ```
72#![deny(missing_docs)]
73
74use std::collections::HashSet;
75
76use nom::{
77 branch::alt,
78 bytes::complete::tag,
79 character::complete::digit1,
80 combinator::{map, map_res},
81 IResult,
82};
83use quote::{quote, ToTokens};
84
85pub mod macros;
86
87mod core_fmt;
88mod printf;
89
90#[derive(Clone, Copy, Debug, PartialEq, Eq)]
91/// Primitive type of a conversion (integer, float, string, etc.)
92pub enum Primitive {
93 /// Signed integer primitive.
94 Integer,
95
96 /// Unsigned integer primitive.
97 Unsigned,
98
99 /// Floating point primitive.
100 Float,
101
102 /// String primitive.
103 String,
104
105 /// Character primitive.
106 Character,
107
108 /// Pointer primitive.
109 Pointer,
110
111 /// Untyped primitive.
112 Untyped,
113}
114
115#[derive(Clone, Copy, Debug, PartialEq, Eq)]
116/// The abstract formatting style for a conversion.
117pub enum Style {
118 /// No style specified, use defaults.
119 None,
120
121 /// Octal rendering (i.e. "%o" or "{:o}").
122 Octal,
123
124 /// Hexadecimal rendering (i.e. "%x" or "{:x}").
125 Hex,
126
127 /// Upper case hexadecimal rendering (i.e. "%X" or "{:X}").
128 UpperHex,
129
130 /// Exponential rendering (i.e. "%e" or "{:e}".
131 Exponential,
132
133 /// Upper case exponential rendering (i.e. "%E" or "{:E}".
134 UpperExponential,
135
136 /// Pointer type rendering (i.e. "%p" or "{:p}").
137 Pointer,
138
139 /// `core::fmt`'s `{:?}`
140 Debug,
141
142 /// `core::fmt`'s `{:x?}`
143 HexDebug,
144
145 /// `core::fmt`'s `{:X?}`
146 UpperHexDebug,
147
148 /// Unsupported binary rendering
149 ///
150 /// This variant exists so that the proc macros can give useful error
151 /// messages.
152 Binary,
153}
154
155/// Implemented for testing through the pw_format_test_macros crate.
156impl ToTokens for Style {
157 fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
158 let new_tokens = match self {
159 Style::None => quote!(pw_format::Style::None),
160 Style::Octal => quote!(pw_format::Style::Octal),
161 Style::Hex => quote!(pw_format::Style::Hex),
162 Style::UpperHex => quote!(pw_format::Style::UpperHex),
163 Style::Exponential => quote!(pw_format::Style::Exponential),
164 Style::UpperExponential => quote!(pw_format::Style::UpperExponential),
165 Style::Debug => quote!(pw_format::Style::Debug),
166 Style::HexDebug => quote!(pw_format::Style::HexDebug),
167 Style::UpperHexDebug => quote!(pw_format::Style::UpperHexDebug),
168 Style::Pointer => quote!(pw_format::Style::Pointer),
169 Style::Binary => quote!(pw_format::Style::Binary),
170 };
171 new_tokens.to_tokens(tokens);
172 }
173}
174
175#[derive(Clone, Debug, Hash, PartialEq, Eq)]
176/// A printf flag (the '+' in %+d).
177pub enum Flag {
178 /// `-`
179 LeftJustify,
180
181 /// `+`
182 ForceSign,
183
184 /// ` `
185 SpaceSign,
186
187 /// `#`
188 AlternateSyntax,
189
190 /// `0`
191 LeadingZeros,
192}
193
194#[derive(Clone, Debug, PartialEq, Eq)]
195/// A printf minimum field width (the 5 in %5d).
196pub enum MinFieldWidth {
197 /// No field width specified.
198 None,
199
200 /// Fixed field with.
201 Fixed(u32),
202
203 /// Variable field width passed as an argument (i.e. %*d).
204 Variable,
205}
206
207#[derive(Clone, Debug, PartialEq, Eq)]
208/// A printf precision (the .5 in %.5d).
209///
210/// For string conversions (%s) this is treated as the maximum number of
211/// bytes of the string to output.
212pub enum Precision {
213 /// No precision specified.
214 None,
215
216 /// Fixed precision.
217 Fixed(u32),
218
219 /// Variable precision passed as an argument (i.e. %.*f).
220 Variable,
221}
222
223#[derive(Clone, Copy, Debug, PartialEq, Eq)]
224/// A printf length (the l in %ld).
225pub enum Length {
226 /// `hh`
227 Char,
228
229 /// `h`
230 Short,
231
232 /// `l`
233 Long,
234
235 /// `ll`
236 LongLong,
237
238 /// `L`
239 LongDouble,
240
241 /// `j`
242 IntMax,
243
244 /// `z`
245 Size,
246
247 /// `t`
248 PointerDiff,
249}
250
251#[derive(Clone, Copy, Debug, PartialEq, Eq)]
252/// A core::fmt alignment spec.
253pub enum Alignment {
254 /// No alignment
255 None,
256
257 /// Left alignment (`<`)
258 Left,
259
260 /// Center alignment (`^`)
261 Center,
262
263 /// Right alignment (`>`)
264 Right,
265}
266
267#[derive(Clone, Debug, PartialEq, Eq)]
268/// An argument in a core::fmt style alignment spec.
269///
270/// i.e. the var_name in `{var_name:#0x}`
271pub enum Argument {
272 /// No argument
273 None,
274
275 /// A positional argument (i.e. `{0}`).
276 Positional(usize),
277
278 /// A named argument (i.e. `{var_name}`).
279 Named(String),
280}
281
282#[derive(Clone, Debug, PartialEq, Eq)]
283/// A printf conversion specification aka a % clause.
284pub struct ConversionSpec {
285 /// ConversionSpec's argument.
286 pub argument: Argument,
287 /// ConversionSpec's fill character.
288 pub fill: char,
289 /// ConversionSpec's field alignment.
290 pub alignment: Alignment,
291 /// ConversionSpec's set of [Flag]s.
292 pub flags: HashSet<Flag>,
293 /// ConversionSpec's minimum field width argument.
294 pub min_field_width: MinFieldWidth,
295 /// ConversionSpec's [Precision] argument.
296 pub precision: Precision,
297 /// ConversionSpec's [Length] argument.
298 pub length: Option<Length>,
299 /// ConversionSpec's [Primitive].
300 pub primitive: Primitive,
301 /// ConversionSpec's [Style].
302 pub style: Style,
303}
304
305#[derive(Clone, Debug, PartialEq, Eq)]
306/// A fragment of a printf format string.
307pub enum FormatFragment {
308 /// A literal string value.
309 Literal(String),
310
311 /// A conversion specification (i.e. %d).
312 Conversion(ConversionSpec),
313}
314
315impl FormatFragment {
316 /// Try to append `fragment` to `self`.
317 ///
318 /// Returns `None` if the appending succeeds and `Some<fragment>` if it fails.
319 fn try_append<'a>(&mut self, fragment: &'a FormatFragment) -> Option<&'a FormatFragment> {
320 let Self::Literal(literal_fragment) = &fragment else {
321 return Some(fragment);
322 };
323
324 let Self::Literal(ref mut literal_self) = self else {
325 return Some(fragment);
326 };
327
328 literal_self.push_str(literal_fragment);
329
330 None
331 }
332}
333
334#[derive(Debug, PartialEq, Eq)]
335/// A parsed printf format string.
336pub struct FormatString {
337 /// The [FormatFragment]s that comprise the [FormatString].
338 pub fragments: Vec<FormatFragment>,
339}
340
341impl FormatString {
342 /// Parses a printf style format string.
343 pub fn parse_printf(s: &str) -> Result<Self, String> {
344 // TODO: b/281858500 - Add better errors to failed parses.
345 let (rest, result) = printf::format_string(s)
346 .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
347
348 // If the parser did not consume all the input, return an error.
349 if !rest.is_empty() {
350 return Err(format!(
351 "Failed to parse format string fragment: \"{rest}\""
352 ));
353 }
354
355 Ok(result)
356 }
357
358 /// Parses a core::fmt style format string.
359 pub fn parse_core_fmt(s: &str) -> Result<Self, String> {
360 // TODO: b/281858500 - Add better errors to failed parses.
361 let (rest, result) = core_fmt::format_string(s)
362 .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
363
364 // If the parser did not consume all the input, return an error.
365 if !rest.is_empty() {
366 return Err(format!("Failed to parse format string: \"{rest}\""));
367 }
368
369 Ok(result)
370 }
371
372 /// Creates a `FormatString` from a slice of fragments.
373 ///
374 /// This primary responsibility of this function is to merge literal
375 /// fragments. Adjacent literal fragments occur when a parser parses
376 /// escape sequences. Merging them here allows a
377 /// [`macros::FormatMacroGenerator`] to not worry about the escape codes.
378 pub(crate) fn from_fragments(fragments: &[FormatFragment]) -> Self {
379 Self {
380 fragments: fragments
381 .iter()
382 .fold(Vec::<_>::new(), |mut fragments, fragment| {
383 // Collapse adjacent literal fragments.
384 let Some(last) = fragments.last_mut() else {
385 // If there are no accumulated fragments, add this one and return.
386 fragments.push((*fragment).clone());
387 return fragments;
388 };
389 if let Some(fragment) = last.try_append(fragment) {
390 // If the fragments were able to append, no more work to do
391 fragments.push((*fragment).clone());
392 };
393 fragments
394 }),
395 }
396 }
397}
398
399fn variable_width(input: &str) -> IResult<&str, MinFieldWidth> {
400 map(tag("*"), |_| MinFieldWidth::Variable)(input)
401}
402
403fn fixed_width(input: &str) -> IResult<&str, MinFieldWidth> {
404 map_res(
405 digit1,
406 |value: &str| -> Result<MinFieldWidth, std::num::ParseIntError> {
407 Ok(MinFieldWidth::Fixed(value.parse()?))
408 },
409 )(input)
410}
411
412fn no_width(input: &str) -> IResult<&str, MinFieldWidth> {
413 Ok((input, MinFieldWidth::None))
414}
415
416fn width(input: &str) -> IResult<&str, MinFieldWidth> {
417 alt((variable_width, fixed_width, no_width))(input)
418}
419
420fn variable_precision(input: &str) -> IResult<&str, Precision> {
421 let (input, _) = tag(".")(input)?;
422 map(tag("*"), |_| Precision::Variable)(input)
423}
424
425fn fixed_precision(input: &str) -> IResult<&str, Precision> {
426 let (input, _) = tag(".")(input)?;
427 map_res(
428 digit1,
429 |value: &str| -> Result<Precision, std::num::ParseIntError> {
430 Ok(Precision::Fixed(value.parse()?))
431 },
432 )(input)
433}
434
435fn no_precision(input: &str) -> IResult<&str, Precision> {
436 Ok((input, Precision::None))
437}
438
439fn precision(input: &str) -> IResult<&str, Precision> {
440 alt((variable_precision, fixed_precision, no_precision))(input)
441}
442
443#[cfg(test)]
444mod tests;