pw_format/
lib.rs

1// Copyright 2023 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15//! The `pw_format` crate is a parser used to implement proc macros that:
16//! * Understand format string argument types at compile time.
17//! * Syntax check format strings.
18//!
19//! `pw_format` is written against `std` and is not intended to be
20//! used in an embedded context.  Some efficiency and memory is traded for a
21//! more expressive interface that exposes the format string's "syntax tree"
22//! to the API client.
23//!
24//! # Proc Macros
25//!
26//! The [`macros`] module provides infrastructure for implementing proc macros
27//! that take format strings as arguments.
28//!
29//! # Example
30//!
31//! ```
32//! use pw_format::{
33//!     Alignment, Argument, ConversionSpec, Flag, FormatFragment, FormatString,
34//!     Length, MinFieldWidth, Precision, Primitive, Style,
35//! };
36//!
37//! let format_string =
38//!   FormatString::parse_printf("long double %+ 4.2Lf is %-03hd%%.").unwrap();
39//!
40//! assert_eq!(format_string, FormatString {
41//!   fragments: vec![
42//!       FormatFragment::Literal("long double ".to_string()),
43//!       FormatFragment::Conversion(ConversionSpec {
44//!           argument: Argument::None,
45//!           fill: ' ',
46//!           alignment: Alignment::None,
47//!           flags: [Flag::ForceSign, Flag::SpaceSign].into_iter().collect(),
48//!           min_field_width: MinFieldWidth::Fixed(4),
49//!           precision: Precision::Fixed(2),
50//!           length: Some(Length::LongDouble),
51//!           primitive: Primitive::Float,
52//!           style: Style::None,
53//!       }),
54//!       FormatFragment::Literal(" is ".to_string()),
55//!       FormatFragment::Conversion(ConversionSpec {
56//!           argument: Argument::None,
57//!           fill: ' ',
58//!           alignment: Alignment::Left,
59//!           flags: [Flag::LeftJustify, Flag::LeadingZeros]
60//!               .into_iter()
61//!               .collect(),
62//!           min_field_width: MinFieldWidth::Fixed(3),
63//!           precision: Precision::None,
64//!           length: Some(Length::Short),
65//!           primitive: Primitive::Integer,
66//!           style: Style::None,
67//!       }),
68//!       FormatFragment::Literal("%.".to_string()),
69//!   ]
70//! });
71//! ```
72#![deny(missing_docs)]
73
74use std::collections::HashSet;
75
76use nom::IResult;
77use nom::branch::alt;
78use nom::bytes::complete::tag;
79use nom::character::complete::digit1;
80use nom::combinator::{map, map_res};
81use quote::{ToTokens, quote};
82
83pub mod macros;
84
85mod core_fmt;
86mod printf;
87
88#[derive(Clone, Copy, Debug, PartialEq, Eq)]
89/// Primitive type of a conversion (integer, float, string, etc.)
90pub enum Primitive {
91    /// Signed integer primitive.
92    Integer,
93
94    /// Unsigned integer primitive.
95    Unsigned,
96
97    /// Floating point primitive.
98    Float,
99
100    /// String primitive.
101    String,
102
103    /// Character primitive.
104    Character,
105
106    /// Pointer primitive.
107    Pointer,
108
109    /// Untyped primitive.
110    Untyped,
111}
112
113#[derive(Clone, Copy, Debug, PartialEq, Eq)]
114/// The abstract formatting style for a conversion.
115pub enum Style {
116    /// No style specified, use defaults.
117    None,
118
119    /// Octal rendering (i.e. "%o" or "{:o}").
120    Octal,
121
122    /// Hexadecimal rendering (i.e. "%x" or "{:x}").
123    Hex,
124
125    /// Upper case hexadecimal rendering (i.e. "%X" or "{:X}").
126    UpperHex,
127
128    /// Exponential rendering (i.e. "%e" or "{:e}".
129    Exponential,
130
131    /// Upper case exponential rendering (i.e. "%E" or "{:E}".
132    UpperExponential,
133
134    /// Pointer type rendering (i.e. "%p" or "{:p}").
135    Pointer,
136
137    /// `core::fmt`'s `{:?}`
138    Debug,
139
140    /// `core::fmt`'s `{:x?}`
141    HexDebug,
142
143    /// `core::fmt`'s `{:X?}`
144    UpperHexDebug,
145
146    /// Unsupported binary rendering
147    ///
148    /// This variant exists so that the proc macros can give useful error
149    /// messages.
150    Binary,
151}
152
153/// Implemented for testing through the pw_format_test_macros crate.
154impl ToTokens for Style {
155    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
156        let new_tokens = match self {
157            Style::None => quote!(pw_format::Style::None),
158            Style::Octal => quote!(pw_format::Style::Octal),
159            Style::Hex => quote!(pw_format::Style::Hex),
160            Style::UpperHex => quote!(pw_format::Style::UpperHex),
161            Style::Exponential => quote!(pw_format::Style::Exponential),
162            Style::UpperExponential => quote!(pw_format::Style::UpperExponential),
163            Style::Debug => quote!(pw_format::Style::Debug),
164            Style::HexDebug => quote!(pw_format::Style::HexDebug),
165            Style::UpperHexDebug => quote!(pw_format::Style::UpperHexDebug),
166            Style::Pointer => quote!(pw_format::Style::Pointer),
167            Style::Binary => quote!(pw_format::Style::Binary),
168        };
169        new_tokens.to_tokens(tokens);
170    }
171}
172
173#[derive(Clone, Debug, Hash, PartialEq, Eq)]
174/// A printf flag (the '+' in %+d).
175pub enum Flag {
176    /// `-`
177    LeftJustify,
178
179    /// `+`
180    ForceSign,
181
182    /// ` `
183    SpaceSign,
184
185    /// `#`
186    AlternateSyntax,
187
188    /// `0`
189    LeadingZeros,
190}
191
192#[derive(Clone, Debug, PartialEq, Eq)]
193/// A printf minimum field width (the 5 in %5d).
194pub enum MinFieldWidth {
195    /// No field width specified.
196    None,
197
198    /// Fixed field with.
199    Fixed(u32),
200
201    /// Variable field width passed as an argument (i.e. %*d).
202    Variable,
203}
204
205#[derive(Clone, Debug, PartialEq, Eq)]
206/// A printf precision (the .5 in %.5d).
207///
208/// For string conversions (%s) this is treated as the maximum number of
209/// bytes of the string to output.
210pub enum Precision {
211    /// No precision specified.
212    None,
213
214    /// Fixed precision.
215    Fixed(u32),
216
217    /// Variable precision passed as an argument (i.e. %.*f).
218    Variable,
219}
220
221#[derive(Clone, Copy, Debug, PartialEq, Eq)]
222/// A printf length (the l in %ld).
223pub enum Length {
224    /// `hh`
225    Char,
226
227    /// `h`
228    Short,
229
230    /// `l`
231    Long,
232
233    /// `ll`
234    LongLong,
235
236    /// `L`
237    LongDouble,
238
239    /// `j`
240    IntMax,
241
242    /// `z`
243    Size,
244
245    /// `t`
246    PointerDiff,
247}
248
249#[derive(Clone, Copy, Debug, PartialEq, Eq)]
250/// A core::fmt alignment spec.
251pub enum Alignment {
252    /// No alignment
253    None,
254
255    /// Left alignment (`<`)
256    Left,
257
258    /// Center alignment (`^`)
259    Center,
260
261    /// Right alignment (`>`)
262    Right,
263}
264
265#[derive(Clone, Debug, PartialEq, Eq)]
266/// An argument in a core::fmt style alignment spec.
267///
268/// i.e. the var_name in `{var_name:#0x}`
269pub enum Argument {
270    /// No argument
271    None,
272
273    /// A positional argument (i.e. `{0}`).
274    Positional(usize),
275
276    /// A named argument (i.e. `{var_name}`).
277    Named(String),
278}
279
280#[derive(Clone, Debug, PartialEq, Eq)]
281/// A printf conversion specification aka a % clause.
282pub struct ConversionSpec {
283    /// ConversionSpec's argument.
284    pub argument: Argument,
285    /// ConversionSpec's fill character.
286    pub fill: char,
287    /// ConversionSpec's field alignment.
288    pub alignment: Alignment,
289    /// ConversionSpec's set of [Flag]s.
290    pub flags: HashSet<Flag>,
291    /// ConversionSpec's minimum field width argument.
292    pub min_field_width: MinFieldWidth,
293    /// ConversionSpec's [Precision] argument.
294    pub precision: Precision,
295    /// ConversionSpec's [Length] argument.
296    pub length: Option<Length>,
297    /// ConversionSpec's [Primitive].
298    pub primitive: Primitive,
299    /// ConversionSpec's [Style].
300    pub style: Style,
301}
302
303#[derive(Clone, Debug, PartialEq, Eq)]
304/// A fragment of a printf format string.
305pub enum FormatFragment {
306    /// A literal string value.
307    Literal(String),
308
309    /// A conversion specification (i.e. %d).
310    Conversion(ConversionSpec),
311}
312
313impl FormatFragment {
314    /// Try to append `fragment` to `self`.
315    ///
316    /// Returns `None` if the appending succeeds and `Some<fragment>` if it fails.
317    fn try_append<'a>(&mut self, fragment: &'a FormatFragment) -> Option<&'a FormatFragment> {
318        let Self::Literal(literal_fragment) = &fragment else {
319            return Some(fragment);
320        };
321
322        let Self::Literal(literal_self) = self else {
323            return Some(fragment);
324        };
325
326        literal_self.push_str(literal_fragment);
327
328        None
329    }
330}
331
332#[derive(Debug, PartialEq, Eq)]
333/// A parsed printf format string.
334pub struct FormatString {
335    /// The [FormatFragment]s that comprise the [FormatString].
336    pub fragments: Vec<FormatFragment>,
337}
338
339impl FormatString {
340    /// Parses a printf style format string.
341    pub fn parse_printf(s: &str) -> Result<Self, String> {
342        // TODO: b/281858500 - Add better errors to failed parses.
343        let (rest, result) = printf::format_string(s)
344            .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
345
346        // If the parser did not consume all the input, return an error.
347        if !rest.is_empty() {
348            return Err(format!(
349                "Failed to parse format string fragment: \"{rest}\""
350            ));
351        }
352
353        Ok(result)
354    }
355
356    /// Parses a core::fmt style format string.
357    pub fn parse_core_fmt(s: &str) -> Result<Self, String> {
358        // TODO: b/281858500 - Add better errors to failed parses.
359        let (rest, result) = core_fmt::format_string(s)
360            .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
361
362        // If the parser did not consume all the input, return an error.
363        if !rest.is_empty() {
364            return Err(format!("Failed to parse format string: \"{rest}\""));
365        }
366
367        Ok(result)
368    }
369
370    /// Creates a `FormatString` from a slice of fragments.
371    ///
372    /// This primary responsibility of this function is to merge literal
373    /// fragments.  Adjacent literal fragments occur when a parser parses
374    /// escape sequences.  Merging them here allows a
375    /// [`macros::FormatMacroGenerator`] to not worry about the escape codes.
376    pub(crate) fn from_fragments(fragments: &[FormatFragment]) -> Self {
377        Self {
378            fragments: fragments
379                .iter()
380                .fold(Vec::<_>::new(), |mut fragments, fragment| {
381                    // Collapse adjacent literal fragments.
382                    let Some(last) = fragments.last_mut() else {
383                        // If there are no accumulated fragments, add this one and return.
384                        fragments.push((*fragment).clone());
385                        return fragments;
386                    };
387                    if let Some(fragment) = last.try_append(fragment) {
388                        // If the fragments were able to append, no more work to do
389                        fragments.push((*fragment).clone());
390                    };
391                    fragments
392                }),
393        }
394    }
395}
396
397fn variable_width(input: &str) -> IResult<&str, MinFieldWidth> {
398    map(tag("*"), |_| MinFieldWidth::Variable)(input)
399}
400
401fn fixed_width(input: &str) -> IResult<&str, MinFieldWidth> {
402    map_res(
403        digit1,
404        |value: &str| -> Result<MinFieldWidth, std::num::ParseIntError> {
405            Ok(MinFieldWidth::Fixed(value.parse()?))
406        },
407    )(input)
408}
409
410fn no_width(input: &str) -> IResult<&str, MinFieldWidth> {
411    Ok((input, MinFieldWidth::None))
412}
413
414fn width(input: &str) -> IResult<&str, MinFieldWidth> {
415    alt((variable_width, fixed_width, no_width))(input)
416}
417
418fn variable_precision(input: &str) -> IResult<&str, Precision> {
419    let (input, _) = tag(".")(input)?;
420    map(tag("*"), |_| Precision::Variable)(input)
421}
422
423fn fixed_precision(input: &str) -> IResult<&str, Precision> {
424    let (input, _) = tag(".")(input)?;
425    map_res(
426        digit1,
427        |value: &str| -> Result<Precision, std::num::ParseIntError> {
428            Ok(Precision::Fixed(value.parse()?))
429        },
430    )(input)
431}
432
433fn no_precision(input: &str) -> IResult<&str, Precision> {
434    Ok((input, Precision::None))
435}
436
437fn precision(input: &str) -> IResult<&str, Precision> {
438    alt((variable_precision, fixed_precision, no_precision))(input)
439}
440
441#[cfg(test)]
442mod tests;