pw_format/
lib.rs

1// Copyright 2023 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15//! The `pw_format` crate is a parser used to implement proc macros that:
16//! * Understand format string argument types at compile time.
17//! * Syntax check format strings.
18//!
19//! `pw_format` is written against `std` and is not intended to be
20//! used in an embedded context.  Some efficiency and memory is traded for a
21//! more expressive interface that exposes the format string's "syntax tree"
22//! to the API client.
23//!
24//! # Proc Macros
25//!
26//! The [`macros`] module provides infrastructure for implementing proc macros
27//! that take format strings as arguments.
28//!
29//! # Example
30//!
31//! ```
32//! use pw_format::{
33//!     Alignment, Argument, ConversionSpec, Flag, FormatFragment, FormatString,
34//!     Length, MinFieldWidth, Precision, Primitive, Style,
35//! };
36//!
37//! let format_string =
38//!   FormatString::parse_printf("long double %+ 4.2Lf is %-03hd%%.").unwrap();
39//!
40//! assert_eq!(format_string, FormatString {
41//!   fragments: vec![
42//!       FormatFragment::Literal("long double ".to_string()),
43//!       FormatFragment::Conversion(ConversionSpec {
44//!           argument: Argument::None,
45//!           fill: ' ',
46//!           alignment: Alignment::None,
47//!           flags: [Flag::ForceSign, Flag::SpaceSign].into_iter().collect(),
48//!           min_field_width: MinFieldWidth::Fixed(4),
49//!           precision: Precision::Fixed(2),
50//!           length: Some(Length::LongDouble),
51//!           primitive: Primitive::Float,
52//!           style: Style::None,
53//!       }),
54//!       FormatFragment::Literal(" is ".to_string()),
55//!       FormatFragment::Conversion(ConversionSpec {
56//!           argument: Argument::None,
57//!           fill: ' ',
58//!           alignment: Alignment::Left,
59//!           flags: [Flag::LeftJustify, Flag::LeadingZeros]
60//!               .into_iter()
61//!               .collect(),
62//!           min_field_width: MinFieldWidth::Fixed(3),
63//!           precision: Precision::None,
64//!           length: Some(Length::Short),
65//!           primitive: Primitive::Integer,
66//!           style: Style::None,
67//!       }),
68//!       FormatFragment::Literal("%.".to_string()),
69//!   ]
70//! });
71//! ```
72#![deny(missing_docs)]
73
74use std::collections::HashSet;
75
76use nom::{
77    branch::alt,
78    bytes::complete::tag,
79    character::complete::digit1,
80    combinator::{map, map_res},
81    IResult,
82};
83use quote::{quote, ToTokens};
84
85pub mod macros;
86
87mod core_fmt;
88mod printf;
89
90#[derive(Clone, Copy, Debug, PartialEq, Eq)]
91/// Primitive type of a conversion (integer, float, string, etc.)
92pub enum Primitive {
93    /// Signed integer primitive.
94    Integer,
95
96    /// Unsigned integer primitive.
97    Unsigned,
98
99    /// Floating point primitive.
100    Float,
101
102    /// String primitive.
103    String,
104
105    /// Character primitive.
106    Character,
107
108    /// Pointer primitive.
109    Pointer,
110
111    /// Untyped primitive.
112    Untyped,
113}
114
115#[derive(Clone, Copy, Debug, PartialEq, Eq)]
116/// The abstract formatting style for a conversion.
117pub enum Style {
118    /// No style specified, use defaults.
119    None,
120
121    /// Octal rendering (i.e. "%o" or "{:o}").
122    Octal,
123
124    /// Hexadecimal rendering (i.e. "%x" or "{:x}").
125    Hex,
126
127    /// Upper case hexadecimal rendering (i.e. "%X" or "{:X}").
128    UpperHex,
129
130    /// Exponential rendering (i.e. "%e" or "{:e}".
131    Exponential,
132
133    /// Upper case exponential rendering (i.e. "%E" or "{:E}".
134    UpperExponential,
135
136    /// Pointer type rendering (i.e. "%p" or "{:p}").
137    Pointer,
138
139    /// `core::fmt`'s `{:?}`
140    Debug,
141
142    /// `core::fmt`'s `{:x?}`
143    HexDebug,
144
145    /// `core::fmt`'s `{:X?}`
146    UpperHexDebug,
147
148    /// Unsupported binary rendering
149    ///
150    /// This variant exists so that the proc macros can give useful error
151    /// messages.
152    Binary,
153}
154
155/// Implemented for testing through the pw_format_test_macros crate.
156impl ToTokens for Style {
157    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
158        let new_tokens = match self {
159            Style::None => quote!(pw_format::Style::None),
160            Style::Octal => quote!(pw_format::Style::Octal),
161            Style::Hex => quote!(pw_format::Style::Hex),
162            Style::UpperHex => quote!(pw_format::Style::UpperHex),
163            Style::Exponential => quote!(pw_format::Style::Exponential),
164            Style::UpperExponential => quote!(pw_format::Style::UpperExponential),
165            Style::Debug => quote!(pw_format::Style::Debug),
166            Style::HexDebug => quote!(pw_format::Style::HexDebug),
167            Style::UpperHexDebug => quote!(pw_format::Style::UpperHexDebug),
168            Style::Pointer => quote!(pw_format::Style::Pointer),
169            Style::Binary => quote!(pw_format::Style::Binary),
170        };
171        new_tokens.to_tokens(tokens);
172    }
173}
174
175#[derive(Clone, Debug, Hash, PartialEq, Eq)]
176/// A printf flag (the '+' in %+d).
177pub enum Flag {
178    /// `-`
179    LeftJustify,
180
181    /// `+`
182    ForceSign,
183
184    /// ` `
185    SpaceSign,
186
187    /// `#`
188    AlternateSyntax,
189
190    /// `0`
191    LeadingZeros,
192}
193
194#[derive(Clone, Debug, PartialEq, Eq)]
195/// A printf minimum field width (the 5 in %5d).
196pub enum MinFieldWidth {
197    /// No field width specified.
198    None,
199
200    /// Fixed field with.
201    Fixed(u32),
202
203    /// Variable field width passed as an argument (i.e. %*d).
204    Variable,
205}
206
207#[derive(Clone, Debug, PartialEq, Eq)]
208/// A printf precision (the .5 in %.5d).
209///
210/// For string conversions (%s) this is treated as the maximum number of
211/// bytes of the string to output.
212pub enum Precision {
213    /// No precision specified.
214    None,
215
216    /// Fixed precision.
217    Fixed(u32),
218
219    /// Variable precision passed as an argument (i.e. %.*f).
220    Variable,
221}
222
223#[derive(Clone, Copy, Debug, PartialEq, Eq)]
224/// A printf length (the l in %ld).
225pub enum Length {
226    /// `hh`
227    Char,
228
229    /// `h`
230    Short,
231
232    /// `l`
233    Long,
234
235    /// `ll`
236    LongLong,
237
238    /// `L`
239    LongDouble,
240
241    /// `j`
242    IntMax,
243
244    /// `z`
245    Size,
246
247    /// `t`
248    PointerDiff,
249}
250
251#[derive(Clone, Copy, Debug, PartialEq, Eq)]
252/// A core::fmt alignment spec.
253pub enum Alignment {
254    /// No alignment
255    None,
256
257    /// Left alignment (`<`)
258    Left,
259
260    /// Center alignment (`^`)
261    Center,
262
263    /// Right alignment (`>`)
264    Right,
265}
266
267#[derive(Clone, Debug, PartialEq, Eq)]
268/// An argument in a core::fmt style alignment spec.
269///
270/// i.e. the var_name in `{var_name:#0x}`
271pub enum Argument {
272    /// No argument
273    None,
274
275    /// A positional argument (i.e. `{0}`).
276    Positional(usize),
277
278    /// A named argument (i.e. `{var_name}`).
279    Named(String),
280}
281
282#[derive(Clone, Debug, PartialEq, Eq)]
283/// A printf conversion specification aka a % clause.
284pub struct ConversionSpec {
285    /// ConversionSpec's argument.
286    pub argument: Argument,
287    /// ConversionSpec's fill character.
288    pub fill: char,
289    /// ConversionSpec's field alignment.
290    pub alignment: Alignment,
291    /// ConversionSpec's set of [Flag]s.
292    pub flags: HashSet<Flag>,
293    /// ConversionSpec's minimum field width argument.
294    pub min_field_width: MinFieldWidth,
295    /// ConversionSpec's [Precision] argument.
296    pub precision: Precision,
297    /// ConversionSpec's [Length] argument.
298    pub length: Option<Length>,
299    /// ConversionSpec's [Primitive].
300    pub primitive: Primitive,
301    /// ConversionSpec's [Style].
302    pub style: Style,
303}
304
305#[derive(Clone, Debug, PartialEq, Eq)]
306/// A fragment of a printf format string.
307pub enum FormatFragment {
308    /// A literal string value.
309    Literal(String),
310
311    /// A conversion specification (i.e. %d).
312    Conversion(ConversionSpec),
313}
314
315impl FormatFragment {
316    /// Try to append `fragment` to `self`.
317    ///
318    /// Returns `None` if the appending succeeds and `Some<fragment>` if it fails.
319    fn try_append<'a>(&mut self, fragment: &'a FormatFragment) -> Option<&'a FormatFragment> {
320        let Self::Literal(literal_fragment) = &fragment else {
321            return Some(fragment);
322        };
323
324        let Self::Literal(ref mut literal_self) = self else {
325            return Some(fragment);
326        };
327
328        literal_self.push_str(literal_fragment);
329
330        None
331    }
332}
333
334#[derive(Debug, PartialEq, Eq)]
335/// A parsed printf format string.
336pub struct FormatString {
337    /// The [FormatFragment]s that comprise the [FormatString].
338    pub fragments: Vec<FormatFragment>,
339}
340
341impl FormatString {
342    /// Parses a printf style format string.
343    pub fn parse_printf(s: &str) -> Result<Self, String> {
344        // TODO: b/281858500 - Add better errors to failed parses.
345        let (rest, result) = printf::format_string(s)
346            .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
347
348        // If the parser did not consume all the input, return an error.
349        if !rest.is_empty() {
350            return Err(format!(
351                "Failed to parse format string fragment: \"{rest}\""
352            ));
353        }
354
355        Ok(result)
356    }
357
358    /// Parses a core::fmt style format string.
359    pub fn parse_core_fmt(s: &str) -> Result<Self, String> {
360        // TODO: b/281858500 - Add better errors to failed parses.
361        let (rest, result) = core_fmt::format_string(s)
362            .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
363
364        // If the parser did not consume all the input, return an error.
365        if !rest.is_empty() {
366            return Err(format!("Failed to parse format string: \"{rest}\""));
367        }
368
369        Ok(result)
370    }
371
372    /// Creates a `FormatString` from a slice of fragments.
373    ///
374    /// This primary responsibility of this function is to merge literal
375    /// fragments.  Adjacent literal fragments occur when a parser parses
376    /// escape sequences.  Merging them here allows a
377    /// [`macros::FormatMacroGenerator`] to not worry about the escape codes.
378    pub(crate) fn from_fragments(fragments: &[FormatFragment]) -> Self {
379        Self {
380            fragments: fragments
381                .iter()
382                .fold(Vec::<_>::new(), |mut fragments, fragment| {
383                    // Collapse adjacent literal fragments.
384                    let Some(last) = fragments.last_mut() else {
385                        // If there are no accumulated fragments, add this one and return.
386                        fragments.push((*fragment).clone());
387                        return fragments;
388                    };
389                    if let Some(fragment) = last.try_append(fragment) {
390                        // If the fragments were able to append, no more work to do
391                        fragments.push((*fragment).clone());
392                    };
393                    fragments
394                }),
395        }
396    }
397}
398
399fn variable_width(input: &str) -> IResult<&str, MinFieldWidth> {
400    map(tag("*"), |_| MinFieldWidth::Variable)(input)
401}
402
403fn fixed_width(input: &str) -> IResult<&str, MinFieldWidth> {
404    map_res(
405        digit1,
406        |value: &str| -> Result<MinFieldWidth, std::num::ParseIntError> {
407            Ok(MinFieldWidth::Fixed(value.parse()?))
408        },
409    )(input)
410}
411
412fn no_width(input: &str) -> IResult<&str, MinFieldWidth> {
413    Ok((input, MinFieldWidth::None))
414}
415
416fn width(input: &str) -> IResult<&str, MinFieldWidth> {
417    alt((variable_width, fixed_width, no_width))(input)
418}
419
420fn variable_precision(input: &str) -> IResult<&str, Precision> {
421    let (input, _) = tag(".")(input)?;
422    map(tag("*"), |_| Precision::Variable)(input)
423}
424
425fn fixed_precision(input: &str) -> IResult<&str, Precision> {
426    let (input, _) = tag(".")(input)?;
427    map_res(
428        digit1,
429        |value: &str| -> Result<Precision, std::num::ParseIntError> {
430            Ok(Precision::Fixed(value.parse()?))
431        },
432    )(input)
433}
434
435fn no_precision(input: &str) -> IResult<&str, Precision> {
436    Ok((input, Precision::None))
437}
438
439fn precision(input: &str) -> IResult<&str, Precision> {
440    alt((variable_precision, fixed_precision, no_precision))(input)
441}
442
443#[cfg(test)]
444mod tests;