pw_format/lib.rs
1// Copyright 2023 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15//! The `pw_format` crate is a parser used to implement proc macros that:
16//! * Understand format string argument types at compile time.
17//! * Syntax check format strings.
18//!
19//! `pw_format` is written against `std` and is not intended to be
20//! used in an embedded context. Some efficiency and memory is traded for a
21//! more expressive interface that exposes the format string's "syntax tree"
22//! to the API client.
23//!
24//! # Proc Macros
25//!
26//! The [`macros`] module provides infrastructure for implementing proc macros
27//! that take format strings as arguments.
28//!
29//! # Example
30//!
31//! ```
32//! use pw_format::{
33//! Alignment, Argument, ConversionSpec, Flag, FormatFragment, FormatString,
34//! Length, MinFieldWidth, Precision, Primitive, Style,
35//! };
36//!
37//! let format_string =
38//! FormatString::parse_printf("long double %+ 4.2Lf is %-03hd%%.").unwrap();
39//!
40//! assert_eq!(format_string, FormatString {
41//! fragments: vec![
42//! FormatFragment::Literal("long double ".to_string()),
43//! FormatFragment::Conversion(ConversionSpec {
44//! argument: Argument::None,
45//! fill: ' ',
46//! alignment: Alignment::None,
47//! flags: [Flag::ForceSign, Flag::SpaceSign].into_iter().collect(),
48//! min_field_width: MinFieldWidth::Fixed(4),
49//! precision: Precision::Fixed(2),
50//! length: Some(Length::LongDouble),
51//! primitive: Primitive::Float,
52//! style: Style::None,
53//! }),
54//! FormatFragment::Literal(" is ".to_string()),
55//! FormatFragment::Conversion(ConversionSpec {
56//! argument: Argument::None,
57//! fill: ' ',
58//! alignment: Alignment::Left,
59//! flags: [Flag::LeftJustify, Flag::LeadingZeros]
60//! .into_iter()
61//! .collect(),
62//! min_field_width: MinFieldWidth::Fixed(3),
63//! precision: Precision::None,
64//! length: Some(Length::Short),
65//! primitive: Primitive::Integer,
66//! style: Style::None,
67//! }),
68//! FormatFragment::Literal("%.".to_string()),
69//! ]
70//! });
71//! ```
72#![deny(missing_docs)]
73
74use std::collections::HashSet;
75
76use nom::IResult;
77use nom::branch::alt;
78use nom::bytes::complete::tag;
79use nom::character::complete::digit1;
80use nom::combinator::{map, map_res};
81use quote::{ToTokens, quote};
82
83pub mod macros;
84
85mod core_fmt;
86mod printf;
87
88#[derive(Clone, Copy, Debug, PartialEq, Eq)]
89/// Primitive type of a conversion (integer, float, string, etc.)
90pub enum Primitive {
91 /// Signed integer primitive.
92 Integer,
93
94 /// Unsigned integer primitive.
95 Unsigned,
96
97 /// Floating point primitive.
98 Float,
99
100 /// String primitive.
101 String,
102
103 /// Character primitive.
104 Character,
105
106 /// Pointer primitive.
107 Pointer,
108
109 /// Untyped primitive.
110 Untyped,
111}
112
113#[derive(Clone, Copy, Debug, PartialEq, Eq)]
114/// The abstract formatting style for a conversion.
115pub enum Style {
116 /// No style specified, use defaults.
117 None,
118
119 /// Octal rendering (i.e. "%o" or "{:o}").
120 Octal,
121
122 /// Hexadecimal rendering (i.e. "%x" or "{:x}").
123 Hex,
124
125 /// Upper case hexadecimal rendering (i.e. "%X" or "{:X}").
126 UpperHex,
127
128 /// Exponential rendering (i.e. "%e" or "{:e}".
129 Exponential,
130
131 /// Upper case exponential rendering (i.e. "%E" or "{:E}".
132 UpperExponential,
133
134 /// Pointer type rendering (i.e. "%p" or "{:p}").
135 Pointer,
136
137 /// `core::fmt`'s `{:?}`
138 Debug,
139
140 /// `core::fmt`'s `{:x?}`
141 HexDebug,
142
143 /// `core::fmt`'s `{:X?}`
144 UpperHexDebug,
145
146 /// Unsupported binary rendering
147 ///
148 /// This variant exists so that the proc macros can give useful error
149 /// messages.
150 Binary,
151}
152
153/// Implemented for testing through the pw_format_test_macros crate.
154impl ToTokens for Style {
155 fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
156 let new_tokens = match self {
157 Style::None => quote!(pw_format::Style::None),
158 Style::Octal => quote!(pw_format::Style::Octal),
159 Style::Hex => quote!(pw_format::Style::Hex),
160 Style::UpperHex => quote!(pw_format::Style::UpperHex),
161 Style::Exponential => quote!(pw_format::Style::Exponential),
162 Style::UpperExponential => quote!(pw_format::Style::UpperExponential),
163 Style::Debug => quote!(pw_format::Style::Debug),
164 Style::HexDebug => quote!(pw_format::Style::HexDebug),
165 Style::UpperHexDebug => quote!(pw_format::Style::UpperHexDebug),
166 Style::Pointer => quote!(pw_format::Style::Pointer),
167 Style::Binary => quote!(pw_format::Style::Binary),
168 };
169 new_tokens.to_tokens(tokens);
170 }
171}
172
173#[derive(Clone, Debug, Hash, PartialEq, Eq)]
174/// A printf flag (the '+' in %+d).
175pub enum Flag {
176 /// `-`
177 LeftJustify,
178
179 /// `+`
180 ForceSign,
181
182 /// ` `
183 SpaceSign,
184
185 /// `#`
186 AlternateSyntax,
187
188 /// `0`
189 LeadingZeros,
190}
191
192#[derive(Clone, Debug, PartialEq, Eq)]
193/// A printf minimum field width (the 5 in %5d).
194pub enum MinFieldWidth {
195 /// No field width specified.
196 None,
197
198 /// Fixed field with.
199 Fixed(u32),
200
201 /// Variable field width passed as an argument (i.e. %*d).
202 Variable,
203}
204
205#[derive(Clone, Debug, PartialEq, Eq)]
206/// A printf precision (the .5 in %.5d).
207///
208/// For string conversions (%s) this is treated as the maximum number of
209/// bytes of the string to output.
210pub enum Precision {
211 /// No precision specified.
212 None,
213
214 /// Fixed precision.
215 Fixed(u32),
216
217 /// Variable precision passed as an argument (i.e. %.*f).
218 Variable,
219}
220
221#[derive(Clone, Copy, Debug, PartialEq, Eq)]
222/// A printf length (the l in %ld).
223pub enum Length {
224 /// `hh`
225 Char,
226
227 /// `h`
228 Short,
229
230 /// `l`
231 Long,
232
233 /// `ll`
234 LongLong,
235
236 /// `L`
237 LongDouble,
238
239 /// `j`
240 IntMax,
241
242 /// `z`
243 Size,
244
245 /// `t`
246 PointerDiff,
247}
248
249#[derive(Clone, Copy, Debug, PartialEq, Eq)]
250/// A core::fmt alignment spec.
251pub enum Alignment {
252 /// No alignment
253 None,
254
255 /// Left alignment (`<`)
256 Left,
257
258 /// Center alignment (`^`)
259 Center,
260
261 /// Right alignment (`>`)
262 Right,
263}
264
265#[derive(Clone, Debug, PartialEq, Eq)]
266/// An argument in a core::fmt style alignment spec.
267///
268/// i.e. the var_name in `{var_name:#0x}`
269pub enum Argument {
270 /// No argument
271 None,
272
273 /// A positional argument (i.e. `{0}`).
274 Positional(usize),
275
276 /// A named argument (i.e. `{var_name}`).
277 Named(String),
278}
279
280#[derive(Clone, Debug, PartialEq, Eq)]
281/// A printf conversion specification aka a % clause.
282pub struct ConversionSpec {
283 /// ConversionSpec's argument.
284 pub argument: Argument,
285 /// ConversionSpec's fill character.
286 pub fill: char,
287 /// ConversionSpec's field alignment.
288 pub alignment: Alignment,
289 /// ConversionSpec's set of [Flag]s.
290 pub flags: HashSet<Flag>,
291 /// ConversionSpec's minimum field width argument.
292 pub min_field_width: MinFieldWidth,
293 /// ConversionSpec's [Precision] argument.
294 pub precision: Precision,
295 /// ConversionSpec's [Length] argument.
296 pub length: Option<Length>,
297 /// ConversionSpec's [Primitive].
298 pub primitive: Primitive,
299 /// ConversionSpec's [Style].
300 pub style: Style,
301}
302
303#[derive(Clone, Debug, PartialEq, Eq)]
304/// A fragment of a printf format string.
305pub enum FormatFragment {
306 /// A literal string value.
307 Literal(String),
308
309 /// A conversion specification (i.e. %d).
310 Conversion(ConversionSpec),
311}
312
313impl FormatFragment {
314 /// Try to append `fragment` to `self`.
315 ///
316 /// Returns `None` if the appending succeeds and `Some<fragment>` if it fails.
317 fn try_append<'a>(&mut self, fragment: &'a FormatFragment) -> Option<&'a FormatFragment> {
318 let Self::Literal(literal_fragment) = &fragment else {
319 return Some(fragment);
320 };
321
322 let Self::Literal(literal_self) = self else {
323 return Some(fragment);
324 };
325
326 literal_self.push_str(literal_fragment);
327
328 None
329 }
330}
331
332#[derive(Debug, PartialEq, Eq)]
333/// A parsed printf format string.
334pub struct FormatString {
335 /// The [FormatFragment]s that comprise the [FormatString].
336 pub fragments: Vec<FormatFragment>,
337}
338
339impl FormatString {
340 /// Parses a printf style format string.
341 pub fn parse_printf(s: &str) -> Result<Self, String> {
342 // TODO: b/281858500 - Add better errors to failed parses.
343 let (rest, result) = printf::format_string(s)
344 .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
345
346 // If the parser did not consume all the input, return an error.
347 if !rest.is_empty() {
348 return Err(format!(
349 "Failed to parse format string fragment: \"{rest}\""
350 ));
351 }
352
353 Ok(result)
354 }
355
356 /// Parses a core::fmt style format string.
357 pub fn parse_core_fmt(s: &str) -> Result<Self, String> {
358 // TODO: b/281858500 - Add better errors to failed parses.
359 let (rest, result) = core_fmt::format_string(s)
360 .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
361
362 // If the parser did not consume all the input, return an error.
363 if !rest.is_empty() {
364 return Err(format!("Failed to parse format string: \"{rest}\""));
365 }
366
367 Ok(result)
368 }
369
370 /// Creates a `FormatString` from a slice of fragments.
371 ///
372 /// This primary responsibility of this function is to merge literal
373 /// fragments. Adjacent literal fragments occur when a parser parses
374 /// escape sequences. Merging them here allows a
375 /// [`macros::FormatMacroGenerator`] to not worry about the escape codes.
376 pub(crate) fn from_fragments(fragments: &[FormatFragment]) -> Self {
377 Self {
378 fragments: fragments
379 .iter()
380 .fold(Vec::<_>::new(), |mut fragments, fragment| {
381 // Collapse adjacent literal fragments.
382 let Some(last) = fragments.last_mut() else {
383 // If there are no accumulated fragments, add this one and return.
384 fragments.push((*fragment).clone());
385 return fragments;
386 };
387 if let Some(fragment) = last.try_append(fragment) {
388 // If the fragments were able to append, no more work to do
389 fragments.push((*fragment).clone());
390 };
391 fragments
392 }),
393 }
394 }
395}
396
397fn variable_width(input: &str) -> IResult<&str, MinFieldWidth> {
398 map(tag("*"), |_| MinFieldWidth::Variable)(input)
399}
400
401fn fixed_width(input: &str) -> IResult<&str, MinFieldWidth> {
402 map_res(
403 digit1,
404 |value: &str| -> Result<MinFieldWidth, std::num::ParseIntError> {
405 Ok(MinFieldWidth::Fixed(value.parse()?))
406 },
407 )(input)
408}
409
410fn no_width(input: &str) -> IResult<&str, MinFieldWidth> {
411 Ok((input, MinFieldWidth::None))
412}
413
414fn width(input: &str) -> IResult<&str, MinFieldWidth> {
415 alt((variable_width, fixed_width, no_width))(input)
416}
417
418fn variable_precision(input: &str) -> IResult<&str, Precision> {
419 let (input, _) = tag(".")(input)?;
420 map(tag("*"), |_| Precision::Variable)(input)
421}
422
423fn fixed_precision(input: &str) -> IResult<&str, Precision> {
424 let (input, _) = tag(".")(input)?;
425 map_res(
426 digit1,
427 |value: &str| -> Result<Precision, std::num::ParseIntError> {
428 Ok(Precision::Fixed(value.parse()?))
429 },
430 )(input)
431}
432
433fn no_precision(input: &str) -> IResult<&str, Precision> {
434 Ok((input, Precision::None))
435}
436
437fn precision(input: &str) -> IResult<&str, Precision> {
438 alt((variable_precision, fixed_precision, no_precision))(input)
439}
440
441#[cfg(test)]
442mod tests;