Skip to main content

pw_format/
format_string.rs

1// Copyright 2023 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15use std::collections::HashSet;
16
17use quote::{quote, ToTokens};
18
19use crate::{core_fmt, printf};
20
21/// Primitive type of a conversion (integer, float, string, etc.)
22#[derive(Clone, Copy, Debug, PartialEq, Eq)]
23pub enum Primitive {
24    /// Signed integer primitive.
25    Integer,
26
27    /// Unsigned integer primitive.
28    Unsigned,
29
30    /// Floating point primitive.
31    Float,
32
33    /// String primitive.
34    String,
35
36    /// Character primitive.
37    Character,
38
39    /// Pointer primitive.
40    Pointer,
41
42    /// Untyped primitive.
43    Untyped,
44}
45
46/// The abstract formatting style for a conversion.
47#[derive(Clone, Copy, Debug, PartialEq, Eq)]
48pub enum Style {
49    /// No style specified, use defaults.
50    None,
51
52    /// Octal rendering (i.e. "%o" or "{:o}").
53    Octal,
54
55    /// Hexadecimal rendering (i.e. "%x" or "{:x}").
56    Hex,
57
58    /// Upper case hexadecimal rendering (i.e. "%X" or "{:X}").
59    UpperHex,
60
61    /// Exponential rendering (i.e. "%e" or "{:e}".
62    Exponential,
63
64    /// Upper case exponential rendering (i.e. "%E" or "{:E}".
65    UpperExponential,
66
67    /// Pointer type rendering (i.e. "%p" or "{:p}").
68    Pointer,
69
70    /// `core::fmt`'s `{:?}`
71    Debug,
72
73    /// `core::fmt`'s `{:x?}`
74    HexDebug,
75
76    /// `core::fmt`'s `{:X?}`
77    UpperHexDebug,
78
79    /// Unsupported binary rendering
80    ///
81    /// This variant exists so that the proc macros can give useful error
82    /// messages.
83    Binary,
84}
85
86/// Implemented for testing through the pw_format_test_macros crate.
87impl ToTokens for Style {
88    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
89        let new_tokens = match self {
90            Style::None => quote!(pw_format::Style::None),
91            Style::Octal => quote!(pw_format::Style::Octal),
92            Style::Hex => quote!(pw_format::Style::Hex),
93            Style::UpperHex => quote!(pw_format::Style::UpperHex),
94            Style::Exponential => quote!(pw_format::Style::Exponential),
95            Style::UpperExponential => quote!(pw_format::Style::UpperExponential),
96            Style::Debug => quote!(pw_format::Style::Debug),
97            Style::HexDebug => quote!(pw_format::Style::HexDebug),
98            Style::UpperHexDebug => quote!(pw_format::Style::UpperHexDebug),
99            Style::Pointer => quote!(pw_format::Style::Pointer),
100            Style::Binary => quote!(pw_format::Style::Binary),
101        };
102        new_tokens.to_tokens(tokens);
103    }
104}
105
106/// A printf flag (the '+' in %+d).
107#[derive(Clone, Debug, Hash, PartialEq, Eq)]
108pub enum Flag {
109    /// `-`
110    LeftJustify,
111
112    /// `+`
113    ForceSign,
114
115    /// ` `
116    SpaceSign,
117
118    /// `#`
119    AlternateSyntax,
120
121    /// `0`
122    LeadingZeros,
123}
124
125/// A printf minimum field width (the 5 in %5d).
126#[derive(Clone, Debug, PartialEq, Eq)]
127pub enum MinFieldWidth {
128    /// No field width specified.
129    None,
130
131    /// Fixed field with.
132    Fixed(u32),
133
134    /// Variable field width passed as an argument (i.e. %*d).
135    Variable,
136}
137
138/// A printf precision (the .5 in %.5d).
139///
140/// For string conversions (%s) this is treated as the maximum number of
141/// bytes of the string to output.
142#[derive(Clone, Debug, PartialEq, Eq)]
143pub enum Precision {
144    /// No precision specified.
145    None,
146
147    /// Fixed precision.
148    Fixed(u32),
149
150    /// Variable precision passed as an argument (i.e. %.*f).
151    Variable,
152}
153
154/// A printf length (the l in %ld).
155#[derive(Clone, Copy, Debug, PartialEq, Eq)]
156pub enum Length {
157    /// `hh`
158    Char,
159
160    /// `h`
161    Short,
162
163    /// `l`
164    Long,
165
166    /// `ll`
167    LongLong,
168
169    /// `L`
170    LongDouble,
171
172    /// `j`
173    IntMax,
174
175    /// `z`
176    Size,
177
178    /// `t`
179    PointerDiff,
180}
181
182/// A core::fmt alignment spec.
183#[derive(Clone, Copy, Debug, PartialEq, Eq)]
184pub enum Alignment {
185    /// No alignment
186    None,
187
188    /// Left alignment (`<`)
189    Left,
190
191    /// Center alignment (`^`)
192    Center,
193
194    /// Right alignment (`>`)
195    Right,
196}
197
198/// An argument in a core::fmt style alignment spec.
199///
200/// i.e. the var_name in `{var_name:#0x}`
201#[derive(Clone, Debug, PartialEq, Eq)]
202pub enum Argument {
203    /// No argument
204    None,
205
206    /// A positional argument (i.e. `{0}`).
207    Positional(usize),
208
209    /// A named argument (i.e. `{var_name}`).
210    Named(String),
211}
212
213/// A printf conversion specification aka a % clause.
214#[derive(Clone, Debug, PartialEq, Eq)]
215pub struct ConversionSpec {
216    /// ConversionSpec's argument.
217    pub argument: Argument,
218    /// ConversionSpec's fill character.
219    pub fill: char,
220    /// ConversionSpec's field alignment.
221    pub alignment: Alignment,
222    /// ConversionSpec's set of [Flag]s.
223    pub flags: HashSet<Flag>,
224    /// ConversionSpec's minimum field width argument.
225    pub min_field_width: MinFieldWidth,
226    /// ConversionSpec's [Precision] argument.
227    pub precision: Precision,
228    /// ConversionSpec's [Length] argument.
229    pub length: Option<Length>,
230    /// ConversionSpec's [Primitive].
231    pub primitive: Primitive,
232    /// ConversionSpec's [Style].
233    pub style: Style,
234}
235
236impl ConversionSpec {
237    /// Reconstructs the conversion specifier back to its printf format string representation (e.g., `%+05.2ld`).
238    #[must_use]
239    pub fn to_printf(&self) -> String {
240        let mut s = String::from("%");
241        if self.flags.contains(&Flag::LeftJustify) {
242            s.push('-');
243        }
244        if self.flags.contains(&Flag::ForceSign) {
245            s.push('+');
246        }
247        if self.flags.contains(&Flag::SpaceSign) {
248            s.push(' ');
249        }
250        if self.flags.contains(&Flag::AlternateSyntax) {
251            s.push('#');
252        }
253        if self.flags.contains(&Flag::LeadingZeros) {
254            s.push('0');
255        }
256
257        match self.min_field_width {
258            MinFieldWidth::None => {}
259            MinFieldWidth::Fixed(w) => s.push_str(&w.to_string()),
260            MinFieldWidth::Variable => s.push('*'),
261        }
262
263        match self.precision {
264            Precision::None => {}
265            Precision::Fixed(p) => s.push_str(&format!(".{p}")),
266            Precision::Variable => s.push_str(".*"),
267        }
268
269        if let Some(length) = self.length {
270            s.push_str(match length {
271                Length::Char => "hh",
272                Length::Short => "h",
273                Length::Long => "l",
274                Length::LongLong => "ll",
275                Length::LongDouble => "L",
276                Length::IntMax => "j",
277                Length::Size => "z",
278                Length::PointerDiff => "t",
279            });
280        }
281
282        let type_char = match (self.primitive, self.style) {
283            (Primitive::Integer, _) => 'd',
284            (Primitive::Unsigned, Style::Octal) => 'o',
285            (Primitive::Unsigned, Style::Hex) => 'x',
286            (Primitive::Unsigned, Style::UpperHex) => 'X',
287            (Primitive::Unsigned, _) => 'u',
288            (Primitive::Float, Style::Exponential) => 'e',
289            (Primitive::Float, Style::UpperExponential) => 'E',
290            (Primitive::Float, _) => 'f',
291            (Primitive::Character, _) => 'c',
292            (Primitive::String, _) => 's',
293            (Primitive::Pointer, _) => 'p',
294            (Primitive::Untyped, _) => 'v',
295        };
296        s.push(type_char);
297        s
298    }
299}
300
301/// A fragment of a printf format string.
302#[derive(Clone, Debug, PartialEq, Eq)]
303pub enum FormatFragment {
304    /// A literal string value.
305    Literal(String),
306
307    /// A conversion specification (i.e. %d).
308    Conversion(ConversionSpec),
309}
310
311impl FormatFragment {
312    /// Try to append `fragment` to `self`.
313    ///
314    /// Returns `None` if the appending succeeds and `Some<fragment>` if it fails.
315    fn try_append<'a>(&mut self, fragment: &'a FormatFragment) -> Option<&'a FormatFragment> {
316        let Self::Literal(literal_fragment) = &fragment else {
317            return Some(fragment);
318        };
319
320        let Self::Literal(literal_self) = self else {
321            return Some(fragment);
322        };
323
324        literal_self.push_str(literal_fragment);
325
326        None
327    }
328}
329
330/// Representation of a decoded argument.
331#[derive(Debug, Clone, PartialEq)]
332pub enum Arg {
333    /// Signed integer.
334    Int(i64),
335    /// Unsigned integer.
336    Uint(u64),
337    /// Floating point number.
338    Float(f64),
339    /// String.
340    Str(String),
341    /// Character.
342    Char(char),
343    /// Pointer.
344    Ptr(usize),
345}
346
347/// The style of formatting to apply (influences defaults).
348#[derive(Debug, Clone, Copy, PartialEq, Eq)]
349pub enum FormatStyle {
350    /// Printf style defaults (e.g. %f defaults to precision 6).
351    Printf,
352    /// Core::fmt style defaults.
353    CoreFmt,
354}
355
356/// A trait for formatting conversion specifiers that failed, were skipped, or were missing.
357pub trait FormatError {
358    /// The domain-specific error type.
359    type Error;
360
361    /// Renders a conversion specifier that failed with a domain-specific error.
362    fn format_error(&self, spec: &ConversionSpec, error: &Self::Error) -> String;
363
364    /// Renders a conversion specifier that was missing from the supplied arguments.
365    fn format_missing(&self, spec: &ConversionSpec) -> String;
366
367    /// Renders a conversion specifier that decoded successfully but failed to format (type mismatch).
368    fn format_type_error(&self, spec: &ConversionSpec, arg: &Arg) -> String;
369}
370
371/// Formatter that retains the original conversion specifier when formatting fails,
372/// parameterizing over `std::convert::Infallible` (which has no error state).
373#[derive(Debug, Clone, Copy, PartialEq, Eq)]
374struct DefaultFormatter;
375
376impl FormatError for DefaultFormatter {
377    type Error = core::convert::Infallible;
378
379    fn format_error(&self, spec: &ConversionSpec, _error: &core::convert::Infallible) -> String {
380        spec.to_printf()
381    }
382    fn format_missing(&self, spec: &ConversionSpec) -> String {
383        spec.to_printf()
384    }
385    fn format_type_error(&self, spec: &ConversionSpec, _arg: &Arg) -> String {
386        spec.to_printf()
387    }
388}
389
390/// A parsed format string.
391#[derive(Clone, Debug, PartialEq, Eq)]
392pub struct FormatString {
393    /// The [FormatFragment]s that comprise the [FormatString].
394    pub fragments: Vec<FormatFragment>,
395}
396
397impl FormatString {
398    /// Formats a parsed format string with provided arguments.
399    #[must_use]
400    pub fn format(&self, args: &[Arg], style: FormatStyle) -> String {
401        let result_args: Vec<Result<Arg, core::convert::Infallible>> =
402            args.iter().map(|arg| Ok(arg.clone())).collect();
403
404        self.format_with_errors(&result_args, style, &DefaultFormatter)
405    }
406
407    /// Formats a parsed format string with the provided argument states (Result<Arg, FE::Error>),
408    /// delegating formatting of any failures, missing arguments, or type mismatches
409    /// to the provided `FormatError` implementation.
410    pub fn format_with_errors<FE: FormatError>(
411        &self,
412        args: &[Result<Arg, FE::Error>],
413        style: FormatStyle,
414        error_formatter: &FE,
415    ) -> String {
416        let mut output = String::new();
417        let mut args_iter = args.iter();
418
419        for fragment in &self.fragments {
420            self.format_fragment(
421                fragment,
422                &mut args_iter,
423                style,
424                error_formatter,
425                &mut output,
426            );
427        }
428
429        output
430    }
431
432    fn format_fragment<'a, FE: FormatError>(
433        &self,
434        fragment: &FormatFragment,
435        args: &mut impl Iterator<Item = &'a Result<Arg, FE::Error>>,
436        style: FormatStyle,
437        error_formatter: &FE,
438        output: &mut String,
439    ) where
440        FE::Error: 'a,
441    {
442        let spec = match fragment {
443            FormatFragment::Conversion(spec) => spec,
444            FormatFragment::Literal(s) => {
445                output.push_str(s);
446                return;
447            }
448        };
449
450        let Some(decoded) = args.next() else {
451            output.push_str(&error_formatter.format_missing(spec));
452            return;
453        };
454
455        let arg = match decoded {
456            Ok(arg) => arg,
457            Err(err) => {
458                output.push_str(&error_formatter.format_error(spec, err));
459                return;
460            }
461        };
462
463        let mut formatted = String::new();
464        match self.format_value(spec, arg, style, &mut formatted) {
465            Ok(()) => output.push_str(&formatted),
466            Err(_) => {
467                output.push_str(&error_formatter.format_type_error(spec, arg));
468            }
469        }
470    }
471
472    /// Parses a printf style format string.
473    pub fn parse_printf(s: &str) -> Result<Self, String> {
474        // TODO: b/281858500 - Add better errors to failed parses.
475        let (rest, result) = printf::format_string(s)
476            .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
477
478        // If the parser did not consume all the input, return an error.
479        if !rest.is_empty() {
480            return Err(format!(
481                "Failed to parse format string fragment: \"{rest}\""
482            ));
483        }
484
485        Ok(result)
486    }
487
488    /// Parses a core::fmt style format string.
489    pub fn parse_core_fmt(s: &str) -> Result<Self, String> {
490        // TODO: b/281858500 - Add better errors to failed parses.
491        let (rest, result) = core_fmt::format_string(s)
492            .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
493
494        // If the parser did not consume all the input, return an error.
495        if !rest.is_empty() {
496            return Err(format!("Failed to parse format string: \"{rest}\""));
497        }
498
499        Ok(result)
500    }
501
502    /// Creates a `FormatString` from a slice of fragments.
503    ///
504    /// This primary responsibility of this function is to merge literal
505    /// fragments.  Adjacent literal fragments occur when a parser parses
506    /// escape sequences.  Merging them here allows a
507    /// [`macros::FormatMacroGenerator`] to not worry about the escape codes.
508    pub(crate) fn from_fragments(fragments: &[FormatFragment]) -> Self {
509        Self {
510            fragments: fragments
511                .iter()
512                .fold(Vec::<_>::new(), |mut fragments, fragment| {
513                    // Collapse adjacent literal fragments.
514                    let Some(last) = fragments.last_mut() else {
515                        // If there are no accumulated fragments, add this one and return.
516                        fragments.push((*fragment).clone());
517                        return fragments;
518                    };
519                    if let Some(fragment) = last.try_append(fragment) {
520                        // If the fragments were able to append, no more work to do
521                        fragments.push((*fragment).clone());
522                    };
523                    fragments
524                }),
525        }
526    }
527
528    fn format_value(
529        &self,
530        spec: &ConversionSpec,
531        arg: &Arg,
532        style: FormatStyle,
533        output: &mut String,
534    ) -> Result<(), String> {
535        match (spec.primitive, arg) {
536            (Primitive::Integer, Arg::Int(v)) => self.format_int(*v, spec, style, output),
537            (Primitive::Unsigned, Arg::Uint(v)) => self.format_uint(*v, spec, style, output),
538            (Primitive::Float, Arg::Float(v)) => self.format_float(*v, spec, style, output),
539            (Primitive::String, Arg::Str(v)) => self.format_str(v, spec, style, output),
540            (Primitive::Character, Arg::Char(v)) => self.format_char(*v, spec, style, output),
541            (Primitive::Pointer, Arg::Ptr(v)) => self.format_ptr(*v, spec, style, output),
542            (Primitive::Untyped, _) => self.format_untyped(spec, arg, style, output),
543            _ => Err(format!(
544                "Mismatched type: expected {:?}, got {:?}",
545                spec.primitive, arg
546            )),
547        }
548    }
549
550    fn format_untyped(
551        &self,
552        spec: &ConversionSpec,
553        arg: &Arg,
554        style: FormatStyle,
555        output: &mut String,
556    ) -> Result<(), String> {
557        match arg {
558            Arg::Int(v) => self.format_int(*v, spec, style, output),
559            Arg::Uint(v) => self.format_uint(*v, spec, style, output),
560            Arg::Float(v) => self.format_float(*v, spec, style, output),
561            Arg::Str(v) => self.format_str(v, spec, style, output),
562            Arg::Char(v) => self.format_char(*v, spec, style, output),
563            Arg::Ptr(v) => self.format_ptr(*v, spec, style, output),
564        }
565    }
566
567    fn format_int_common(
568        &self,
569        v: u64,
570        sign: &str,
571        spec: &ConversionSpec,
572        output: &mut String,
573    ) -> Result<(), String> {
574        let (base_prefix, mut value) = match spec.style {
575            Style::Hex | Style::Pointer => ("0x", format!("{:x}", v)),
576            Style::UpperHex => ("0X", format!("{:X}", v)),
577            Style::Octal => ("0", format!("{:o}", v)),
578            _ => ("", format!("{}", v)),
579        };
580
581        if let Precision::Fixed(p) = spec.precision {
582            while value.len() < p as usize {
583                value.insert(0, '0');
584            }
585        }
586
587        let mut prefix = sign.to_string();
588        if spec.flags.contains(&Flag::AlternateSyntax) || spec.style == Style::Pointer {
589            // For octal, it's possible that the value string already starts with the prefix.
590            if !value.starts_with(base_prefix) {
591                prefix.push_str(base_prefix);
592            }
593        }
594
595        let s = self.apply_width_and_alignment(&prefix, &value, spec)?;
596        output.push_str(&s);
597        Ok(())
598    }
599
600    fn sign_prefix(&self, is_negative: bool, spec: &ConversionSpec) -> &'static str {
601        if is_negative {
602            "-"
603        } else if spec.flags.contains(&Flag::ForceSign) {
604            "+"
605        } else if spec.flags.contains(&Flag::SpaceSign) {
606            " "
607        } else {
608            ""
609        }
610    }
611
612    fn format_int(
613        &self,
614        v: i64,
615        spec: &ConversionSpec,
616        _style: FormatStyle,
617        output: &mut String,
618    ) -> Result<(), String> {
619        let sign = self.sign_prefix(v < 0, spec);
620        self.format_int_common(v.unsigned_abs(), sign, spec, output)
621    }
622
623    fn format_uint(
624        &self,
625        v: u64,
626        spec: &ConversionSpec,
627        _style: FormatStyle,
628        output: &mut String,
629    ) -> Result<(), String> {
630        self.format_int_common(v, "", spec, output)
631    }
632
633    fn format_float(
634        &self,
635        v: f64,
636        spec: &ConversionSpec,
637        style: FormatStyle,
638        output: &mut String,
639    ) -> Result<(), String> {
640        let abs_v = v.abs();
641        let value = match spec.precision {
642            Precision::Fixed(p) => format!("{:.1$}", abs_v, p as usize),
643            _ => match style {
644                FormatStyle::Printf => format!("{:.6}", abs_v),
645                FormatStyle::CoreFmt => format!("{}", abs_v),
646            },
647        };
648        let prefix = self.sign_prefix(v < 0.0 || v.is_sign_negative(), spec);
649
650        let s = self.apply_width_and_alignment(prefix, &value, spec)?;
651        output.push_str(&s);
652        Ok(())
653    }
654
655    fn format_str(
656        &self,
657        v: &str,
658        spec: &ConversionSpec,
659        _style: FormatStyle,
660        output: &mut String,
661    ) -> Result<(), String> {
662        let mut value = v.to_string();
663        if let Precision::Fixed(p) = spec.precision {
664            value.truncate(p as usize);
665        }
666        let s = self.apply_width_and_alignment("", &value, spec)?;
667        output.push_str(&s);
668        Ok(())
669    }
670
671    fn format_char(
672        &self,
673        v: char,
674        spec: &ConversionSpec,
675        _style: FormatStyle,
676        output: &mut String,
677    ) -> Result<(), String> {
678        let value = v.to_string();
679        let s = self.apply_width_and_alignment("", &value, spec)?;
680        output.push_str(&s);
681        Ok(())
682    }
683
684    fn format_ptr(
685        &self,
686        v: usize,
687        spec: &ConversionSpec,
688        _style: FormatStyle,
689        output: &mut String,
690    ) -> Result<(), String> {
691        self.format_int_common(v as u64, "", spec, output)
692    }
693
694    fn apply_width_and_alignment(
695        &self,
696        prefix: &str,
697        value: &str,
698        spec: &ConversionSpec,
699    ) -> Result<String, String> {
700        // If there is no fixed field width, format w/o padding.
701        // Variable field width is unsupported for now.
702        let MinFieldWidth::Fixed(w) = spec.min_field_width else {
703            return Ok(format!("{}{}", prefix, value));
704        };
705
706        let w = w as usize;
707        let total_len = prefix.len() + value.len();
708
709        // If the value overflows the minimum field width, format w/o padding.
710        if total_len >= w {
711            return Ok(format!("{}{}", prefix, value));
712        }
713
714        let pad_len = w - total_len;
715        let ignore_zero = spec.flags.contains(&Flag::LeftJustify)
716            || (matches!(spec.precision, Precision::Fixed(_))
717                && matches!(spec.primitive, Primitive::Integer | Primitive::Unsigned));
718        let do_zero_fill = spec.flags.contains(&Flag::LeadingZeros) && !ignore_zero;
719        let is_left_aligned =
720            spec.alignment == Alignment::Left || spec.flags.contains(&Flag::LeftJustify);
721
722        let mut s = String::with_capacity(w);
723        if is_left_aligned {
724            // Left justified values are never zero filled.
725            s.push_str(prefix);
726            s.push_str(value);
727            for _ in 0..pad_len {
728                s.push(spec.fill);
729            }
730        } else {
731            if do_zero_fill {
732                // Zero fill happens after the prefix like '0x001' or '-0001'.
733                s.push_str(prefix);
734                for _ in 0..pad_len {
735                    s.push('0');
736                }
737            } else {
738                // Normal fill happens after the prefix like '  0x1' or '  -01'.
739                for _ in 0..pad_len {
740                    s.push(spec.fill);
741                }
742                s.push_str(prefix);
743            }
744            s.push_str(value);
745        }
746        Ok(s)
747    }
748}