Skip to main content

pw_format/
format_string.rs

1// Copyright 2023 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15use std::collections::HashSet;
16
17use quote::{quote, ToTokens};
18
19use crate::{core_fmt, printf};
20
21/// Primitive type of a conversion (integer, float, string, etc.)
22#[derive(Clone, Copy, Debug, PartialEq, Eq)]
23pub enum Primitive {
24    /// Signed integer primitive.
25    Integer,
26
27    /// Unsigned integer primitive.
28    Unsigned,
29
30    /// Floating point primitive.
31    Float,
32
33    /// String primitive.
34    String,
35
36    /// Character primitive.
37    Character,
38
39    /// Pointer primitive.
40    Pointer,
41
42    /// Untyped primitive.
43    Untyped,
44}
45
46/// The abstract formatting style for a conversion.
47#[derive(Clone, Copy, Debug, PartialEq, Eq)]
48pub enum Style {
49    /// No style specified, use defaults.
50    None,
51
52    /// Octal rendering (i.e. "%o" or "{:o}").
53    Octal,
54
55    /// Hexadecimal rendering (i.e. "%x" or "{:x}").
56    Hex,
57
58    /// Upper case hexadecimal rendering (i.e. "%X" or "{:X}").
59    UpperHex,
60
61    /// Exponential rendering (i.e. "%e" or "{:e}".
62    Exponential,
63
64    /// Upper case exponential rendering (i.e. "%E" or "{:E}".
65    UpperExponential,
66
67    /// Pointer type rendering (i.e. "%p" or "{:p}").
68    Pointer,
69
70    /// `core::fmt`'s `{:?}`
71    Debug,
72
73    /// `core::fmt`'s `{:x?}`
74    HexDebug,
75
76    /// `core::fmt`'s `{:X?}`
77    UpperHexDebug,
78
79    /// Unsupported binary rendering
80    ///
81    /// This variant exists so that the proc macros can give useful error
82    /// messages.
83    Binary,
84}
85
86/// Implemented for testing through the pw_format_test_macros crate.
87impl ToTokens for Style {
88    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
89        let new_tokens = match self {
90            Style::None => quote!(pw_format::Style::None),
91            Style::Octal => quote!(pw_format::Style::Octal),
92            Style::Hex => quote!(pw_format::Style::Hex),
93            Style::UpperHex => quote!(pw_format::Style::UpperHex),
94            Style::Exponential => quote!(pw_format::Style::Exponential),
95            Style::UpperExponential => quote!(pw_format::Style::UpperExponential),
96            Style::Debug => quote!(pw_format::Style::Debug),
97            Style::HexDebug => quote!(pw_format::Style::HexDebug),
98            Style::UpperHexDebug => quote!(pw_format::Style::UpperHexDebug),
99            Style::Pointer => quote!(pw_format::Style::Pointer),
100            Style::Binary => quote!(pw_format::Style::Binary),
101        };
102        new_tokens.to_tokens(tokens);
103    }
104}
105
106/// A printf flag (the '+' in %+d).
107#[derive(Clone, Debug, Hash, PartialEq, Eq)]
108pub enum Flag {
109    /// `-`
110    LeftJustify,
111
112    /// `+`
113    ForceSign,
114
115    /// ` `
116    SpaceSign,
117
118    /// `#`
119    AlternateSyntax,
120
121    /// `0`
122    LeadingZeros,
123}
124
125/// A printf minimum field width (the 5 in %5d).
126#[derive(Clone, Debug, PartialEq, Eq)]
127pub enum MinFieldWidth {
128    /// No field width specified.
129    None,
130
131    /// Fixed field with.
132    Fixed(u32),
133
134    /// Variable field width passed as an argument (i.e. %*d).
135    Variable,
136}
137
138/// A printf precision (the .5 in %.5d).
139///
140/// For string conversions (%s) this is treated as the maximum number of
141/// bytes of the string to output.
142#[derive(Clone, Debug, PartialEq, Eq)]
143pub enum Precision {
144    /// No precision specified.
145    None,
146
147    /// Fixed precision.
148    Fixed(u32),
149
150    /// Variable precision passed as an argument (i.e. %.*f).
151    Variable,
152}
153
154/// A printf length (the l in %ld).
155#[derive(Clone, Copy, Debug, PartialEq, Eq)]
156pub enum Length {
157    /// `hh`
158    Char,
159
160    /// `h`
161    Short,
162
163    /// `l`
164    Long,
165
166    /// `ll`
167    LongLong,
168
169    /// `L`
170    LongDouble,
171
172    /// `j`
173    IntMax,
174
175    /// `z`
176    Size,
177
178    /// `t`
179    PointerDiff,
180}
181
182/// A core::fmt alignment spec.
183#[derive(Clone, Copy, Debug, PartialEq, Eq)]
184pub enum Alignment {
185    /// No alignment
186    None,
187
188    /// Left alignment (`<`)
189    Left,
190
191    /// Center alignment (`^`)
192    Center,
193
194    /// Right alignment (`>`)
195    Right,
196}
197
198/// An argument in a core::fmt style alignment spec.
199///
200/// i.e. the var_name in `{var_name:#0x}`
201#[derive(Clone, Debug, PartialEq, Eq)]
202pub enum Argument {
203    /// No argument
204    None,
205
206    /// A positional argument (i.e. `{0}`).
207    Positional(usize),
208
209    /// A named argument (i.e. `{var_name}`).
210    Named(String),
211}
212
213/// A printf conversion specification aka a % clause.
214#[derive(Clone, Debug, PartialEq, Eq)]
215pub struct ConversionSpec {
216    /// ConversionSpec's argument.
217    pub argument: Argument,
218    /// ConversionSpec's fill character.
219    pub fill: char,
220    /// ConversionSpec's field alignment.
221    pub alignment: Alignment,
222    /// ConversionSpec's set of [Flag]s.
223    pub flags: HashSet<Flag>,
224    /// ConversionSpec's minimum field width argument.
225    pub min_field_width: MinFieldWidth,
226    /// ConversionSpec's [Precision] argument.
227    pub precision: Precision,
228    /// ConversionSpec's [Length] argument.
229    pub length: Option<Length>,
230    /// ConversionSpec's [Primitive].
231    pub primitive: Primitive,
232    /// ConversionSpec's [Style].
233    pub style: Style,
234}
235
236impl ConversionSpec {
237    /// Reconstructs the conversion specifier back to its printf format string representation (e.g., `%+05.2ld`).
238    pub fn to_printf(&self) -> String {
239        let mut s = String::from("%");
240        if self.flags.contains(&Flag::LeftJustify) {
241            s.push('-');
242        }
243        if self.flags.contains(&Flag::ForceSign) {
244            s.push('+');
245        }
246        if self.flags.contains(&Flag::SpaceSign) {
247            s.push(' ');
248        }
249        if self.flags.contains(&Flag::AlternateSyntax) {
250            s.push('#');
251        }
252        if self.flags.contains(&Flag::LeadingZeros) {
253            s.push('0');
254        }
255
256        match self.min_field_width {
257            MinFieldWidth::None => {}
258            MinFieldWidth::Fixed(w) => s.push_str(&w.to_string()),
259            MinFieldWidth::Variable => s.push('*'),
260        }
261
262        match self.precision {
263            Precision::None => {}
264            Precision::Fixed(p) => s.push_str(&format!(".{p}")),
265            Precision::Variable => s.push_str(".*"),
266        }
267
268        if let Some(length) = self.length {
269            s.push_str(match length {
270                Length::Char => "hh",
271                Length::Short => "h",
272                Length::Long => "l",
273                Length::LongLong => "ll",
274                Length::LongDouble => "L",
275                Length::IntMax => "j",
276                Length::Size => "z",
277                Length::PointerDiff => "t",
278            });
279        }
280
281        let type_char = match (self.primitive, self.style) {
282            (Primitive::Integer, _) => 'd',
283            (Primitive::Unsigned, Style::Octal) => 'o',
284            (Primitive::Unsigned, Style::Hex) => 'x',
285            (Primitive::Unsigned, Style::UpperHex) => 'X',
286            (Primitive::Unsigned, _) => 'u',
287            (Primitive::Float, Style::Exponential) => 'e',
288            (Primitive::Float, Style::UpperExponential) => 'E',
289            (Primitive::Float, _) => 'f',
290            (Primitive::Character, _) => 'c',
291            (Primitive::String, _) => 's',
292            (Primitive::Pointer, _) => 'p',
293            (Primitive::Untyped, _) => 'v',
294        };
295        s.push(type_char);
296        s
297    }
298}
299
300/// A fragment of a printf format string.
301#[derive(Clone, Debug, PartialEq, Eq)]
302pub enum FormatFragment {
303    /// A literal string value.
304    Literal(String),
305
306    /// A conversion specification (i.e. %d).
307    Conversion(ConversionSpec),
308}
309
310impl FormatFragment {
311    /// Try to append `fragment` to `self`.
312    ///
313    /// Returns `None` if the appending succeeds and `Some<fragment>` if it fails.
314    fn try_append<'a>(&mut self, fragment: &'a FormatFragment) -> Option<&'a FormatFragment> {
315        let Self::Literal(literal_fragment) = &fragment else {
316            return Some(fragment);
317        };
318
319        let Self::Literal(literal_self) = self else {
320            return Some(fragment);
321        };
322
323        literal_self.push_str(literal_fragment);
324
325        None
326    }
327}
328
329/// Representation of a decoded argument.
330#[derive(Debug, Clone, PartialEq)]
331pub enum Arg {
332    /// Signed integer.
333    Int(i64),
334    /// Unsigned integer.
335    Uint(u64),
336    /// Floating point number.
337    Float(f64),
338    /// String.
339    Str(String),
340    /// Character.
341    Char(char),
342    /// Pointer.
343    Ptr(usize),
344}
345
346/// The style of formatting to apply (influences defaults).
347#[derive(Debug, Clone, Copy, PartialEq, Eq)]
348pub enum FormatStyle {
349    /// Printf style defaults (e.g. %f defaults to precision 6).
350    Printf,
351    /// Core::fmt style defaults.
352    CoreFmt,
353}
354
355/// A trait for formatting conversion specifiers that failed, were skipped, or were missing.
356pub trait FormatError {
357    /// The domain-specific error type.
358    type Error;
359
360    /// Renders a conversion specifier that failed with a domain-specific error.
361    fn format_error(&self, spec: &ConversionSpec, error: &Self::Error) -> String;
362
363    /// Renders a conversion specifier that was missing from the supplied arguments.
364    fn format_missing(&self, spec: &ConversionSpec) -> String;
365
366    /// Renders a conversion specifier that decoded successfully but failed to format (type mismatch).
367    fn format_type_error(&self, spec: &ConversionSpec, arg: &Arg) -> String;
368}
369
370/// Formatter that retains the original conversion specifier when formatting fails,
371/// parameterizing over `std::convert::Infallible` (which has no error state).
372#[derive(Debug, Clone, Copy, PartialEq, Eq)]
373struct DefaultFormatter;
374
375impl FormatError for DefaultFormatter {
376    type Error = std::convert::Infallible;
377
378    fn format_error(&self, spec: &ConversionSpec, _error: &std::convert::Infallible) -> String {
379        spec.to_printf()
380    }
381    fn format_missing(&self, spec: &ConversionSpec) -> String {
382        spec.to_printf()
383    }
384    fn format_type_error(&self, spec: &ConversionSpec, _arg: &Arg) -> String {
385        spec.to_printf()
386    }
387}
388
389/// A parsed format string.
390#[derive(Clone, Debug, PartialEq, Eq)]
391pub struct FormatString {
392    /// The [FormatFragment]s that comprise the [FormatString].
393    pub fragments: Vec<FormatFragment>,
394}
395
396impl FormatString {
397    /// Formats a parsed format string with provided arguments.
398    pub fn format(&self, args: &[Arg], style: FormatStyle) -> String {
399        let result_args: Vec<Result<Arg, std::convert::Infallible>> =
400            args.iter().map(|arg| Ok(arg.clone())).collect();
401
402        self.format_with_errors(&result_args, style, &DefaultFormatter)
403    }
404
405    /// Formats a parsed format string with the provided argument states (Result<Arg, FE::Error>),
406    /// delegating formatting of any failures, missing arguments, or type mismatches
407    /// to the provided `FormatError` implementation.
408    pub fn format_with_errors<FE: FormatError>(
409        &self,
410        args: &[Result<Arg, FE::Error>],
411        style: FormatStyle,
412        error_formatter: &FE,
413    ) -> String {
414        let mut output = String::new();
415        let mut args_iter = args.iter();
416
417        for fragment in &self.fragments {
418            self.format_fragment(
419                fragment,
420                &mut args_iter,
421                style,
422                error_formatter,
423                &mut output,
424            );
425        }
426
427        output
428    }
429
430    fn format_fragment<'a, FE: FormatError>(
431        &self,
432        fragment: &FormatFragment,
433        args: &mut impl Iterator<Item = &'a Result<Arg, FE::Error>>,
434        style: FormatStyle,
435        error_formatter: &FE,
436        output: &mut String,
437    ) where
438        FE::Error: 'a,
439    {
440        let spec = match fragment {
441            FormatFragment::Conversion(spec) => spec,
442            FormatFragment::Literal(s) => {
443                output.push_str(s);
444                return;
445            }
446        };
447
448        let Some(decoded) = args.next() else {
449            output.push_str(&error_formatter.format_missing(spec));
450            return;
451        };
452
453        let arg = match decoded {
454            Ok(arg) => arg,
455            Err(err) => {
456                output.push_str(&error_formatter.format_error(spec, err));
457                return;
458            }
459        };
460
461        let mut formatted = String::new();
462        match self.format_value(spec, arg, style, &mut formatted) {
463            Ok(()) => output.push_str(&formatted),
464            Err(_) => {
465                output.push_str(&error_formatter.format_type_error(spec, arg));
466            }
467        }
468    }
469
470    /// Parses a printf style format string.
471    pub fn parse_printf(s: &str) -> Result<Self, String> {
472        // TODO: b/281858500 - Add better errors to failed parses.
473        let (rest, result) = printf::format_string(s)
474            .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
475
476        // If the parser did not consume all the input, return an error.
477        if !rest.is_empty() {
478            return Err(format!(
479                "Failed to parse format string fragment: \"{rest}\""
480            ));
481        }
482
483        Ok(result)
484    }
485
486    /// Parses a core::fmt style format string.
487    pub fn parse_core_fmt(s: &str) -> Result<Self, String> {
488        // TODO: b/281858500 - Add better errors to failed parses.
489        let (rest, result) = core_fmt::format_string(s)
490            .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
491
492        // If the parser did not consume all the input, return an error.
493        if !rest.is_empty() {
494            return Err(format!("Failed to parse format string: \"{rest}\""));
495        }
496
497        Ok(result)
498    }
499
500    /// Creates a `FormatString` from a slice of fragments.
501    ///
502    /// This primary responsibility of this function is to merge literal
503    /// fragments.  Adjacent literal fragments occur when a parser parses
504    /// escape sequences.  Merging them here allows a
505    /// [`macros::FormatMacroGenerator`] to not worry about the escape codes.
506    pub(crate) fn from_fragments(fragments: &[FormatFragment]) -> Self {
507        Self {
508            fragments: fragments
509                .iter()
510                .fold(Vec::<_>::new(), |mut fragments, fragment| {
511                    // Collapse adjacent literal fragments.
512                    let Some(last) = fragments.last_mut() else {
513                        // If there are no accumulated fragments, add this one and return.
514                        fragments.push((*fragment).clone());
515                        return fragments;
516                    };
517                    if let Some(fragment) = last.try_append(fragment) {
518                        // If the fragments were able to append, no more work to do
519                        fragments.push((*fragment).clone());
520                    };
521                    fragments
522                }),
523        }
524    }
525
526    fn format_value(
527        &self,
528        spec: &ConversionSpec,
529        arg: &Arg,
530        style: FormatStyle,
531        output: &mut String,
532    ) -> Result<(), String> {
533        match (spec.primitive, arg) {
534            (Primitive::Integer, Arg::Int(v)) => self.format_int(*v, spec, style, output),
535            (Primitive::Unsigned, Arg::Uint(v)) => self.format_uint(*v, spec, style, output),
536            (Primitive::Float, Arg::Float(v)) => self.format_float(*v, spec, style, output),
537            (Primitive::String, Arg::Str(v)) => self.format_str(v, spec, style, output),
538            (Primitive::Character, Arg::Char(v)) => self.format_char(*v, spec, style, output),
539            (Primitive::Pointer, Arg::Ptr(v)) => self.format_ptr(*v, spec, style, output),
540            (Primitive::Untyped, _) => self.format_untyped(spec, arg, style, output),
541            _ => Err(format!(
542                "Mismatched type: expected {:?}, got {:?}",
543                spec.primitive, arg
544            )),
545        }
546    }
547
548    fn format_untyped(
549        &self,
550        spec: &ConversionSpec,
551        arg: &Arg,
552        style: FormatStyle,
553        output: &mut String,
554    ) -> Result<(), String> {
555        match arg {
556            Arg::Int(v) => self.format_int(*v, spec, style, output),
557            Arg::Uint(v) => self.format_uint(*v, spec, style, output),
558            Arg::Float(v) => self.format_float(*v, spec, style, output),
559            Arg::Str(v) => self.format_str(v, spec, style, output),
560            Arg::Char(v) => self.format_char(*v, spec, style, output),
561            Arg::Ptr(v) => self.format_ptr(*v, spec, style, output),
562        }
563    }
564
565    fn format_int_common(
566        &self,
567        v: u64,
568        sign: &str,
569        spec: &ConversionSpec,
570        output: &mut String,
571    ) -> Result<(), String> {
572        let (base_prefix, mut value) = match spec.style {
573            Style::Hex | Style::Pointer => ("0x", format!("{:x}", v)),
574            Style::UpperHex => ("0X", format!("{:X}", v)),
575            Style::Octal => ("0", format!("{:o}", v)),
576            _ => ("", format!("{}", v)),
577        };
578
579        if let Precision::Fixed(p) = spec.precision {
580            while value.len() < p as usize {
581                value.insert(0, '0');
582            }
583        }
584
585        let mut prefix = sign.to_string();
586        if spec.flags.contains(&Flag::AlternateSyntax) || spec.style == Style::Pointer {
587            // For octal, it's possible that the value string already starts with the prefix.
588            if !value.starts_with(base_prefix) {
589                prefix.push_str(base_prefix);
590            }
591        }
592
593        let s = self.apply_width_and_alignment(&prefix, &value, spec)?;
594        output.push_str(&s);
595        Ok(())
596    }
597
598    fn sign_prefix(&self, is_negative: bool, spec: &ConversionSpec) -> &'static str {
599        if is_negative {
600            "-"
601        } else if spec.flags.contains(&Flag::ForceSign) {
602            "+"
603        } else if spec.flags.contains(&Flag::SpaceSign) {
604            " "
605        } else {
606            ""
607        }
608    }
609
610    fn format_int(
611        &self,
612        v: i64,
613        spec: &ConversionSpec,
614        _style: FormatStyle,
615        output: &mut String,
616    ) -> Result<(), String> {
617        let sign = self.sign_prefix(v < 0, spec);
618        self.format_int_common(v.unsigned_abs(), sign, spec, output)
619    }
620
621    fn format_uint(
622        &self,
623        v: u64,
624        spec: &ConversionSpec,
625        _style: FormatStyle,
626        output: &mut String,
627    ) -> Result<(), String> {
628        self.format_int_common(v, "", spec, output)
629    }
630
631    fn format_float(
632        &self,
633        v: f64,
634        spec: &ConversionSpec,
635        style: FormatStyle,
636        output: &mut String,
637    ) -> Result<(), String> {
638        let abs_v = v.abs();
639        let value = match spec.precision {
640            Precision::Fixed(p) => format!("{:.1$}", abs_v, p as usize),
641            _ => match style {
642                FormatStyle::Printf => format!("{:.6}", abs_v),
643                FormatStyle::CoreFmt => format!("{}", abs_v),
644            },
645        };
646        let prefix = self.sign_prefix(v < 0.0 || v.is_sign_negative(), spec);
647
648        let s = self.apply_width_and_alignment(prefix, &value, spec)?;
649        output.push_str(&s);
650        Ok(())
651    }
652
653    fn format_str(
654        &self,
655        v: &str,
656        spec: &ConversionSpec,
657        _style: FormatStyle,
658        output: &mut String,
659    ) -> Result<(), String> {
660        let mut value = v.to_string();
661        if let Precision::Fixed(p) = spec.precision {
662            value.truncate(p as usize);
663        }
664        let s = self.apply_width_and_alignment("", &value, spec)?;
665        output.push_str(&s);
666        Ok(())
667    }
668
669    fn format_char(
670        &self,
671        v: char,
672        spec: &ConversionSpec,
673        _style: FormatStyle,
674        output: &mut String,
675    ) -> Result<(), String> {
676        let value = v.to_string();
677        let s = self.apply_width_and_alignment("", &value, spec)?;
678        output.push_str(&s);
679        Ok(())
680    }
681
682    fn format_ptr(
683        &self,
684        v: usize,
685        spec: &ConversionSpec,
686        _style: FormatStyle,
687        output: &mut String,
688    ) -> Result<(), String> {
689        self.format_int_common(v as u64, "", spec, output)
690    }
691
692    fn apply_width_and_alignment(
693        &self,
694        prefix: &str,
695        value: &str,
696        spec: &ConversionSpec,
697    ) -> Result<String, String> {
698        // If there is no fixed field width, format w/o padding.
699        // Variable field width is unsupported for now.
700        let MinFieldWidth::Fixed(w) = spec.min_field_width else {
701            return Ok(format!("{}{}", prefix, value));
702        };
703
704        let w = w as usize;
705        let total_len = prefix.len() + value.len();
706
707        // If the value overflows the minimum field width, format w/o padding.
708        if total_len >= w {
709            return Ok(format!("{}{}", prefix, value));
710        }
711
712        let pad_len = w - total_len;
713        let ignore_zero = spec.flags.contains(&Flag::LeftJustify)
714            || (matches!(spec.precision, Precision::Fixed(_))
715                && matches!(spec.primitive, Primitive::Integer | Primitive::Unsigned));
716        let do_zero_fill = spec.flags.contains(&Flag::LeadingZeros) && !ignore_zero;
717        let is_left_aligned =
718            spec.alignment == Alignment::Left || spec.flags.contains(&Flag::LeftJustify);
719
720        let mut s = String::with_capacity(w);
721        if is_left_aligned {
722            // Left justified values are never zero filled.
723            s.push_str(prefix);
724            s.push_str(value);
725            for _ in 0..pad_len {
726                s.push(spec.fill);
727            }
728        } else {
729            if do_zero_fill {
730                // Zero fill happens after the prefix like '0x001' or '-0001'.
731                s.push_str(prefix);
732                for _ in 0..pad_len {
733                    s.push('0');
734                }
735            } else {
736                // Normal fill happens after the prefix like '  0x1' or '  -01'.
737                for _ in 0..pad_len {
738                    s.push(spec.fill);
739                }
740                s.push_str(prefix);
741            }
742            s.push_str(value);
743        }
744        Ok(s)
745    }
746}