C/C++ API Reference
Loading...
Searching...
No Matches
tokenize.h
1// Copyright 2020 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14#pragma once
15
16#ifdef __cplusplus
17
18#include <cstddef>
19#include <cstdint>
20#include <string_view>
21
22#else
23
24#include <stddef.h>
25#include <stdint.h>
26
27#endif // __cplusplus
28
29#include "pw_polyfill/static_assert.h"
30#include "pw_preprocessor/arguments.h"
31#include "pw_preprocessor/compiler.h"
32#include "pw_preprocessor/concat.h"
33#include "pw_preprocessor/util.h"
34#include "pw_tokenizer/internal/argument_types.h"
35#include "pw_tokenizer/internal/tokenize_string.h"
36
38
41typedef uint32_t pw_tokenizer_Token;
42
49#define PW_TOKENIZER_DEFAULT_DOMAIN ""
50
67#define PW_TOKENIZE_STRING(...) \
68 PW_DELEGATE_BY_ARG_COUNT(_PW_TOKENIZE_STRING_, __VA_ARGS__)
69
71
72#define _PW_TOKENIZE_STRING_1(string_literal) \
73 PW_TOKENIZE_STRING_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN, string_literal)
74
75#define _PW_TOKENIZE_STRING_2(domain, string_literal) \
76 PW_TOKENIZE_STRING_DOMAIN(domain, string_literal)
77
79
92#define PW_TOKENIZE_STRING_EXPR(...) \
93 PW_DELEGATE_BY_ARG_COUNT(_PW_TOKENIZE_STRING_EXPR_, __VA_ARGS__)
94
96
97#define _PW_TOKENIZE_STRING_EXPR_1(string_literal) \
98 _PW_TOKENIZE_STRING_EXPR_2(PW_TOKENIZER_DEFAULT_DOMAIN, string_literal)
99
100#define _PW_TOKENIZE_STRING_EXPR_2(domain, string_literal) \
101 [&] { \
102 constexpr uint32_t lambda_ret_token = \
103 PW_TOKENIZE_STRING_DOMAIN(domain, string_literal); \
104 return lambda_ret_token; \
105 }()
106
108
109// clang-format off
113// clang-format on
114#define PW_TOKENIZE_STRING_DOMAIN(domain, string_literal) \
115 PW_TOKENIZE_STRING_MASK(domain, UINT32_MAX, string_literal)
116
117// clang-format off
121// clang-format on
122#define PW_TOKENIZE_STRING_DOMAIN_EXPR(domain, string_literal) \
123 [&] { \
124 constexpr uint32_t lambda_ret_token = \
125 PW_TOKENIZE_STRING_DOMAIN(domain, string_literal); \
126 return lambda_ret_token; \
127 }()
128
129// clang-format off
134// clang-format on
135#define PW_TOKENIZE_STRING_MASK(domain, mask, string_literal) \
136 /* assign to a variable */ _PW_TOKENIZER_MASK_TOKEN(mask, string_literal); \
137 \
138 static_assert(0 < (mask) && (mask) <= UINT32_MAX, \
139 "Tokenizer masks must be non-zero uint32_t values."); \
140 \
141 PW_TOKENIZER_DEFINE_TOKEN( \
142 _PW_TOKENIZER_MASK_TOKEN(mask, string_literal), domain, string_literal)
143
144// clang-format off
149// clang-format on
150#define PW_TOKENIZE_STRING_MASK_EXPR(domain, mask, string_literal) \
151 [&] { \
152 constexpr uint32_t lambda_ret_token = \
153 PW_TOKENIZE_STRING_MASK(domain, mask, string_literal); \
154 return lambda_ret_token; \
155 }()
156
158
159#define _PW_TOKENIZER_MASK_TOKEN(mask, string_literal) \
160 ((pw_tokenizer_Token)(mask) & PW_TOKENIZER_STRING_TOKEN(string_literal))
161
163
194#define PW_TOKENIZE_TO_BUFFER(buffer, buffer_size_pointer, format, ...) \
195 PW_TOKENIZE_TO_BUFFER_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN, \
196 buffer, \
197 buffer_size_pointer, \
198 format, \
199 __VA_ARGS__)
200
201// clang-format off
204// clang-format on
205#define PW_TOKENIZE_TO_BUFFER_DOMAIN( \
206 domain, buffer, buffer_size_pointer, format, ...) \
207 PW_TOKENIZE_TO_BUFFER_MASK( \
208 domain, UINT32_MAX, buffer, buffer_size_pointer, format, __VA_ARGS__)
209
213#define PW_TOKENIZE_TO_BUFFER_MASK( \
214 domain, mask, buffer, buffer_size_pointer, format, ...) \
215 do { \
216 PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__); \
217 _pw_tokenizer_ToBuffer(buffer, \
218 buffer_size_pointer, \
219 PW_TOKENIZER_REPLACE_FORMAT_STRING(__VA_ARGS__)); \
220 } while (0)
221
234
244#define PW_TOKENIZER_REPLACE_FORMAT_STRING(...) \
245 _PW_TOKENIZER_REPLACE_FORMAT_STRING(PW_EMPTY_ARGS(__VA_ARGS__), __VA_ARGS__)
246
248
249#define _PW_TOKENIZER_REPLACE_FORMAT_STRING(empty_args, ...) \
250 _PW_CONCAT_2(_PW_TOKENIZER_REPLACE_FORMAT_STRING_, empty_args)(__VA_ARGS__)
251
252#define _PW_TOKENIZER_REPLACE_FORMAT_STRING_1() _pw_tokenizer_token, 0u
253#define _PW_TOKENIZER_REPLACE_FORMAT_STRING_0(...) \
254 _pw_tokenizer_token, PW_TOKENIZER_ARG_TYPES(__VA_ARGS__), __VA_ARGS__
255
257
267#define PW_TOKENIZER_ARG_TYPES(...) \
268 PW_DELEGATE_BY_ARG_COUNT(_PW_TOKENIZER_TYPES_, __VA_ARGS__)
269
271
272PW_EXTERN_C_START
273
274// These functions encode the tokenized strings. These should not be called
275// directly. Instead, use the corresponding PW_TOKENIZE_TO_* macros above.
276void _pw_tokenizer_ToBuffer(void* buffer,
277 size_t* buffer_size_bytes, // input and output arg
278 pw_tokenizer_Token token,
279 pw_tokenizer_ArgTypes types,
280 ...);
281
282// This empty function allows the compiler to check the format string.
283static inline void pw_tokenizer_CheckFormatString(const char* format, ...)
284 PW_PRINTF_FORMAT(1, 2);
285
286static inline void pw_tokenizer_CheckFormatString(const char* format, ...) {
287 (void)format;
288}
289
290PW_EXTERN_C_END
291
293
294// clang-format off
308// clang-format on
309#define PW_TOKENIZE_FORMAT_STRING(domain, mask, format, ...) \
310 static_assert( \
311 PW_FUNCTION_ARG_COUNT(__VA_ARGS__) <= PW_TOKENIZER_MAX_SUPPORTED_ARGS, \
312 "Tokenized strings cannot have more than " \
313 PW_STRINGIFY(PW_TOKENIZER_MAX_SUPPORTED_ARGS) " arguments; " \
314 PW_STRINGIFY(PW_FUNCTION_ARG_COUNT(__VA_ARGS__)) \
315 " arguments were used for " #format " (" #__VA_ARGS__ ")"); \
316 PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT(domain, mask, format, __VA_ARGS__)
317
333#define PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT(domain, mask, format, ...) \
334 if (0) { /* Do not execute to prevent double evaluation of the arguments. */ \
335 pw_tokenizer_CheckFormatString(format PW_COMMA_ARGS(__VA_ARGS__)); \
336 } \
337 \
338 _PW_TOKENIZE_VALIDATE_FORMAT_STRING(format); \
339 \
340 /* Tokenize the string to a pw_tokenizer_Token at compile time. */ \
341 static _PW_TOKENIZER_CONST pw_tokenizer_Token _pw_tokenizer_token = \
342 _PW_TOKENIZER_MASK_TOKEN(mask, format); \
343 \
344 PW_TOKENIZER_DEFINE_TOKEN(_pw_tokenizer_token, domain, format)
345
347
348// Creates unique names to use for tokenized string entries and linker sections.
349#define _PW_TOKENIZER_UNIQUE(prefix) PW_CONCAT(prefix, __LINE__, _, __COUNTER__)
350
351#ifdef __cplusplus
352
353#define _PW_TOKENIZER_CONST constexpr
354
356
363#define PW_TOKENIZER_DEFINE_TOKEN(token, domain, string) \
364 static_assert(::pw::tokenizer::internal::ValidDomain(domain), \
365 "pw_tokenizer domains may only contain alphanumeric " \
366 "characters, underscore, or colon, and cannot start with a " \
367 "number; space characters are ignored"); \
368 alignas(1) static constexpr auto _PW_TOKENIZER_SECTION _PW_TOKENIZER_UNIQUE( \
369 _pw_tokenizer_string_entry_) = \
370 ::pw::tokenizer::internal::MakeEntry(token, domain, string)
371
373
374// Validates the format string provided to PW_TOKENIZE_FORMAT_STRING and
375// friends.
376#define _PW_TOKENIZE_VALIDATE_FORMAT_STRING(format) \
377 do { \
378 static_assert(!::pw::tokenizer::internal::Contains(format, "%.*s"), \
379 "The %.*s specifier is not supported." \
380 " See https://pwbug.dev/408040194"); \
381 } while (0)
382
383namespace pw::tokenizer {
384
385using Token = ::pw_tokenizer_Token;
386inline constexpr const char* kDefaultDomain = PW_TOKENIZER_DEFAULT_DOMAIN;
387
388namespace internal {
389
390constexpr bool Contains(const char* haystack, const char* needle) {
391 std::string_view haystack_view(haystack);
392 return haystack_view.find(needle) != std::string_view::npos;
393}
394
395} // namespace internal
396} // namespace pw::tokenizer
397
398#else
399
400#define _PW_TOKENIZER_CONST const
401#define _PW_ALIGNAS(alignment) __attribute__((aligned(alignment)))
402
403#define PW_TOKENIZER_DEFINE_TOKEN(token, domain, string) \
404 _PW_ALIGNAS(1) static const _PW_TOKENIZER_STRING_ENTRY(token, domain, string)
405
406// There is no way to do this in C.
407#define _PW_TOKENIZE_VALIDATE_FORMAT_STRING(format) \
408 do { \
409 } while (0)
410
411#endif // __cplusplus
412
413// _PW_TOKENIZER_SECTION places the tokenized strings in a special .pw_tokenizer
414// linker section. Host-side decoding tools read the strings and tokens from
415// this section to build a database of tokenized strings.
416//
417// This section should be declared as type INFO so that it is excluded from the
418// final binary. To declare the section, as well as the .pw_tokenizer.info
419// metadata section, add the following to the linker script's SECTIONS command:
420//
421// .pw_tokenizer.info 0x0 (INFO) :
422// {
423// KEEP(*(.pw_tokenizer.info))
424// }
425//
426// .pw_tokenizer.entries 0x0 (INFO) :
427// {
428// KEEP(*(.pw_tokenizer.entries.*))
429// }
430//
431// A linker script snippet that provides these sections is provided in the file
432// pw_tokenizer_linker_sections.ld. This file may be directly included into
433// existing linker scripts.
434//
435// The tokenized string sections can also be managed without linker script
436// modifications, though this is not recommended. The section can be extracted
437// and removed from the ELF with objcopy:
438//
439// objcopy --only-section .pw_tokenizer.* <ORIGINAL_ELF> <OUTPUT_ELF>
440// objcopy --remove-section .pw_tokenizer.* <ORIGINAL_ELF>
441//
442// OUTPUT_ELF will be an ELF with only the tokenized strings, and the original
443// ELF file will have the sections removed.
444//
445// Without the above linker script modifications, the section garbage collection
446// option (--gc-sections) removes the tokenized string sections. To avoid
447// editing the target linker script, a separate metadata ELF can be linked
448// without --gc-sections to preserve the tokenized data.
449//
450// pw_tokenizer is intended for use with ELF files only. Mach-O files (macOS
451// executables) do not support section names longer than 16 characters, so a
452// short, unused section name is used on macOS.
453#ifdef __APPLE__
454#define _PW_TOKENIZER_SECTION \
455 PW_KEEP_IN_SECTION(PW_STRINGIFY(_PW_TOKENIZER_UNIQUE(.pw.)))
456#else
457#define _PW_TOKENIZER_SECTION \
458 PW_KEEP_IN_SECTION(PW_STRINGIFY(_PW_TOKENIZER_UNIQUE(.pw_tokenizer.entries.)))
459#endif // __APPLE__
#define PW_PRINTF_FORMAT(format_index, parameter_index)
Definition: compiler.h:89
uint32_t pw_tokenizer_Token
Definition: tokenize.h:41
#define PW_TOKENIZER_DEFAULT_DOMAIN
Definition: tokenize.h:49