Pigweed
 
Loading...
Searching...
No Matches
tokenize.h
1// Copyright 2020 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14#pragma once
15
16#ifdef __cplusplus
17
18#include <cstddef>
19#include <cstdint>
20
21#else
22
23#include <stddef.h>
24#include <stdint.h>
25
26#endif // __cplusplus
27
28#include "pw_polyfill/static_assert.h"
29#include "pw_preprocessor/arguments.h"
30#include "pw_preprocessor/compiler.h"
31#include "pw_preprocessor/concat.h"
32#include "pw_preprocessor/util.h"
33#include "pw_tokenizer/internal/argument_types.h"
34#include "pw_tokenizer/internal/tokenize_string.h"
35
38typedef uint32_t pw_tokenizer_Token;
39
40// Strings may optionally be tokenized to a domain. Strings in different
41// domains can be processed separately by the token database tools. Each domain
42// in use must have a corresponding section declared in the linker script. See
43// `pw_tokenizer_linker_sections.ld` for more details.
44//
45// The default domain is an empty string.
46#define PW_TOKENIZER_DEFAULT_DOMAIN ""
47
51#define PW_TOKENIZE_STRING_OPTIONAL_DOMAIN(...) \
52 PW_DELEGATE_BY_ARG_COUNT(_PW_TOKENIZE_STRING_OPTIONAL_DOMAIN_, __VA_ARGS__)
53
54#define _PW_TOKENIZE_STRING_OPTIONAL_DOMAIN_1(string_literal) \
55 PW_TOKENIZE_STRING(string_literal)
56
57#define _PW_TOKENIZE_STRING_OPTIONAL_DOMAIN_2(domain, string_literal) \
58 PW_TOKENIZE_STRING_DOMAIN(domain, string_literal)
59
72#define PW_TOKENIZE_STRING(string_literal) \
73 PW_TOKENIZE_STRING_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN, string_literal)
74
83#define PW_TOKENIZE_STRING_EXPR(string_literal) \
84 [&] { \
85 constexpr uint32_t lambda_ret_token = PW_TOKENIZE_STRING(string_literal); \
86 return lambda_ret_token; \
87 }()
88
91#define PW_TOKENIZE_STRING_DOMAIN(domain, string_literal) \
92 PW_TOKENIZE_STRING_MASK(domain, UINT32_MAX, string_literal)
93
96#define PW_TOKENIZE_STRING_DOMAIN_EXPR(domain, string_literal) \
97 [&] { \
98 constexpr uint32_t lambda_ret_token = \
99 PW_TOKENIZE_STRING_DOMAIN(domain, string_literal); \
100 return lambda_ret_token; \
101 }()
102
106#define PW_TOKENIZE_STRING_MASK(domain, mask, string_literal) \
107 /* assign to a variable */ _PW_TOKENIZER_MASK_TOKEN(mask, string_literal); \
108 \
109 static_assert(0 < (mask) && (mask) <= UINT32_MAX, \
110 "Tokenizer masks must be non-zero uint32_t values."); \
111 \
112 PW_TOKENIZER_DEFINE_TOKEN( \
113 _PW_TOKENIZER_MASK_TOKEN(mask, string_literal), domain, string_literal)
114
118#define PW_TOKENIZE_STRING_MASK_EXPR(domain, mask, string_literal) \
119 [&] { \
120 constexpr uint32_t lambda_ret_token = \
121 PW_TOKENIZE_STRING_MASK(domain, mask, string_literal); \
122 return lambda_ret_token; \
123 }()
124
125#define _PW_TOKENIZER_MASK_TOKEN(mask, string_literal) \
126 ((pw_tokenizer_Token)(mask) & PW_TOKENIZER_STRING_TOKEN(string_literal))
127
158#define PW_TOKENIZE_TO_BUFFER(buffer, buffer_size_pointer, format, ...) \
159 PW_TOKENIZE_TO_BUFFER_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN, \
160 buffer, \
161 buffer_size_pointer, \
162 format, \
163 __VA_ARGS__)
164
167#define PW_TOKENIZE_TO_BUFFER_DOMAIN( \
168 domain, buffer, buffer_size_pointer, format, ...) \
169 PW_TOKENIZE_TO_BUFFER_MASK( \
170 domain, UINT32_MAX, buffer, buffer_size_pointer, format, __VA_ARGS__)
171
174#define PW_TOKENIZE_TO_BUFFER_MASK( \
175 domain, mask, buffer, buffer_size_pointer, format, ...) \
176 do { \
177 PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__); \
178 _pw_tokenizer_ToBuffer(buffer, \
179 buffer_size_pointer, \
180 PW_TOKENIZER_REPLACE_FORMAT_STRING(__VA_ARGS__)); \
181 } while (0)
182
195
205#define PW_TOKENIZER_REPLACE_FORMAT_STRING(...) \
206 _PW_TOKENIZER_REPLACE_FORMAT_STRING(PW_EMPTY_ARGS(__VA_ARGS__), __VA_ARGS__)
207
208#define _PW_TOKENIZER_REPLACE_FORMAT_STRING(empty_args, ...) \
209 _PW_CONCAT_2(_PW_TOKENIZER_REPLACE_FORMAT_STRING_, empty_args)(__VA_ARGS__)
210
211#define _PW_TOKENIZER_REPLACE_FORMAT_STRING_1() _pw_tokenizer_token, 0u
212#define _PW_TOKENIZER_REPLACE_FORMAT_STRING_0(...) \
213 _pw_tokenizer_token, PW_TOKENIZER_ARG_TYPES(__VA_ARGS__), __VA_ARGS__
214
224#define PW_TOKENIZER_ARG_TYPES(...) \
225 PW_DELEGATE_BY_ARG_COUNT(_PW_TOKENIZER_TYPES_, __VA_ARGS__)
226
227PW_EXTERN_C_START
228
229// These functions encode the tokenized strings. These should not be called
230// directly. Instead, use the corresponding PW_TOKENIZE_TO_* macros above.
231void _pw_tokenizer_ToBuffer(void* buffer,
232 size_t* buffer_size_bytes, // input and output arg
233 pw_tokenizer_Token token,
234 pw_tokenizer_ArgTypes types,
235 ...);
236
237// This empty function allows the compiler to check the format string.
238static inline void pw_tokenizer_CheckFormatString(const char* format, ...)
239 PW_PRINTF_FORMAT(1, 2);
240
241static inline void pw_tokenizer_CheckFormatString(const char* format, ...) {
242 (void)format;
243}
244
245PW_EXTERN_C_END
246
259// clang-format off
260#define PW_TOKENIZE_FORMAT_STRING(domain, mask, format, ...) \
261 static_assert( \
262 PW_FUNCTION_ARG_COUNT(__VA_ARGS__) <= PW_TOKENIZER_MAX_SUPPORTED_ARGS, \
263 "Tokenized strings cannot have more than " \
264 PW_STRINGIFY(PW_TOKENIZER_MAX_SUPPORTED_ARGS) " arguments; " \
265 PW_STRINGIFY(PW_FUNCTION_ARG_COUNT(__VA_ARGS__)) \
266 " arguments were used for " #format " (" #__VA_ARGS__ ")"); \
267 PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT(domain, mask, format, __VA_ARGS__)
268// clang-format on
269
282#define PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT(domain, mask, format, ...) \
283 if (0) { /* Do not execute to prevent double evaluation of the arguments. */ \
284 pw_tokenizer_CheckFormatString(format PW_COMMA_ARGS(__VA_ARGS__)); \
285 } \
286 \
287 /* Tokenize the string to a pw_tokenizer_Token at compile time. */ \
288 static _PW_TOKENIZER_CONST pw_tokenizer_Token _pw_tokenizer_token = \
289 _PW_TOKENIZER_MASK_TOKEN(mask, format); \
290 \
291 PW_TOKENIZER_DEFINE_TOKEN(_pw_tokenizer_token, domain, format)
292
293// Creates unique names to use for tokenized string entries and linker sections.
294#define _PW_TOKENIZER_UNIQUE(prefix) PW_CONCAT(prefix, __LINE__, _, __COUNTER__)
295
296#ifdef __cplusplus
297
298#define _PW_TOKENIZER_CONST constexpr
299
306#define PW_TOKENIZER_DEFINE_TOKEN(token, domain, string) \
307 static_assert(::pw::tokenizer::internal::ValidDomain(domain), \
308 "pw_tokenizer domains may only contain alphanumeric " \
309 "characters, underscore, or colon, and cannot start with a " \
310 "number; space characters are ignored"); \
311 alignas(1) static constexpr auto _PW_TOKENIZER_SECTION _PW_TOKENIZER_UNIQUE( \
312 _pw_tokenizer_string_entry_) = \
313 ::pw::tokenizer::internal::MakeEntry(token, domain, string)
314
315namespace pw::tokenizer {
316
317using Token = ::pw_tokenizer_Token;
318inline constexpr const char* kDefaultDomain = PW_TOKENIZER_DEFAULT_DOMAIN;
319
320} // namespace pw::tokenizer
321
322#else
323
324#define _PW_TOKENIZER_CONST const
325#define _PW_ALIGNAS(alignment) __attribute__((aligned(alignment)))
326
327#define PW_TOKENIZER_DEFINE_TOKEN(token, domain, string) \
328 _PW_ALIGNAS(1) static const _PW_TOKENIZER_STRING_ENTRY(token, domain, string)
329
330#endif // __cplusplus
331
332// _PW_TOKENIZER_SECTION places the tokenized strings in a special .pw_tokenizer
333// linker section. Host-side decoding tools read the strings and tokens from
334// this section to build a database of tokenized strings.
335//
336// This section should be declared as type INFO so that it is excluded from the
337// final binary. To declare the section, as well as the .pw_tokenizer.info
338// metadata section, add the following to the linker script's SECTIONS command:
339//
340// .pw_tokenizer.info 0x0 (INFO) :
341// {
342// KEEP(*(.pw_tokenizer.info))
343// }
344//
345// .pw_tokenizer.entries 0x0 (INFO) :
346// {
347// KEEP(*(.pw_tokenizer.entries.*))
348// }
349//
350// A linker script snippet that provides these sections is provided in the file
351// pw_tokenizer_linker_sections.ld. This file may be directly included into
352// existing linker scripts.
353//
354// The tokenized string sections can also be managed without linker script
355// modifications, though this is not recommended. The section can be extracted
356// and removed from the ELF with objcopy:
357//
358// objcopy --only-section .pw_tokenizer.* <ORIGINAL_ELF> <OUTPUT_ELF>
359// objcopy --remove-section .pw_tokenizer.* <ORIGINAL_ELF>
360//
361// OUTPUT_ELF will be an ELF with only the tokenized strings, and the original
362// ELF file will have the sections removed.
363//
364// Without the above linker script modifications, the section garbage collection
365// option (--gc-sections) removes the tokenized string sections. To avoid
366// editing the target linker script, a separate metadata ELF can be linked
367// without --gc-sections to preserve the tokenized data.
368//
369// pw_tokenizer is intended for use with ELF files only. Mach-O files (macOS
370// executables) do not support section names longer than 16 characters, so a
371// short, unused section name is used on macOS.
372#ifdef __APPLE__
373#define _PW_TOKENIZER_SECTION \
374 PW_KEEP_IN_SECTION(PW_STRINGIFY(_PW_TOKENIZER_UNIQUE(.pw.)))
375#else
376#define _PW_TOKENIZER_SECTION \
377 PW_KEEP_IN_SECTION(PW_STRINGIFY(_PW_TOKENIZER_UNIQUE(.pw_tokenizer.entries.)))
378#endif // __APPLE__
#define PW_PRINTF_FORMAT(format_index, parameter_index)
Definition: compiler.h:86