30#include <unordered_map>
34#include "pw_result/result.h"
35#include "pw_span/span.h"
36#include "pw_stream/stream.h"
37#include "pw_tokenizer/internal/decode.h"
38#include "pw_tokenizer/token_database.h"
39#include "pw_tokenizer/tokenize.h"
41namespace pw::tokenizer {
49using DomainTokenEntriesMap = std::unordered_map<
51 std::unordered_map<uint32_t, std::vector<TokenizedStringEntry>>>;
67 bool successful_decode =
false;
68 for (
const auto& match : matches_) {
70 if (successful_decode) {
73 successful_decode =
true;
77 return successful_decode;
81 const std::vector<DecodedFormatString>&
matches()
const {
return matches_; }
83 const uint32_t& token()
const {
return token_; }
88 const std::string&
BestString()
const {
return best_string_; }
96 std::string best_string_;
98 std::vector<DecodedFormatString> matches_;
112 : database_(std::move(database)) {}
133 std::string_view domain = kDefaultDomain)
const {
139 std::string_view domain = kDefaultDomain)
const {
145 std::string_view domain = kDefaultDomain)
const {
146 return Detokenize(encoded.data(), encoded.size(), domain);
152 std::string_view domain = kDefaultDomain)
const {
153 return Detokenize(
span(
static_cast<const std::byte*
>(encoded), size_bytes),
161 std::string_view domain = kDefaultDomain)
const {
168 std::string_view domain = kDefaultDomain)
const {
174 std::string_view encoded,
175 std::string_view domain = kDefaultDomain)
const {
183 std::string_view domain = kDefaultDomain)
const {
185 span(
static_cast<const std::byte*
>(encoded), size_bytes), domain);
202 return DetokenizeTextRecursive(text, kMaxDecodePasses);
221 const DomainTokenEntriesMap& database()
const {
return database_; }
224 uint32_t token, std::string_view domain)
const;
230 static constexpr unsigned kMaxDecodePasses = 4;
232 std::string DetokenizeTextRecursive(std::string_view text,
233 unsigned max_passes)
const;
238 std::string_view domain,
239 bool recursion)
const;
241 DomainTokenEntriesMap database_;
Definition: span_impl.h:235
Definition: detokenize.h:55
const std::vector< DecodedFormatString > & matches() const
Returns the strings that matched the token, with the best matches first.
Definition: detokenize.h:81
bool ok() const
True if there was only one match that decoded successfully.
Definition: detokenize.h:66
std::string BestStringWithErrors() const
const std::string & BestString() const
Definition: detokenize.h:88
Definition: detokenize.h:103
DetokenizedString Detokenize(const span< const std::byte > &encoded, std::string_view domain=kDefaultDomain) const
Definition: detokenize.h:132
DetokenizedString Detokenize(const span< const uint8_t > &encoded, std::string_view domain=kDefaultDomain) const
Overload of Detokenize for span<const uint8_t>.
Definition: detokenize.h:138
std::string DetokenizeText(std::string_view text) const
Definition: detokenize.h:201
static Result< Detokenizer > FromElfFile(stream::SeekableReader &stream)
static Result< Detokenizer > FromCsv(std::string_view csv)
Constructs a detokenizer from a CSV database.
DetokenizedString RecursiveDetokenize(const span< const std::byte > &encoded, std::string_view domain=kDefaultDomain) const
Definition: detokenize.h:159
DetokenizedString DetokenizeBase64Message(std::string_view text) const
static Result< Detokenizer > FromElfSection(span< const uint8_t > elf_section)
Overload of FromElfSection for a uint8_t span.
Definition: detokenize.h:119
Detokenizer(const TokenDatabase &database)
DetokenizedString Detokenize(const void *encoded, size_t size_bytes, std::string_view domain=kDefaultDomain) const
Overload of Detokenize for a pointer and length.
Definition: detokenize.h:150
Detokenizer(DomainTokenEntriesMap &&database)
Constructs a detokenizer by directly passing the parsed database.
Definition: detokenize.h:111
DetokenizedString Detokenize(std::string_view encoded, std::string_view domain=kDefaultDomain) const
Overload of Detokenize for std::string_view.
Definition: detokenize.h:144
DetokenizedString RecursiveDetokenize(const span< const uint8_t > &encoded, std::string_view domain=kDefaultDomain) const
Overload of Detokenize for span<const uint8_t>.
Definition: detokenize.h:166
static Result< Detokenizer > FromElfSection(span< const std::byte > elf_section)
DetokenizedString RecursiveDetokenize(const void *encoded, size_t size_bytes, std::string_view domain=kDefaultDomain) const
Overload of Detokenize for a pointer and length.
Definition: detokenize.h:180
DetokenizedString RecursiveDetokenize(std::string_view encoded, std::string_view domain=kDefaultDomain) const
Overload of Detokenize for std::string_view.
Definition: detokenize.h:173
std::string DecodeOptionallyTokenizedData(const span< const std::byte > &optionally_tokenized_data)
Definition: token_database.h:77
std::pair< FormatString, uint32_t > TokenizedStringEntry
Token database entry.
Definition: detokenize.h:48