21namespace pw::tokenizer {
84 uint32_t date_removed;
87 static_assert(
sizeof(RawEntry) == 8u);
90 static constexpr uint32_t ReadUint32(
const T* bytes) {
91 return static_cast<uint32_t
>(
static_cast<uint8_t
>(bytes[0]) |
92 static_cast<uint8_t
>(bytes[1]) << 8 |
93 static_cast<uint8_t
>(bytes[2]) << 16 |
94 static_cast<uint8_t
>(bytes[3]) << 24);
121 using difference_type = std::ptrdiff_t;
125 using iterator_category = std::forward_iterator_tag;
127 constexpr iterator() : entry_{}, raw_(
nullptr) {}
133 raw_ +=
sizeof(RawEntry);
136 while (*entry_.
string++ !=
'\0') {
140 constexpr iterator operator++(
int) {
145 constexpr bool operator==(
const iterator& rhs)
const {
146 return raw_ == rhs.raw_;
148 constexpr bool operator!=(
const iterator& rhs)
const {
149 return raw_ != rhs.raw_;
152 constexpr const Entry& operator*()
const {
return entry_; }
154 constexpr const Entry* operator->()
const {
return &entry_; }
156 constexpr difference_type operator-(
const iterator& rhs)
const {
157 return (raw_ - rhs.raw_) /
static_cast<difference_type
>(
sizeof(RawEntry));
164 constexpr iterator(
const char* raw_entry,
const char*
string)
165 : entry_{0, 0,
string}, raw_{raw_entry} {
166 if (raw_entry !=
string) {
171 explicit constexpr iterator(
const char*
end) : entry_{}, raw_(
end) {}
173 constexpr void ReadRawEntry() {
174 entry_.
token = ReadUint32(raw_);
183 using size_type = std::size_t;
184 using difference_type = std::ptrdiff_t;
190 using reverse_iterator = std::reverse_iterator<iterator>;
191 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
198 : begin_(begin), end_(end) {}
201 constexpr size_type size()
const {
202 return static_cast<size_type
>(end_ - begin_);
206 constexpr bool empty()
const {
return begin_ == end_; }
210 Entry operator[](size_type index)
const;
212 constexpr const iterator& begin()
const {
return begin_; }
213 constexpr const iterator& end()
const {
return end_; }
224 template <
typename ByteArray>
225 static constexpr bool IsValid(
const ByteArray& bytes) {
226 return HasValidHeader(bytes) && EachEntryHasAString(bytes);
239 template <const auto& kDatabaseBytes>
242 HasValidHeader<decltype(kDatabaseBytes)>(kDatabaseBytes),
243 "Databases must start with a 16-byte header that begins with TOKENS.");
245 static_assert(EachEntryHasAString<decltype(kDatabaseBytes)>(kDatabaseBytes),
246 "The database must have at least one string for each entry.");
258 template <
typename ByteArray>
260 return IsValid<ByteArray>(database_bytes)
265 constexpr TokenDatabase() : begin_{.data = nullptr}, end_{.data = nullptr} {}
271 constexpr size_type
size()
const {
272 return static_cast<size_type
>(end_.data - begin_.data) /
sizeof(RawEntry);
277 constexpr bool ok()
const {
return begin_.data !=
nullptr; }
287 std::array<char, 6> magic;
289 uint32_t entry_count;
293 static_assert(
sizeof(Header) == 2 *
sizeof(RawEntry));
295 template <
typename ByteArray>
296 static constexpr bool HasValidHeader(
const ByteArray& bytes) {
297 static_assert(
sizeof(*std::data(bytes)) == 1u);
299 if (std::size(bytes) <
sizeof(Header)) {
304 for (size_type i = 0; i < kMagicAndVersion.size(); ++i) {
305 if (bytes[i] != kMagicAndVersion[i]) {
313 template <
typename ByteArray>
314 static constexpr bool EachEntryHasAString(
const ByteArray& bytes) {
315 const size_type entries = ReadEntryCount(std::data(bytes));
318 if (std::size(bytes) < StringTable(entries)) {
323 size_type string_count = 0;
325 std::begin(bytes) +
static_cast<ptrdiff_t
>(StringTable(entries));
328 string_count += (*i ==
'\0') ? 1 : 0;
332 return string_count >= entries;
337 template <
typename T>
338 static constexpr uint32_t ReadEntryCount(
const T* header_bytes) {
339 const T* bytes = header_bytes + offsetof(Header, entry_count);
340 return ReadUint32(bytes);
344 static constexpr size_type StringTable(size_type entries) {
345 return sizeof(Header) + entries *
sizeof(RawEntry);
350 static constexpr std::array<char, 8> kMagicAndVersion = {
351 'T',
'O',
'K',
'E',
'N',
'S',
'\0',
'\0'};
353 template <
typename Byte>
356 bytes + StringTable(ReadEntryCount(bytes))) {
357 static_assert(
sizeof(Byte) == 1u);
365 : begin_{.data =
begin}, end_{.data =
end} {}
368 : begin_{.unsigned_data =
begin}, end_{.unsigned_data =
end} {}
371 : begin_{.signed_data =
begin}, end_{.signed_data =
end} {}
377 const unsigned char* unsigned_data;
378 const signed char* signed_data;
Definition: token_database.h:195
Iterator for TokenDatabase values.
Definition: token_database.h:119
Definition: token_database.h:77
static constexpr uint32_t kDateRemovedNever
Definition: token_database.h:100
constexpr TokenDatabase()
Creates a database with no data. ok() returns false.
Definition: token_database.h:265
static constexpr TokenDatabase Create(const ByteArray &database_bytes)
Definition: token_database.h:259
constexpr iterator begin() const
Returns an iterator for the first token entry.
Definition: token_database.h:280
constexpr size_type size() const
Returns the total number of entries (unique token-string pairs).
Definition: token_database.h:271
static constexpr bool IsValid(const ByteArray &bytes)
Definition: token_database.h:225
constexpr bool ok() const
Definition: token_database.h:277
Entries Find(uint32_t token) const
Returns all entries associated with this token. This is O(n).
static constexpr TokenDatabase Create()
Definition: token_database.h:240
constexpr iterator end() const
Returns an iterator for one past the last token entry.
Definition: token_database.h:283
An entry in the token database.
Definition: token_database.h:103
const char * string
The null-terminated string represented by this token.
Definition: token_database.h:115
uint32_t date_removed
Definition: token_database.h:112
uint32_t token
The token that represents this string.
Definition: token_database.h:105