21namespace pw::tokenizer {
82 uint32_t date_removed;
85 static_assert(
sizeof(RawEntry) == 8u);
88 static constexpr uint32_t ReadUint32(
const T* bytes) {
89 return static_cast<uint8_t
>(bytes[0]) |
90 static_cast<uint8_t
>(bytes[1]) << 8 |
91 static_cast<uint8_t
>(bytes[2]) << 16 |
92 static_cast<uint8_t
>(bytes[3]) << 24;
119 using difference_type = std::ptrdiff_t;
123 using iterator_category = std::forward_iterator_tag;
125 constexpr iterator() : entry_{}, raw_(
nullptr) {}
131 raw_ +=
sizeof(RawEntry);
134 while (*entry_.
string++ !=
'\0') {
138 constexpr iterator operator++(
int) {
143 constexpr bool operator==(
const iterator& rhs)
const {
144 return raw_ == rhs.raw_;
146 constexpr bool operator!=(
const iterator& rhs)
const {
147 return raw_ != rhs.raw_;
150 constexpr const Entry& operator*()
const {
return entry_; }
152 constexpr const Entry* operator->()
const {
return &entry_; }
154 constexpr difference_type operator-(
const iterator& rhs)
const {
155 return (raw_ - rhs.raw_) /
sizeof(RawEntry);
162 constexpr iterator(
const char* raw_entry,
const char*
string)
163 : entry_{0, 0,
string}, raw_{raw_entry} {
164 if (raw_entry !=
string) {
169 explicit constexpr iterator(
const char*
end) : entry_{}, raw_(
end) {}
171 constexpr void ReadRawEntry() {
172 entry_.
token = ReadUint32(raw_);
181 using size_type = std::size_t;
182 using difference_type = std::ptrdiff_t;
188 using reverse_iterator = std::reverse_iterator<iterator>;
189 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
196 : begin_(begin), end_(end) {}
199 constexpr size_type size()
const {
return end_ - begin_; }
202 constexpr bool empty()
const {
return begin_ == end_; }
206 Entry operator[](size_type index)
const;
208 constexpr const iterator& begin()
const {
return begin_; }
209 constexpr const iterator& end()
const {
return end_; }
220 template <
typename ByteArray>
221 static constexpr bool IsValid(
const ByteArray& bytes) {
222 return HasValidHeader(bytes) && EachEntryHasAString(bytes);
235 template <const auto& kDatabaseBytes>
238 HasValidHeader<decltype(kDatabaseBytes)>(kDatabaseBytes),
239 "Databases must start with a 16-byte header that begins with TOKENS.");
241 static_assert(EachEntryHasAString<decltype(kDatabaseBytes)>(kDatabaseBytes),
242 "The database must have at least one string for each entry.");
254 template <
typename ByteArray>
256 return IsValid<ByteArray>(database_bytes)
261 constexpr TokenDatabase() : begin_{.data = nullptr}, end_{.data = nullptr} {}
267 constexpr size_type
size()
const {
268 return (end_.data - begin_.data) /
sizeof(RawEntry);
273 constexpr bool ok()
const {
return begin_.data !=
nullptr; }
283 std::array<char, 6> magic;
285 uint32_t entry_count;
289 static_assert(
sizeof(Header) == 2 *
sizeof(RawEntry));
291 template <
typename ByteArray>
292 static constexpr bool HasValidHeader(
const ByteArray& bytes) {
293 static_assert(
sizeof(*std::data(bytes)) == 1u);
295 if (std::size(bytes) <
sizeof(Header)) {
300 for (size_type i = 0; i < kMagicAndVersion.size(); ++i) {
301 if (bytes[i] != kMagicAndVersion[i]) {
309 template <
typename ByteArray>
310 static constexpr bool EachEntryHasAString(
const ByteArray& bytes) {
311 const size_type entries = ReadEntryCount(std::data(bytes));
314 if (std::size(bytes) < StringTable(entries)) {
319 size_type string_count = 0;
320 for (
auto i = std::begin(bytes) + StringTable(entries); i < std::end(bytes);
322 string_count += (*i ==
'\0') ? 1 : 0;
326 return string_count >= entries;
331 template <
typename T>
332 static constexpr uint32_t ReadEntryCount(
const T* header_bytes) {
333 const T* bytes = header_bytes + offsetof(Header, entry_count);
334 return ReadUint32(bytes);
338 static constexpr size_type StringTable(size_type entries) {
339 return sizeof(Header) + entries *
sizeof(RawEntry);
344 static constexpr std::array<char, 8> kMagicAndVersion = {
345 'T',
'O',
'K',
'E',
'N',
'S',
'\0',
'\0'};
347 template <
typename Byte>
350 bytes + StringTable(ReadEntryCount(bytes))) {
351 static_assert(
sizeof(Byte) == 1u);
359 : begin_{.data =
begin}, end_{.data =
end} {}
362 : begin_{.unsigned_data =
begin}, end_{.unsigned_data =
end} {}
365 : begin_{.signed_data =
begin}, end_{.signed_data =
end} {}
371 const unsigned char* unsigned_data;
372 const signed char* signed_data;
Definition: token_database.h:193
Iterator for TokenDatabase values.
Definition: token_database.h:117
Definition: token_database.h:75
static constexpr uint32_t kDateRemovedNever
Definition: token_database.h:98
constexpr TokenDatabase()
Creates a database with no data. ok() returns false.
Definition: token_database.h:261
static constexpr TokenDatabase Create(const ByteArray &database_bytes)
Definition: token_database.h:255
constexpr iterator begin() const
Returns an iterator for the first token entry.
Definition: token_database.h:276
constexpr size_type size() const
Returns the total number of entries (unique token-string pairs).
Definition: token_database.h:267
static constexpr bool IsValid(const ByteArray &bytes)
Definition: token_database.h:221
constexpr bool ok() const
Definition: token_database.h:273
Entries Find(uint32_t token) const
Returns all entries associated with this token. This is O(n).
static constexpr TokenDatabase Create()
Definition: token_database.h:236
constexpr iterator end() const
Returns an iterator for one past the last token entry.
Definition: token_database.h:279
An entry in the token database.
Definition: token_database.h:101
const char * string
The null-terminated string represented by this token.
Definition: token_database.h:113
uint32_t date_removed
Definition: token_database.h:110
uint32_t token
The token that represents this string.
Definition: token_database.h:103