21namespace pw::tokenizer {
82 uint32_t date_removed;
85 static_assert(
sizeof(RawEntry) == 8u);
88 static constexpr uint32_t ReadUint32(
const T* bytes) {
89 return static_cast<uint32_t
>(
static_cast<uint8_t
>(bytes[0]) |
90 static_cast<uint8_t
>(bytes[1]) << 8 |
91 static_cast<uint8_t
>(bytes[2]) << 16 |
92 static_cast<uint8_t
>(bytes[3]) << 24);
119 using difference_type = std::ptrdiff_t;
123 using iterator_category = std::forward_iterator_tag;
125 constexpr iterator() : entry_{}, raw_(
nullptr) {}
131 raw_ +=
sizeof(RawEntry);
134 while (*entry_.
string++ !=
'\0') {
138 constexpr iterator operator++(
int) {
143 constexpr bool operator==(
const iterator& rhs)
const {
144 return raw_ == rhs.raw_;
146 constexpr bool operator!=(
const iterator& rhs)
const {
147 return raw_ != rhs.raw_;
150 constexpr const Entry& operator*()
const {
return entry_; }
152 constexpr const Entry* operator->()
const {
return &entry_; }
154 constexpr difference_type operator-(
const iterator& rhs)
const {
155 return (raw_ - rhs.raw_) /
static_cast<difference_type
>(
sizeof(RawEntry));
162 constexpr iterator(
const char* raw_entry,
const char*
string)
163 : entry_{0, 0,
string}, raw_{raw_entry} {
164 if (raw_entry !=
string) {
169 explicit constexpr iterator(
const char*
end) : entry_{}, raw_(
end) {}
171 constexpr void ReadRawEntry() {
172 entry_.
token = ReadUint32(raw_);
181 using size_type = std::size_t;
182 using difference_type = std::ptrdiff_t;
188 using reverse_iterator = std::reverse_iterator<iterator>;
189 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
196 : begin_(begin), end_(end) {}
199 constexpr size_type size()
const {
200 return static_cast<size_type
>(end_ - begin_);
204 constexpr bool empty()
const {
return begin_ == end_; }
208 Entry operator[](size_type index)
const;
210 constexpr const iterator& begin()
const {
return begin_; }
211 constexpr const iterator& end()
const {
return end_; }
222 template <
typename ByteArray>
223 static constexpr bool IsValid(
const ByteArray& bytes) {
224 return HasValidHeader(bytes) && EachEntryHasAString(bytes);
237 template <const auto& kDatabaseBytes>
240 HasValidHeader<decltype(kDatabaseBytes)>(kDatabaseBytes),
241 "Databases must start with a 16-byte header that begins with TOKENS.");
243 static_assert(EachEntryHasAString<decltype(kDatabaseBytes)>(kDatabaseBytes),
244 "The database must have at least one string for each entry.");
256 template <
typename ByteArray>
258 return IsValid<ByteArray>(database_bytes)
263 constexpr TokenDatabase() : begin_{.data = nullptr}, end_{.data = nullptr} {}
269 constexpr size_type
size()
const {
270 return static_cast<size_type
>(end_.data - begin_.data) /
sizeof(RawEntry);
275 constexpr bool ok()
const {
return begin_.data !=
nullptr; }
285 std::array<char, 6> magic;
287 uint32_t entry_count;
291 static_assert(
sizeof(Header) == 2 *
sizeof(RawEntry));
293 template <
typename ByteArray>
294 static constexpr bool HasValidHeader(
const ByteArray& bytes) {
295 static_assert(
sizeof(*std::data(bytes)) == 1u);
297 if (std::size(bytes) <
sizeof(Header)) {
302 for (size_type i = 0; i < kMagicAndVersion.size(); ++i) {
303 if (bytes[i] != kMagicAndVersion[i]) {
311 template <
typename ByteArray>
312 static constexpr bool EachEntryHasAString(
const ByteArray& bytes) {
313 const size_type entries = ReadEntryCount(std::data(bytes));
316 if (std::size(bytes) < StringTable(entries)) {
321 size_type string_count = 0;
323 std::begin(bytes) +
static_cast<ptrdiff_t
>(StringTable(entries));
326 string_count += (*i ==
'\0') ? 1 : 0;
330 return string_count >= entries;
335 template <
typename T>
336 static constexpr uint32_t ReadEntryCount(
const T* header_bytes) {
337 const T* bytes = header_bytes + offsetof(Header, entry_count);
338 return ReadUint32(bytes);
342 static constexpr size_type StringTable(size_type entries) {
343 return sizeof(Header) + entries *
sizeof(RawEntry);
348 static constexpr std::array<char, 8> kMagicAndVersion = {
349 'T',
'O',
'K',
'E',
'N',
'S',
'\0',
'\0'};
351 template <
typename Byte>
354 bytes + StringTable(ReadEntryCount(bytes))) {
355 static_assert(
sizeof(Byte) == 1u);
363 : begin_{.data =
begin}, end_{.data =
end} {}
366 : begin_{.unsigned_data =
begin}, end_{.unsigned_data =
end} {}
369 : begin_{.signed_data =
begin}, end_{.signed_data =
end} {}
375 const unsigned char* unsigned_data;
376 const signed char* signed_data;
Definition: token_database.h:193
Iterator for TokenDatabase values.
Definition: token_database.h:117
Definition: token_database.h:75
static constexpr uint32_t kDateRemovedNever
Definition: token_database.h:98
constexpr TokenDatabase()
Creates a database with no data. ok() returns false.
Definition: token_database.h:263
static constexpr TokenDatabase Create(const ByteArray &database_bytes)
Definition: token_database.h:257
constexpr iterator begin() const
Returns an iterator for the first token entry.
Definition: token_database.h:278
constexpr size_type size() const
Returns the total number of entries (unique token-string pairs).
Definition: token_database.h:269
static constexpr bool IsValid(const ByteArray &bytes)
Definition: token_database.h:223
constexpr bool ok() const
Definition: token_database.h:275
Entries Find(uint32_t token) const
Returns all entries associated with this token. This is O(n).
static constexpr TokenDatabase Create()
Definition: token_database.h:238
constexpr iterator end() const
Returns an iterator for one past the last token entry.
Definition: token_database.h:281
An entry in the token database.
Definition: token_database.h:101
const char * string
The null-terminated string represented by this token.
Definition: token_database.h:113
uint32_t date_removed
Definition: token_database.h:110
uint32_t token
The token that represents this string.
Definition: token_database.h:103