21#include "pw_result/result.h"
22#include "pw_status/status.h"
38 return code_point < 0xD800u ||
39 (code_point >= 0xE000u && code_point <= 0x10FFFFu);
47 return code_point < 0xD800u ||
48 (code_point >= 0xE000u && code_point < 0xFDD0u) ||
49 (code_point > 0xFDEFu && code_point <= 0x10FFFFu &&
50 (code_point & 0xFFFEu) != 0xFFFEu);
64 : code_point_((static_cast<uint32_t>(
size) << kSizeShift) |
code_point) {}
72 constexpr uint32_t
code_point()
const {
return code_point_ & kCodePointMask; }
75 constexpr size_t size()
const {
76 return (code_point_ & kSizeMask) >> kSizeShift;
80 static constexpr size_t kSizeBits = 4;
81 static constexpr uint32_t kCodePointMask = ~0U >> kSizeBits;
82 static constexpr uint32_t kSizeMask = ~kCodePointMask;
83 static constexpr size_t kSizeShift =
sizeof(uint32_t) * 8 - kSizeBits;
114 std::string_view str) {
119 const uint8_t leading_byte =
static_cast<uint8_t
>(str.front());
120 size_t byte_count = 0;
121 uint32_t code_point = 0xFFFFFFFFu;
123 if (leading_byte <= 0x7F) {
126 code_point = leading_byte;
127 }
else if (leading_byte <= 0xDF) {
129 if (str.size() < byte_count) {
133 if ((str[1] & 0xC0) != 0x80) {
137 code_point = (
static_cast<uint32_t
>(str[0] & 0x1F) << 6) +
138 static_cast<uint32_t
>(str[1] & 0x3F);
139 }
else if (leading_byte <= 0xEF) {
141 if (str.size() < byte_count) {
144 if ((str[1] & 0xC0) != 0x80 || (str[2] & 0xC0) != 0x80) {
149 code_point = (
static_cast<uint32_t
>(str[0] & 0x0F) << 12) +
150 (
static_cast<uint32_t
>(str[1] & 0x3F) << 6) +
151 static_cast<uint32_t
>(str[2] & 0x3F);
152 }
else if (leading_byte <= 0xF7) {
154 if (str.size() < byte_count) {
157 if ((str[1] & 0xC0) != 0x80 || (str[2] & 0xC0) != 0x80 ||
158 (str[3] & 0xC0) != 0x80) {
163 code_point = (
static_cast<uint32_t
>(str[0] & 0x07) << 18) +
164 (
static_cast<uint32_t
>(str[1] & 0x3F) << 12) +
165 (
static_cast<uint32_t
>(str[2] & 0x3F) << 6) +
166 static_cast<uint32_t
>(str[3] & 0x3F);
181 while (!str.empty()) {
186 str = str.substr(rslt->size());
195 : size_(size), data_(std::move(data)) {}
197 constexpr std::string_view as_view()
const {
return {data_.data(), size_}; }
201 std::array<char, 4> data_;
231 if (code_point <= 0x7F) {
234 if (code_point <= 0x7FF) {
236 {
static_cast<char>(0xC0 | (code_point >> 6)),
237 static_cast<char>(0x80 | (code_point & 0x3F))}};
239 if (code_point <= 0xFFFF) {
242 {
static_cast<char>(0xE0 | (code_point >> 12)),
243 static_cast<char>(0x80 | ((code_point >> 6) & 0x3F)),
244 static_cast<char>(0x80 | (code_point & 0x3F))}};
246 if (code_point <= 0x10FFFF) {
249 {
static_cast<char>(0xF0 | (code_point >> 18)),
250 static_cast<char>(0x80 | ((code_point >> 12) & 0x3F)),
251 static_cast<char>(0x80 | ((code_point >> 6) & 0x3F)),
252 static_cast<char>(0x80 | (code_point & 0x3F))}};
static constexpr Status InvalidArgument()
Argument was malformed; e.g. invalid characters when parsing integer.
Definition: status.h:131
static constexpr Status OutOfRange()
Operation attempted out of range; e.g. seeking past end of file.
Definition: status.h:172
Definition: string_builder.h:89
Encapsulates the result of encoding a single code point as UTF-8.
Definition: utf_codecs.h:192
Definition: utf_codecs.h:60
constexpr uint32_t code_point() const
Returns the code point this represents.
Definition: utf_codecs.h:72
constexpr size_t size() const
Returns the number of bytes required to encode this codepoint.
Definition: utf_codecs.h:75
constexpr CodePointAndSize(uint32_t code_point, size_t size)
Creates a combined view of a @code_point and its encoded @size.
Definition: utf_codecs.h:63
constexpr Result< EncodedCodePoint > EncodeCodePoint(uint32_t code_point)
Encodes a single code point as UTF-8.
Definition: utf_codecs.h:230
constexpr bool IsStringValid(std::string_view str)
Determines if str is a valid UTF-8 string.
Definition: utf_codecs.h:180
constexpr bool IsValidCodepoint(uint32_t code_point)
Definition: utf_codecs.h:37
constexpr pw::Result< utf::CodePointAndSize > ReadCodePoint(std::string_view str)
Reads the first code point from a UTF-8 encoded str.
Definition: utf_codecs.h:113
constexpr bool IsValidCharacter(uint32_t code_point)
Definition: utf_codecs.h:46
Status WriteCodePoint(uint32_t code_point, pw::StringBuilder &output)
Helper that writes a code point to the provided pw::StringBuilder.
The Pigweed namespace.
Definition: alignment.h:27
pw::StringBuilder facilitates creating formatted strings in a fixed-sized buffer or in a pw::InlineSt...