21#include "pw_result/result.h"
22#include "pw_status/status.h"
38 return code_point < 0xD800u ||
39 (code_point >= 0xE000u && code_point <= 0x10FFFFu);
47 return code_point < 0xD800u ||
48 (code_point >= 0xE000u && code_point < 0xFDD0u) ||
49 (code_point > 0xFDEFu && code_point <= 0x10FFFFu &&
50 (code_point & 0xFFFEu) != 0xFFFEu);
64 : code_point_((static_cast<uint32_t>(
size) << kSizeShift) |
code_point) {}
72 constexpr uint32_t
code_point()
const {
return code_point_ & kCodePointMask; }
75 constexpr size_t size()
const {
76 return (code_point_ & kSizeMask) >> kSizeShift;
80 static constexpr size_t kSizeBits = 4;
81 static constexpr uint32_t kCodePointMask = ~0U >> kSizeBits;
82 static constexpr uint32_t kSizeMask = ~kCodePointMask;
83 static constexpr size_t kSizeShift =
sizeof(uint32_t) * 8 - kSizeBits;
107 std::string_view str) {
112 const uint8_t leading_byte =
static_cast<uint8_t
>(str.front());
113 size_t byte_count = 0;
114 uint32_t code_point = 0xFFFFFFFFu;
116 if (leading_byte <= 0x7F) {
119 code_point = leading_byte;
120 }
else if (leading_byte <= 0xDF) {
122 if (str.size() < byte_count) {
126 if ((str[1] & 0xC0) != 0x80) {
130 code_point = (
static_cast<uint32_t
>(str[0] & 0x1F) << 6) +
131 static_cast<uint32_t
>(str[1] & 0x3F);
132 }
else if (leading_byte <= 0xEF) {
134 if (str.size() < byte_count) {
137 if ((str[1] & 0xC0) != 0x80 || (str[2] & 0xC0) != 0x80) {
142 code_point = (
static_cast<uint32_t
>(str[0] & 0x0F) << 12) +
143 (
static_cast<uint32_t
>(str[1] & 0x3F) << 6) +
144 static_cast<uint32_t
>(str[2] & 0x3F);
145 }
else if (leading_byte <= 0xF7) {
147 if (str.size() < byte_count) {
150 if ((str[1] & 0xC0) != 0x80 || (str[2] & 0xC0) != 0x80 ||
151 (str[3] & 0xC0) != 0x80) {
156 code_point = (
static_cast<uint32_t
>(str[0] & 0x07) << 18) +
157 (
static_cast<uint32_t
>(str[1] & 0x3F) << 12) +
158 (
static_cast<uint32_t
>(str[2] & 0x3F) << 6) +
159 static_cast<uint32_t
>(str[3] & 0x3F);
174 while (!str.empty()) {
179 str = str.substr(rslt->size());
188 : size_(size), data_(std::move(data)) {}
190 constexpr std::string_view as_view()
const {
return {data_.data(), size_}; }
194 std::array<char, 4> data_;
217 if (code_point <= 0x7F) {
220 if (code_point <= 0x7FF) {
222 {
static_cast<char>(0xC0 | (code_point >> 6)),
223 static_cast<char>(0x80 | (code_point & 0x3F))}};
225 if (code_point <= 0xFFFF) {
228 {
static_cast<char>(0xE0 | (code_point >> 12)),
229 static_cast<char>(0x80 | ((code_point >> 6) & 0x3F)),
230 static_cast<char>(0x80 | (code_point & 0x3F))}};
232 if (code_point <= 0x10FFFF) {
235 {
static_cast<char>(0xF0 | (code_point >> 18)),
236 static_cast<char>(0x80 | ((code_point >> 12) & 0x3F)),
237 static_cast<char>(0x80 | ((code_point >> 6) & 0x3F)),
238 static_cast<char>(0x80 | (code_point & 0x3F))}};
static constexpr Status InvalidArgument()
Definition: status.h:164
static constexpr Status OutOfRange()
Definition: status.h:267
Definition: string_builder.h:89
Encapsulates the result of encoding a single code point as UTF-8.
Definition: utf_codecs.h:185
Definition: utf_codecs.h:60
constexpr uint32_t code_point() const
Returns the code point this represents.
Definition: utf_codecs.h:72
constexpr size_t size() const
Returns the number of bytes required to encode this codepoint.
Definition: utf_codecs.h:75
constexpr CodePointAndSize(uint32_t code_point, size_t size)
Creates a combined view of a @code_point and its encoded @size.
Definition: utf_codecs.h:63
constexpr Result< EncodedCodePoint > EncodeCodePoint(uint32_t code_point)
Encodes a single code point as UTF-8.
Definition: utf_codecs.h:216
constexpr bool IsStringValid(std::string_view str)
Determines if str is a valid UTF-8 string.
Definition: utf_codecs.h:173
constexpr bool IsValidCodepoint(uint32_t code_point)
Definition: utf_codecs.h:37
constexpr pw::Result< utf::CodePointAndSize > ReadCodePoint(std::string_view str)
Reads the first code point from a UTF-8 encoded str.
Definition: utf_codecs.h:106
constexpr bool IsValidCharacter(uint32_t code_point)
Definition: utf_codecs.h:46
Status WriteCodePoint(uint32_t code_point, pw::StringBuilder &output)
Helper that writes a code point to the provided pw::StringBuilder.
The Pigweed namespace.
Definition: alignment.h:27
pw::StringBuilder facilitates creating formatted strings in a fixed-sized buffer or in a pw::InlineSt...