23#include "pw_result/result.h"
24#include "pw_status/status.h"
34constexpr inline bool IsValidCodepoint(uint32_t code_point) {
35 return code_point < 0xD800u ||
36 (code_point >= 0xE000u && code_point <= 0x10FFFFu);
43constexpr inline bool IsValidCharacter(uint32_t code_point) {
44 return code_point < 0xD800u ||
45 (code_point >= 0xE000u && code_point < 0xFDD0u) ||
46 (code_point > 0xFDEFu && code_point <= 0x10FFFFu &&
47 (code_point & 0xFFFEu) != 0xFFFEu);
61 : code_point_((static_cast<uint32_t>(
size) << kSizeShift) |
code_point) {}
69 constexpr uint32_t
code_point()
const {
return code_point_ & kCodePointMask; }
72 constexpr size_t size()
const {
73 return (code_point_ & kSizeMask) >> kSizeShift;
77 static constexpr size_t kSizeBits = 4;
78 static constexpr uint32_t kCodePointMask = ~0U >> kSizeBits;
79 static constexpr uint32_t kSizeMask = ~kCodePointMask;
80 static constexpr size_t kSizeShift =
sizeof(uint32_t) * 8 - kSizeBits;
104constexpr pw::Result<utf::CodePointAndSize> ReadCodePoint(
105 std::string_view str) {
107 return pw::Status::InvalidArgument();
110 const uint8_t leading_byte =
static_cast<uint8_t
>(str.front());
111 size_t byte_count = 0;
112 uint32_t code_point = 0xFFFFFFFFu;
114 if (leading_byte <= 0x7F) {
117 code_point = leading_byte;
118 }
else if (leading_byte <= 0xDF) {
120 if (str.size() < byte_count) {
121 return pw::Status::InvalidArgument();
124 if ((str[1] & 0xC0) != 0x80) {
126 return pw::Status::InvalidArgument();
128 code_point = (
static_cast<uint32_t
>(str[0] & 0x1F) << 6) +
129 static_cast<uint32_t
>(str[1] & 0x3F);
130 }
else if (leading_byte <= 0xEF) {
132 if (str.size() < byte_count) {
133 return pw::Status::InvalidArgument();
135 if ((str[1] & 0xC0) != 0x80 || (str[2] & 0xC0) != 0x80) {
137 return pw::Status::InvalidArgument();
140 code_point = (
static_cast<uint32_t
>(str[0] & 0x0F) << 12) +
141 (
static_cast<uint32_t
>(str[1] & 0x3F) << 6) +
142 static_cast<uint32_t
>(str[2] & 0x3F);
143 }
else if (leading_byte <= 0xF7) {
145 if (str.size() < byte_count) {
146 return pw::Status::InvalidArgument();
148 if ((str[1] & 0xC0) != 0x80 || (str[2] & 0xC0) != 0x80 ||
149 (str[3] & 0xC0) != 0x80) {
151 return pw::Status::InvalidArgument();
154 code_point = (
static_cast<uint32_t
>(str[0] & 0x07) << 18) +
155 (
static_cast<uint32_t
>(str[1] & 0x3F) << 12) +
156 (
static_cast<uint32_t
>(str[2] & 0x3F) << 6) +
157 static_cast<uint32_t
>(str[3] & 0x3F);
159 return pw::Status::InvalidArgument();
163 if (utf::IsValidCodepoint(code_point)) {
164 return utf::CodePointAndSize(code_point, byte_count);
167 return pw::Status::OutOfRange();
171constexpr bool IsStringValid(std::string_view str) {
172 while (!str.empty()) {
173 auto rslt = utf8::ReadCodePoint(str);
174 if (!rslt.ok() || !utf::IsValidCharacter(rslt->code_point())) {
177 str = str.substr(rslt->size());
186 : size_(size), data_(std::move(data)) {}
188 constexpr std::string_view as_view()
const {
return {data_.data(), size_}; }
192 std::array<char, 4> data_;
221constexpr Result<EncodedCodePoint> EncodeCodePoint(uint32_t code_point) {
222 if (code_point <= 0x7F) {
225 if (code_point <= 0x7FF) {
226 return EncodedCodePoint{2,
227 {
static_cast<char>(0xC0 | (code_point >> 6)),
228 static_cast<char>(0x80 | (code_point & 0x3F))}};
230 if (code_point <= 0xFFFF) {
231 return EncodedCodePoint{
233 {
static_cast<char>(0xE0 | (code_point >> 12)),
234 static_cast<char>(0x80 | ((code_point >> 6) & 0x3F)),
235 static_cast<char>(0x80 | (code_point & 0x3F))}};
237 if (code_point <= 0x10FFFF) {
238 return EncodedCodePoint{
240 {
static_cast<char>(0xF0 | (code_point >> 18)),
241 static_cast<char>(0x80 | ((code_point >> 12) & 0x3F)),
242 static_cast<char>(0x80 | ((code_point >> 6) & 0x3F)),
243 static_cast<char>(0x80 | (code_point & 0x3F))}};
246 return pw::Status::OutOfRange();
Definition: string_builder.h:87
Encapsulates the result of encoding a single code point as UTF-8.
Definition: utf_codecs.h:183
Definition: utf_codecs.h:57
constexpr uint32_t code_point() const
Returns the code point this represents.
Definition: utf_codecs.h:69
constexpr size_t size() const
Returns the number of bytes required to encode this codepoint.
Definition: utf_codecs.h:72
constexpr CodePointAndSize(uint32_t code_point, size_t size)
Creates a combined view of a @code_point and its encoded @size.
Definition: utf_codecs.h:60
Provides basic helpers for reading and writing UTF-8 encoded strings.
Definition: alignment.h:27
pw::StringBuilder facilitates creating formatted strings in a fixed-sized buffer or in a pw::InlineSt...