7#ifndef PXR_BASE_TF_UNICODE_UTILS_H
8#define PXR_BASE_TF_UNICODE_UTILS_H
15#include "pxr/base/tf/api.h"
22PXR_NAMESPACE_OPEN_SCOPE
48 static constexpr std::pair<uint32_t, uint32_t>
62 constexpr uint32_t AsUInt32()
const {
return _value; }
66 return left._value == right._value;
70 return left._value != right._value;
87 return static_cast<unsigned char>(value) < 128 ?
101 using iterator_category = std::forward_iterator_tag;
103 using difference_type = std::ptrdiff_t;
104 using pointer = void;
119 const std::string_view::const_iterator& it,
120 const std::string_view::const_iterator& end) : _it(it), _end(end) {
136 std::string_view::const_iterator
GetBase()
const
147 return (this->_it == rhs._it);
156 return (this->_it != rhs._it);
170 _EncodingLength increment = _GetEncodingLength();
179 auto isContinuation = [](
const char c) {
180 const auto uc =
static_cast<unsigned char>(c);
181 return (uc >=
static_cast<unsigned char>(
'\x80')) &&
182 (uc <
static_cast<unsigned char>(
'\xc0'));
184 while ((increment > 1) && !_IsPastTheEnd() && isContinuation(*_it)) {
208 return lhs._IsPastTheEnd();
211 friend bool operator==(PastTheEndSentinel lhs,
218 PastTheEndSentinel rhs)
220 return !(lhs == rhs);
222 friend bool operator!=(PastTheEndSentinel lhs,
225 return !(lhs == rhs);
229 using _EncodingLength =
unsigned char;
235 _EncodingLength _GetEncodingLength()
const
247 unsigned char x =
static_cast<unsigned char>(*_it);
252 else if ((x >= 0xc0) && (x < 0xe0))
256 else if ((x >= 0xe0) && (x < 0xf0))
260 else if ((x >= 0xf0) && (x < 0xf8))
276 TF_API uint32_t _GetCodePoint()
const;
280 bool _IsPastTheEnd()
const
285 std::string_view::const_iterator _it;
286 std::string_view::const_iterator _end;
340 inline const_iterator cbegin()
const
355 return _view.empty();
374 std::string_view _view;
419PXR_NAMESPACE_CLOSE_SCOPE
Low-level utilities for informing users of various internal and external diagnostic conditions.
Wrapper for a 32-bit code point value that can be encoded as UTF-8.
static constexpr std::pair< uint32_t, uint32_t > SurrogateRange
Values in this range (inclusive) cannot be constructed and will be replaced by the replacement code p...
constexpr TfUtf8CodePoint(uint32_t value)
Construct a UTF-8 valued code point, constrained by the maximum value and surrogate range.
static constexpr uint32_t MaximumValue
Values higher than this will be replaced with the replacement code point.
constexpr TfUtf8CodePoint()=default
Construct a code point initialized to the replacement value.
static constexpr uint32_t ReplacementValue
Code points that cannot be decoded or are outside of the valid range will be replaced with this value...
Defines an iterator over a UTF-8 encoded string that extracts unicode code point values.
bool operator!=(const TfUtf8CodePointIterator &rhs) const
Determines if two iterators are unequal.
friend bool operator==(const TfUtf8CodePointIterator &lhs, PastTheEndSentinel)
Checks if the lhs iterator is at or past the end for the underlying string_view
value_type operator*() const
Retrieves the current UTF-8 character in the sequence as its Unicode code point value.
TfUtf8CodePointIterator & operator++()
Advances the iterator logically one UTF-8 character sequence in the string.
TfUtf8CodePointIterator(const std::string_view::const_iterator &it, const std::string_view::const_iterator &end)
Constructs an iterator that can read UTF-8 character sequences from the given starting string_view it...
std::string_view::const_iterator GetBase() const
Retrieves the wrapped string iterator.
Model iteration ending when the underlying iterator's end condition has been met.
Wrapper for a UTF-8 encoded std::string_view that can be iterated over as code points instead of byte...
bool empty() const
Returns true if the underlying view is empty.
TfUtf8CodePointIterator::PastTheEndSentinel end() const
The sentinel will compare as equal to any iterator at the end of the underlying string_view
TfUtf8CodePointIterator::PastTheEndSentinel cend() const
The sentinel will compare as equal to any iterator at the end of the underlying string_view
const_iterator EndAsIterator() const
Returns an iterator of the same type as begin that identifies the end of the string.
GF_API std::ostream & operator<<(std::ostream &, const GfBBox3d &)
Output a GfBBox3d using the format [(range) matrix zeroArea].
#define TF_DEV_AXIOM(cond)
The same as TF_AXIOM, but compiled only in dev builds.
constexpr TfUtf8CodePoint TfUtf8InvalidCodePoint
The replacement code point can be used to signal that a code point could not be decoded and needed to...
TF_API bool TfIsUtf8CodePointXidContinue(uint32_t codePoint)
Determines whether the given Unicode codePoint is in the XID_Continue character class.
constexpr TfUtf8CodePoint TfUtf8CodePointFromAscii(const char value)
Constructs a TfUtf8CodePoint from an ASCII charcter (0-127).
TF_API bool TfIsUtf8CodePointXidStart(uint32_t codePoint)
Determines whether the given Unicode codePoint is in the XID_Start character class.