Skip to content

Commit

Permalink
ICU-22843 Enable constructing UnicodeString from literal in fixed time.
Browse files Browse the repository at this point in the history
When passing a string literal to any of the legacy constructors that
take just a plain pointer to a UTF-16 string it becomes necessary to
iterate through the string to find its length, even though this length
was known to the compiler (which just has no way of passing it on to the
constructor).

But when calling the new templated string view constructor instead it
becomes possible for the compiler to use the known length of a string
literal to directly create a string view of the correct size and pass
this on to the constructor.

By replacing the legacy constructors with the new constructor this is
made the default behaviour.
  • Loading branch information
roubert committed Sep 3, 2024
1 parent 045350e commit a7678ec
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 67 deletions.
46 changes: 43 additions & 3 deletions icu4c/source/common/unicode/char16ptr.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,13 +309,19 @@ inline OldUChar *toOldUCharPtr(char16_t *p) {

#ifndef U_FORCE_HIDE_INTERNAL_API
/**
* Is T convertible to a std::u16string_view or to a 16-bit std::wstring_view?
* Is T convertible to a std::u16string_view or some other 16-bit string view?
* @internal
*/
template<typename T>
constexpr bool ConvertibleToU16StringView =
std::is_convertible_v<T, std::u16string_view> ||
(U_SIZEOF_WCHAR_T==2 && std::is_convertible_v<T, std::wstring_view>);
std::is_convertible_v<T, std::u16string_view>
#if !U_CHAR16_IS_TYPEDEF
|| std::is_convertible_v<T, std::basic_string_view<uint16_t>>
#endif
#if U_SIZEOF_WCHAR_T==2
|| std::is_convertible_v<T, std::wstring_view>
#endif
;

namespace internal {
/**
Expand All @@ -324,6 +330,17 @@ namespace internal {
*/
inline std::u16string_view toU16StringView(std::u16string_view sv) { return sv; }

#if !U_CHAR16_IS_TYPEDEF
/**
* Basically undefined behavior but sometimes necessary conversion
* from std::basic_string_view<uint16_t> to std::u16string_view.
* @internal
*/
inline std::u16string_view toU16StringView(std::basic_string_view<uint16_t> sv) {
return { ConstChar16Ptr(sv.data()), sv.length() };
}
#endif

#if U_SIZEOF_WCHAR_T==2
/**
* Basically undefined behavior but sometimes necessary conversion
Expand All @@ -334,6 +351,29 @@ inline std::u16string_view toU16StringView(std::wstring_view sv) {
return { ConstChar16Ptr(sv.data()), sv.length() };
}
#endif

/**
* Pass-through overload.
* @internal
*/
template <typename T,
typename = typename std::enable_if_t<!std::is_pointer_v<std::remove_reference_t<T>>>>
inline std::u16string_view toU16StringViewNullable(const T& text) {
return toU16StringView(text);
}

/**
* In case of nullptr, return an empty view.
* @internal
*/
template <typename T,
typename = typename std::enable_if_t<std::is_pointer_v<std::remove_reference_t<T>>>,
typename = void>
inline std::u16string_view toU16StringViewNullable(const T& text) {
if (text == nullptr) return {}; // For backward compatibility.
return toU16StringView(text);
}

} // internal
#endif // U_FORCE_HIDE_INTERNAL_API

Expand Down
99 changes: 40 additions & 59 deletions icu4c/source/common/unicode/unistr.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,11 @@ class UnicodeStringAppendable; // unicode/appendable.h
* this macro was provided for portability and efficiency when
* initializing UnicodeStrings from literals.
*
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* The string parameter must be a C string literal.
Expand Down Expand Up @@ -335,13 +333,11 @@ class U_COMMON_API UnicodeString : public Replaceable
* which is, or which is implicitly convertible to,
* a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.
*
* For performance, you can use std::u16string_view literals with compile-time
* For performance, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str = ...;
* if (str == u"literal"sv) { ... }
* if (str == u"literal") { ... }
* \endcode
* @param text The string view to compare to this string.
* @return true if `text` contains the same characters as this one, false otherwise.
Expand Down Expand Up @@ -3080,6 +3076,7 @@ class U_COMMON_API UnicodeString : public Replaceable
*/
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);

#ifdef U_HIDE_DRAFT_API
/**
* char16_t* constructor.
*
Expand All @@ -3088,20 +3085,19 @@ class U_COMMON_API UnicodeString : public Replaceable
* on the compiler command line or similar.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text The characters to place in the UnicodeString. `text`
* must be NUL (U+0000) terminated.
* @stable ICU 2.0
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
UnicodeString(text, -1) {}

#if !U_CHAR16_IS_TYPEDEF
/**
Expand All @@ -3113,20 +3109,18 @@ class U_COMMON_API UnicodeString : public Replaceable
* on the compiler command line or similar.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text NUL-terminated UTF-16 string
* @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
UnicodeString(ConstChar16Ptr(text)) {}
UnicodeString(ConstChar16Ptr(text), -1) {}
#endif

#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
Expand All @@ -3140,21 +3134,20 @@ class U_COMMON_API UnicodeString : public Replaceable
* on the compiler command line or similar.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text NUL-terminated UTF-16 string
* @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
UnicodeString(ConstChar16Ptr(text)) {}
UnicodeString(ConstChar16Ptr(text), -1) {}
#endif
#endif // U_HIDE_DRAFT_API

/**
* nullptr_t constructor.
Expand All @@ -3172,13 +3165,11 @@ class U_COMMON_API UnicodeString : public Replaceable
* char16_t* constructor.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text The characters to place in the UnicodeString.
Expand All @@ -3195,13 +3186,11 @@ class U_COMMON_API UnicodeString : public Replaceable
* Delegates to UnicodeString(const char16_t *, int32_t).
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text UTF-16 string
Expand All @@ -3219,13 +3208,11 @@ class U_COMMON_API UnicodeString : public Replaceable
* Delegates to UnicodeString(const char16_t *, int32_t).
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param text UTF-16 string
Expand Down Expand Up @@ -3259,9 +3246,9 @@ class U_COMMON_API UnicodeString : public Replaceable
* @draft ICU 76
*/
template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
explicit inline UnicodeString(const S &text) {
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) {
fUnion.fFields.fLengthAndFlags = kShortString;
doAppend(internal::toU16StringView(text));
doAppend(internal::toU16StringViewNullable(text));
}
#endif // U_HIDE_DRAFT_API

Expand All @@ -3280,13 +3267,11 @@ class U_COMMON_API UnicodeString : public Replaceable
* so that both strings then alias the same readonly-text.
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString alias = UnicodeString::readOnlyAlias(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString alias = UnicodeString::readOnlyAlias(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param isTerminated specifies if `text` is `NUL`-terminated.
Expand Down Expand Up @@ -3369,13 +3354,11 @@ class U_COMMON_API UnicodeString : public Replaceable
* the constructor that takes a US_INV (for its enum EInvariant).
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* It is recommended to mark this constructor "explicit" by
Expand Down Expand Up @@ -3485,13 +3468,11 @@ class U_COMMON_API UnicodeString : public Replaceable
* \endcode
*
* Note, for string literals:
* Since C++17 and ICU 76, you can use std::u16string_view literals with compile-time
* Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time
* length determination:
* \code
* #include &lt;string_view&gt;
* using namespace std::string_view_literals;
* UnicodeString str(u"literal"sv);
* if (str == u"other literal"sv) { ... }
* UnicodeString str(u"literal");
* if (str == u"other literal") { ... }
* \endcode
*
* @param src String using only invariant characters.
Expand Down
5 changes: 0 additions & 5 deletions icu4c/source/common/unistr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,6 @@ UnicodeString::UnicodeString(UChar32 ch) {
}
}

UnicodeString::UnicodeString(const char16_t *text) {
fUnion.fFields.fLengthAndFlags = kShortString;
doAppend(text, 0, -1);
}

UnicodeString::UnicodeString(const char16_t *text,
int32_t textLength) {
fUnion.fFields.fLengthAndFlags = kShortString;
Expand Down

0 comments on commit a7678ec

Please sign in to comment.