QChar::isSpace: optimize by lowering the upper limit check

Of all the Category categories, separators are the only to currently
have assigned codepoints exclusively in the BMP. This allows us to lower
the maximum check from the LastValidCodepoint to category-specific
one. This will also cause the compiler to dead-code eliminate the check
inside of qGetProperty and emit only the BMP check of the property
tables:

    if (ucs4 < 0x11000)
        return uc_properties + uc_property_trie[uc_property_trie[ucs4 >> 5] + (ucs4 & 0x1f)];

Pick-to: 6.10
Change-Id: I31eda5d79cc2c3560d90fffd74a546d1e7cda7bb
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
Thiago Macieira 2025-08-15 10:49:56 -07:00
parent 5b1e49a7e2
commit 038d127fe5
3 changed files with 14 additions and 3 deletions

View File

@ -830,8 +830,9 @@ bool QChar::isPrint(char32_t ucs4) noexcept
*/ */
bool QT_FASTCALL QChar::isSpace_helper(char32_t ucs4) noexcept bool QT_FASTCALL QChar::isSpace_helper(char32_t ucs4) noexcept
{ {
if (ucs4 > LastValidCodePoint) if (ucs4 > MaxSeparatorCodepoint)
return false; return false;
const int test = FLAG(Separator_Space) | const int test = FLAG(Separator_Space) |
FLAG(Separator_Line) | FLAG(Separator_Line) |
FLAG(Separator_Paragraph); FLAG(Separator_Paragraph);

View File

@ -8,6 +8,7 @@
QT_BEGIN_NAMESPACE QT_BEGIN_NAMESPACE
namespace QUnicodeTables { namespace QUnicodeTables {
static constexpr char32_t MaxSeparatorCodepoint = 0x3000;
static constexpr unsigned short uc_property_trie[] = { static constexpr unsigned short uc_property_trie[] = {
// [0x0..0x11000) // [0x0..0x11000)

View File

@ -1306,6 +1306,7 @@ static QHash<int, int> combiningClassUsage;
static int maxLowerCaseDiff = 0; static int maxLowerCaseDiff = 0;
static int maxUpperCaseDiff = 0; static int maxUpperCaseDiff = 0;
static int maxTitleCaseDiff = 0; static int maxTitleCaseDiff = 0;
static int maxSeparatorCodepoint = 0;
static void readUnicodeData() static void readUnicodeData()
{ {
@ -1366,6 +1367,9 @@ static void readUnicodeData()
UnicodeData &data = UnicodeData::valueRef(codepoint); UnicodeData &data = UnicodeData::valueRef(codepoint);
data.p.category = categoryMap.value(properties[UD_Category], QChar::Other_NotAssigned); data.p.category = categoryMap.value(properties[UD_Category], QChar::Other_NotAssigned);
if (data.p.category == QChar::Separator_Space || data.p.category == QChar::Separator_Line
|| data.p.category == QChar::Separator_Paragraph)
maxSeparatorCodepoint = codepoint;
data.p.combiningClass = properties[UD_CombiningClass].toInt(); data.p.combiningClass = properties[UD_CombiningClass].toInt();
if (!combiningClassUsage.contains(data.p.combiningClass)) if (!combiningClassUsage.contains(data.p.combiningClass))
combiningClassUsage[data.p.combiningClass] = 1; combiningClassUsage[data.p.combiningClass] = 1;
@ -2986,7 +2990,12 @@ static QByteArray createPropertyInfo()
Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE +(SMP_END-BMP_END)/SMP_BLOCKSIZE); // 0x1870 Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE +(SMP_END-BMP_END)/SMP_BLOCKSIZE); // 0x1870
Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8))); Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));
QByteArray out = "static constexpr unsigned short uc_property_trie[] = {\n"; QByteArray out;
out += "static constexpr char32_t MaxSeparatorCodepoint = 0x";
out += QByteArray::number(maxSeparatorCodepoint, 16);
out += ";\n";
out += "\nstatic constexpr unsigned short uc_property_trie[] = {\n";
// First write the map from blockId to indices of unique blocks: // First write the map from blockId to indices of unique blocks:
out += " // [0x0..0x" + QByteArray::number(BMP_END, 16) + ")"; out += " // [0x0..0x" + QByteArray::number(BMP_END, 16) + ")";
for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) { for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
@ -3702,7 +3711,7 @@ int main(int, char **)
f.write(note); f.write(note);
f.write("#include \"qunicodetables_p.h\"\n\n"); f.write("#include \"qunicodetables_p.h\"\n\n");
f.write("QT_BEGIN_NAMESPACE\n\n"); f.write("QT_BEGIN_NAMESPACE\n\n");
f.write("namespace QUnicodeTables {\n\n"); f.write("namespace QUnicodeTables {\n");
f.write(properties); f.write(properties);
f.write(specialCases); f.write(specialCases);
f.write(compositions); f.write(compositions);