From 038d127fe573128b987c700d6173990839ee33ad Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 15 Aug 2025 10:49:56 -0700 Subject: [PATCH] QChar::isSpace: optimize by lowering the upper limit check Of all the Category categories, separators are the only to currently have assigned codepoints exclusively in the BMP. This allows us to lower the maximum check from the LastValidCodepoint to category-specific one. This will also cause the compiler to dead-code eliminate the check inside of qGetProperty and emit only the BMP check of the property tables: if (ucs4 < 0x11000) return uc_properties + uc_property_trie[uc_property_trie[ucs4 >> 5] + (ucs4 & 0x1f)]; Pick-to: 6.10 Change-Id: I31eda5d79cc2c3560d90fffd74a546d1e7cda7bb Reviewed-by: Edward Welbourne --- src/corelib/text/qchar.cpp | 3 ++- src/corelib/text/qunicodetables.cpp | 1 + util/unicode/main.cpp | 13 +++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/corelib/text/qchar.cpp b/src/corelib/text/qchar.cpp index 052dba39e9c..d7006281614 100644 --- a/src/corelib/text/qchar.cpp +++ b/src/corelib/text/qchar.cpp @@ -830,8 +830,9 @@ bool QChar::isPrint(char32_t ucs4) noexcept */ bool QT_FASTCALL QChar::isSpace_helper(char32_t ucs4) noexcept { - if (ucs4 > LastValidCodePoint) + if (ucs4 > MaxSeparatorCodepoint) return false; + const int test = FLAG(Separator_Space) | FLAG(Separator_Line) | FLAG(Separator_Paragraph); diff --git a/src/corelib/text/qunicodetables.cpp b/src/corelib/text/qunicodetables.cpp index cf4fdb199ea..fd491532f1a 100644 --- a/src/corelib/text/qunicodetables.cpp +++ b/src/corelib/text/qunicodetables.cpp @@ -8,6 +8,7 @@ QT_BEGIN_NAMESPACE namespace QUnicodeTables { +static constexpr char32_t MaxSeparatorCodepoint = 0x3000; static constexpr unsigned short uc_property_trie[] = { // [0x0..0x11000) diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index e5d1ad47e08..46a39d2ed96 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -1306,6 +1306,7 @@ static QHash combiningClassUsage; static int maxLowerCaseDiff = 0; static int maxUpperCaseDiff = 0; static int maxTitleCaseDiff = 0; +static int maxSeparatorCodepoint = 0; static void readUnicodeData() { @@ -1366,6 +1367,9 @@ static void readUnicodeData() UnicodeData &data = UnicodeData::valueRef(codepoint); data.p.category = categoryMap.value(properties[UD_Category], QChar::Other_NotAssigned); + if (data.p.category == QChar::Separator_Space || data.p.category == QChar::Separator_Line + || data.p.category == QChar::Separator_Paragraph) + maxSeparatorCodepoint = codepoint; data.p.combiningClass = properties[UD_CombiningClass].toInt(); if (!combiningClassUsage.contains(data.p.combiningClass)) combiningClassUsage[data.p.combiningClass] = 1; @@ -2986,7 +2990,12 @@ static QByteArray createPropertyInfo() Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE +(SMP_END-BMP_END)/SMP_BLOCKSIZE); // 0x1870 Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8))); - QByteArray out = "static constexpr unsigned short uc_property_trie[] = {\n"; + QByteArray out; + out += "static constexpr char32_t MaxSeparatorCodepoint = 0x"; + out += QByteArray::number(maxSeparatorCodepoint, 16); + out += ";\n"; + + out += "\nstatic constexpr unsigned short uc_property_trie[] = {\n"; // First write the map from blockId to indices of unique blocks: out += " // [0x0..0x" + QByteArray::number(BMP_END, 16) + ")"; for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) { @@ -3702,7 +3711,7 @@ int main(int, char **) f.write(note); f.write("#include \"qunicodetables_p.h\"\n\n"); f.write("QT_BEGIN_NAMESPACE\n\n"); - f.write("namespace QUnicodeTables {\n\n"); + f.write("namespace QUnicodeTables {\n"); f.write(properties); f.write(specialCases); f.write(compositions);