From d03ad0e3da977c5f68b9e567ee6fc0870712a5d8 Mon Sep 17 00:00:00 2001 From: Marc Mutz Date: Wed, 27 Aug 2025 16:56:40 +0200 Subject: [PATCH] util/unicode: remove replace() calls from remaining read*() functions Some users of the split()ed value handled intervening whitespace already: - the first field is piped through parseHexRange(), which does - the second field was missing the trimmed() call before lookup. Added. All looked-up values are space-free (cf. resp. init*() functions), so that's enough, too. As a consequence, we can accept the lines by reference to const QByteArray now and, now that all lambdas have the same signature, change readUnicodeFile() from a template to a regular function taking qxp::function_ref callbacks. Amends a794c5e287381bd056008b20ae55f9b1e0acf138 (readEmojiData()) and the start of the public history (rest). Pick-to: 6.10 6.9 6.8 6.5 Change-Id: I442855a183552aa90d24810023793e6464b18162 Reviewed-by: Edward Welbourne --- util/unicode/main.cpp | 45 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index ee6a593a353..853c8593ad9 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -16,6 +16,8 @@ #include #endif +#include + #if QT_VERSION < QT_VERSION_CHECK(6, 9, 0) // QSpan, QIODevice::readLineInto() # error This tool needs Qt >= 6.9, even if you are building tables for Qt 6.5 or 6.8. @@ -1316,8 +1318,7 @@ static int maxUpperCaseDiff = 0; static int maxTitleCaseDiff = 0; static int maxSeparatorCodepoint = 0; -template -void readUnicodeFile(const char *fileName, LineConsumer yield) +void readUnicodeFile(const char *fileName, qxp::function_ref yield) { qDebug("Reading %s", fileName); @@ -1782,15 +1783,13 @@ static QByteArray createNormalizationCorrections() static void readLineBreak() { readUnicodeFile("LineBreak.txt", - [] (QByteArray &line, int lineNo) { - line.replace(" ", ""); - + [] (const QByteArray &line, int lineNo) { QList l = line.split(';'); Q_ASSERT(l.size() == 2); const auto [from, to] = parseHexRange(l[0], lineNo); - LineBreakClass lb = line_break_map.value(l[1], LineBreak_Unassigned); + LineBreakClass lb = line_break_map.value(l[1].trimmed(), LineBreak_Unassigned); if (lb == LineBreak_Unassigned) qFatal("unassigned line break class: %s", l[1].constData()); @@ -1893,16 +1892,13 @@ static void readCaseFolding() static void readGraphemeBreak() { readUnicodeFile("GraphemeBreakProperty.txt", - [] (QByteArray &line, int lineNo) { - - line.replace(" ", ""); - + [] (const QByteArray &line, int lineNo) { QList l = line.split(';'); Q_ASSERT(l.size() == 2); const auto [from, to] = parseHexRange(l[0], lineNo); - GraphemeBreakClass brk = grapheme_break_map.value(l[1], GraphemeBreak_Unassigned); + GraphemeBreakClass brk = grapheme_break_map.value(l[1].trimmed(), GraphemeBreak_Unassigned); if (brk == GraphemeBreak_Unassigned) qFatal("unassigned grapheme break class: %s", l[1].constData()); @@ -1916,13 +1912,11 @@ static void readGraphemeBreak() static void readEmojiData() { readUnicodeFile("emoji-data.txt", - [] (QByteArray &line, int lineNo) { - line.replace(" ", ""); - + [] (const QByteArray &line, int lineNo) { QList l = line.split(';'); Q_ASSERT(l.size() == 2); - EmojiFlags emojiFlags = emojiFlagsMap.value(l[1], EmojiFlags::NoEmoji); + EmojiFlags emojiFlags = emojiFlagsMap.value(l[1].trimmed(), EmojiFlags::NoEmoji); if (emojiFlags == EmojiFlags::NoEmoji) return; @@ -1945,15 +1939,13 @@ static void readEmojiData() static void readWordBreak() { readUnicodeFile("WordBreakProperty.txt", - [] (QByteArray &line, int lineNo) { - line.replace(" ", ""); - + [] (const QByteArray &line, int lineNo) { QList l = line.split(';'); Q_ASSERT(l.size() == 2); const auto [from, to] = parseHexRange(l[0], lineNo); - WordBreakClass brk = word_break_map.value(l[1], WordBreak_Unassigned); + WordBreakClass brk = word_break_map.value(l[1].trimmed(), WordBreak_Unassigned); if (brk == WordBreak_Unassigned) qFatal("unassigned word break class: %s", l[1].constData()); @@ -1976,15 +1968,13 @@ static void readWordBreak() static void readSentenceBreak() { readUnicodeFile("SentenceBreakProperty.txt", - [] (QByteArray &line, int lineNo) { - line.replace(" ", ""); - + [] (const QByteArray &line, int lineNo) { QList l = line.split(';'); Q_ASSERT(l.size() == 2); const auto [from, to] = parseHexRange(l[0], lineNo); - SentenceBreakClass brk = sentence_break_map.value(l[1], SentenceBreak_Unassigned); + SentenceBreakClass brk = sentence_break_map.value(l[1].trimmed(), SentenceBreak_Unassigned); if (brk == SentenceBreak_Unassigned) qFatal("unassigned sentence break class: %s", l[1].constData()); @@ -2188,16 +2178,11 @@ static void readBlocks() static void readScripts() { readUnicodeFile("Scripts.txt", - [] (QByteArray &line, int lineNo) { - line.replace(" ", ""); - - if (line.isEmpty()) - return; - + [] (const QByteArray &line, int lineNo) { int semicolon = line.indexOf(';'); Q_ASSERT(semicolon >= 0); QByteArray codePoints = line.left(semicolon); - QByteArray scriptName = line.mid(semicolon + 1); + QByteArray scriptName = line.mid(semicolon + 1).trimmed(); const auto [first, last] = parseHexRange(codePoints, lineNo);