From 0738a0dd5eb91c3e22c98f76f3aecdd1a30c149f Mon Sep 17 00:00:00 2001 From: Marc Mutz Date: Wed, 27 Aug 2025 18:10:37 +0200 Subject: [PATCH] util/unicode: remove replace('_', "") from readScripts() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason, the code stored the official Unicode script tags without their intervening underscores, removing underscores from the input before attempting to match, which works, as long as Unicode stays consistent in spelling properties "Like_This". Relying on that is brittle, though, seeing as a tag without intervening underscore (SignWriting) already slipped into the database, potentially matching a sought Sign_Writing. It's highly unlikely that Unicode will start to use property names that differ only by their use of underscore, but why risk it, and why confuse readers of code by using a different sought string, compared to what's in the files? Fix by storing the tags unaltered and leaving the underscores in the input alone, too. Amends the start of the public history. Pick-to: 6.10 6.9 6.8 6.5 Change-Id: I5870a35812cb3fc0b28888cb09e9f42661684a26 Reviewed-by: MÃ¥rten Nordheim --- util/unicode/main.cpp | 99 +++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index 3cd62e6b94d..0e127b73736 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -697,7 +697,7 @@ static void initScriptMap() { QChar::Script_Hangul, "Hangul" }, { QChar::Script_Ethiopic, "Ethiopic" }, { QChar::Script_Cherokee, "Cherokee" }, - { QChar::Script_CanadianAboriginal, "CanadianAboriginal" }, + { QChar::Script_CanadianAboriginal, "Canadian_Aboriginal" }, { QChar::Script_Ogham, "Ogham" }, { QChar::Script_Runic, "Runic" }, { QChar::Script_Khmer, "Khmer" }, @@ -707,7 +707,7 @@ static void initScriptMap() { QChar::Script_Bopomofo, "Bopomofo" }, { QChar::Script_Han, "Han" }, { QChar::Script_Yi, "Yi" }, - { QChar::Script_OldItalic, "OldItalic" }, + { QChar::Script_OldItalic, "Old_Italic" }, { QChar::Script_Gothic, "Gothic" }, { QChar::Script_Deseret, "Deseret" }, { QChar::Script_Tagalog, "Tagalog" }, @@ -717,8 +717,8 @@ static void initScriptMap() { QChar::Script_Coptic, "Coptic" }, // 4.0 { QChar::Script_Limbu, "Limbu" }, - { QChar::Script_TaiLe, "TaiLe" }, - { QChar::Script_LinearB, "LinearB" }, + { QChar::Script_TaiLe, "Tai_Le" }, + { QChar::Script_LinearB, "Linear_B" }, { QChar::Script_Ugaritic, "Ugaritic" }, { QChar::Script_Shavian, "Shavian" }, { QChar::Script_Osmanya, "Osmanya" }, @@ -726,45 +726,45 @@ static void initScriptMap() { QChar::Script_Braille, "Braille" }, // 4.1 { QChar::Script_Buginese, "Buginese" }, - { QChar::Script_NewTaiLue, "NewTaiLue" }, + { QChar::Script_NewTaiLue, "New_Tai_Lue" }, { QChar::Script_Glagolitic, "Glagolitic" }, { QChar::Script_Tifinagh, "Tifinagh" }, - { QChar::Script_SylotiNagri, "SylotiNagri" }, - { QChar::Script_OldPersian, "OldPersian" }, + { QChar::Script_SylotiNagri, "Syloti_Nagri" }, + { QChar::Script_OldPersian, "Old_Persian" }, { QChar::Script_Kharoshthi, "Kharoshthi" }, // 5.0 { QChar::Script_Balinese, "Balinese" }, { QChar::Script_Cuneiform, "Cuneiform" }, { QChar::Script_Phoenician, "Phoenician" }, - { QChar::Script_PhagsPa, "PhagsPa" }, + { QChar::Script_PhagsPa, "Phags_Pa" }, { QChar::Script_Nko, "Nko" }, // 5.1 { QChar::Script_Sundanese, "Sundanese" }, { QChar::Script_Lepcha, "Lepcha" }, - { QChar::Script_OlChiki, "OlChiki" }, + { QChar::Script_OlChiki, "Ol_Chiki" }, { QChar::Script_Vai, "Vai" }, { QChar::Script_Saurashtra, "Saurashtra" }, - { QChar::Script_KayahLi, "KayahLi" }, + { QChar::Script_KayahLi, "Kayah_Li" }, { QChar::Script_Rejang, "Rejang" }, { QChar::Script_Lycian, "Lycian" }, { QChar::Script_Carian, "Carian" }, { QChar::Script_Lydian, "Lydian" }, { QChar::Script_Cham, "Cham" }, // 5.2 - { QChar::Script_TaiTham, "TaiTham" }, - { QChar::Script_TaiViet, "TaiViet" }, + { QChar::Script_TaiTham, "Tai_Tham" }, + { QChar::Script_TaiViet, "Tai_Viet" }, { QChar::Script_Avestan, "Avestan" }, - { QChar::Script_EgyptianHieroglyphs, "EgyptianHieroglyphs" }, + { QChar::Script_EgyptianHieroglyphs, "Egyptian_Hieroglyphs" }, { QChar::Script_Samaritan, "Samaritan" }, { QChar::Script_Lisu, "Lisu" }, { QChar::Script_Bamum, "Bamum" }, { QChar::Script_Javanese, "Javanese" }, - { QChar::Script_MeeteiMayek, "MeeteiMayek" }, - { QChar::Script_ImperialAramaic, "ImperialAramaic" }, - { QChar::Script_OldSouthArabian, "OldSouthArabian" }, - { QChar::Script_InscriptionalParthian, "InscriptionalParthian" }, - { QChar::Script_InscriptionalPahlavi, "InscriptionalPahlavi" }, - { QChar::Script_OldTurkic, "OldTurkic" }, + { QChar::Script_MeeteiMayek, "Meetei_Mayek" }, + { QChar::Script_ImperialAramaic, "Imperial_Aramaic" }, + { QChar::Script_OldSouthArabian, "Old_South_Arabian" }, + { QChar::Script_InscriptionalParthian, "Inscriptional_Parthian" }, + { QChar::Script_InscriptionalPahlavi, "Inscriptional_Pahlavi" }, + { QChar::Script_OldTurkic, "Old_Turkic" }, { QChar::Script_Kaithi, "Kaithi" }, // 6.0 { QChar::Script_Batak, "Batak" }, @@ -772,42 +772,42 @@ static void initScriptMap() { QChar::Script_Mandaic, "Mandaic" }, // 6.1 { QChar::Script_Chakma, "Chakma" }, - { QChar::Script_MeroiticCursive, "MeroiticCursive" }, - { QChar::Script_MeroiticHieroglyphs, "MeroiticHieroglyphs" }, + { QChar::Script_MeroiticCursive, "Meroitic_Cursive" }, + { QChar::Script_MeroiticHieroglyphs, "Meroitic_Hieroglyphs" }, { QChar::Script_Miao, "Miao" }, { QChar::Script_Sharada, "Sharada" }, - { QChar::Script_SoraSompeng, "SoraSompeng" }, + { QChar::Script_SoraSompeng, "Sora_Sompeng" }, { QChar::Script_Takri, "Takri" }, // 7.0 - { QChar::Script_CaucasianAlbanian, "CaucasianAlbanian" }, - { QChar::Script_BassaVah, "BassaVah" }, + { QChar::Script_CaucasianAlbanian, "Caucasian_Albanian" }, + { QChar::Script_BassaVah, "Bassa_Vah" }, { QChar::Script_Duployan, "Duployan" }, { QChar::Script_Elbasan, "Elbasan" }, { QChar::Script_Grantha, "Grantha" }, - { QChar::Script_PahawhHmong, "PahawhHmong" }, + { QChar::Script_PahawhHmong, "Pahawh_Hmong" }, { QChar::Script_Khojki, "Khojki" }, - { QChar::Script_LinearA, "LinearA" }, + { QChar::Script_LinearA, "Linear_A" }, { QChar::Script_Mahajani, "Mahajani" }, { QChar::Script_Manichaean, "Manichaean" }, - { QChar::Script_MendeKikakui, "MendeKikakui" }, + { QChar::Script_MendeKikakui, "Mende_Kikakui" }, { QChar::Script_Modi, "Modi" }, { QChar::Script_Mro, "Mro" }, - { QChar::Script_OldNorthArabian, "OldNorthArabian" }, + { QChar::Script_OldNorthArabian, "Old_North_Arabian" }, { QChar::Script_Nabataean, "Nabataean" }, { QChar::Script_Palmyrene, "Palmyrene" }, - { QChar::Script_PauCinHau, "PauCinHau" }, - { QChar::Script_OldPermic, "OldPermic" }, - { QChar::Script_PsalterPahlavi, "PsalterPahlavi" }, + { QChar::Script_PauCinHau, "Pau_Cin_Hau" }, + { QChar::Script_OldPermic, "Old_Permic" }, + { QChar::Script_PsalterPahlavi, "Psalter_Pahlavi" }, { QChar::Script_Siddham, "Siddham" }, { QChar::Script_Khudawadi, "Khudawadi" }, { QChar::Script_Tirhuta, "Tirhuta" }, - { QChar::Script_WarangCiti, "WarangCiti" }, + { QChar::Script_WarangCiti, "Warang_Citi" }, // 8.0 { QChar::Script_Ahom, "Ahom" }, - { QChar::Script_AnatolianHieroglyphs, "AnatolianHieroglyphs" }, + { QChar::Script_AnatolianHieroglyphs, "Anatolian_Hieroglyphs" }, { QChar::Script_Hatran, "Hatran" }, { QChar::Script_Multani, "Multani" }, - { QChar::Script_OldHungarian, "OldHungarian" }, + { QChar::Script_OldHungarian, "Old_Hungarian" }, { QChar::Script_SignWriting, "SignWriting" }, // 9.0 { QChar::Script_Adlam, "Adlam" }, @@ -817,47 +817,47 @@ static void initScriptMap() { QChar::Script_Osage, "Osage" }, { QChar::Script_Tangut, "Tangut" }, // 10.0 - { QChar::Script_MasaramGondi, "MasaramGondi" }, + { QChar::Script_MasaramGondi, "Masaram_Gondi" }, { QChar::Script_Nushu, "Nushu" }, { QChar::Script_Soyombo, "Soyombo" }, - { QChar::Script_ZanabazarSquare, "ZanabazarSquare" }, + { QChar::Script_ZanabazarSquare, "Zanabazar_Square" }, // 12.1 { QChar::Script_Dogra, "Dogra" }, - { QChar::Script_GunjalaGondi, "GunjalaGondi" }, - { QChar::Script_HanifiRohingya, "HanifiRohingya" }, + { QChar::Script_GunjalaGondi, "Gunjala_Gondi" }, + { QChar::Script_HanifiRohingya, "Hanifi_Rohingya" }, { QChar::Script_Makasar, "Makasar" }, { QChar::Script_Medefaidrin, "Medefaidrin" }, - { QChar::Script_OldSogdian, "OldSogdian" }, + { QChar::Script_OldSogdian, "Old_Sogdian" }, { QChar::Script_Sogdian, "Sogdian" }, { QChar::Script_Elymaic, "Elymaic" }, { QChar::Script_Nandinagari, "Nandinagari" }, - { QChar::Script_NyiakengPuachueHmong, "NyiakengPuachueHmong" }, + { QChar::Script_NyiakengPuachueHmong, "Nyiakeng_Puachue_Hmong" }, { QChar::Script_Wancho, "Wancho" }, // 13.0 { QChar::Script_Chorasmian, "Chorasmian" }, - { QChar::Script_DivesAkuru, "DivesAkuru" }, - { QChar::Script_KhitanSmallScript, "KhitanSmallScript" }, + { QChar::Script_DivesAkuru, "Dives_Akuru" }, + { QChar::Script_KhitanSmallScript, "Khitan_Small_Script" }, { QChar::Script_Yezidi, "Yezidi" }, // 14.0 - { QChar::Script_CyproMinoan, "CyproMinoan"}, - { QChar::Script_OldUyghur, "OldUyghur"}, + { QChar::Script_CyproMinoan, "Cypro_Minoan"}, + { QChar::Script_OldUyghur, "Old_Uyghur"}, { QChar::Script_Tangsa, "Tangsa"}, { QChar::Script_Toto, "Toto"}, { QChar::Script_Vithkuqi, "Vithkuqi"}, // 15.0 { QChar::Script_Kawi, "Kawi"}, - { QChar::Script_NagMundari, "NagMundari"}, + { QChar::Script_NagMundari, "Nag_Mundari"}, // 16.0 { QChar::Script_Garay, "Garay"}, - { QChar::Script_GurungKhema, "GurungKhema"}, - { QChar::Script_KiratRai, "KiratRai"}, - { QChar::Script_OlOnal, "OlOnal"}, + { QChar::Script_GurungKhema, "Gurung_Khema"}, + { QChar::Script_KiratRai, "Kirat_Rai"}, + { QChar::Script_OlOnal, "Ol_Onal"}, { QChar::Script_Sunuwar, "Sunuwar"}, { QChar::Script_Todhri, "Todhri"}, - { QChar::Script_TuluTigalari, "TuluTigalari"}, + { QChar::Script_TuluTigalari, "Tulu_Tigalari"}, // unhandled { QChar::Script_Unknown, 0 } @@ -2195,7 +2195,6 @@ static void readScripts() readUnicodeFile("Scripts.txt", [] (QByteArray &line, int lineNo) { line.replace(" ", ""); - line.replace("_", ""); if (line.isEmpty()) return;