util/unicode: remove replace('_', "") from readScripts()

For some reason, the code stored the official Unicode script tags
without their intervening underscores, removing underscores from the
input before attempting to match, which works, as long as Unicode
stays consistent in spelling properties "Like_This".

Relying on that is brittle, though, seeing as a tag without intervening
underscore (SignWriting) already slipped into the database, potentially
matching a sought Sign_Writing. It's highly unlikely that Unicode will
start to use property names that differ only by their use of underscore,
but why risk it, and why confuse readers of code by using a different
sought string, compared to what's in the files?

Fix by storing the tags unaltered and leaving the underscores in the
input alone, too.

Amends the start of the public history.

Pick-to: 6.10 6.9 6.8 6.5
Change-Id: I5870a35812cb3fc0b28888cb09e9f42661684a26
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
This commit is contained in:
Marc Mutz 2025-08-27 18:10:37 +02:00
parent d183bbe184
commit 0738a0dd5e
1 changed files with 49 additions and 50 deletions

View File

@ -697,7 +697,7 @@ static void initScriptMap()
{ QChar::Script_Hangul, "Hangul" }, { QChar::Script_Hangul, "Hangul" },
{ QChar::Script_Ethiopic, "Ethiopic" }, { QChar::Script_Ethiopic, "Ethiopic" },
{ QChar::Script_Cherokee, "Cherokee" }, { QChar::Script_Cherokee, "Cherokee" },
{ QChar::Script_CanadianAboriginal, "CanadianAboriginal" }, { QChar::Script_CanadianAboriginal, "Canadian_Aboriginal" },
{ QChar::Script_Ogham, "Ogham" }, { QChar::Script_Ogham, "Ogham" },
{ QChar::Script_Runic, "Runic" }, { QChar::Script_Runic, "Runic" },
{ QChar::Script_Khmer, "Khmer" }, { QChar::Script_Khmer, "Khmer" },
@ -707,7 +707,7 @@ static void initScriptMap()
{ QChar::Script_Bopomofo, "Bopomofo" }, { QChar::Script_Bopomofo, "Bopomofo" },
{ QChar::Script_Han, "Han" }, { QChar::Script_Han, "Han" },
{ QChar::Script_Yi, "Yi" }, { QChar::Script_Yi, "Yi" },
{ QChar::Script_OldItalic, "OldItalic" }, { QChar::Script_OldItalic, "Old_Italic" },
{ QChar::Script_Gothic, "Gothic" }, { QChar::Script_Gothic, "Gothic" },
{ QChar::Script_Deseret, "Deseret" }, { QChar::Script_Deseret, "Deseret" },
{ QChar::Script_Tagalog, "Tagalog" }, { QChar::Script_Tagalog, "Tagalog" },
@ -717,8 +717,8 @@ static void initScriptMap()
{ QChar::Script_Coptic, "Coptic" }, { QChar::Script_Coptic, "Coptic" },
// 4.0 // 4.0
{ QChar::Script_Limbu, "Limbu" }, { QChar::Script_Limbu, "Limbu" },
{ QChar::Script_TaiLe, "TaiLe" }, { QChar::Script_TaiLe, "Tai_Le" },
{ QChar::Script_LinearB, "LinearB" }, { QChar::Script_LinearB, "Linear_B" },
{ QChar::Script_Ugaritic, "Ugaritic" }, { QChar::Script_Ugaritic, "Ugaritic" },
{ QChar::Script_Shavian, "Shavian" }, { QChar::Script_Shavian, "Shavian" },
{ QChar::Script_Osmanya, "Osmanya" }, { QChar::Script_Osmanya, "Osmanya" },
@ -726,45 +726,45 @@ static void initScriptMap()
{ QChar::Script_Braille, "Braille" }, { QChar::Script_Braille, "Braille" },
// 4.1 // 4.1
{ QChar::Script_Buginese, "Buginese" }, { QChar::Script_Buginese, "Buginese" },
{ QChar::Script_NewTaiLue, "NewTaiLue" }, { QChar::Script_NewTaiLue, "New_Tai_Lue" },
{ QChar::Script_Glagolitic, "Glagolitic" }, { QChar::Script_Glagolitic, "Glagolitic" },
{ QChar::Script_Tifinagh, "Tifinagh" }, { QChar::Script_Tifinagh, "Tifinagh" },
{ QChar::Script_SylotiNagri, "SylotiNagri" }, { QChar::Script_SylotiNagri, "Syloti_Nagri" },
{ QChar::Script_OldPersian, "OldPersian" }, { QChar::Script_OldPersian, "Old_Persian" },
{ QChar::Script_Kharoshthi, "Kharoshthi" }, { QChar::Script_Kharoshthi, "Kharoshthi" },
// 5.0 // 5.0
{ QChar::Script_Balinese, "Balinese" }, { QChar::Script_Balinese, "Balinese" },
{ QChar::Script_Cuneiform, "Cuneiform" }, { QChar::Script_Cuneiform, "Cuneiform" },
{ QChar::Script_Phoenician, "Phoenician" }, { QChar::Script_Phoenician, "Phoenician" },
{ QChar::Script_PhagsPa, "PhagsPa" }, { QChar::Script_PhagsPa, "Phags_Pa" },
{ QChar::Script_Nko, "Nko" }, { QChar::Script_Nko, "Nko" },
// 5.1 // 5.1
{ QChar::Script_Sundanese, "Sundanese" }, { QChar::Script_Sundanese, "Sundanese" },
{ QChar::Script_Lepcha, "Lepcha" }, { QChar::Script_Lepcha, "Lepcha" },
{ QChar::Script_OlChiki, "OlChiki" }, { QChar::Script_OlChiki, "Ol_Chiki" },
{ QChar::Script_Vai, "Vai" }, { QChar::Script_Vai, "Vai" },
{ QChar::Script_Saurashtra, "Saurashtra" }, { QChar::Script_Saurashtra, "Saurashtra" },
{ QChar::Script_KayahLi, "KayahLi" }, { QChar::Script_KayahLi, "Kayah_Li" },
{ QChar::Script_Rejang, "Rejang" }, { QChar::Script_Rejang, "Rejang" },
{ QChar::Script_Lycian, "Lycian" }, { QChar::Script_Lycian, "Lycian" },
{ QChar::Script_Carian, "Carian" }, { QChar::Script_Carian, "Carian" },
{ QChar::Script_Lydian, "Lydian" }, { QChar::Script_Lydian, "Lydian" },
{ QChar::Script_Cham, "Cham" }, { QChar::Script_Cham, "Cham" },
// 5.2 // 5.2
{ QChar::Script_TaiTham, "TaiTham" }, { QChar::Script_TaiTham, "Tai_Tham" },
{ QChar::Script_TaiViet, "TaiViet" }, { QChar::Script_TaiViet, "Tai_Viet" },
{ QChar::Script_Avestan, "Avestan" }, { QChar::Script_Avestan, "Avestan" },
{ QChar::Script_EgyptianHieroglyphs, "EgyptianHieroglyphs" }, { QChar::Script_EgyptianHieroglyphs, "Egyptian_Hieroglyphs" },
{ QChar::Script_Samaritan, "Samaritan" }, { QChar::Script_Samaritan, "Samaritan" },
{ QChar::Script_Lisu, "Lisu" }, { QChar::Script_Lisu, "Lisu" },
{ QChar::Script_Bamum, "Bamum" }, { QChar::Script_Bamum, "Bamum" },
{ QChar::Script_Javanese, "Javanese" }, { QChar::Script_Javanese, "Javanese" },
{ QChar::Script_MeeteiMayek, "MeeteiMayek" }, { QChar::Script_MeeteiMayek, "Meetei_Mayek" },
{ QChar::Script_ImperialAramaic, "ImperialAramaic" }, { QChar::Script_ImperialAramaic, "Imperial_Aramaic" },
{ QChar::Script_OldSouthArabian, "OldSouthArabian" }, { QChar::Script_OldSouthArabian, "Old_South_Arabian" },
{ QChar::Script_InscriptionalParthian, "InscriptionalParthian" }, { QChar::Script_InscriptionalParthian, "Inscriptional_Parthian" },
{ QChar::Script_InscriptionalPahlavi, "InscriptionalPahlavi" }, { QChar::Script_InscriptionalPahlavi, "Inscriptional_Pahlavi" },
{ QChar::Script_OldTurkic, "OldTurkic" }, { QChar::Script_OldTurkic, "Old_Turkic" },
{ QChar::Script_Kaithi, "Kaithi" }, { QChar::Script_Kaithi, "Kaithi" },
// 6.0 // 6.0
{ QChar::Script_Batak, "Batak" }, { QChar::Script_Batak, "Batak" },
@ -772,42 +772,42 @@ static void initScriptMap()
{ QChar::Script_Mandaic, "Mandaic" }, { QChar::Script_Mandaic, "Mandaic" },
// 6.1 // 6.1
{ QChar::Script_Chakma, "Chakma" }, { QChar::Script_Chakma, "Chakma" },
{ QChar::Script_MeroiticCursive, "MeroiticCursive" }, { QChar::Script_MeroiticCursive, "Meroitic_Cursive" },
{ QChar::Script_MeroiticHieroglyphs, "MeroiticHieroglyphs" }, { QChar::Script_MeroiticHieroglyphs, "Meroitic_Hieroglyphs" },
{ QChar::Script_Miao, "Miao" }, { QChar::Script_Miao, "Miao" },
{ QChar::Script_Sharada, "Sharada" }, { QChar::Script_Sharada, "Sharada" },
{ QChar::Script_SoraSompeng, "SoraSompeng" }, { QChar::Script_SoraSompeng, "Sora_Sompeng" },
{ QChar::Script_Takri, "Takri" }, { QChar::Script_Takri, "Takri" },
// 7.0 // 7.0
{ QChar::Script_CaucasianAlbanian, "CaucasianAlbanian" }, { QChar::Script_CaucasianAlbanian, "Caucasian_Albanian" },
{ QChar::Script_BassaVah, "BassaVah" }, { QChar::Script_BassaVah, "Bassa_Vah" },
{ QChar::Script_Duployan, "Duployan" }, { QChar::Script_Duployan, "Duployan" },
{ QChar::Script_Elbasan, "Elbasan" }, { QChar::Script_Elbasan, "Elbasan" },
{ QChar::Script_Grantha, "Grantha" }, { QChar::Script_Grantha, "Grantha" },
{ QChar::Script_PahawhHmong, "PahawhHmong" }, { QChar::Script_PahawhHmong, "Pahawh_Hmong" },
{ QChar::Script_Khojki, "Khojki" }, { QChar::Script_Khojki, "Khojki" },
{ QChar::Script_LinearA, "LinearA" }, { QChar::Script_LinearA, "Linear_A" },
{ QChar::Script_Mahajani, "Mahajani" }, { QChar::Script_Mahajani, "Mahajani" },
{ QChar::Script_Manichaean, "Manichaean" }, { QChar::Script_Manichaean, "Manichaean" },
{ QChar::Script_MendeKikakui, "MendeKikakui" }, { QChar::Script_MendeKikakui, "Mende_Kikakui" },
{ QChar::Script_Modi, "Modi" }, { QChar::Script_Modi, "Modi" },
{ QChar::Script_Mro, "Mro" }, { QChar::Script_Mro, "Mro" },
{ QChar::Script_OldNorthArabian, "OldNorthArabian" }, { QChar::Script_OldNorthArabian, "Old_North_Arabian" },
{ QChar::Script_Nabataean, "Nabataean" }, { QChar::Script_Nabataean, "Nabataean" },
{ QChar::Script_Palmyrene, "Palmyrene" }, { QChar::Script_Palmyrene, "Palmyrene" },
{ QChar::Script_PauCinHau, "PauCinHau" }, { QChar::Script_PauCinHau, "Pau_Cin_Hau" },
{ QChar::Script_OldPermic, "OldPermic" }, { QChar::Script_OldPermic, "Old_Permic" },
{ QChar::Script_PsalterPahlavi, "PsalterPahlavi" }, { QChar::Script_PsalterPahlavi, "Psalter_Pahlavi" },
{ QChar::Script_Siddham, "Siddham" }, { QChar::Script_Siddham, "Siddham" },
{ QChar::Script_Khudawadi, "Khudawadi" }, { QChar::Script_Khudawadi, "Khudawadi" },
{ QChar::Script_Tirhuta, "Tirhuta" }, { QChar::Script_Tirhuta, "Tirhuta" },
{ QChar::Script_WarangCiti, "WarangCiti" }, { QChar::Script_WarangCiti, "Warang_Citi" },
// 8.0 // 8.0
{ QChar::Script_Ahom, "Ahom" }, { QChar::Script_Ahom, "Ahom" },
{ QChar::Script_AnatolianHieroglyphs, "AnatolianHieroglyphs" }, { QChar::Script_AnatolianHieroglyphs, "Anatolian_Hieroglyphs" },
{ QChar::Script_Hatran, "Hatran" }, { QChar::Script_Hatran, "Hatran" },
{ QChar::Script_Multani, "Multani" }, { QChar::Script_Multani, "Multani" },
{ QChar::Script_OldHungarian, "OldHungarian" }, { QChar::Script_OldHungarian, "Old_Hungarian" },
{ QChar::Script_SignWriting, "SignWriting" }, { QChar::Script_SignWriting, "SignWriting" },
// 9.0 // 9.0
{ QChar::Script_Adlam, "Adlam" }, { QChar::Script_Adlam, "Adlam" },
@ -817,47 +817,47 @@ static void initScriptMap()
{ QChar::Script_Osage, "Osage" }, { QChar::Script_Osage, "Osage" },
{ QChar::Script_Tangut, "Tangut" }, { QChar::Script_Tangut, "Tangut" },
// 10.0 // 10.0
{ QChar::Script_MasaramGondi, "MasaramGondi" }, { QChar::Script_MasaramGondi, "Masaram_Gondi" },
{ QChar::Script_Nushu, "Nushu" }, { QChar::Script_Nushu, "Nushu" },
{ QChar::Script_Soyombo, "Soyombo" }, { QChar::Script_Soyombo, "Soyombo" },
{ QChar::Script_ZanabazarSquare, "ZanabazarSquare" }, { QChar::Script_ZanabazarSquare, "Zanabazar_Square" },
// 12.1 // 12.1
{ QChar::Script_Dogra, "Dogra" }, { QChar::Script_Dogra, "Dogra" },
{ QChar::Script_GunjalaGondi, "GunjalaGondi" }, { QChar::Script_GunjalaGondi, "Gunjala_Gondi" },
{ QChar::Script_HanifiRohingya, "HanifiRohingya" }, { QChar::Script_HanifiRohingya, "Hanifi_Rohingya" },
{ QChar::Script_Makasar, "Makasar" }, { QChar::Script_Makasar, "Makasar" },
{ QChar::Script_Medefaidrin, "Medefaidrin" }, { QChar::Script_Medefaidrin, "Medefaidrin" },
{ QChar::Script_OldSogdian, "OldSogdian" }, { QChar::Script_OldSogdian, "Old_Sogdian" },
{ QChar::Script_Sogdian, "Sogdian" }, { QChar::Script_Sogdian, "Sogdian" },
{ QChar::Script_Elymaic, "Elymaic" }, { QChar::Script_Elymaic, "Elymaic" },
{ QChar::Script_Nandinagari, "Nandinagari" }, { QChar::Script_Nandinagari, "Nandinagari" },
{ QChar::Script_NyiakengPuachueHmong, "NyiakengPuachueHmong" }, { QChar::Script_NyiakengPuachueHmong, "Nyiakeng_Puachue_Hmong" },
{ QChar::Script_Wancho, "Wancho" }, { QChar::Script_Wancho, "Wancho" },
// 13.0 // 13.0
{ QChar::Script_Chorasmian, "Chorasmian" }, { QChar::Script_Chorasmian, "Chorasmian" },
{ QChar::Script_DivesAkuru, "DivesAkuru" }, { QChar::Script_DivesAkuru, "Dives_Akuru" },
{ QChar::Script_KhitanSmallScript, "KhitanSmallScript" }, { QChar::Script_KhitanSmallScript, "Khitan_Small_Script" },
{ QChar::Script_Yezidi, "Yezidi" }, { QChar::Script_Yezidi, "Yezidi" },
// 14.0 // 14.0
{ QChar::Script_CyproMinoan, "CyproMinoan"}, { QChar::Script_CyproMinoan, "Cypro_Minoan"},
{ QChar::Script_OldUyghur, "OldUyghur"}, { QChar::Script_OldUyghur, "Old_Uyghur"},
{ QChar::Script_Tangsa, "Tangsa"}, { QChar::Script_Tangsa, "Tangsa"},
{ QChar::Script_Toto, "Toto"}, { QChar::Script_Toto, "Toto"},
{ QChar::Script_Vithkuqi, "Vithkuqi"}, { QChar::Script_Vithkuqi, "Vithkuqi"},
// 15.0 // 15.0
{ QChar::Script_Kawi, "Kawi"}, { QChar::Script_Kawi, "Kawi"},
{ QChar::Script_NagMundari, "NagMundari"}, { QChar::Script_NagMundari, "Nag_Mundari"},
// 16.0 // 16.0
{ QChar::Script_Garay, "Garay"}, { QChar::Script_Garay, "Garay"},
{ QChar::Script_GurungKhema, "GurungKhema"}, { QChar::Script_GurungKhema, "Gurung_Khema"},
{ QChar::Script_KiratRai, "KiratRai"}, { QChar::Script_KiratRai, "Kirat_Rai"},
{ QChar::Script_OlOnal, "OlOnal"}, { QChar::Script_OlOnal, "Ol_Onal"},
{ QChar::Script_Sunuwar, "Sunuwar"}, { QChar::Script_Sunuwar, "Sunuwar"},
{ QChar::Script_Todhri, "Todhri"}, { QChar::Script_Todhri, "Todhri"},
{ QChar::Script_TuluTigalari, "TuluTigalari"}, { QChar::Script_TuluTigalari, "Tulu_Tigalari"},
// unhandled // unhandled
{ QChar::Script_Unknown, 0 } { QChar::Script_Unknown, 0 }
@ -2195,7 +2195,6 @@ static void readScripts()
readUnicodeFile("Scripts.txt", readUnicodeFile("Scripts.txt",
[] (QByteArray &line, int lineNo) { [] (QByteArray &line, int lineNo) {
line.replace(" ", ""); line.replace(" ", "");
line.replace("_", "");
if (line.isEmpty()) if (line.isEmpty())
return; return;