util/unicode: remove replace('_', "") from readScripts()

For some reason, the code stored the official Unicode script tags
without their intervening underscores, removing underscores from the
input before attempting to match, which works, as long as Unicode
stays consistent in spelling properties "Like_This".

Relying on that is brittle, though, seeing as a tag without intervening
underscore (SignWriting) already slipped into the database, potentially
matching a sought Sign_Writing. It's highly unlikely that Unicode will
start to use property names that differ only by their use of underscore,
but why risk it, and why confuse readers of code by using a different
sought string, compared to what's in the files?

Fix by storing the tags unaltered and leaving the underscores in the
input alone, too.

Amends the start of the public history.

Pick-to: 6.10 6.9 6.8 6.5
Change-Id: I5870a35812cb3fc0b28888cb09e9f42661684a26
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
This commit is contained in:
Marc Mutz 2025-08-27 18:10:37 +02:00
parent d183bbe184
commit 0738a0dd5e
1 changed files with 49 additions and 50 deletions

View File

@ -697,7 +697,7 @@ static void initScriptMap()
{ QChar::Script_Hangul, "Hangul" },
{ QChar::Script_Ethiopic, "Ethiopic" },
{ QChar::Script_Cherokee, "Cherokee" },
{ QChar::Script_CanadianAboriginal, "CanadianAboriginal" },
{ QChar::Script_CanadianAboriginal, "Canadian_Aboriginal" },
{ QChar::Script_Ogham, "Ogham" },
{ QChar::Script_Runic, "Runic" },
{ QChar::Script_Khmer, "Khmer" },
@ -707,7 +707,7 @@ static void initScriptMap()
{ QChar::Script_Bopomofo, "Bopomofo" },
{ QChar::Script_Han, "Han" },
{ QChar::Script_Yi, "Yi" },
{ QChar::Script_OldItalic, "OldItalic" },
{ QChar::Script_OldItalic, "Old_Italic" },
{ QChar::Script_Gothic, "Gothic" },
{ QChar::Script_Deseret, "Deseret" },
{ QChar::Script_Tagalog, "Tagalog" },
@ -717,8 +717,8 @@ static void initScriptMap()
{ QChar::Script_Coptic, "Coptic" },
// 4.0
{ QChar::Script_Limbu, "Limbu" },
{ QChar::Script_TaiLe, "TaiLe" },
{ QChar::Script_LinearB, "LinearB" },
{ QChar::Script_TaiLe, "Tai_Le" },
{ QChar::Script_LinearB, "Linear_B" },
{ QChar::Script_Ugaritic, "Ugaritic" },
{ QChar::Script_Shavian, "Shavian" },
{ QChar::Script_Osmanya, "Osmanya" },
@ -726,45 +726,45 @@ static void initScriptMap()
{ QChar::Script_Braille, "Braille" },
// 4.1
{ QChar::Script_Buginese, "Buginese" },
{ QChar::Script_NewTaiLue, "NewTaiLue" },
{ QChar::Script_NewTaiLue, "New_Tai_Lue" },
{ QChar::Script_Glagolitic, "Glagolitic" },
{ QChar::Script_Tifinagh, "Tifinagh" },
{ QChar::Script_SylotiNagri, "SylotiNagri" },
{ QChar::Script_OldPersian, "OldPersian" },
{ QChar::Script_SylotiNagri, "Syloti_Nagri" },
{ QChar::Script_OldPersian, "Old_Persian" },
{ QChar::Script_Kharoshthi, "Kharoshthi" },
// 5.0
{ QChar::Script_Balinese, "Balinese" },
{ QChar::Script_Cuneiform, "Cuneiform" },
{ QChar::Script_Phoenician, "Phoenician" },
{ QChar::Script_PhagsPa, "PhagsPa" },
{ QChar::Script_PhagsPa, "Phags_Pa" },
{ QChar::Script_Nko, "Nko" },
// 5.1
{ QChar::Script_Sundanese, "Sundanese" },
{ QChar::Script_Lepcha, "Lepcha" },
{ QChar::Script_OlChiki, "OlChiki" },
{ QChar::Script_OlChiki, "Ol_Chiki" },
{ QChar::Script_Vai, "Vai" },
{ QChar::Script_Saurashtra, "Saurashtra" },
{ QChar::Script_KayahLi, "KayahLi" },
{ QChar::Script_KayahLi, "Kayah_Li" },
{ QChar::Script_Rejang, "Rejang" },
{ QChar::Script_Lycian, "Lycian" },
{ QChar::Script_Carian, "Carian" },
{ QChar::Script_Lydian, "Lydian" },
{ QChar::Script_Cham, "Cham" },
// 5.2
{ QChar::Script_TaiTham, "TaiTham" },
{ QChar::Script_TaiViet, "TaiViet" },
{ QChar::Script_TaiTham, "Tai_Tham" },
{ QChar::Script_TaiViet, "Tai_Viet" },
{ QChar::Script_Avestan, "Avestan" },
{ QChar::Script_EgyptianHieroglyphs, "EgyptianHieroglyphs" },
{ QChar::Script_EgyptianHieroglyphs, "Egyptian_Hieroglyphs" },
{ QChar::Script_Samaritan, "Samaritan" },
{ QChar::Script_Lisu, "Lisu" },
{ QChar::Script_Bamum, "Bamum" },
{ QChar::Script_Javanese, "Javanese" },
{ QChar::Script_MeeteiMayek, "MeeteiMayek" },
{ QChar::Script_ImperialAramaic, "ImperialAramaic" },
{ QChar::Script_OldSouthArabian, "OldSouthArabian" },
{ QChar::Script_InscriptionalParthian, "InscriptionalParthian" },
{ QChar::Script_InscriptionalPahlavi, "InscriptionalPahlavi" },
{ QChar::Script_OldTurkic, "OldTurkic" },
{ QChar::Script_MeeteiMayek, "Meetei_Mayek" },
{ QChar::Script_ImperialAramaic, "Imperial_Aramaic" },
{ QChar::Script_OldSouthArabian, "Old_South_Arabian" },
{ QChar::Script_InscriptionalParthian, "Inscriptional_Parthian" },
{ QChar::Script_InscriptionalPahlavi, "Inscriptional_Pahlavi" },
{ QChar::Script_OldTurkic, "Old_Turkic" },
{ QChar::Script_Kaithi, "Kaithi" },
// 6.0
{ QChar::Script_Batak, "Batak" },
@ -772,42 +772,42 @@ static void initScriptMap()
{ QChar::Script_Mandaic, "Mandaic" },
// 6.1
{ QChar::Script_Chakma, "Chakma" },
{ QChar::Script_MeroiticCursive, "MeroiticCursive" },
{ QChar::Script_MeroiticHieroglyphs, "MeroiticHieroglyphs" },
{ QChar::Script_MeroiticCursive, "Meroitic_Cursive" },
{ QChar::Script_MeroiticHieroglyphs, "Meroitic_Hieroglyphs" },
{ QChar::Script_Miao, "Miao" },
{ QChar::Script_Sharada, "Sharada" },
{ QChar::Script_SoraSompeng, "SoraSompeng" },
{ QChar::Script_SoraSompeng, "Sora_Sompeng" },
{ QChar::Script_Takri, "Takri" },
// 7.0
{ QChar::Script_CaucasianAlbanian, "CaucasianAlbanian" },
{ QChar::Script_BassaVah, "BassaVah" },
{ QChar::Script_CaucasianAlbanian, "Caucasian_Albanian" },
{ QChar::Script_BassaVah, "Bassa_Vah" },
{ QChar::Script_Duployan, "Duployan" },
{ QChar::Script_Elbasan, "Elbasan" },
{ QChar::Script_Grantha, "Grantha" },
{ QChar::Script_PahawhHmong, "PahawhHmong" },
{ QChar::Script_PahawhHmong, "Pahawh_Hmong" },
{ QChar::Script_Khojki, "Khojki" },
{ QChar::Script_LinearA, "LinearA" },
{ QChar::Script_LinearA, "Linear_A" },
{ QChar::Script_Mahajani, "Mahajani" },
{ QChar::Script_Manichaean, "Manichaean" },
{ QChar::Script_MendeKikakui, "MendeKikakui" },
{ QChar::Script_MendeKikakui, "Mende_Kikakui" },
{ QChar::Script_Modi, "Modi" },
{ QChar::Script_Mro, "Mro" },
{ QChar::Script_OldNorthArabian, "OldNorthArabian" },
{ QChar::Script_OldNorthArabian, "Old_North_Arabian" },
{ QChar::Script_Nabataean, "Nabataean" },
{ QChar::Script_Palmyrene, "Palmyrene" },
{ QChar::Script_PauCinHau, "PauCinHau" },
{ QChar::Script_OldPermic, "OldPermic" },
{ QChar::Script_PsalterPahlavi, "PsalterPahlavi" },
{ QChar::Script_PauCinHau, "Pau_Cin_Hau" },
{ QChar::Script_OldPermic, "Old_Permic" },
{ QChar::Script_PsalterPahlavi, "Psalter_Pahlavi" },
{ QChar::Script_Siddham, "Siddham" },
{ QChar::Script_Khudawadi, "Khudawadi" },
{ QChar::Script_Tirhuta, "Tirhuta" },
{ QChar::Script_WarangCiti, "WarangCiti" },
{ QChar::Script_WarangCiti, "Warang_Citi" },
// 8.0
{ QChar::Script_Ahom, "Ahom" },
{ QChar::Script_AnatolianHieroglyphs, "AnatolianHieroglyphs" },
{ QChar::Script_AnatolianHieroglyphs, "Anatolian_Hieroglyphs" },
{ QChar::Script_Hatran, "Hatran" },
{ QChar::Script_Multani, "Multani" },
{ QChar::Script_OldHungarian, "OldHungarian" },
{ QChar::Script_OldHungarian, "Old_Hungarian" },
{ QChar::Script_SignWriting, "SignWriting" },
// 9.0
{ QChar::Script_Adlam, "Adlam" },
@ -817,47 +817,47 @@ static void initScriptMap()
{ QChar::Script_Osage, "Osage" },
{ QChar::Script_Tangut, "Tangut" },
// 10.0
{ QChar::Script_MasaramGondi, "MasaramGondi" },
{ QChar::Script_MasaramGondi, "Masaram_Gondi" },
{ QChar::Script_Nushu, "Nushu" },
{ QChar::Script_Soyombo, "Soyombo" },
{ QChar::Script_ZanabazarSquare, "ZanabazarSquare" },
{ QChar::Script_ZanabazarSquare, "Zanabazar_Square" },
// 12.1
{ QChar::Script_Dogra, "Dogra" },
{ QChar::Script_GunjalaGondi, "GunjalaGondi" },
{ QChar::Script_HanifiRohingya, "HanifiRohingya" },
{ QChar::Script_GunjalaGondi, "Gunjala_Gondi" },
{ QChar::Script_HanifiRohingya, "Hanifi_Rohingya" },
{ QChar::Script_Makasar, "Makasar" },
{ QChar::Script_Medefaidrin, "Medefaidrin" },
{ QChar::Script_OldSogdian, "OldSogdian" },
{ QChar::Script_OldSogdian, "Old_Sogdian" },
{ QChar::Script_Sogdian, "Sogdian" },
{ QChar::Script_Elymaic, "Elymaic" },
{ QChar::Script_Nandinagari, "Nandinagari" },
{ QChar::Script_NyiakengPuachueHmong, "NyiakengPuachueHmong" },
{ QChar::Script_NyiakengPuachueHmong, "Nyiakeng_Puachue_Hmong" },
{ QChar::Script_Wancho, "Wancho" },
// 13.0
{ QChar::Script_Chorasmian, "Chorasmian" },
{ QChar::Script_DivesAkuru, "DivesAkuru" },
{ QChar::Script_KhitanSmallScript, "KhitanSmallScript" },
{ QChar::Script_DivesAkuru, "Dives_Akuru" },
{ QChar::Script_KhitanSmallScript, "Khitan_Small_Script" },
{ QChar::Script_Yezidi, "Yezidi" },
// 14.0
{ QChar::Script_CyproMinoan, "CyproMinoan"},
{ QChar::Script_OldUyghur, "OldUyghur"},
{ QChar::Script_CyproMinoan, "Cypro_Minoan"},
{ QChar::Script_OldUyghur, "Old_Uyghur"},
{ QChar::Script_Tangsa, "Tangsa"},
{ QChar::Script_Toto, "Toto"},
{ QChar::Script_Vithkuqi, "Vithkuqi"},
// 15.0
{ QChar::Script_Kawi, "Kawi"},
{ QChar::Script_NagMundari, "NagMundari"},
{ QChar::Script_NagMundari, "Nag_Mundari"},
// 16.0
{ QChar::Script_Garay, "Garay"},
{ QChar::Script_GurungKhema, "GurungKhema"},
{ QChar::Script_KiratRai, "KiratRai"},
{ QChar::Script_OlOnal, "OlOnal"},
{ QChar::Script_GurungKhema, "Gurung_Khema"},
{ QChar::Script_KiratRai, "Kirat_Rai"},
{ QChar::Script_OlOnal, "Ol_Onal"},
{ QChar::Script_Sunuwar, "Sunuwar"},
{ QChar::Script_Todhri, "Todhri"},
{ QChar::Script_TuluTigalari, "TuluTigalari"},
{ QChar::Script_TuluTigalari, "Tulu_Tigalari"},
// unhandled
{ QChar::Script_Unknown, 0 }
@ -2195,7 +2195,6 @@ static void readScripts()
readUnicodeFile("Scripts.txt",
[] (QByteArray &line, int lineNo) {
line.replace(" ", "");
line.replace("_", "");
if (line.isEmpty())
return;