mirror of https://github.com/qt/qtbase.git
util/unicode: Extract Method parseHexRange()
Wrapping parseHexList(), which gets extended to support QLatin1StringView separators, add parseHexRange() and use it around the code to parse HHHHH[..HHHHH] hex ranges. Amends the start of the public history. Pick-to: 6.10 6.9 6.8 6.5 Change-Id: I0372e5c239642988f0e920d95108657e276b19dd Reviewed-by: Ahmad Samir <a.samirh78@gmail.com>
This commit is contained in:
parent
714969e8a1
commit
6e526bf92c
|
@ -1348,16 +1348,32 @@ static int parseHex(QByteArrayView input, int lineNo)
|
|||
return result;
|
||||
}
|
||||
|
||||
QVarLengthArray<int, 4> parseHexList(QByteArrayView input, int lineNo)
|
||||
template <typename Sep = char16_t>
|
||||
QVarLengthArray<int, 4> parseHexList(QByteArrayView input, int lineNo, Sep sep = u' ')
|
||||
{
|
||||
QVarLengthArray<int, 4> result;
|
||||
constexpr char16_t sep = u' ';
|
||||
constexpr auto sb = Qt::SkipEmptyParts;
|
||||
const auto sb = sep == u' ' ? Qt::SkipEmptyParts : Qt::KeepEmptyParts;
|
||||
for (auto e : qTokenize(QLatin1StringView{input}, sep, sb))
|
||||
result.push_back(parseHex(e, lineNo));
|
||||
return result;
|
||||
}
|
||||
|
||||
static auto parseHexRange(QByteArrayView input, int lineNo)
|
||||
{
|
||||
struct R { int from, to; };
|
||||
|
||||
const auto pair = parseHexList(input, lineNo, ".."_L1);
|
||||
Q_ASSERT(pair.size() <= 2);
|
||||
int from = pair[0];
|
||||
int to = from;
|
||||
if (pair.size() == 2) {
|
||||
to = pair[1];
|
||||
if (from > to)
|
||||
qFatal("invalid range in line %d: %05x > %05x", lineNo, from, to);
|
||||
}
|
||||
return R{from, to};
|
||||
}
|
||||
|
||||
static void readUnicodeData()
|
||||
{
|
||||
qDebug("Reading UnicodeData.txt");
|
||||
|
@ -1582,25 +1598,12 @@ static void readDerivedAge()
|
|||
{
|
||||
readUnicodeFile("DerivedAge.txt",
|
||||
[] (QByteArray &line, int lineNo) {
|
||||
Q_UNUSED(lineNo);
|
||||
|
||||
line.replace(" ", "");
|
||||
|
||||
QList<QByteArray> l = line.split(';');
|
||||
Q_ASSERT(l.size() == 2);
|
||||
|
||||
QByteArray codes = l[0];
|
||||
codes.replace("..", ".");
|
||||
QList<QByteArray> cl = codes.split('.');
|
||||
|
||||
bool ok;
|
||||
int from = cl[0].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
int to = from;
|
||||
if (cl.size() == 2) {
|
||||
to = cl[1].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
}
|
||||
const auto [from, to] = parseHexRange(l[0], lineNo);
|
||||
|
||||
QChar::UnicodeVersion age = age_map.value(l[1].trimmed(), QChar::Unicode_Unassigned);
|
||||
//qDebug() << Qt::hex << from << ".." << to << ba << age;
|
||||
|
@ -1618,17 +1621,12 @@ static void readEastAsianWidth()
|
|||
{
|
||||
readUnicodeFile("EastAsianWidth.txt",
|
||||
[] (QByteArray &line, int lineNo) {
|
||||
Q_UNUSED(lineNo);
|
||||
|
||||
line = std::move(line).simplified();
|
||||
|
||||
QList<QByteArray> fields = line.split(';');
|
||||
Q_ASSERT(fields.size() == 2);
|
||||
|
||||
// That would be split(".."), but that API does not exist.
|
||||
const QByteArray codePoints = fields[0].trimmed().replace("..", ".");
|
||||
QList<QByteArray> cl = codePoints.split('.');
|
||||
Q_ASSERT(cl.size() >= 1 && cl.size() <= 2);
|
||||
const auto [first, last] = parseHexRange(fields[0], lineNo);
|
||||
|
||||
const QByteArray widthString = fields[1].trimmed();
|
||||
if (!eastAsianWidthMap.contains(widthString)) {
|
||||
|
@ -1637,11 +1635,6 @@ static void readEastAsianWidth()
|
|||
}
|
||||
auto width = eastAsianWidthMap.value(widthString);
|
||||
|
||||
bool ok;
|
||||
const int first = cl[0].toInt(&ok, 16);
|
||||
const int last = ok && cl.size() == 2 ? cl[1].toInt(&ok, 16) : first;
|
||||
Q_ASSERT(ok);
|
||||
|
||||
for (int codepoint = first; codepoint <= last; ++codepoint) {
|
||||
UnicodeData &ud = UnicodeData::valueRef(codepoint);
|
||||
// Ensure that ranges don't overlap.
|
||||
|
@ -1655,8 +1648,6 @@ static void readDerivedNormalizationProps()
|
|||
{
|
||||
readUnicodeFile("DerivedNormalizationProps.txt",
|
||||
[] (const QByteArray &line, int lineNo) {
|
||||
Q_UNUSED(lineNo);
|
||||
|
||||
QList<QByteArray> l = line.split(';');
|
||||
Q_ASSERT(l.size() >= 2);
|
||||
|
||||
|
@ -1668,18 +1659,7 @@ static void readDerivedNormalizationProps()
|
|||
return;
|
||||
}
|
||||
|
||||
QByteArray codes = l[0].trimmed();
|
||||
codes.replace("..", ".");
|
||||
QList<QByteArray> cl = codes.split('.');
|
||||
|
||||
bool ok;
|
||||
int from = cl[0].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
int to = from;
|
||||
if (cl.size() == 2) {
|
||||
to = cl[1].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
}
|
||||
const auto [from, to] = parseHexRange(l[0], lineNo);
|
||||
|
||||
for (int codepoint = from; codepoint <= to; ++codepoint) {
|
||||
UnicodeData &d = UnicodeData::valueRef(codepoint);
|
||||
|
@ -1797,24 +1777,12 @@ static void readLineBreak()
|
|||
{
|
||||
readUnicodeFile("LineBreak.txt",
|
||||
[] (QByteArray &line, int lineNo) {
|
||||
Q_UNUSED(lineNo);
|
||||
line.replace(" ", "");
|
||||
|
||||
QList<QByteArray> l = line.split(';');
|
||||
Q_ASSERT(l.size() == 2);
|
||||
|
||||
QByteArray codes = l[0];
|
||||
codes.replace("..", ".");
|
||||
QList<QByteArray> cl = codes.split('.');
|
||||
|
||||
bool ok;
|
||||
int from = cl[0].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
int to = from;
|
||||
if (cl.size() == 2) {
|
||||
to = cl[1].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
}
|
||||
const auto [from, to] = parseHexRange(l[0], lineNo);
|
||||
|
||||
LineBreakClass lb = line_break_map.value(l[1], LineBreak_Unassigned);
|
||||
if (lb == LineBreak_Unassigned)
|
||||
|
@ -1920,25 +1888,13 @@ static void readGraphemeBreak()
|
|||
{
|
||||
readUnicodeFile("GraphemeBreakProperty.txt",
|
||||
[] (QByteArray &line, int lineNo) {
|
||||
Q_UNUSED(lineNo);
|
||||
|
||||
line.replace(" ", "");
|
||||
|
||||
QList<QByteArray> l = line.split(';');
|
||||
Q_ASSERT(l.size() == 2);
|
||||
|
||||
QByteArray codes = l[0];
|
||||
codes.replace("..", ".");
|
||||
QList<QByteArray> cl = codes.split('.');
|
||||
|
||||
bool ok;
|
||||
int from = cl[0].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
int to = from;
|
||||
if (cl.size() == 2) {
|
||||
to = cl[1].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
}
|
||||
const auto [from, to] = parseHexRange(l[0], lineNo);
|
||||
|
||||
GraphemeBreakClass brk = grapheme_break_map.value(l[1], GraphemeBreak_Unassigned);
|
||||
if (brk == GraphemeBreak_Unassigned)
|
||||
|
@ -1955,7 +1911,6 @@ static void readEmojiData()
|
|||
{
|
||||
readUnicodeFile("emoji-data.txt",
|
||||
[] (QByteArray &line, int lineNo) {
|
||||
Q_UNUSED(lineNo);
|
||||
line.replace(" ", "");
|
||||
|
||||
QList<QByteArray> l = line.split(';');
|
||||
|
@ -1965,18 +1920,7 @@ static void readEmojiData()
|
|||
if (emojiFlags == EmojiFlags::NoEmoji)
|
||||
return;
|
||||
|
||||
QByteArray codes = l[0];
|
||||
codes.replace("..", ".");
|
||||
QList<QByteArray> cl = codes.split('.');
|
||||
|
||||
bool ok;
|
||||
int from = cl[0].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
int to = from;
|
||||
if (cl.size() == 2) {
|
||||
to = cl[1].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
}
|
||||
const auto [from, to] = parseHexRange(l[0], lineNo);
|
||||
|
||||
for (int codepoint = from; codepoint <= to; ++codepoint) {
|
||||
UnicodeData &ud = UnicodeData::valueRef(codepoint);
|
||||
|
@ -1996,24 +1940,12 @@ static void readWordBreak()
|
|||
{
|
||||
readUnicodeFile("WordBreakProperty.txt",
|
||||
[] (QByteArray &line, int lineNo) {
|
||||
Q_UNUSED(lineNo);
|
||||
line.replace(" ", "");
|
||||
|
||||
QList<QByteArray> l = line.split(';');
|
||||
Q_ASSERT(l.size() == 2);
|
||||
|
||||
QByteArray codes = l[0];
|
||||
codes.replace("..", ".");
|
||||
QList<QByteArray> cl = codes.split('.');
|
||||
|
||||
bool ok;
|
||||
int from = cl[0].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
int to = from;
|
||||
if (cl.size() == 2) {
|
||||
to = cl[1].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
}
|
||||
const auto [from, to] = parseHexRange(l[0], lineNo);
|
||||
|
||||
WordBreakClass brk = word_break_map.value(l[1], WordBreak_Unassigned);
|
||||
if (brk == WordBreak_Unassigned)
|
||||
|
@ -2039,24 +1971,12 @@ static void readSentenceBreak()
|
|||
{
|
||||
readUnicodeFile("SentenceBreakProperty.txt",
|
||||
[] (QByteArray &line, int lineNo) {
|
||||
Q_UNUSED(lineNo);
|
||||
line.replace(" ", "");
|
||||
|
||||
QList<QByteArray> l = line.split(';');
|
||||
Q_ASSERT(l.size() == 2);
|
||||
|
||||
QByteArray codes = l[0];
|
||||
codes.replace("..", ".");
|
||||
QList<QByteArray> cl = codes.split('.');
|
||||
|
||||
bool ok;
|
||||
int from = cl[0].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
int to = from;
|
||||
if (cl.size() == 2) {
|
||||
to = cl[1].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
}
|
||||
const auto [from, to] = parseHexRange(l[0], lineNo);
|
||||
|
||||
SentenceBreakClass brk = sentence_break_map.value(l[1], SentenceBreak_Unassigned);
|
||||
if (brk == SentenceBreak_Unassigned)
|
||||
|
@ -2263,7 +2183,6 @@ static void readScripts()
|
|||
{
|
||||
readUnicodeFile("Scripts.txt",
|
||||
[] (QByteArray &line, int lineNo) {
|
||||
Q_UNUSED(lineNo);
|
||||
line.replace(" ", "");
|
||||
line.replace("_", "");
|
||||
|
||||
|
@ -2275,17 +2194,7 @@ static void readScripts()
|
|||
QByteArray codePoints = line.left(semicolon);
|
||||
QByteArray scriptName = line.mid(semicolon + 1);
|
||||
|
||||
codePoints.replace("..", ".");
|
||||
QList<QByteArray> cl = codePoints.split('.');
|
||||
|
||||
bool ok;
|
||||
int first = cl[0].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
int last = first;
|
||||
if (cl.size() == 2) {
|
||||
last = cl[1].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
}
|
||||
const auto [first, last] = parseHexRange(codePoints, lineNo);
|
||||
|
||||
if (!scriptMap.contains(scriptName))
|
||||
qFatal("Unhandled script property value: %s", scriptName.constData());
|
||||
|
@ -2304,17 +2213,12 @@ static void readIdnaMappingTable()
|
|||
{
|
||||
readUnicodeFile("IdnaMappingTable.txt",
|
||||
[] (QByteArray &line, int lineNo) {
|
||||
Q_UNUSED(lineNo);
|
||||
|
||||
line = std::move(line).simplified();
|
||||
|
||||
QList<QByteArray> fields = line.split(';');
|
||||
Q_ASSERT(fields.size() >= 2);
|
||||
|
||||
// That would be split(".."), but that API does not exist.
|
||||
const QByteArray codePoints = fields[0].trimmed().replace("..", ".");
|
||||
QList<QByteArray> cl = codePoints.split('.');
|
||||
Q_ASSERT(cl.size() >= 1 && cl.size() <= 2);
|
||||
const auto [first, last] = parseHexRange(fields[0], lineNo);
|
||||
|
||||
const QByteArray statusString = fields[1].trimmed();
|
||||
if (!idnaStatusMap.contains(statusString))
|
||||
|
@ -2322,11 +2226,6 @@ static void readIdnaMappingTable()
|
|||
fields[0].constData(), statusString.data());
|
||||
IdnaRawStatus rawStatus = idnaStatusMap.value(statusString);
|
||||
|
||||
bool ok;
|
||||
const int first = cl[0].toInt(&ok, 16);
|
||||
const int last = ok && cl.size() == 2 ? cl[1].toInt(&ok, 16) : first;
|
||||
Q_ASSERT(ok);
|
||||
|
||||
QString mapping;
|
||||
|
||||
switch (rawStatus) {
|
||||
|
|
Loading…
Reference in New Issue