mirror of https://github.com/qt/qtbase.git
Update the Unicode BiDi algorithm to be compliant with Unicode 10
The UBA in Qt was out of date, implementing the spec from pre Unicode 6.3 days. It missed handling of directional isolates and paired brackets. This adds a completely new implementation of the UBA, that is compliant with Unicode 10. Added the test data from Unicode 10 to the qcomplextext auto test and ensure that we pass the test suite. Task-number: QTBUG-57743 Change-Id: Ie2d957bc9775d82f0a51d1c78dc6bd154f22847c Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
parent
4804d42ab8
commit
7f504283ef
File diff suppressed because it is too large
Load Diff
|
@ -146,9 +146,17 @@ struct Q_AUTOTEST_EXPORT QScriptAnalysis
|
|||
TabOrObject = Tab,
|
||||
Object = 7
|
||||
};
|
||||
unsigned short script : 7;
|
||||
unsigned short bidiLevel : 6; // Unicode Bidi algorithm embedding level (0-61)
|
||||
unsigned short flags : 3;
|
||||
enum BidiFlags {
|
||||
BidiBN = 1,
|
||||
BidiMaybeResetToParagraphLevel = 2,
|
||||
BidiResetToParagraphLevel = 4,
|
||||
BidiMirrored = 8
|
||||
};
|
||||
unsigned short script : 8;
|
||||
unsigned short flags : 4;
|
||||
unsigned short bidiFlags : 4;
|
||||
unsigned short bidiLevel : 8; // Unicode Bidi algorithm embedding level (0-125)
|
||||
QChar::Direction bidiDirection : 8; // used when running the bidi algorithm
|
||||
inline bool operator == (const QScriptAnalysis &other) const {
|
||||
return script == other.script && bidiLevel == other.bidiLevel && flags == other.flags;
|
||||
}
|
||||
|
|
|
@ -106,33 +106,62 @@ const LV logical_visual[] = {
|
|||
// LRO: \342\200\255
|
||||
// RLO: \342\200\256
|
||||
|
||||
{ "override1", "\342\200\256hello\342\200\254", "\342\200\254olleh\342\200\256", QChar::DirL },
|
||||
{ "override1", "\342\200\256hello\342\200\254", "\342\200\256olleh\342\200\254", QChar::DirL },
|
||||
{ "override2", "\342\200\255hello\342\200\254", "\342\200\255hello\342\200\254", QChar::DirL },
|
||||
{ "override3", "\342\200\255\327\251\327\234\327\225\327\235\342\200\254", "\342\200\255\327\251\327\234\327\225\327\235\342\200\254", QChar::DirL },
|
||||
{ "override4", "\342\200\256\327\251\327\234\327\225\327\235\342\200\254", "\342\200\254\327\235\327\225\327\234\327\251\342\200\256", QChar::DirL },
|
||||
{ "override4", "\342\200\256\327\251\327\234\327\225\327\235\342\200\254", "\342\200\256\327\235\327\225\327\234\327\251\342\200\254", QChar::DirL },
|
||||
{ "override5", "\342\200\256hello\342\200\254", "\342\200\254olleh\342\200\256", QChar::DirR },
|
||||
{ "override6", "\342\200\255hello\342\200\254", "\342\200\255hello\342\200\254", QChar::DirR },
|
||||
{ "override7", "\342\200\255\327\251\327\234\327\225\327\235\342\200\254", "\342\200\255\327\251\327\234\327\225\327\235\342\200\254", QChar::DirR },
|
||||
{ "override6", "\342\200\255hello\342\200\254", "\342\200\254hello\342\200\255", QChar::DirR },
|
||||
{ "override7", "\342\200\255\327\251\327\234\327\225\327\235\342\200\254", "\342\200\254\327\251\327\234\327\225\327\235\342\200\255", QChar::DirR },
|
||||
{ "override8", "\342\200\256\327\251\327\234\327\225\327\235\342\200\254", "\342\200\254\327\235\327\225\327\234\327\251\342\200\256", QChar::DirR },
|
||||
|
||||
{ "override9", "\327\224\342\200\255\327\251\327\234\342\200\256hello\342\200\254\327\225\327\235\342\200\254", "\342\200\255\327\251\327\234\342\200\254olleh\342\200\256\327\225\327\235\342\200\254\327\224", QChar::DirL },
|
||||
{ "override10", "\327\224\342\200\255\327\251\327\234\342\200\256hello\342\200\254\327\225\327\235\342\200\254", "\342\200\255\327\251\327\234\342\200\254olleh\342\200\256\327\225\327\235\342\200\254\327\224", QChar::DirR },
|
||||
{ "override9", "\327\224\342\200\255\327\251\327\234\342\200\256hello\342\200\254\327\225\327\235\342\200\254",
|
||||
"\327\251\327\234\342\200\256olleh\342\200\254\327\225\327\235\342\200\255\327\224\342\200\254", QChar::DirL },
|
||||
{ "override10", "\327\224\342\200\255\327\251\327\234\342\200\256hello\342\200\254\327\225\327\235\342\200\254",
|
||||
"\342\200\254\327\251\327\234\342\200\256olleh\342\200\254\327\225\327\235\342\200\255\327\224", QChar::DirR },
|
||||
|
||||
|
||||
{ "embed1", "\342\200\252hello\342\200\254", "\342\200\252hello\342\200\254", QChar::DirL },
|
||||
{ "embed2", "\342\200\253hello\342\200\254", "\342\200\254hello\342\200\253", QChar::DirL },
|
||||
{ "embed3", "\342\200\252hello\342\200\254", "\342\200\252hello\342\200\254", QChar::DirR },
|
||||
{ "embed2", "\342\200\253hello\342\200\254", "\342\200\253hello\342\200\254", QChar::DirL },
|
||||
{ "embed3", "\342\200\252hello\342\200\254", "\342\200\254hello\342\200\252", QChar::DirR },
|
||||
{ "embed4", "\342\200\253hello\342\200\254", "\342\200\254hello\342\200\253", QChar::DirR },
|
||||
{ "embed5", "\342\200\252\327\251\327\234\327\225\327\235\342\200\254", "\342\200\252\327\235\327\225\327\234\327\251\342\200\254", QChar::DirL },
|
||||
{ "embed6", "\342\200\253\327\251\327\234\327\225\327\235\342\200\254", "\342\200\254\327\235\327\225\327\234\327\251\342\200\253", QChar::DirL },
|
||||
{ "embed7", "\342\200\252\327\251\327\234\327\225\327\235\342\200\254", "\342\200\252\327\235\327\225\327\234\327\251\342\200\254", QChar::DirR },
|
||||
{ "embed6", "\342\200\253\327\251\327\234\327\225\327\235\342\200\254", "\342\200\253\327\235\327\225\327\234\327\251\342\200\254", QChar::DirL },
|
||||
{ "embed7", "\342\200\252\327\251\327\234\327\225\327\235\342\200\254", "\342\200\254\327\235\327\225\327\234\327\251\342\200\252", QChar::DirR },
|
||||
{ "embed8", "\342\200\253\327\251\327\234\327\225\327\235\342\200\254", "\342\200\254\327\235\327\225\327\234\327\251\342\200\253", QChar::DirR },
|
||||
|
||||
{ "embed9", "\342\200\252x \327\251\327\234\327\225\327\235 y\342\200\254", "\342\200\252x \327\235\327\225\327\234\327\251 y\342\200\254", QChar::DirL },
|
||||
{ "embed10", "\342\200\253x \327\251\327\234\327\225\327\235 y\342\200\254", "\342\200\254y \327\235\327\225\327\234\327\251 x\342\200\253", QChar::DirL },
|
||||
{ "embed11", "\342\200\252x \327\251\327\234\327\225\327\235 y\342\200\254", "\342\200\252x \327\235\327\225\327\234\327\251 y\342\200\254", QChar::DirR },
|
||||
{ "embed10", "\342\200\253x \327\251\327\234\327\225\327\235 y\342\200\254", "\342\200\253y \327\235\327\225\327\234\327\251 x\342\200\254", QChar::DirL },
|
||||
{ "embed11", "\342\200\252x \327\251\327\234\327\225\327\235 y\342\200\254", "\342\200\254x \327\235\327\225\327\234\327\251 y\342\200\252", QChar::DirR },
|
||||
{ "embed12", "\342\200\253x \327\251\327\234\327\225\327\235 y\342\200\254", "\342\200\254y \327\235\327\225\327\234\327\251 x\342\200\253", QChar::DirR },
|
||||
{ "zwsp", "+0\342\200\213f-1", "+0\342\200\213f-1", QChar::DirL },
|
||||
|
||||
// Alef: \xD7\x90
|
||||
|
||||
{ "bracketpair_1_ltr", "\xD7\x90(\xD7\x90[&a]!)a", "\xD7\x90(\xD7\x90[&a]!)a", QChar::DirL },
|
||||
{ "bracketpair_1_rtl", "\xD7\x90(\xD7\x90[&a]!)a", "a(![a&]\xD7\x90)\xD7\x90", QChar::DirR },
|
||||
|
||||
{ "bracketpair_2_ltr", "a(\xD7\x90[&a]!)\xD7\x90", "a(\xD7\x90[&a]!)\xD7\x90", QChar::DirL },
|
||||
{ "bracketpair_2_rtl", "a(\xD7\x90[&a]!)\xD7\x90", "\xD7\x90(![a&]\xD7\x90)a", QChar::DirR },
|
||||
|
||||
{ "bracketpair_3_ltr", "\xD7\x90(a[&\xD7\x90]!)a", "\xD7\x90(a[&\xD7\x90]!)a", QChar::DirL },
|
||||
{ "bracketpair_3_rtl", "\xD7\x90(a[&\xD7\x90]!)a", "a(![\xD7\x90&]a)\xD7\x90", QChar::DirR },
|
||||
|
||||
{ "bracketpair_4_ltr", "a (a \xD7\x90) \xD7\x90", "a (a \xD7\x90) \xD7\x90", QChar::DirL },
|
||||
{ "bracketpair_4_rtl", "a (a \xD7\x90) \xD7\x90", "\xD7\x90 (\xD7\x90 a) a", QChar::DirR },
|
||||
|
||||
{ "bracketpair_5_ltr", "\xD7\x90 (a \xD7\x90) a", "\xD7\x90 (a \xD7\x90) a", QChar::DirL },
|
||||
{ "bracketpair_5_rtl", "\xD7\x90 (a \xD7\x90) a", "a (\xD7\x90 a) \xD7\x90", QChar::DirR },
|
||||
|
||||
{ "bracketpair_6_ltr", "a (\xD7\x90 a) \xD7\x90", "a (\xD7\x90 a) \xD7\x90", QChar::DirL },
|
||||
{ "bracketpair_6_rtl", "a (\xD7\x90 a) \xD7\x90", "\xD7\x90 (a \xD7\x90) a", QChar::DirR },
|
||||
|
||||
{ "bracketpair_7_ltr", "\xD7\x90\xD7\x90 book(s)", "\xD7\x90\xD7\x90 book(s)", QChar::DirL },
|
||||
{ "bracketpair_7_rtl", "\xD7\x90\xD7\x90 book(s)", "book(s) \xD7\x90\xD7\x90", QChar::DirR },
|
||||
|
||||
{ "bracketpair_8_ltr", "a \xD7\x90\xD7\x90(\xD7\x90)", "a (\xD7\x90)\xD7\x90\xD7\x90", QChar::DirL },
|
||||
{ "bracketpair_8_rtl", "a \xD7\x90\xD7\x90(\xD7\x90)", "(\xD7\x90)\xD7\x90\xD7\x90 a", QChar::DirR },
|
||||
|
||||
|
||||
{ 0, 0, 0, QChar::DirON }
|
||||
};
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -3,3 +3,10 @@ TARGET = tst_qcomplextext
|
|||
QT += testlib
|
||||
QT += core-private gui-private
|
||||
SOURCES += tst_qcomplextext.cpp
|
||||
|
||||
TESTDATA += data
|
||||
|
||||
android {
|
||||
RESOURCES += \
|
||||
testdata.qrc
|
||||
}
|
||||
|
|
|
@ -47,6 +47,12 @@ private slots:
|
|||
void bidiCursorLogicalMovement();
|
||||
void bidiInvalidCursorNoMovement_data();
|
||||
void bidiInvalidCursorNoMovement();
|
||||
|
||||
void bidiCharacterTest_data();
|
||||
void bidiCharacterTest();
|
||||
void bidiTest_data();
|
||||
void bidiTest();
|
||||
|
||||
};
|
||||
|
||||
void tst_QComplexText::bidiReorderString_data()
|
||||
|
@ -93,7 +99,7 @@ void tst_QComplexText::bidiReorderString()
|
|||
QString sub = logical.mid(si.position, e.length(visualOrder[i]));
|
||||
if (si.analysis.bidiLevel % 2) {
|
||||
// reverse sub
|
||||
QChar *a = (QChar *)sub.unicode();
|
||||
QChar *a = sub.data();
|
||||
QChar *b = a + sub.length() - 1;
|
||||
while (a < b) {
|
||||
QChar tmp = *a;
|
||||
|
@ -273,5 +279,282 @@ void tst_QComplexText::bidiCursor_PDF()
|
|||
QVERIFY(line.cursorToX(size) == line.cursorToX(size - 1));
|
||||
}
|
||||
|
||||
void tst_QComplexText::bidiCharacterTest_data()
|
||||
{
|
||||
QTest::addColumn<QString>("data");
|
||||
QTest::addColumn<int>("paragraphDirection");
|
||||
QTest::addColumn<QVector<int>>("resolvedLevels");
|
||||
QTest::addColumn<QVector<int>>("visualOrder");
|
||||
|
||||
QString testFile = QFINDTESTDATA("data/BidiCharacterTest.txt");
|
||||
QFile f(testFile);
|
||||
QVERIFY(f.exists());
|
||||
|
||||
f.open(QIODevice::ReadOnly);
|
||||
|
||||
int linenum = 0;
|
||||
while (!f.atEnd()) {
|
||||
linenum++;
|
||||
|
||||
QByteArray line = f.readLine().simplified();
|
||||
if (line.startsWith('#') || line.isEmpty())
|
||||
continue;
|
||||
QVERIFY(!line.contains('#'));
|
||||
|
||||
QList<QByteArray> parts = line.split(';');
|
||||
QVERIFY(parts.size() == 5);
|
||||
|
||||
QString data;
|
||||
QList<QByteArray> dataParts = parts.at(0).split(' ');
|
||||
for (const auto &p : dataParts) {
|
||||
bool ok;
|
||||
data += QChar((ushort)p.toInt(&ok, 16));
|
||||
QVERIFY(ok);
|
||||
}
|
||||
|
||||
int paragraphDirection = parts.at(1).toInt();
|
||||
// int resolvedParagraphLevel = parts.at(2).toInt();
|
||||
|
||||
QVector<int> resolvedLevels;
|
||||
QList<QByteArray> levelParts = parts.at(3).split(' ');
|
||||
for (const auto &p : levelParts) {
|
||||
if (p == "x") {
|
||||
resolvedLevels += -1;
|
||||
} else {
|
||||
bool ok;
|
||||
resolvedLevels += p.toInt(&ok);
|
||||
QVERIFY(ok);
|
||||
}
|
||||
}
|
||||
|
||||
QVector<int> visualOrder;
|
||||
QList<QByteArray> orderParts = parts.at(4).split(' ');
|
||||
for (const auto &p : orderParts) {
|
||||
bool ok;
|
||||
visualOrder += p.toInt(&ok);
|
||||
QVERIFY(ok);
|
||||
}
|
||||
|
||||
const QByteArray nm = "line #" + QByteArray::number(linenum);
|
||||
QTest::newRow(nm.constData()) << data << paragraphDirection << resolvedLevels << visualOrder;
|
||||
}
|
||||
}
|
||||
|
||||
static void testBidiString(const QString &data, int paragraphDirection, const QVector<int> &resolvedLevels, const QVector<int> &visualOrder)
|
||||
{
|
||||
Q_UNUSED(resolvedLevels);
|
||||
|
||||
QTextEngine e(data, QFont());
|
||||
Qt::LayoutDirection pDir = Qt::LeftToRight;
|
||||
if (paragraphDirection == 1)
|
||||
pDir = Qt::RightToLeft;
|
||||
else if (paragraphDirection == 2)
|
||||
pDir = Qt::LayoutDirectionAuto;
|
||||
|
||||
e.option.setTextDirection(pDir);
|
||||
e.itemize();
|
||||
quint8 levels[1024];
|
||||
int visual[1024];
|
||||
int nitems = e.layoutData->items.size();
|
||||
int i;
|
||||
for (i = 0; i < nitems; ++i) {
|
||||
//qDebug("item %d bidiLevel=%d", i, e.items[i].analysis.bidiLevel);
|
||||
levels[i] = e.layoutData->items[i].analysis.bidiLevel;
|
||||
}
|
||||
e.bidiReorder(nitems, levels, visual);
|
||||
|
||||
QString visualString;
|
||||
for (i = 0; i < nitems; ++i) {
|
||||
QScriptItem &si = e.layoutData->items[visual[i]];
|
||||
QString sub;
|
||||
for (int j = si.position; j < si.position + e.length(visual[i]); ++j) {
|
||||
switch (data.at(j).direction()) {
|
||||
case QChar::DirLRE:
|
||||
case QChar::DirRLE:
|
||||
case QChar::DirLRO:
|
||||
case QChar::DirRLO:
|
||||
case QChar::DirPDF:
|
||||
case QChar::DirBN:
|
||||
continue;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
sub += data.at(j);
|
||||
}
|
||||
|
||||
// remove explicit embedding characters, as the test data has them removed as well
|
||||
sub.remove(QChar(0x202a));
|
||||
sub.remove(QChar(0x202b));
|
||||
sub.remove(QChar(0x202c));
|
||||
sub.remove(QChar(0x202d));
|
||||
sub.remove(QChar(0x202e));
|
||||
if (si.analysis.bidiLevel % 2) {
|
||||
// reverse sub
|
||||
QChar *a = sub.data();
|
||||
QChar *b = a + sub.length() - 1;
|
||||
while (a < b) {
|
||||
QChar tmp = *a;
|
||||
*a = *b;
|
||||
*b = tmp;
|
||||
++a;
|
||||
--b;
|
||||
}
|
||||
a = (QChar *)sub.unicode();
|
||||
b = a + sub.length();
|
||||
// while (a<b) {
|
||||
// *a = a->mirroredChar();
|
||||
// ++a;
|
||||
// }
|
||||
}
|
||||
visualString += sub;
|
||||
}
|
||||
QString expected;
|
||||
// qDebug() << "expected visual order";
|
||||
for (int i : visualOrder) {
|
||||
// qDebug() << " " << i << hex << data[i].unicode();
|
||||
expected.append(data[i]);
|
||||
}
|
||||
|
||||
QCOMPARE(visualString, expected);
|
||||
|
||||
}
|
||||
|
||||
void tst_QComplexText::bidiCharacterTest()
|
||||
{
|
||||
QFETCH(QString, data);
|
||||
QFETCH(int, paragraphDirection);
|
||||
QFETCH(QVector<int>, resolvedLevels);
|
||||
QFETCH(QVector<int>, visualOrder);
|
||||
|
||||
testBidiString(data, paragraphDirection, resolvedLevels, visualOrder);
|
||||
}
|
||||
|
||||
ushort unicodeForDirection(const QByteArray &direction)
|
||||
{
|
||||
struct {
|
||||
const char *string;
|
||||
ushort unicode;
|
||||
} dirToUnicode[] = {
|
||||
{ "L", 0x41 },
|
||||
{ "R", 0x5d0 },
|
||||
{ "EN", 0x30 },
|
||||
{ "ES", 0x2b },
|
||||
{ "ET", 0x24 },
|
||||
{ "AN", 0x660 },
|
||||
{ "CS", 0x2c },
|
||||
{ "B", QChar::ParagraphSeparator },
|
||||
{ "S", 0x9 },
|
||||
{ "WS", 0x20 },
|
||||
{ "ON", 0x2a },
|
||||
{ "LRE", 0x202a },
|
||||
{ "LRO", 0x202d },
|
||||
{ "AL", 0x627 },
|
||||
{ "RLE", 0x202b },
|
||||
{ "RLO", 0x202e },
|
||||
{ "PDF", 0x202c },
|
||||
{ "NSM", 0x300 },
|
||||
{ "BN", 0xad },
|
||||
{ "LRI", 0x2066 },
|
||||
{ "RLI", 0x2067 },
|
||||
{ "FSI", 0x2068 },
|
||||
{ "PDI", 0x2069 }
|
||||
};
|
||||
for (const auto &e : dirToUnicode) {
|
||||
if (e.string == direction)
|
||||
return e.unicode;
|
||||
}
|
||||
Q_UNREACHABLE();
|
||||
}
|
||||
|
||||
void tst_QComplexText::bidiTest_data()
|
||||
{
|
||||
QTest::addColumn<QString>("data");
|
||||
QTest::addColumn<int>("paragraphDirection");
|
||||
QTest::addColumn<QVector<int>>("resolvedLevels");
|
||||
QTest::addColumn<QVector<int>>("visualOrder");
|
||||
|
||||
QString testFile = QFINDTESTDATA("data/BidiTest.txt");
|
||||
QFile f(testFile);
|
||||
QVERIFY(f.exists());
|
||||
|
||||
f.open(QIODevice::ReadOnly);
|
||||
|
||||
int linenum = 0;
|
||||
QVector<int> resolvedLevels;
|
||||
QVector<int> visualOrder;
|
||||
while (!f.atEnd()) {
|
||||
linenum++;
|
||||
|
||||
QByteArray line = f.readLine().simplified();
|
||||
if (line.startsWith('#') || line.isEmpty())
|
||||
continue;
|
||||
QVERIFY(!line.contains('#'));
|
||||
|
||||
if (line.startsWith("@Levels:")) {
|
||||
line = line.mid(strlen("@Levels:")).simplified();
|
||||
|
||||
resolvedLevels.clear();
|
||||
QList<QByteArray> levelParts = line.split(' ');
|
||||
for (const auto &p : levelParts) {
|
||||
if (p == "x") {
|
||||
resolvedLevels += -1;
|
||||
} else {
|
||||
bool ok;
|
||||
resolvedLevels += p.toInt(&ok);
|
||||
QVERIFY(ok);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
} else if (line.startsWith("@Reorder:")) {
|
||||
line = line.mid(strlen("@Reorder:")).simplified();
|
||||
|
||||
visualOrder.clear();
|
||||
QList<QByteArray> orderParts = line.split(' ');
|
||||
for (const auto &p : orderParts) {
|
||||
if (p.isEmpty())
|
||||
continue;
|
||||
bool ok;
|
||||
visualOrder += p.toInt(&ok);
|
||||
QVERIFY(ok);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
QList<QByteArray> parts = line.split(';');
|
||||
Q_ASSERT(parts.size() == 2);
|
||||
|
||||
QString data;
|
||||
QList<QByteArray> dataParts = parts.at(0).split(' ');
|
||||
for (const auto &p : dataParts) {
|
||||
ushort uc = unicodeForDirection(p);
|
||||
data += QChar(uc);
|
||||
}
|
||||
|
||||
int paragraphDirections = parts.at(1).toInt();
|
||||
|
||||
const QByteArray nm = "line #" + QByteArray::number(linenum);
|
||||
if (paragraphDirections & 1)
|
||||
QTest::newRow((nm + " (Auto)").constData()) << data << 2 << resolvedLevels << visualOrder;
|
||||
if (paragraphDirections & 2)
|
||||
QTest::newRow((nm + " (LTR)").constData()) << data << 0 << resolvedLevels << visualOrder;
|
||||
if (paragraphDirections & 4)
|
||||
QTest::newRow((nm + " (RTL)").constData()) << data << 1 << resolvedLevels << visualOrder;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void tst_QComplexText::bidiTest()
|
||||
{
|
||||
QFETCH(QString, data);
|
||||
QFETCH(int, paragraphDirection);
|
||||
QFETCH(QVector<int>, resolvedLevels);
|
||||
QFETCH(QVector<int>, visualOrder);
|
||||
|
||||
testBidiString(data, paragraphDirection, resolvedLevels, visualOrder);
|
||||
}
|
||||
|
||||
|
||||
|
||||
QTEST_MAIN(tst_QComplexText)
|
||||
#include "tst_qcomplextext.moc"
|
||||
|
|
Loading…
Reference in New Issue