Protobuf: store and re-emit unknown fields

Use the unknownEntries variable we so cleverly added in
QProtobufMessagePrivate to store the data for the unknown fields.

By using a bytes-to-count mapping we can easily store multiple repeated
entries of a message, which could happen in a repeated list with
multiple identical entries. Since we don't know if a field is actually
marked as _repeated_ or not (since it's an unknown field) we just have
to assume it is.

Fixes: QTBUG-101966
Change-Id: I1ebde5fdaebe2987a68de1632f136bb6606e718a
Reviewed-by: Alexey Edelev <alexey.edelev@qt.io>
This commit is contained in:
Mårten Nordheim 2022-10-05 19:11:23 +02:00
parent 660608d6b0
commit 81d3e8eb8c
8 changed files with 259 additions and 5 deletions

View File

@ -55,6 +55,11 @@ int QProtobufMessagePrivate::getPropertyIndex(QAnyStringView propertyName) const
});
}
void QProtobufMessagePrivate::storeUnknownEntry(QByteArrayView entry)
{
++unknownEntries[entry.toByteArray()];
}
/*!
Set the property \a propertyName to the value stored in \a value.

View File

@ -34,6 +34,7 @@ public:
const QMetaObject *metaObject = nullptr;
int getPropertyIndex(QAnyStringView propertyName) const;
void storeUnknownEntry(QByteArrayView entry);
static QProtobufMessagePrivate *get(QProtobufMessage *message) { return message->d_func(); }
static const QProtobufMessagePrivate *get(const QProtobufMessage *message)

View File

@ -131,6 +131,13 @@ private:
return lhs.m_it != other;
}
friend qint64 operator-(const QProtobufSelfcheckIterator &lhs,
const QProtobufSelfcheckIterator &rhs) noexcept
{
Q_ASSERT(lhs.m_containerBegin == rhs.m_containerBegin);
return lhs.m_it - rhs.m_it;
}
const QByteArrayView::const_iterator m_containerBegin;
const QByteArrayView::const_iterator m_containerEnd;
QByteArrayView::const_iterator m_it;

View File

@ -342,6 +342,11 @@ QByteArray QProtobufSerializer::serializeMessage(
QProtobufPropertyOrderingInfo(ordering, index)));
}
// Restore any unknown fields we have stored away:
const QProtobufMessagePrivate *messagePrivate = QProtobufMessagePrivate::get(message);
for (const auto &[bytes, occurrences] : messagePrivate->unknownEntries.asKeyValueRange())
result += bytes.repeated(occurrences);
return result;
}
@ -638,6 +643,7 @@ bool QProtobufSerializerPrivate::deserializeProperty(
//Each iteration we expect iterator is setup to beginning of next chunk
int fieldNumber = QtProtobufPrivate::NotUsedFieldIndex;
QtProtobuf::WireTypes wireType = QtProtobuf::WireTypes::Unknown;
const QProtobufSelfcheckIterator itBeforeHeader = it; // copy this, we may need it later
if (!QProtobufSerializerPrivate::decodeHeader(it, fieldNumber, wireType)) {
setDeserializationError(
QAbstractProtobufSerializer::InvalidHeaderError,
@ -648,11 +654,20 @@ bool QProtobufSerializerPrivate::deserializeProperty(
int index = ordering.indexOfFieldNumber(fieldNumber);
if (index == -1) {
// This is an unknown field.
// Currently we just skip it, but starting with protobuf 3.5 we should
// preserve the unknown field and include it if the message is
// serialized again. TBD.
QProtobufSerializerPrivate::skipSerializedFieldBytes(it, wireType);
// This is an unknown field, it may have been added in a later revision
// of the Message we are currently deserializing. We must store the
// bytes for this field and re-emit them later if this message is
// serialized again.
qsizetype length = std::distance(itBeforeHeader, it); // size of header
length += QProtobufSerializerPrivate::skipSerializedFieldBytes(it, wireType);
if (!it.isValid()) {
setUnexpectedEndOfStreamError();
return false;
}
QProtobufMessagePrivate *messagePrivate = QProtobufMessagePrivate::get(message);
messagePrivate->storeUnknownEntry(QByteArrayView(itBeforeHeader.data(), length));
return true;
}

View File

@ -161,6 +161,18 @@ qt6_add_protobuf(tst_protobuf_non_packed_repeatedtypes
)
qt_autogen_tools_initial_setup(tst_protobuf_non_packed_repeatedtypes)
qt_internal_add_test(tst_protobuf_unknown_field
SOURCES
tst_protobuf_unknown_field.cpp
LIBRARIES
Qt::Test
Qt::Protobuf)
qt6_add_protobuf(tst_protobuf_unknown_field
PROTO_FILES
proto/unknownfield.proto
OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/qt_protobuf_generated"
)
qt_internal_add_test(tst_protobuf_raw_serializers
SOURCES

View File

@ -0,0 +1,60 @@
# Copyright (C) 2022 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
# This script is run to generate test data for the unknown field tests,
# see tst_protobuf_unknown_field.cpp
# NOTE: "protoc" must be installed and in path, this script takes care of
# invoking the tool to generate the generated python output.
# ALSO NOTE: paths assume it is run from the directory in which it is stored.
import os, os.path
import shutil
assert os.path.exists('unknownfield.proto'), \
"I need to be run from the directory that unknownfield.proto lives in"
# Generates unknownfield_pb2...
os.system('protoc --python_out=. --proto_path=. ./unknownfield.proto')
# ... which we subsequently include:
import unknownfield_pb2
msg = unknownfield_pb2.StringMessage()
msg.aaa = 2
msg.timestamp = 42
msg.stringField = "Hello World"
print(msg.SerializeToString().hex())
msg = unknownfield_pb2.LargeIndexStringMessage()
msg.aaa = 2
msg.timestamp = 42
msg.stringField = "Hello World"
print(msg.SerializeToString().hex())
msg = unknownfield_pb2.IntMessage()
msg.aaa = 2
msg.timestamp = 42
msg.intField = 242
print(msg.SerializeToString().hex())
msg = unknownfield_pb2.MapMessage()
msg.aaa = 2
msg.timestamp = 42
msg.mapField[1] = 2
msg.mapField[2] = 4
msg.mapField[3] = 6
print(msg.SerializeToString().hex())
msg = unknownfield_pb2.RepeatedMessage()
msg.aaa = 2
msg.timestamp = 42
msg.repeatedField.extend([1, 1, 2, 3, 5, -5, -3, -2, -1, -1])
print(msg.SerializeToString().hex())
os.remove("unknownfield_pb2.py")
shutil.rmtree("__pycache__")

View File

@ -0,0 +1,42 @@
syntax = "proto3";
package qtproto.tests;
// Generate the message using one of the messages with all three fields defined
// in e.g. Python and then deserialize it with PartialMessage.
message StringMessage {
int32 aaa = 1;
int64 timestamp = 2;
string stringField = 3;
}
message LargeIndexStringMessage {
int32 aaa = 1;
int64 timestamp = 2;
string stringField = 536870900;
}
message IntMessage {
int32 aaa = 1;
int64 timestamp = 2;
int32 intField = 3;
}
message MapMessage {
int32 aaa = 1;
int64 timestamp = 2;
map<int32, int32> mapField = 3;
}
message RepeatedMessage {
int32 aaa = 1;
int64 timestamp = 2;
repeated int32 repeatedField = 3;
}
message PartialMessage {
int32 aaa = 1;
int64 timestamp = 2;
// No third field
}

View File

@ -0,0 +1,112 @@
// Copyright (C) 2022 The Qt Company Ltd.
// Copyright (C) 2022 Alexey Edelev <semlanik@gmail.com>
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include <QtTest/qtest.h>
#include <QtCore/qhash.h>
#include <QtProtobuf/qprotobufserializer.h>
#include "unknownfield.qpb.h"
// NOTE: all test data in this file is generated by generate_unknownfield_data.py
// located in the ./proto folder!
class tst_protobuf_unknown_field : public QObject
{
Q_OBJECT
private slots:
void unknownStringField();
void unknownIntField();
void unknownMapField();
void unknownRepeatedField();
};
// Deserialize a full known message into a type for which one of the fields is
// unknown, then serialize the message back to bytes.
void partialMessageRoundtrip_common(QByteArrayView data, QByteArray *out)
{
Q_ASSERT(out);
qtproto::tests::PartialMessage pm;
QProtobufSerializer serializer;
pm.deserialize(&serializer, data);
QCOMPARE(pm.aaa(), 2);
QCOMPARE(pm.timestamp(), 42);
pm.setTimestamp(64); // update one field, will not affect other fields
*out = pm.serialize(&serializer);
}
template<typename TargetType>
void partialMessageRoundtrip(QByteArrayView data, TargetType *targetMessage)
{
Q_ASSERT(targetMessage);
QByteArray roundtrippedProto;
partialMessageRoundtrip_common(data, &roundtrippedProto);
QProtobufSerializer serializer;
targetMessage->deserialize(&serializer, roundtrippedProto);
QCOMPARE(targetMessage->aaa(), 2);
QCOMPARE(targetMessage->timestamp(), 64);
}
void tst_protobuf_unknown_field::unknownStringField()
{
// "2, 42, Hello World"
QByteArray payload = QByteArray::fromHex("0802102a1a0b48656c6c6f20576f726c64");
qtproto::tests::StringMessage sm;
partialMessageRoundtrip<>(payload, &sm);
QCOMPARE(sm.stringField(), u"Hello World");
// "2, 42, Hello World"
// Same as above but the field number of the string is huge
payload = QByteArray::fromHex("0802102aa2ffffff0f0b48656c6c6f20576f726c64");
qtproto::tests::LargeIndexStringMessage lsm;
partialMessageRoundtrip<>(payload, &lsm);
QCOMPARE(lsm.stringField(), u"Hello World");
}
void tst_protobuf_unknown_field::unknownIntField()
{
// "2, 42, 242"
QByteArray payload = QByteArray::fromHex("0802102a18f201");
qtproto::tests::IntMessage im;
partialMessageRoundtrip<>(payload, &im);
QCOMPARE(im.intField(), 242);
}
void tst_protobuf_unknown_field::unknownMapField()
{
// "2, 42, {1: 2, 2: 4, 3: 6}"
QByteArray payload = QByteArray::fromHex("0802102a1a04080110021a04080310061a0408021004");
qtproto::tests::MapMessage mm;
partialMessageRoundtrip<>(payload, &mm);
QHash<QtProtobuf::int32, QtProtobuf::int32> expected{
{ 1, 2 },
{ 2, 4 },
{ 3, 6 },
};
QCOMPARE(mm.mapField(), expected);
}
void tst_protobuf_unknown_field::unknownRepeatedField()
{
// "2, 42, [1, 1, 2, 3, 5, -5, -3, -2, -1, -1]"
QByteArray payload = QByteArray::fromHex("0802102a1a370101020305fbffffffffffffffff01fdfffffffff"
"fffffff01feffffffffffffffff01ffffffffffffffffff01ffff"
"ffffffffffffff01");
qtproto::tests::RepeatedMessage mm;
partialMessageRoundtrip<>(payload, &mm);
QList<QtProtobuf::int32> expected{ 1, 1, 2, 3, 5, -5, -3, -2, -1, -1 };
QCOMPARE(mm.repeatedField(), expected);
}
QTEST_MAIN(tst_protobuf_unknown_field)
#include "tst_protobuf_unknown_field.moc"