Add support for H.264/AVC

This commit is contained in:
Aaro Altonen 2020-09-25 10:23:12 +03:00
parent d60b240341
commit 5374a79a28
10 changed files with 449 additions and 2 deletions

View File

@ -1,6 +1,6 @@
# uvgRTP
uvgRTP is an RTP library written in C++ with a focus on usability and efficiency. It features a very intuitive and easy-to-use API, built-in support for HEVC and Opus, SRTP and ZRTP. In ideal conditions it is able to receive a goodput of 600 MB/s for HEVC stream.
uvgRTP is an RTP library written in C++ with a focus on usability and efficiency. It features a very intuitive and easy-to-use API, built-in support for HEVC, AVC, Opus, SRTP and ZRTP. In ideal conditions it is able to receive a goodput of 600 MB/s for HEVC stream.
uvgRTP is licensed under the permissive BSD 2-Clause License
@ -9,6 +9,7 @@ For SRTP/ZRTP support, uvgRTP uses [Crypto++](https://www.cryptopp.com/)
Supported specifications:
* [RFC 3350: RTP: A Transport Protocol for Real-Time Applications](https://tools.ietf.org/html/rfc3550)
* [RFC 7798: RTP Payload Format for High Efficiency Video Coding (HEVC)](https://tools.ietf.org/html/rfc7798)
* [RFC 6184: RTP Payload Format for H.264 Video](https://tools.ietf.org/html/rfc6184)
* [RFC 7587: RTP Payload Format for the Opus Speech and Audio Codec](https://tools.ietf.org/html/rfc7587)
* [RFC 3711: The Secure Real-time Transport Protocol (SRTP)](https://tools.ietf.org/html/rfc3711)
* [RFC 6189: ZRTP: Media Path Key Agreement for Unicast Secure RTP](https://tools.ietf.org/html/rfc6189)
@ -18,6 +19,7 @@ Based on Marko Viitanen's [fRTPlib](https://github.com/fador/fRTPlib)
## Notable features
* Builtin support for:
* AVC
* HEVC
* Opus
* SRTP/ZRTP
@ -39,5 +41,5 @@ See [examples](examples/) directory for different uvgRTP examples
Please cite the following paper for uvgRTP:
```
A. Altonen, J. Räsänen, J. Laitinen, M. Viitanen, and J. Vanne, “Open-source RTP library for high-speed 4K HEVC video streaming,” Accepted to IEEE Int. Workshop on Multimedia Signal Processing, Tampere, Finland, Sept. 2020.
A. Altonen, J. Räsänen, J. Laitinen, M. Viitanen, and J. Vanne, “Open-source RTP library for high-speed 4K HEVC video streaming,” in Proc. IEEE Int. Workshop on Multimedia Signal Processing, Tampere, Finland, Sept. 2020.
```

49
include/formats/h264.hh Normal file
View File

@ -0,0 +1,49 @@
#pragma once
#include "frame.hh"
#include "queue.hh"
#include "formats/h26x.hh"
namespace uvg_rtp {
namespace formats {
struct h264_headers {
uint8_t fu_indicator[uvg_rtp::frame::HEADER_SIZE_H264_FU];
/* there are three types of Fragmentation Unit headers:
* - header for the first fragment
* - header for all middle fragments
* - header for the last fragment */
uint8_t fu_headers[3 * uvg_rtp::frame::HEADER_SIZE_H264_FU];
};
class h264 : public h26x {
public:
h264(uvg_rtp::socket *socket, uvg_rtp::rtp *rtp, int flags);
~h264();
/* Packet handler for RTP frames that transport HEVC bitstream
*
* If "frame" is not a fragmentation unit, packet handler checks
* if "frame" is SPS/VPS/PPS packet and if so, returns the packet
* to user immediately.
*
* If "frame" is a fragmentation unit, packet handler checks if
* it has received all fragments of a complete HEVC NAL unit and if
* so, it merges all fragments into a complete NAL unit and returns
* the NAL unit to user. If the NAL unit is not complete, packet
* handler holds onto the frame and waits for other fragments to arrive.
*
* Return RTP_OK if the packet was successfully handled
* Return RTP_PKT_READY if "frame" contains an RTP that can be returned to user
* Return RTP_PKT_NOT_HANDLED if the packet is not handled by this handler
* Return RTP_PKT_MODIFIED if the packet was modified but should be forwarded to other handlers
* Return RTP_GENERIC_ERROR if the packet was corrupted in some way */
static rtp_error_t packet_handler(void *arg, int flags, frame::rtp_frame **frame);
protected:
rtp_error_t push_nal_unit(uint8_t *data, size_t data_len, bool more);
};
};
};

View File

@ -21,6 +21,8 @@ namespace uvg_rtp {
enum HEADER_SIZES {
HEADER_SIZE_RTP = 12,
HEADER_SIZE_OPUS = 1,
HEADER_SIZE_H264_NAL = 1,
HEADER_SIZE_H264_FU = 1,
HEADER_SIZE_H265_NAL = 2,
HEADER_SIZE_H265_FU = 1,
};

View File

@ -61,6 +61,7 @@ typedef enum RTP_ERROR {
typedef enum RTP_FORMAT {
RTP_FORMAT_GENERIC = 0,
RTP_FORMAT_H264 = 95,
RTP_FORMAT_H265 = 96,
RTP_FORMAT_OPUS = 97,
} rtp_format_t;

104
src/formats/h264.cc Normal file
View File

@ -0,0 +1,104 @@
#ifdef _WIN32
#else
#include <sys/socket.h>
#endif
#include <cstdint>
#include <cstring>
#include <iostream>
#include <unordered_map>
#include <queue>
#include "debug.hh"
#include "queue.hh"
#include "formats/h264.hh"
rtp_error_t uvg_rtp::formats::h264::push_nal_unit(uint8_t *data, size_t data_len, bool more)
{
if (data_len <= 3)
return RTP_INVALID_VALUE;
uint8_t nal_type = (data[0] >> 1) & 0x3F;
rtp_error_t ret = RTP_OK;
size_t data_left = data_len;
size_t data_pos = 0;
size_t payload_size = rtp_ctx_->get_payload_size();
if (data_len - 3 <= payload_size) {
if ((ret = fqueue_->enqueue_message(data, data_len)) != RTP_OK) {
LOG_ERROR("enqeueu failed for small packet");
return ret;
}
if (more)
return RTP_NOT_READY;
return fqueue_->flush_queue();
}
/* The payload is larger than MTU (1500 bytes) so we must split it into smaller RTP frames
* Because we don't if the SCD is enabled and thus cannot make any assumptions about the life time
* of current stack, we need to store NAL and FU headers to the frame queue transaction.
*
* This can be done by asking a handle to current transaction's buffer vectors.
*
* During Connection initialization, the frame queue was given AVC as the payload format so the
* transaction also contains our media-specific headers */
auto buffers = fqueue_->get_buffer_vector();
auto headers = (uvg_rtp::formats::h264_headers *)fqueue_->get_media_headers();
headers->fu_indicator[0] = data[0]; /* use NAL header of input frame */
headers->fu_headers[0] = (uint8_t)((1 << 7) | nal_type);
headers->fu_headers[1] = nal_type;
headers->fu_headers[2] = (uint8_t)((1 << 6) | nal_type);
buffers.push_back(std::make_pair(sizeof(headers->fu_indicator), headers->fu_indicator));
buffers.push_back(std::make_pair(sizeof(uint8_t), &headers->fu_headers[0]));
buffers.push_back(std::make_pair(payload_size, nullptr));
data_pos = uvg_rtp::frame::HEADER_SIZE_H264_NAL;
data_left -= uvg_rtp::frame::HEADER_SIZE_H264_NAL;
while (data_left > payload_size) {
buffers.at(2).first = payload_size;
buffers.at(2).second = &data[data_pos];
if ((ret = fqueue_->enqueue_message(buffers)) != RTP_OK) {
LOG_ERROR("Queueing the message failed!");
fqueue_->deinit_transaction();
return ret;
}
data_pos += payload_size;
data_left -= payload_size;
/* from now on, use the FU header meant for middle fragments */
buffers.at(1).second = &headers->fu_headers[1];
}
/* use the FU header meant for the last fragment */
buffers.at(1).second = &headers->fu_headers[2];
buffers.at(2).first = data_left;
buffers.at(2).second = &data[data_pos];
if ((ret = fqueue_->enqueue_message(buffers)) != RTP_OK) {
LOG_ERROR("Failed to send AVC frame!");
fqueue_->deinit_transaction();
return ret;
}
if (more)
return RTP_NOT_READY;
return fqueue_->flush_queue();
}
uvg_rtp::formats::h264::h264(uvg_rtp::socket *socket, uvg_rtp::rtp *rtp, int flags):
h26x(socket, rtp, flags)
{
}
uvg_rtp::formats::h264::~h264()
{
}

View File

@ -0,0 +1,262 @@
#include <cstdint>
#include <cstring>
#include <iostream>
#include <map>
#include <unordered_set>
#include "debug.hh"
#include "queue.hh"
#include "formats/h265.hh"
#define RTP_FRAME_MAX_DELAY 100
#define INVALID_SEQ 0x13371338
#define INVALID_TS 0xffffffff
#define RTP_HDR_SIZE 12
#define NAL_HDR_SIZE 1
enum FRAG_TYPES {
FT_INVALID = -2, /* invalid combination of S and E bits */
FT_NOT_FRAG = -1, /* frame doesn't contain HEVC fragment */
FT_START = 1, /* frame contains a fragment with S bit set */
FT_MIDDLE = 2, /* frame is fragment but not S or E fragment */
FT_END = 3, /* frame contains a fragment with E bit set */
FT_STAP_A = 4 /* Single-Time Aggregation Packet, Type A */
};
enum NAL_TYPES {
NT_INTRA = 0x00,
NT_INTER = 0x01,
NT_OTHER = 0xff
};
typedef std::unordered_map<uint32_t, struct hevc_info> frame_info_t;
struct hevc_info {
/* clock reading when the first fragment is received */
uvg_rtp::clock::hrc::hrc_t sframe_time;
/* sequence number of the frame with s-bit */
uint32_t s_seq;
/* sequence number of the frame with e-bit */
uint32_t e_seq;
/* how many fragments have been received */
size_t pkts_received;
/* total size of all fragments */
size_t total_size;
/* map of frame's fragments,
* allows out-of-order insertion and loop-through in order */
std::map<uint16_t, uvg_rtp::frame::rtp_frame *> fragments;
};
static int __get_frag(uvg_rtp::frame::rtp_frame *frame)
{
bool first_frag = frame->payload[2] & 0x80;
bool last_frag = frame->payload[2] & 0x40;
if ((frame->payload[0] & 0x1f) == 24)
return FT_STAP_A;
if ((frame->payload[0] & 0x1f) != 49)
return FT_NOT_FRAG;
if (first_frag && last_frag)
return FT_INVALID;
if (first_frag)
return FT_START;
if (last_frag)
return FT_END;
return FT_MIDDLE;
}
/* TODO: This requires additional support from packet dispatcher.
* Auxiliary handlers must be able to return more than one packet
* or auxiliary handlers must provide additional hooking function
* for the pkt dispatcher so it can query all received packets */
static rtp_error_t __handle_stap_a(uvg_rtp::frame::rtp_frame **frame)
{
return RTP_PKT_READY;
}
static inline uint8_t __get_nal(uvg_rtp::frame::rtp_frame *frame)
{
switch (frame->payload[2] & 0x3f) {
case 19: return NT_INTRA;
case 1: return NT_INTER;
default: break;
}
return NT_OTHER;
}
static inline bool __frame_late(hevc_info& hinfo)
{
return (uvg_rtp::clock::hrc::diff_now(hinfo.sframe_time) >= RTP_FRAME_MAX_DELAY);
}
static void __drop_frame(frame_info_t& finfo, uint32_t ts)
{
uint16_t s_seq = finfo.at(ts).s_seq;
uint16_t e_seq = finfo.at(ts).e_seq;
LOG_INFO("Dropping frame %u, %u - %u", ts, s_seq, e_seq);
for (auto& fragment : finfo.at(ts).fragments)
(void)uvg_rtp::frame::dealloc_frame(fragment.second);
finfo.erase(ts);
}
rtp_error_t uvg_rtp::formats::h265::packet_handler(void *arg, int flags, uvg_rtp::frame::rtp_frame **out)
{
(void)arg;
static frame_info_t finfo;
static std::unordered_set<uint32_t> dropped;
uvg_rtp::frame::rtp_frame *frame;
bool enable_idelay = false;
/* Use "intra" to keep track of intra frames
*
* If uvgRTP is in the process of receiving fragments of an incomplete intra frame,
* "intra" shall be the timestamp value of that intra frame.
* This means that when we're receiving packets out of order and an inter frame is complete
* while "intra" contains value other than INVALID_TS, we drop the inter frame and wait for
* the intra frame to complete.
*
* If "intra" contains INVALID_TS and all packets of an inter frame have been received,
* the inter frame is returned to user. If intra contains a value other than INVALID_TS
* (meaning an intra frame is in progress) and a new intra frame is received, the old intra frame
* pointed to by "intra" and new intra frame shall take the place of active intra frame */
uint32_t intra = INVALID_TS;
const size_t H264_HEADER_SIZE = 2 * uvg_rtp::frame::HEADER_SIZE_H264_FU;
frame = *out;
uint32_t c_ts = frame->header.timestamp;
uint32_t c_seq = frame->header.seq;
int frag_type = __get_frag(frame);
uint8_t nal_type = __get_nal(frame);
if (frag_type == FT_STAP_A)
return __handle_stap_a(out);
if (frag_type == FT_NOT_FRAG)
return RTP_PKT_READY;
if (frag_type == FT_INVALID) {
LOG_WARN("invalid frame received!");
(void)uvg_rtp::frame::dealloc_frame(*out);
*out = nullptr;
return RTP_GENERIC_ERROR;
}
/* initialize new frame */
if (finfo.find(c_ts) == finfo.end()) {
/* make sure we haven't discarded the frame "c_ts" before */
if (dropped.find(c_ts) != dropped.end()) {
LOG_WARN("packet belonging to a dropped frame was received!");
return RTP_GENERIC_ERROR;
}
/* drop old intra if a new one is received */
if (nal_type == NT_INTRA) {
if (intra != INVALID_TS && enable_idelay) {
__drop_frame(finfo, intra);
dropped.insert(intra);
}
intra = c_ts;
}
finfo[c_ts].s_seq = INVALID_SEQ;
finfo[c_ts].e_seq = INVALID_SEQ;
if (frag_type == FT_START) finfo[c_ts].s_seq = c_seq;
if (frag_type == FT_END) finfo[c_ts].e_seq = c_seq;
finfo[c_ts].sframe_time = uvg_rtp::clock::hrc::now();
finfo[c_ts].total_size = frame->payload_len - H264_HEADER_SIZE;
finfo[c_ts].pkts_received = 1;
finfo[c_ts].fragments[c_seq] = frame;
return RTP_OK;
}
finfo[c_ts].fragments[c_seq] = frame;
finfo[c_ts].pkts_received += 1;
finfo[c_ts].total_size += (frame->payload_len - H264_HEADER_SIZE);
if (frag_type == FT_START)
finfo[c_ts].s_seq = c_seq;
if (frag_type == FT_END)
finfo[c_ts].e_seq = c_seq;
if (finfo[c_ts].s_seq != INVALID_SEQ && finfo[c_ts].e_seq != INVALID_SEQ) {
size_t received = 0;
size_t fptr = NAL_HDR_SIZE;
size_t s_seq = finfo[c_ts].s_seq;
size_t e_seq = finfo[c_ts].e_seq;
if (s_seq > e_seq)
received = 0xffff - s_seq + e_seq + 2;
else
received = e_seq - s_seq + 1;
/* we've received every fragment and the frame can be reconstructed */
if (received == finfo[c_ts].pkts_received) {
/* intra is still in progress, do not return the inter */
if (nal_type == NT_INTER && intra != INVALID_TS && enable_idelay) {
__drop_frame(finfo, c_ts);
dropped.insert(c_ts);
return RTP_OK;
}
uvg_rtp::frame::rtp_frame *complete = uvg_rtp::frame::alloc_rtp_frame();
complete->payload_len = finfo[c_ts].total_size + NAL_HDR_SIZE;
complete->payload = new uint8_t[complete->payload_len];
std::memcpy(&complete->header, &(*out)->header, RTP_HDR_SIZE);
complete->payload[0] = (*out)->payload[0];
for (auto& fragment : finfo.at(c_ts).fragments) {
std::memcpy(
&complete->payload[fptr],
&fragment.second->payload[H264_HEADER_SIZE],
fragment.second->payload_len - H264_HEADER_SIZE
);
fptr += fragment.second->payload_len - H264_HEADER_SIZE;
(void)uvg_rtp::frame::dealloc_frame(fragment.second);
}
if (nal_type == NT_INTRA)
intra = INVALID_TS;
*out = complete;
finfo.erase(c_ts);
return RTP_PKT_READY;
}
}
if (__frame_late(finfo.at(c_ts))) {
if (nal_type != NT_INTRA || (nal_type == NT_INTRA && !enable_idelay)) {
__drop_frame(finfo, c_ts);
dropped.insert(c_ts);
}
}
return RTP_OK;
}

View File

@ -1,6 +1,8 @@
SOURCES += \
src/formats/media.cc \
src/formats/h26x.cc \
src/formats/h264.cc \
src/formats/h264_pkt_handler.cc \
src/formats/h265.cc \
src/formats/h265_pkt_handler.cc \
src/formats/hevc_recv_optimistic.cc

View File

@ -5,6 +5,7 @@
#include "media_stream.hh"
#include "random.hh"
#include "formats/h264.hh"
#include "formats/h265.hh"
#define INVALID_TS UINT64_MAX
@ -140,6 +141,15 @@ rtp_error_t uvg_rtp::media_stream::init()
);
break;
case RTP_FORMAT_H264:
media_ = new uvg_rtp::formats::h264(socket_, rtp_, ctx_config_.flags);
pkt_dispatcher_->install_aux_handler(
rtp_handler_key_,
nullptr,
dynamic_cast<uvg_rtp::formats::h264 *>(media_)->packet_handler
);
break;
case RTP_FORMAT_OPUS:
case RTP_FORMAT_GENERIC:
media_ = new uvg_rtp::formats::media(socket_, rtp_, ctx_config_.flags);

View File

@ -12,6 +12,7 @@
#include "queue.hh"
#include "random.hh"
#include "formats/h264.hh"
#include "formats/h265.hh"
uvg_rtp::frame_queue::frame_queue(uvg_rtp::socket *socket, uvg_rtp::rtp *rtp, int flags):
@ -57,6 +58,10 @@ rtp_error_t uvg_rtp::frame_queue::init_transaction()
active_->rtp_headers = new uvg_rtp::frame::rtp_header[max_mcount_];
switch (rtp_->get_payload()) {
case RTP_FORMAT_H264:
active_->media_headers = new uvg_rtp::formats::h264_headers;
break;
case RTP_FORMAT_H265:
active_->media_headers = new uvg_rtp::formats::h265_headers;
break;
@ -136,6 +141,11 @@ rtp_error_t uvg_rtp::frame_queue::destroy_transaction(uvg_rtp::transaction_t *t)
t->rtp_headers = nullptr;
switch (rtp_->get_payload()) {
case RTP_FORMAT_H264:
delete (uvg_rtp::formats::h264_headers *)t->media_headers;
t->media_headers = nullptr;
break;
case RTP_FORMAT_H265:
delete (uvg_rtp::formats::h265_headers *)t->media_headers;
t->media_headers = nullptr;
@ -192,6 +202,10 @@ rtp_error_t uvg_rtp::frame_queue::deinit_transaction(uint32_t key)
if (free_.size() >= (size_t)max_queued_) {
switch (rtp_->get_payload()) {
case RTP_FORMAT_H264:
delete (uvg_rtp::formats::h264_headers *)transaction_it->second->media_headers;
break;
case RTP_FORMAT_H265:
delete (uvg_rtp::formats::h265_headers *)transaction_it->second->media_headers;
break;

View File

@ -44,6 +44,7 @@ void uvg_rtp::rtp::set_payload(rtp_format_t fmt)
payload_ = fmt_ = fmt;
switch (fmt_) {
case RTP_FORMAT_H264:
case RTP_FORMAT_H265:
clock_rate_ = 90000;
break;