install go2rtc on bob

This commit is contained in:
2026-04-04 19:36:14 +02:00
parent f0b56e63d1
commit ccf88187b8
537 changed files with 69213 additions and 0 deletions
@@ -0,0 +1,16 @@
# H264
Payloader code taken from [pion](https://github.com/pion/rtp) library and changed to AVC packets support.
## Useful Links
- [RTP Payload Format for H.264 Video](https://datatracker.ietf.org/doc/html/rfc6184)
- [The H264 Sequence parameter set](https://www.cardinalpeak.com/blog/the-h-264-sequence-parameter-set)
- [H.264 Video Types (Microsoft)](https://docs.microsoft.com/en-us/windows/win32/directshow/h-264-video-types)
- [Automatic Generation of H.264 Parameter Sets to Recover Video File Fragments](https://arxiv.org/pdf/2104.14522.pdf)
- [Chromium sources](https://chromium.googlesource.com/external/webrtc/+/HEAD/common_video/h264)
- [AVC levels](https://en.wikipedia.org/wiki/Advanced_Video_Coding#Levels)
- [AVC profiles table](https://developer.mozilla.org/ru/docs/Web/Media/Formats/codecs_parameter)
- [Supported Media for Google Cast](https://developers.google.com/cast/docs/media)
- [Two stream formats, Annex-B, AVCC (H.264) and HVCC (H.265)](https://www.programmersought.com/article/3901815022/)
- https://docs.aws.amazon.com/kinesisvideostreams/latest/dg/producer-reference-nal.html
@@ -0,0 +1,156 @@
// Package annexb - universal for H264 and H265
package annexb
import (
"bytes"
"encoding/binary"
)
const StartCode = "\x00\x00\x00\x01"
const startAUD = StartCode + "\x09\xF0"
const startAUDstart = startAUD + StartCode
// EncodeToAVCC
//
// FFmpeg MPEG-TS: 00000001 AUD 00000001 SPS 00000001 PPS 000001 IFrame
// FFmpeg H264: 00000001 SPS 00000001 PPS 000001 IFrame 00000001 PFrame
// Reolink: 000001 AUD 000001 VPS 00000001 SPS 00000001 PPS 00000001 IDR 00000001 IDR
func EncodeToAVCC(annexb []byte) (avc []byte) {
var start int
avc = make([]byte, 0, len(annexb)+4) // init memory with little overhead
for i := 0; ; i++ {
var offset int
if i+3 < len(annexb) {
// search next separator
if annexb[i] == 0 && annexb[i+1] == 0 {
if annexb[i+2] == 1 {
offset = 3 // 00 00 01
} else if annexb[i+2] == 0 && annexb[i+3] == 1 {
offset = 4 // 00 00 00 01
} else {
continue
}
} else {
continue
}
} else {
i = len(annexb) // move i to data end
}
if start != 0 {
size := uint32(i - start)
avc = binary.BigEndian.AppendUint32(avc, size)
avc = append(avc, annexb[start:i]...)
}
// sometimes FFmpeg put separator at the end
if i += offset; i == len(annexb) {
break
}
if isAUD(annexb[i]) {
start = 0 // skip this NALU
} else {
start = i // save this position
}
}
return
}
func isAUD(b byte) bool {
const h264 = 9
const h265 = 35 << 1
return b&0b0001_1111 == h264 || b&0b0111_1110 == h265
}
func DecodeAVCC(b []byte, safeClone bool) []byte {
if safeClone {
b = bytes.Clone(b)
}
for i := 0; i < len(b); {
size := int(binary.BigEndian.Uint32(b[i:]))
b[i] = 0
b[i+1] = 0
b[i+2] = 0
b[i+3] = 1
i += 4 + size
}
return b
}
// DecodeAVCCWithAUD - AUD doesn't important for FFmpeg, but important for Safari
func DecodeAVCCWithAUD(src []byte) []byte {
dst := make([]byte, len(startAUD)+len(src))
copy(dst, startAUD)
copy(dst[len(startAUD):], src)
DecodeAVCC(dst[len(startAUD):], false)
return dst
}
const (
h264PFrame = 1
h264IFrame = 5
h264SPS = 7
h264PPS = 8
h265VPS = 32
h265PFrame = 1
)
// IndexFrame - get new frame start position in the AnnexB stream
func IndexFrame(b []byte) int {
if len(b) < len(startAUDstart) {
return -1
}
for i := len(startAUDstart); ; {
if di := bytes.Index(b[i:], []byte(StartCode)); di < 0 {
break
} else {
i += di + 4 // move to NALU start
}
if i >= len(b) {
break
}
h264Type := b[i] & 0b1_1111
switch h264Type {
case h264PFrame, h264SPS:
return i - 4 // move to start code
case h264IFrame, h264PPS:
continue
}
h265Type := (b[i] >> 1) & 0b11_1111
switch h265Type {
case h265PFrame, h265VPS:
return i - 4 // move to start code
}
}
return -1
}
func FixAnnexBInAVCC(b []byte) []byte {
for i := 0; i < len(b); {
if i+4 >= len(b) {
break
}
size := bytes.Index(b[i+4:], []byte{0, 0, 0, 1})
if size < 0 {
size = len(b) - (i + 4)
}
binary.BigEndian.PutUint32(b[i:], uint32(size))
i += size + 4
}
return b
}
File diff suppressed because one or more lines are too long
+122
View File
@@ -0,0 +1,122 @@
package h264
import (
"bytes"
"encoding/binary"
)
const forbiddenZeroBit = 0x80
const nalUnitType = 0x1F
// Deprecated: DecodeStream - find and return first AU in AVC format
// useful for processing live streams with unknown separator size
func DecodeStream(annexb []byte) ([]byte, int) {
startPos := -1
i := 0
for {
// search next separator
if i = IndexFrom(annexb, []byte{0, 0, 1}, i); i < 0 {
break
}
// move i to next AU
if i += 3; i >= len(annexb) {
break
}
// check if AU type valid
octet := annexb[i]
if octet&forbiddenZeroBit != 0 {
continue
}
// 0 => AUD => SPS/IF/PF => AUD
// 0 => SPS/PF => SPS/PF
nalType := octet & nalUnitType
if startPos >= 0 {
switch nalType {
case NALUTypeAUD, NALUTypeSPS, NALUTypePFrame:
if annexb[i-4] == 0 {
return DecodeAnnexB(annexb[startPos : i-4]), i - 4
} else {
return DecodeAnnexB(annexb[startPos : i-3]), i - 3
}
}
} else {
switch nalType {
case NALUTypeSPS, NALUTypePFrame:
if i >= 4 && annexb[i-4] == 0 {
startPos = i - 4
} else {
startPos = i - 3
}
}
}
}
return nil, 0
}
// DecodeAnnexB - convert AnnexB to AVC format
// support unknown separator size
func DecodeAnnexB(b []byte) []byte {
if b[2] == 1 {
// convert: 0 0 1 => 0 0 0 1
b = append([]byte{0}, b...)
}
startPos := 0
i := 4
for {
// search next separato
if i = IndexFrom(b, []byte{0, 0, 1}, i); i < 0 {
break
}
// move i to next AU
if i += 3; i >= len(b) {
break
}
// check if AU type valid
octet := b[i]
if octet&forbiddenZeroBit != 0 {
continue
}
switch octet & nalUnitType {
case NALUTypePFrame, NALUTypeIFrame, NALUTypeSPS, NALUTypePPS:
if b[i-4] != 0 {
// prefix: 0 0 1
binary.BigEndian.PutUint32(b[startPos:], uint32(i-startPos-7))
tmp := make([]byte, 0, len(b)+1)
tmp = append(tmp, b[:i]...)
tmp = append(tmp, 0)
b = append(tmp, b[i:]...)
startPos = i - 3
} else {
// prefix: 0 0 0 1
binary.BigEndian.PutUint32(b[startPos:], uint32(i-startPos-8))
startPos = i - 4
}
}
}
binary.BigEndian.PutUint32(b[startPos:], uint32(len(b)-startPos-4))
return b
}
func IndexFrom(b []byte, sep []byte, from int) int {
if from > 0 {
if from < len(b) {
if i := bytes.Index(b[from:], sep); i >= 0 {
return from + i
}
}
return -1
}
return bytes.Index(b, sep)
}
+120
View File
@@ -0,0 +1,120 @@
// Package h264 - AVCC format related functions
package h264
import (
"bytes"
"encoding/base64"
"encoding/binary"
"encoding/hex"
"github.com/AlexxIT/go2rtc/pkg/core"
"github.com/pion/rtp"
)
func RepairAVCC(codec *core.Codec, handler core.HandlerFunc) core.HandlerFunc {
sps, pps := GetParameterSet(codec.FmtpLine)
ps := JoinNALU(sps, pps)
return func(packet *rtp.Packet) {
// this can happen for FLV from FFmpeg
if NALUType(packet.Payload) == NALUTypeSEI {
size := int(binary.BigEndian.Uint32(packet.Payload)) + 4
packet.Payload = packet.Payload[size:]
}
if NALUType(packet.Payload) == NALUTypeIFrame {
packet.Payload = Join(ps, packet.Payload)
}
handler(packet)
}
}
func JoinNALU(nalus ...[]byte) (avcc []byte) {
var i, n int
for _, nalu := range nalus {
if i = len(nalu); i > 0 {
n += 4 + i
}
}
avcc = make([]byte, n)
n = 0
for _, nal := range nalus {
if i = len(nal); i > 0 {
binary.BigEndian.PutUint32(avcc[n:], uint32(i))
n += 4 + copy(avcc[n+4:], nal)
}
}
return
}
func SplitNALU(avcc []byte) [][]byte {
var nals [][]byte
for {
// get AVC length
size := int(binary.BigEndian.Uint32(avcc)) + 4
// check if multiple items in one packet
if size < len(avcc) {
nals = append(nals, avcc[:size])
avcc = avcc[size:]
} else {
nals = append(nals, avcc)
break
}
}
return nals
}
func NALUTypes(avcc []byte) []byte {
var types []byte
for {
types = append(types, NALUType(avcc))
size := 4 + int(binary.BigEndian.Uint32(avcc))
if size < len(avcc) {
avcc = avcc[size:]
} else {
break
}
}
return types
}
func AVCCToCodec(avcc []byte) *core.Codec {
buf := bytes.NewBufferString("packetization-mode=1")
for {
n := len(avcc)
if n < 4 {
break
}
size := 4 + int(binary.BigEndian.Uint32(avcc))
if n < size {
break
}
switch NALUType(avcc) {
case NALUTypeSPS:
buf.WriteString(";profile-level-id=")
buf.WriteString(hex.EncodeToString(avcc[5:8]))
buf.WriteString(";sprop-parameter-sets=")
buf.WriteString(base64.StdEncoding.EncodeToString(avcc[4:size]))
case NALUTypePPS:
buf.WriteString(",")
buf.WriteString(base64.StdEncoding.EncodeToString(avcc[4:size]))
}
avcc = avcc[size:]
}
return &core.Codec{
Name: core.CodecH264,
ClockRate: 90000,
FmtpLine: buf.String(),
PayloadType: core.PayloadTypeRAW,
}
}
+145
View File
@@ -0,0 +1,145 @@
package h264
import (
"encoding/base64"
"encoding/binary"
"encoding/hex"
"fmt"
"strings"
"github.com/AlexxIT/go2rtc/pkg/core"
)
const (
NALUTypePFrame = 1 // Coded slice of a non-IDR picture
NALUTypeIFrame = 5 // Coded slice of an IDR picture
NALUTypeSEI = 6 // Supplemental enhancement information (SEI)
NALUTypeSPS = 7 // Sequence parameter set
NALUTypePPS = 8 // Picture parameter set
NALUTypeAUD = 9 // Access unit delimiter
)
func NALUType(b []byte) byte {
return b[4] & 0x1F
}
// IsKeyframe - check if any NALU in one AU is Keyframe
func IsKeyframe(b []byte) bool {
for {
switch NALUType(b) {
case NALUTypePFrame:
return false
case NALUTypeIFrame:
return true
}
size := int(binary.BigEndian.Uint32(b)) + 4
if size < len(b) {
b = b[size:]
continue
} else {
return false
}
}
}
func Join(ps, iframe []byte) []byte {
b := make([]byte, len(ps)+len(iframe))
i := copy(b, ps)
copy(b[i:], iframe)
return b
}
// https://developers.google.com/cast/docs/media
const (
ProfileBaseline = 0x42
ProfileMain = 0x4D
ProfileHigh = 0x64
CapabilityBaseline = 0xE0
CapabilityMain = 0x40
)
// GetProfileLevelID - get profile from fmtp line
// Some devices won't play video with high level, so limit max profile and max level.
// And return some profile even if fmtp line is empty.
func GetProfileLevelID(fmtp string) string {
// avc1.640029 - H.264 high 4.1 (Chromecast 1st and 2nd Gen)
profile := byte(ProfileHigh)
capab := byte(0)
level := byte(41)
if fmtp != "" {
var conf []byte
// some cameras has wrong profile-level-id
// https://github.com/AlexxIT/go2rtc/issues/155
if s := core.Between(fmtp, "sprop-parameter-sets=", ","); s != "" {
if sps, _ := base64.StdEncoding.DecodeString(s); len(sps) >= 4 {
conf = sps[1:4]
}
} else if s = core.Between(fmtp, "profile-level-id=", ";"); s != "" {
conf, _ = hex.DecodeString(s)
}
if len(conf) == 3 {
// sanitize profile, capab and level to supported values
switch conf[0] {
case ProfileBaseline, ProfileMain:
profile = conf[0]
}
switch conf[1] {
case CapabilityBaseline, CapabilityMain:
capab = conf[1]
}
switch conf[2] {
case 30, 31, 40:
level = conf[2]
}
}
}
return fmt.Sprintf("%02X%02X%02X", profile, capab, level)
}
func GetParameterSet(fmtp string) (sps, pps []byte) {
if fmtp == "" {
return
}
s := core.Between(fmtp, "sprop-parameter-sets=", ";")
if s == "" {
return
}
i := strings.IndexByte(s, ',')
if i < 0 {
return
}
sps, _ = base64.StdEncoding.DecodeString(s[:i])
pps, _ = base64.StdEncoding.DecodeString(s[i+1:])
return
}
// GetFmtpLine from SPS+PPS+IFrame in AVC format
func GetFmtpLine(avc []byte) string {
s := "packetization-mode=1"
for {
size := 4 + int(binary.BigEndian.Uint32(avc))
switch NALUType(avc) {
case NALUTypeSPS:
s += ";profile-level-id=" + hex.EncodeToString(avc[5:8])
s += ";sprop-parameter-sets=" + base64.StdEncoding.EncodeToString(avc[4:size])
case NALUTypePPS:
s += "," + base64.StdEncoding.EncodeToString(avc[4:size])
}
if size < len(avc) {
avc = avc[size:]
} else {
return s
}
}
}
@@ -0,0 +1,110 @@
package h264
import (
"encoding/base64"
"encoding/hex"
"testing"
"github.com/stretchr/testify/require"
)
func TestDecodeConfig(t *testing.T) {
s := "01640033ffe1000c67640033ac1514a02800f19001000468ee3cb0"
src, err := hex.DecodeString(s)
require.Nil(t, err)
profile, sps, pps := DecodeConfig(src)
require.NotNil(t, profile)
require.NotNil(t, sps)
require.NotNil(t, pps)
dst := EncodeConfig(sps, pps)
require.Equal(t, src, dst)
}
func TestDecodeSPS(t *testing.T) {
s := "Z0IAMukAUAHjQgAAB9IAAOqcCAA=" // Amcrest AD410
b, err := base64.StdEncoding.DecodeString(s)
require.Nil(t, err)
sps := DecodeSPS(b)
require.Equal(t, uint16(2560), sps.Width())
require.Equal(t, uint16(1920), sps.Height())
s = "R00AKZmgHgCJ+WEAAAMD6AAATiCE" // Sonoff
b, err = base64.StdEncoding.DecodeString(s)
require.Nil(t, err)
sps = DecodeSPS(b)
require.Equal(t, uint16(1920), sps.Width())
require.Equal(t, uint16(1080), sps.Height())
s = "Z01AMqaAKAC1kAA=" // Dahua
b, err = base64.StdEncoding.DecodeString(s)
require.Nil(t, err)
sps = DecodeSPS(b)
require.Equal(t, uint16(2560), sps.Width())
require.Equal(t, uint16(1440), sps.Height())
s = "Z2QAM6wVFKAoAPGQ" // Reolink
b, err = base64.StdEncoding.DecodeString(s)
require.Nil(t, err)
sps = DecodeSPS(b)
require.Equal(t, uint16(2560), sps.Width())
require.Equal(t, uint16(1920), sps.Height())
s = "Z2QAKKwa0AoAt03AQEBQAAADABAAAAMB6PFCKg==" // TP-Link
b, err = base64.StdEncoding.DecodeString(s)
require.Nil(t, err)
sps = DecodeSPS(b)
require.Equal(t, uint16(1280), sps.Width())
require.Equal(t, uint16(720), sps.Height())
s = "Z2QAFqwa0BQF/yzcBAQFAAADAAEAAAMAHo8UIqA=" // TP-Link sub
b, err = base64.StdEncoding.DecodeString(s)
require.Nil(t, err)
sps = DecodeSPS(b)
require.Equal(t, uint16(640), sps.Width())
require.Equal(t, uint16(360), sps.Height())
}
func TestGetProfileLevelID(t *testing.T) {
// OpenIPC https://github.com/OpenIPC
s := "profile-level-id=0033e7; packetization-mode=1; "
profile := GetProfileLevelID(s)
require.Equal(t, "640029", profile)
// Eufy T8400 https://github.com/AlexxIT/go2rtc/issues/155
s = "packetization-mode=1;profile-level-id=276400"
profile = GetProfileLevelID(s)
require.Equal(t, "640029", profile)
}
func TestDecodeSPS2(t *testing.T) {
s := "6764001fad84010c20086100430802184010c200843b50740932"
b, err := hex.DecodeString(s)
require.Nil(t, err)
sps := DecodeSPS(b)
require.Equal(t, uint16(928), sps.Width())
require.Equal(t, uint16(576), sps.Height())
s = "Z2QAHq2EAQwgCGEAQwgCGEAQwgCEO1BQF/yzcBAQFAAAD6AAAXcCEA==" // unknown
b, err = base64.StdEncoding.DecodeString(s)
require.Nil(t, err)
sps = DecodeSPS(b)
require.Equal(t, uint16(640), sps.Width())
require.Equal(t, uint16(360), sps.Height())
}
func TestAVCCToCodec(t *testing.T) {
s := "000000196764001fac2484014016ec0440000003004000000c23c60c920000000568ee32c8b0000000d365"
b, _ := hex.DecodeString(s)
codec := AVCCToCodec(b)
require.Equal(t, "packetization-mode=1;profile-level-id=64001f;sprop-parameter-sets=Z2QAH6wkhAFAFuwEQAAAAwBAAAAMI8YMkg==,aO4yyLA=", codec.FmtpLine)
}
+101
View File
@@ -0,0 +1,101 @@
// Package h264 - MPEG4 format related functions
package h264
import (
"bytes"
"encoding/base64"
"encoding/binary"
"encoding/hex"
"github.com/AlexxIT/go2rtc/pkg/core"
)
// DecodeConfig - extract profile, SPS and PPS from MPEG4 config
func DecodeConfig(conf []byte) (profile []byte, sps []byte, pps []byte) {
if len(conf) < 6 || conf[0] != 1 {
return
}
profile = conf[1:4]
count := conf[5] & 0x1F
conf = conf[6:]
for i := byte(0); i < count; i++ {
if len(conf) < 2 {
return
}
size := 2 + int(binary.BigEndian.Uint16(conf))
if len(conf) < size {
return
}
if sps == nil {
sps = conf[2:size]
}
conf = conf[size:]
}
count = conf[0]
conf = conf[1:]
for i := byte(0); i < count; i++ {
if len(conf) < 2 {
return
}
size := 2 + int(binary.BigEndian.Uint16(conf))
if len(conf) < size {
return
}
if pps == nil {
pps = conf[2:size]
}
conf = conf[size:]
}
return
}
func EncodeConfig(sps, pps []byte) []byte {
spsSize := uint16(len(sps))
ppsSize := uint16(len(pps))
buf := make([]byte, 5+3+spsSize+3+ppsSize)
buf[0] = 1
copy(buf[1:], sps[1:4]) // profile
buf[4] = 3 | 0xFC // ? LengthSizeMinusOne
b := buf[5:]
_ = b[3]
b[0] = 1 | 0xE0 // ? sps count
binary.BigEndian.PutUint16(b[1:], spsSize)
copy(b[3:], sps)
b = buf[5+3+spsSize:]
_ = b[3]
b[0] = 1 // pps count
binary.BigEndian.PutUint16(b[1:], ppsSize)
copy(b[3:], pps)
return buf
}
func ConfigToCodec(conf []byte) *core.Codec {
buf := bytes.NewBufferString("packetization-mode=1")
profile, sps, pps := DecodeConfig(conf)
if profile != nil {
buf.WriteString(";profile-level-id=")
buf.WriteString(hex.EncodeToString(profile))
}
if sps != nil && pps != nil {
buf.WriteString(";sprop-parameter-sets=")
buf.WriteString(base64.StdEncoding.EncodeToString(sps))
buf.WriteString(",")
buf.WriteString(base64.StdEncoding.EncodeToString(pps))
}
return &core.Codec{
Name: core.CodecH264,
ClockRate: 90000,
FmtpLine: buf.String(),
PayloadType: core.PayloadTypeRAW,
}
}
@@ -0,0 +1,195 @@
package h264
import "encoding/binary"
// Payloader payloads H264 packets
type Payloader struct {
IsAVC bool
stapANalu []byte
}
const (
stapaNALUType = 24
fuaNALUType = 28
fubNALUType = 29
spsNALUType = 7
ppsNALUType = 8
audNALUType = 9
fillerNALUType = 12
fuaHeaderSize = 2
//stapaHeaderSize = 1
//stapaNALULengthSize = 2
naluTypeBitmask = 0x1F
naluRefIdcBitmask = 0x60
//fuStartBitmask = 0x80
//fuEndBitmask = 0x40
outputStapAHeader = 0x78
)
//func annexbNALUStartCode() []byte { return []byte{0x00, 0x00, 0x00, 0x01} }
func EmitNalus(nals []byte, isAVC bool, emit func([]byte)) {
if !isAVC {
nextInd := func(nalu []byte, start int) (indStart int, indLen int) {
zeroCount := 0
for i, b := range nalu[start:] {
if b == 0 {
zeroCount++
continue
} else if b == 1 {
if zeroCount >= 2 {
return start + i - zeroCount, zeroCount + 1
}
}
zeroCount = 0
}
return -1, -1
}
nextIndStart, nextIndLen := nextInd(nals, 0)
if nextIndStart == -1 {
emit(nals)
} else {
for nextIndStart != -1 {
prevStart := nextIndStart + nextIndLen
nextIndStart, nextIndLen = nextInd(nals, prevStart)
if nextIndStart != -1 {
emit(nals[prevStart:nextIndStart])
} else {
// Emit until end of stream, no end indicator found
emit(nals[prevStart:])
}
}
}
} else {
for {
n := uint32(len(nals))
if n < 4 {
break
}
end := 4 + binary.BigEndian.Uint32(nals)
if n < end {
break
}
emit(nals[4:end])
nals = nals[end:]
}
}
}
// Payload fragments a H264 packet across one or more byte arrays
func (p *Payloader) Payload(mtu uint16, payload []byte) [][]byte {
var payloads [][]byte
if len(payload) == 0 {
return payloads
}
EmitNalus(payload, p.IsAVC, func(nalu []byte) {
if len(nalu) == 0 {
return
}
naluType := nalu[0] & naluTypeBitmask
naluRefIdc := nalu[0] & naluRefIdcBitmask
switch naluType {
case audNALUType, fillerNALUType:
return
case spsNALUType, ppsNALUType:
if p.stapANalu == nil {
p.stapANalu = []byte{outputStapAHeader}
}
p.stapANalu = append(p.stapANalu, byte(len(nalu)>>8), byte(len(nalu)))
p.stapANalu = append(p.stapANalu, nalu...)
return
}
if p.stapANalu != nil {
// Pack current NALU with SPS and PPS as STAP-A
// Supports multiple PPS in a row
if len(p.stapANalu) <= int(mtu) {
payloads = append(payloads, p.stapANalu)
}
p.stapANalu = nil
}
// Single NALU
if len(nalu) <= int(mtu) {
out := make([]byte, len(nalu))
copy(out, nalu)
payloads = append(payloads, out)
return
}
// FU-A
maxFragmentSize := int(mtu) - fuaHeaderSize
// The FU payload consists of fragments of the payload of the fragmented
// NAL unit so that if the fragmentation unit payloads of consecutive
// FUs are sequentially concatenated, the payload of the fragmented NAL
// unit can be reconstructed. The NAL unit type octet of the fragmented
// NAL unit is not included as such in the fragmentation unit payload,
// but rather the information of the NAL unit type octet of the
// fragmented NAL unit is conveyed in the F and NRI fields of the FU
// indicator octet of the fragmentation unit and in the type field of
// the FU header. An FU payload MAY have any number of octets and MAY
// be empty.
naluData := nalu
// According to the RFC, the first octet is skipped due to redundant information
naluDataIndex := 1
naluDataLength := len(nalu) - naluDataIndex
naluDataRemaining := naluDataLength
if min(maxFragmentSize, naluDataRemaining) <= 0 {
return
}
for naluDataRemaining > 0 {
currentFragmentSize := min(maxFragmentSize, naluDataRemaining)
out := make([]byte, fuaHeaderSize+currentFragmentSize)
// +---------------+
// |0|1|2|3|4|5|6|7|
// +-+-+-+-+-+-+-+-+
// |F|NRI| Type |
// +---------------+
out[0] = fuaNALUType
out[0] |= naluRefIdc
// +---------------+
// |0|1|2|3|4|5|6|7|
// +-+-+-+-+-+-+-+-+
// |S|E|R| Type |
// +---------------+
out[1] = naluType
if naluDataRemaining == naluDataLength {
// Set start bit
out[1] |= 1 << 7
} else if naluDataRemaining-currentFragmentSize == 0 {
// Set end bit
out[1] |= 1 << 6
}
copy(out[fuaHeaderSize:], naluData[naluDataIndex:naluDataIndex+currentFragmentSize])
payloads = append(payloads, out)
naluDataRemaining -= currentFragmentSize
naluDataIndex += currentFragmentSize
}
})
return payloads
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
+137
View File
@@ -0,0 +1,137 @@
package h264
import (
"encoding/binary"
"github.com/AlexxIT/go2rtc/pkg/core"
"github.com/AlexxIT/go2rtc/pkg/h264/annexb"
"github.com/pion/rtp"
"github.com/pion/rtp/codecs"
)
const RTPPacketVersionAVC = 0
const PSMaxSize = 128 // the biggest SPS I've seen is 48 (EZVIZ CS-CV210)
func RTPDepay(codec *core.Codec, handler core.HandlerFunc) core.HandlerFunc {
depack := &codecs.H264Packet{IsAVC: true}
sps, pps := GetParameterSet(codec.FmtpLine)
ps := JoinNALU(sps, pps)
buf := make([]byte, 0, 512*1024) // 512K
return func(packet *rtp.Packet) {
//log.Printf("[RTP] codec: %s, nalu: %2d, size: %6d, ts: %10d, pt: %2d, ssrc: %d, seq: %d, %v", codec.Name, packet.Payload[0]&0x1F, len(packet.Payload), packet.Timestamp, packet.PayloadType, packet.SSRC, packet.SequenceNumber, packet.Marker)
payload, err := depack.Unmarshal(packet.Payload)
if len(payload) == 0 || err != nil {
return
}
// Memory overflow protection. Can happen if we miss a lot of packets with the marker.
// https://github.com/AlexxIT/go2rtc/issues/675
if len(buf) > 5*1024*1024 {
buf = buf[: 0 : 512*1024]
}
// Fix TP-Link Tapo TC70: sends SPS and PPS with packet.Marker = true
// Reolink Duo 2: sends SPS with Marker and PPS without
if packet.Marker && len(payload) < PSMaxSize {
switch NALUType(payload) {
case NALUTypeSPS, NALUTypePPS:
buf = append(buf, payload...)
return
case NALUTypeSEI:
// RtspServer https://github.com/AlexxIT/go2rtc/issues/244
// sends, marked SPS, marked PPS, marked SEI, marked IFrame
return
}
}
if len(buf) == 0 {
for {
// Amcrest IP4M-1051: 9, 7, 8, 6, 28...
// Amcrest IP4M-1051: 9, 6, 1
switch NALUType(payload) {
case NALUTypeIFrame:
// fix IFrame without SPS,PPS
buf = append(buf, ps...)
case NALUTypeSEI, NALUTypeAUD:
// fix ffmpeg with transcoding first frame
i := int(4 + binary.BigEndian.Uint32(payload))
// check if only one NAL (fix ffmpeg transcoding for Reolink RLC-510A)
if i == len(payload) {
return
}
payload = payload[i:]
continue
case NALUTypePFrame, NALUTypeSPS, NALUTypePPS: // pass
default:
return // skip any unknown NAL unit type
}
break
}
}
// collect all NALs for Access Unit
if !packet.Marker {
buf = append(buf, payload...)
return
}
if len(buf) > 0 {
payload = append(buf, payload...)
buf = buf[:0]
}
// should not be that huge SPS
if NALUType(payload) == NALUTypeSPS && binary.BigEndian.Uint32(payload) >= PSMaxSize {
// some Chinese buggy cameras have a single packet with SPS+PPS+IFrame separated by 00 00 00 01
// https://github.com/AlexxIT/WebRTC/issues/391
// https://github.com/AlexxIT/WebRTC/issues/392
payload = annexb.FixAnnexBInAVCC(payload)
}
//log.Printf("[AVC] %v, len: %d, ts: %10d, seq: %d", NALUTypes(payload), len(payload), packet.Timestamp, packet.SequenceNumber)
clone := *packet
clone.Version = RTPPacketVersionAVC
clone.Payload = payload
handler(&clone)
}
}
func RTPPay(mtu uint16, handler core.HandlerFunc) core.HandlerFunc {
if mtu == 0 {
mtu = 1472
}
payloader := &Payloader{IsAVC: true}
sequencer := rtp.NewRandomSequencer()
mtu -= 12 // rtp.Header size
return func(packet *rtp.Packet) {
if packet.Version != RTPPacketVersionAVC {
handler(packet)
return
}
payloads := payloader.Payload(mtu, packet.Payload)
last := len(payloads) - 1
for i, payload := range payloads {
clone := rtp.Packet{
Header: rtp.Header{
Version: 2,
Marker: i == last,
SequenceNumber: sequencer.NextSequenceNumber(),
Timestamp: packet.Timestamp,
},
Payload: payload,
}
handler(&clone)
}
}
}
+366
View File
@@ -0,0 +1,366 @@
package h264
import (
"fmt"
"github.com/AlexxIT/go2rtc/pkg/bits"
)
// http://www.itu.int/rec/T-REC-H.264
// https://webrtc.googlesource.com/src/+/refs/heads/main/common_video/h264/sps_parser.cc
//goland:noinspection GoSnakeCaseUsage
type SPS struct {
profile_idc uint8
profile_iop uint8
level_idc uint8
seq_parameter_set_id uint32
chroma_format_idc uint32
separate_colour_plane_flag byte
bit_depth_luma_minus8 uint32
bit_depth_chroma_minus8 uint32
qpprime_y_zero_transform_bypass_flag byte
seq_scaling_matrix_present_flag byte
log2_max_frame_num_minus4 uint32
pic_order_cnt_type uint32
log2_max_pic_order_cnt_lsb_minus4 uint32
delta_pic_order_always_zero_flag byte
offset_for_non_ref_pic int32
offset_for_top_to_bottom_field int32
num_ref_frames_in_pic_order_cnt_cycle uint32
num_ref_frames uint32
gaps_in_frame_num_value_allowed_flag byte
pic_width_in_mbs_minus_1 uint32
pic_height_in_map_units_minus_1 uint32
frame_mbs_only_flag byte
mb_adaptive_frame_field_flag byte
direct_8x8_inference_flag byte
frame_cropping_flag byte
frame_crop_left_offset uint32
frame_crop_right_offset uint32
frame_crop_top_offset uint32
frame_crop_bottom_offset uint32
vui_parameters_present_flag byte
aspect_ratio_info_present_flag byte
aspect_ratio_idc byte
sar_width uint16
sar_height uint16
overscan_info_present_flag byte
overscan_appropriate_flag byte
video_signal_type_present_flag byte
video_format uint8
video_full_range_flag byte
colour_description_present_flag byte
colour_description uint32
chroma_loc_info_present_flag byte
chroma_sample_loc_type_top_field uint32
chroma_sample_loc_type_bottom_field uint32
timing_info_present_flag byte
num_units_in_tick uint32
time_scale uint32
fixed_frame_rate_flag byte
}
func (s *SPS) Width() uint16 {
width := 16 * (s.pic_width_in_mbs_minus_1 + 1)
crop := 2 * (s.frame_crop_left_offset + s.frame_crop_right_offset)
return uint16(width - crop)
}
func (s *SPS) Height() uint16 {
height := 16 * (s.pic_height_in_map_units_minus_1 + 1)
crop := 2 * (s.frame_crop_top_offset + s.frame_crop_bottom_offset)
if s.frame_mbs_only_flag == 0 {
height *= 2
}
return uint16(height - crop)
}
func DecodeSPS(sps []byte) *SPS {
// https://developer.ridgerun.com/wiki/index.php/H264_Analysis_Tools
// ffmpeg -i file.h264 -c copy -bsf:v trace_headers -f null -
r := bits.NewReader(sps)
hdr := r.ReadByte()
if hdr&0x1F != NALUTypeSPS {
return nil
}
s := &SPS{
profile_idc: r.ReadByte(),
profile_iop: r.ReadByte(),
level_idc: r.ReadByte(),
seq_parameter_set_id: r.ReadUEGolomb(),
}
switch s.profile_idc {
case 100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135:
n := byte(8)
s.chroma_format_idc = r.ReadUEGolomb()
if s.chroma_format_idc == 3 {
s.separate_colour_plane_flag = r.ReadBit()
n = 12
}
s.bit_depth_luma_minus8 = r.ReadUEGolomb()
s.bit_depth_chroma_minus8 = r.ReadUEGolomb()
s.qpprime_y_zero_transform_bypass_flag = r.ReadBit()
s.seq_scaling_matrix_present_flag = r.ReadBit()
if s.seq_scaling_matrix_present_flag != 0 {
for i := byte(0); i < n; i++ {
//goland:noinspection GoSnakeCaseUsage
seq_scaling_list_present_flag := r.ReadBit()
if seq_scaling_list_present_flag != 0 {
if i < 6 {
s.scaling_list(r, 16)
} else {
s.scaling_list(r, 64)
}
}
}
}
}
s.log2_max_frame_num_minus4 = r.ReadUEGolomb()
s.pic_order_cnt_type = r.ReadUEGolomb()
switch s.pic_order_cnt_type {
case 0:
s.log2_max_pic_order_cnt_lsb_minus4 = r.ReadUEGolomb()
case 1:
s.delta_pic_order_always_zero_flag = r.ReadBit()
s.offset_for_non_ref_pic = r.ReadSEGolomb()
s.offset_for_top_to_bottom_field = r.ReadSEGolomb()
s.num_ref_frames_in_pic_order_cnt_cycle = r.ReadUEGolomb()
for i := uint32(0); i < s.num_ref_frames_in_pic_order_cnt_cycle; i++ {
_ = r.ReadSEGolomb() // offset_for_ref_frame[i]
}
}
s.num_ref_frames = r.ReadUEGolomb()
s.gaps_in_frame_num_value_allowed_flag = r.ReadBit()
s.pic_width_in_mbs_minus_1 = r.ReadUEGolomb()
s.pic_height_in_map_units_minus_1 = r.ReadUEGolomb()
s.frame_mbs_only_flag = r.ReadBit()
if s.frame_mbs_only_flag == 0 {
s.mb_adaptive_frame_field_flag = r.ReadBit()
}
s.direct_8x8_inference_flag = r.ReadBit()
s.frame_cropping_flag = r.ReadBit()
if s.frame_cropping_flag != 0 {
s.frame_crop_left_offset = r.ReadUEGolomb()
s.frame_crop_right_offset = r.ReadUEGolomb()
s.frame_crop_top_offset = r.ReadUEGolomb()
s.frame_crop_bottom_offset = r.ReadUEGolomb()
}
s.vui_parameters_present_flag = r.ReadBit()
if s.vui_parameters_present_flag != 0 {
s.aspect_ratio_info_present_flag = r.ReadBit()
if s.aspect_ratio_info_present_flag != 0 {
s.aspect_ratio_idc = r.ReadByte()
if s.aspect_ratio_idc == 255 {
s.sar_width = r.ReadUint16()
s.sar_height = r.ReadUint16()
}
}
s.overscan_info_present_flag = r.ReadBit()
if s.overscan_info_present_flag != 0 {
s.overscan_appropriate_flag = r.ReadBit()
}
s.video_signal_type_present_flag = r.ReadBit()
if s.video_signal_type_present_flag != 0 {
s.video_format = r.ReadBits8(3)
s.video_full_range_flag = r.ReadBit()
s.colour_description_present_flag = r.ReadBit()
if s.colour_description_present_flag != 0 {
s.colour_description = r.ReadUint24()
}
}
s.chroma_loc_info_present_flag = r.ReadBit()
if s.chroma_loc_info_present_flag != 0 {
s.chroma_sample_loc_type_top_field = r.ReadUEGolomb()
s.chroma_sample_loc_type_bottom_field = r.ReadUEGolomb()
}
s.timing_info_present_flag = r.ReadBit()
if s.timing_info_present_flag != 0 {
s.num_units_in_tick = r.ReadUint32()
s.time_scale = r.ReadUint32()
s.fixed_frame_rate_flag = r.ReadBit()
}
//...
}
if r.EOF {
return nil
}
return s
}
//goland:noinspection GoSnakeCaseUsage
func (s *SPS) scaling_list(r *bits.Reader, sizeOfScalingList int) {
lastScale := int32(8)
nextScale := int32(8)
for j := 0; j < sizeOfScalingList; j++ {
if nextScale != 0 {
delta_scale := r.ReadSEGolomb()
nextScale = (lastScale + delta_scale + 256) % 256
}
if nextScale != 0 {
lastScale = nextScale
}
}
}
func (s *SPS) Profile() string {
switch s.profile_idc {
case 0x42:
return "Baseline"
case 0x4D:
return "Main"
case 0x58:
return "Extended"
case 0x64:
return "High"
}
return fmt.Sprintf("0x%02X", s.profile_idc)
}
func (s *SPS) PixFmt() string {
if s.bit_depth_luma_minus8 == 0 {
switch s.chroma_format_idc {
case 1:
if s.video_full_range_flag == 1 {
return "yuvj420p"
}
return "yuv420p"
case 2:
return "yuv422p"
case 3:
return "yuv444p"
}
}
return ""
}
func (s *SPS) String() string {
return fmt.Sprintf(
"%s %d.%d, %s, %dx%d",
s.Profile(), s.level_idc/10, s.level_idc%10, s.PixFmt(), s.Width(), s.Height(),
)
}
// FixPixFmt - change yuvj420p to yuv420p in SPS
// same as "-c:v copy -bsf:v h264_metadata=video_full_range_flag=0"
func FixPixFmt(sps []byte) {
r := bits.NewReader(sps)
_ = r.ReadByte()
profile := r.ReadByte()
_ = r.ReadByte()
_ = r.ReadByte()
_ = r.ReadUEGolomb()
switch profile {
case 100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135:
n := byte(8)
if r.ReadUEGolomb() == 3 {
_ = r.ReadBit()
n = 12
}
_ = r.ReadUEGolomb()
_ = r.ReadUEGolomb()
_ = r.ReadBit()
if r.ReadBit() != 0 {
for i := byte(0); i < n; i++ {
if r.ReadBit() != 0 {
return // skip
}
}
}
}
_ = r.ReadUEGolomb()
switch r.ReadUEGolomb() {
case 0:
_ = r.ReadUEGolomb()
case 1:
_ = r.ReadBit()
_ = r.ReadSEGolomb()
_ = r.ReadSEGolomb()
n := r.ReadUEGolomb()
for i := uint32(0); i < n; i++ {
_ = r.ReadSEGolomb()
}
}
_ = r.ReadUEGolomb()
_ = r.ReadBit()
_ = r.ReadUEGolomb()
_ = r.ReadUEGolomb()
if r.ReadBit() == 0 {
_ = r.ReadBit()
}
_ = r.ReadBit()
if r.ReadBit() != 0 {
_ = r.ReadUEGolomb()
_ = r.ReadUEGolomb()
_ = r.ReadUEGolomb()
_ = r.ReadUEGolomb()
}
if r.ReadBit() != 0 {
if r.ReadBit() != 0 {
if r.ReadByte() == 255 {
_ = r.ReadUint16()
_ = r.ReadUint16()
}
}
if r.ReadBit() != 0 {
_ = r.ReadBit()
}
if r.ReadBit() != 0 {
_ = r.ReadBits8(3)
if r.ReadBit() == 1 {
pos, bit := r.Pos()
sps[pos] &= ^byte(1 << bit)
}
}
}
}