package tutk import ( "encoding/binary" "encoding/hex" "fmt" "sync" "github.com/AlexxIT/go2rtc/pkg/aac" ) const ( FrameTypeStart uint8 = 0x08 // Extended start (36-byte header) FrameTypeStartAlt uint8 = 0x09 // StartAlt (36-byte header) FrameTypeCont uint8 = 0x00 // Continuation (28-byte header) FrameTypeContAlt uint8 = 0x04 // Continuation alt FrameTypeEndSingle uint8 = 0x01 // Single-packet frame (28-byte) FrameTypeEndMulti uint8 = 0x05 // Multi-packet end (28-byte) FrameTypeEndExt uint8 = 0x0d // Extended end (36-byte) ) const ( ChannelIVideo uint8 = 0x05 ChannelAudio uint8 = 0x03 ChannelPVideo uint8 = 0x07 ) const frameInfoSize = 40 // FrameInfo - Wyze extended FRAMEINFO (40 bytes at end of packet) // Video: 40 bytes, Audio: 16 bytes (uses same struct, fields 16+ are zero) // // Offset Size Field // 0-1 2 CodecID - 0x4E=H264, 0x7B=H265, 0x90=AAC_WYZE // 2 1 Flags - Video: 1=Keyframe, 0=P-frame | Audio: sample rate/bits/channels // 3 1 CamIndex - Camera index // 4 1 OnlineNum - Online number // 5 1 FPS - Framerate (e.g. 20) // 6 1 ResTier - Video: 1=Low(360P), 4=High(HD/2K) | Audio: 0 // 7 1 Bitrate - Video: 30=360P, 100=HD, 200=2K | Audio: 1 // 8-11 4 Timestamp - Timestamp (increases ~50000/frame for 20fps video) // 12-15 4 SessionID - Session marker (constant per stream) // 16-19 4 PayloadSize - Frame payload size in bytes // 20-23 4 FrameNo - Global frame number // 24-35 12 DeviceID - MAC address (ASCII) - video only // 36-39 4 Padding - Always 0 - video only type FrameInfo struct { CodecID byte // 0 (only low byte used) Flags uint8 // 2 CamIndex uint8 // 3 OnlineNum uint8 // 4 FPS uint8 // 5: Framerate ResTier uint8 // 6: Resolution tier (1=Low, 4=High) Bitrate uint8 // 7: Bitrate index (30=360P, 100=HD, 200=2K) Timestamp uint32 // 8-11: Timestamp SessionID uint32 // 12-15: Session marker (constant) PayloadSize uint32 // 16-19: Payload size FrameNo uint32 // 20-23: Frame number } func (fi *FrameInfo) IsKeyframe() bool { return fi.Flags == 0x01 } func (fi *FrameInfo) SampleRate() uint32 { idx := (fi.Flags >> 2) & 0x0F if idx < uint8(len(sampleRates)) { return sampleRates[idx] } return 16000 } func (fi *FrameInfo) Channels() uint8 { if fi.Flags&0x01 == 1 { return 2 } return 1 } func ParseFrameInfo(data []byte) *FrameInfo { if len(data) < frameInfoSize { return nil } offset := len(data) - frameInfoSize fi := data[offset:] return &FrameInfo{ CodecID: fi[0], Flags: fi[2], CamIndex: fi[3], OnlineNum: fi[4], FPS: fi[5], ResTier: fi[6], Bitrate: fi[7], Timestamp: binary.LittleEndian.Uint32(fi[8:]), SessionID: binary.LittleEndian.Uint32(fi[12:]), PayloadSize: binary.LittleEndian.Uint32(fi[16:]), FrameNo: binary.LittleEndian.Uint32(fi[20:]), } } type Packet struct { Channel uint8 Codec byte Timestamp uint32 Payload []byte IsKeyframe bool FrameNo uint32 SampleRate uint32 Channels uint8 } type PacketHeader struct { Channel byte FrameType byte HeaderSize int FrameNo uint32 PktIdx uint16 PktTotal uint16 PayloadSize uint16 HasFrameInfo bool } func ParsePacketHeader(data []byte) *PacketHeader { if len(data) < 28 { return nil } frameType := data[1] hdr := &PacketHeader{ Channel: data[0], FrameType: frameType, } switch frameType { case FrameTypeStart, FrameTypeStartAlt, FrameTypeEndExt: hdr.HeaderSize = 36 default: hdr.HeaderSize = 28 } if len(data) < hdr.HeaderSize { return nil } if hdr.HeaderSize == 28 { hdr.PktTotal = binary.LittleEndian.Uint16(data[12:]) pktIdxOrMarker := binary.LittleEndian.Uint16(data[14:]) hdr.PayloadSize = binary.LittleEndian.Uint16(data[16:]) hdr.FrameNo = binary.LittleEndian.Uint32(data[24:]) if pktIdxOrMarker == 0x0028 && (IsEndFrame(frameType) || hdr.PktTotal == 1) { hdr.HasFrameInfo = true if hdr.PktTotal > 0 { hdr.PktIdx = hdr.PktTotal - 1 } } else { hdr.PktIdx = pktIdxOrMarker } } else { hdr.PktTotal = binary.LittleEndian.Uint16(data[20:]) pktIdxOrMarker := binary.LittleEndian.Uint16(data[22:]) hdr.PayloadSize = binary.LittleEndian.Uint16(data[24:]) hdr.FrameNo = binary.LittleEndian.Uint32(data[32:]) if pktIdxOrMarker == 0x0028 && (IsEndFrame(frameType) || hdr.PktTotal == 1) { hdr.HasFrameInfo = true if hdr.PktTotal > 0 { hdr.PktIdx = hdr.PktTotal - 1 } } else { hdr.PktIdx = pktIdxOrMarker } } return hdr } func IsStartFrame(frameType uint8) bool { return frameType == FrameTypeStart || frameType == FrameTypeStartAlt } func IsEndFrame(frameType uint8) bool { return frameType == FrameTypeEndSingle || frameType == FrameTypeEndMulti || frameType == FrameTypeEndExt } func IsContinuationFrame(frameType uint8) bool { return frameType == FrameTypeCont || frameType == FrameTypeContAlt } type channelState struct { frameNo uint32 // current frame being assembled pktTotal uint16 // expected total packets waitSeq uint16 // next expected packet index (0, 1, 2, ...) waitData []byte // accumulated payload data frameInfo *FrameInfo // frame info (from end packet) hasStarted bool // received first packet of frame lastPktIdx uint16 // last received packet index (for OOO detection) } func (cs *channelState) reset() { cs.frameNo = 0 cs.pktTotal = 0 cs.waitSeq = 0 cs.waitData = cs.waitData[:0] cs.frameInfo = nil cs.hasStarted = false cs.lastPktIdx = 0 } const tsWrapPeriod uint32 = 1000000 type tsTracker struct { lastRawTS uint32 accumUS uint64 firstTS bool } func (t *tsTracker) update(rawTS uint32) uint64 { if !t.firstTS { t.firstTS = true t.lastRawTS = rawTS return 0 } var delta uint32 if rawTS >= t.lastRawTS { delta = rawTS - t.lastRawTS } else { // Wrapped: delta = (wrap - last) + new delta = (tsWrapPeriod - t.lastRawTS) + rawTS } t.accumUS += uint64(delta) t.lastRawTS = rawTS return t.accumUS } type FrameHandler struct { channels map[byte]*channelState videoTS tsTracker audioTS tsTracker output chan *Packet verbose bool closed bool closeMu sync.Mutex } func NewFrameHandler(verbose bool) *FrameHandler { return &FrameHandler{ channels: make(map[byte]*channelState), output: make(chan *Packet, 128), verbose: verbose, } } func (h *FrameHandler) Recv() <-chan *Packet { return h.output } func (h *FrameHandler) Close() { h.closeMu.Lock() defer h.closeMu.Unlock() if h.closed { return } h.closed = true close(h.output) } func (h *FrameHandler) Handle(data []byte) { hdr := ParsePacketHeader(data) if hdr == nil { return } payload, fi := h.extractPayload(data, hdr.Channel) if payload == nil { return } if h.verbose { fiStr := "" if hdr.HasFrameInfo { fiStr = " +FI" } fmt.Printf("[RX] ch=0x%02x type=0x%02x #%d pkt=%d/%d data=%dB%s\n", hdr.Channel, hdr.FrameType, hdr.FrameNo, hdr.PktIdx, hdr.PktTotal, len(payload), fiStr) } switch hdr.Channel { case ChannelAudio: h.handleAudio(payload, fi) case ChannelIVideo, ChannelPVideo: h.handleVideo(hdr.Channel, hdr, payload, fi) } } func (h *FrameHandler) extractPayload(data []byte, channel byte) ([]byte, *FrameInfo) { if len(data) < 2 { return nil, nil } frameType := data[1] headerSize := 28 fiSize := 0 switch frameType { case FrameTypeStart: headerSize = 36 case FrameTypeStartAlt: headerSize = 36 if len(data) >= 22 { pktTotal := binary.LittleEndian.Uint16(data[20:]) if pktTotal == 1 { fiSize = frameInfoSize } } case FrameTypeCont, FrameTypeContAlt: headerSize = 28 case FrameTypeEndSingle, FrameTypeEndMulti: headerSize = 28 fiSize = frameInfoSize case FrameTypeEndExt: headerSize = 36 fiSize = frameInfoSize default: headerSize = 28 } if len(data) < headerSize { return nil, nil } if fiSize == 0 { return data[headerSize:], nil } if len(data) < headerSize+fiSize { return data[headerSize:], nil } fi := ParseFrameInfo(data) validCodec := false switch channel { case ChannelIVideo, ChannelPVideo: validCodec = IsVideoCodec(fi.CodecID) case ChannelAudio: validCodec = IsAudioCodec(fi.CodecID) } if validCodec { payload := data[headerSize : len(data)-fiSize] return payload, fi } return data[headerSize:], nil } func (h *FrameHandler) handleVideo(channel byte, hdr *PacketHeader, payload []byte, fi *FrameInfo) { cs := h.channels[channel] if cs == nil { cs = &channelState{} h.channels[channel] = cs } // New frame number - reset and start fresh if hdr.FrameNo != cs.frameNo { // Check if previous frame was incomplete if cs.hasStarted && cs.waitSeq < cs.pktTotal { fmt.Printf("[DROP] ch=0x%02x #%d INCOMPLETE: got %d/%d pkts\n", channel, cs.frameNo, cs.waitSeq, cs.pktTotal) } cs.reset() cs.frameNo = hdr.FrameNo cs.pktTotal = hdr.PktTotal } // If packet index doesn't match expected, reset (data loss) if hdr.PktIdx != cs.waitSeq { fmt.Printf("[OOO] ch=0x%02x #%d frameType=0x%02x pktTotal=%d expected pkt %d, got %d - reset\n", channel, hdr.FrameNo, hdr.FrameType, hdr.PktTotal, cs.waitSeq, hdr.PktIdx) cs.reset() return } // First packet - mark as started if cs.waitSeq == 0 { cs.hasStarted = true } cs.waitData = append(cs.waitData, payload...) cs.waitSeq++ // Store frame info if present if fi != nil { cs.frameInfo = fi } // Check if frame is complete if cs.waitSeq != cs.pktTotal || cs.frameInfo == nil { return } fi = cs.frameInfo defer cs.reset() if fi.PayloadSize > 0 && uint32(len(cs.waitData)) != fi.PayloadSize { fmt.Printf("[SIZE] ch=0x%02x #%d mismatch: expected %d, got %d\n", channel, cs.frameNo, fi.PayloadSize, len(cs.waitData)) return } if len(cs.waitData) == 0 { return } accumUS := h.videoTS.update(fi.Timestamp) rtpTS := uint32(accumUS * 90000 / 1000000) pkt := &Packet{ Channel: channel, Payload: append([]byte{}, cs.waitData...), Codec: fi.CodecID, Timestamp: rtpTS, IsKeyframe: fi.IsKeyframe(), FrameNo: fi.FrameNo, } if h.verbose { frameType := "P" if fi.IsKeyframe() { frameType = "KEY" } fmt.Printf("[OK] ch=0x%02x #%d codec=0x%02x %s size=%d\n", channel, fi.FrameNo, fi.CodecID, frameType, len(pkt.Payload)) fmt.Printf(" [0-1]codec=0x%02x [2]flags=0x%x [3]=%d [4]=%d\n", fi.CodecID, fi.Flags, fi.CamIndex, fi.OnlineNum) fmt.Printf(" [5]=%d [6]=%d [7]=%d [8-11]ts=%d\n", fi.FPS, fi.ResTier, fi.Bitrate, fi.Timestamp) fmt.Printf(" [12-15]=0x%x [16-19]payload=%d [20-23]frameNo=%d\n", fi.SessionID, fi.PayloadSize, fi.FrameNo) fmt.Printf(" rtp_ts=%d accum_us=%d\n", rtpTS, accumUS) fmt.Printf(" hex: %s\n", dumpHex(fi)) } h.queue(pkt) } func (h *FrameHandler) handleAudio(payload []byte, fi *FrameInfo) { if len(payload) == 0 || fi == nil { return } var sampleRate uint32 var channels uint8 switch fi.CodecID { case CodecAACRaw, CodecAACADTS, CodecAACLATM, CodecAACAlt: sampleRate, channels = parseAudioParams(payload, fi) default: sampleRate = fi.SampleRate() channels = fi.Channels() } accumUS := h.audioTS.update(fi.Timestamp) rtpTS := uint32(accumUS * uint64(sampleRate) / 1000000) payloadCopy := make([]byte, len(payload)) copy(payloadCopy, payload) pkt := &Packet{ Channel: ChannelAudio, Payload: payloadCopy, Codec: fi.CodecID, Timestamp: rtpTS, SampleRate: sampleRate, Channels: channels, FrameNo: fi.FrameNo, } if h.verbose { bits := 8 if fi.Flags&0x02 != 0 { bits = 16 } fmt.Printf("[OK] Audio #%d codec=0x%02x size=%d\n", fi.FrameNo, fi.CodecID, len(payload)) fmt.Printf(" [0-1]codec=0x%02x [2]flags=0x%x(%dHz/%dbit/%dch)\n", fi.CodecID, fi.Flags, sampleRate, bits, channels) fmt.Printf(" [8-11]ts=%d [12-15]=0x%x rtp_ts=%d\n", fi.Timestamp, fi.SessionID, rtpTS) fmt.Printf(" hex: %s\n", dumpHex(fi)) } h.queue(pkt) } func (h *FrameHandler) queue(pkt *Packet) { h.closeMu.Lock() defer h.closeMu.Unlock() if h.closed { return } select { case h.output <- pkt: default: // Queue full - drop oldest select { case <-h.output: default: } select { case h.output <- pkt: default: // Queue still full, drop this packet } } } func parseAudioParams(payload []byte, fi *FrameInfo) (sampleRate uint32, channels uint8) { if aac.IsADTS(payload) { codec := aac.ADTSToCodec(payload) if codec != nil { return codec.ClockRate, codec.Channels } } if fi != nil { return fi.SampleRate(), fi.Channels() } return 16000, 1 } func dumpHex(fi *FrameInfo) string { b := make([]byte, frameInfoSize) b[0] = fi.CodecID b[1] = 0 // High byte (unused) b[2] = fi.Flags b[3] = fi.CamIndex b[4] = fi.OnlineNum b[5] = fi.FPS b[6] = fi.ResTier b[7] = fi.Bitrate binary.LittleEndian.PutUint32(b[8:], fi.Timestamp) binary.LittleEndian.PutUint32(b[12:], fi.SessionID) binary.LittleEndian.PutUint32(b[16:], fi.PayloadSize) binary.LittleEndian.PutUint32(b[20:], fi.FrameNo) // Bytes 24-39 are DeviceID and Padding (not stored in struct) hexStr := hex.EncodeToString(b) formatted := "" for i := 0; i < len(hexStr); i += 2 { if i > 0 { formatted += " " } formatted += hexStr[i : i+2] } return formatted }