Real-time AVAssetWriter synchronise audio and video when pausing/resuming
Asked Answered
W

1

6

I am trying to record a video with sound using iPhone's front camera. As I need to also support pause/resume functionality, I need to use AVAssetWriter. I've found an example online, written in Objective-C, which almost achieves the desired functionality (http://www.gdcl.co.uk/2013/02/20/iPhone-Pause.html)

Unfortunately, after converting this example to Swift, I notice that if I pause/resume, at the end of each "section" there is a small but noticeable period during which the video is just a still frame and the audio is playing. So, it seems that when isPaused is triggered, the recorded audio track is longer than the recorded video track.

Sorry if it may seem like a noob question, but I am not a great expert in AVFoundation and some help would be appreciated!

Below I post my implementation of didOutput sampleBuffer.

func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
    var isVideo = true
    if videoConntection != connection {
        isVideo = false
    }
    if (!isCapturing || isPaused) {
        return
    }

    if (encoder == nil) {
        if isVideo {
            return
        }
        if let fmt = CMSampleBufferGetFormatDescription(sampleBuffer) {
            let desc = CMAudioFormatDescriptionGetStreamBasicDescription(fmt as CMAudioFormatDescription)
            if let chan = desc?.pointee.mChannelsPerFrame, let rate = desc?.pointee.mSampleRate {
                let path = tempPath()!
                encoder = VideoEncoder(path: path, height: Int(cameraSize.height), width: Int(cameraSize.width), channels: chan, rate: rate)
            }
        }
    }
    if discont {
        if isVideo {
            return
        }
        discont = false
        var pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
        let last = lastAudio
        if last.flags.contains(CMTimeFlags.valid) {
            if cmOffset.flags.contains(CMTimeFlags.valid) {
                pts = CMTimeSubtract(pts, cmOffset)
            }
            let off = CMTimeSubtract(pts, last)
            print("setting offset from \(isVideo ? "video":"audio")")
            print("adding \(CMTimeGetSeconds(off)) to \(CMTimeGetSeconds(cmOffset)) (pts \(CMTimeGetSeconds(cmOffset)))")
            if cmOffset.value == 0 {
                cmOffset = off
            }
            else {
                cmOffset = CMTimeAdd(cmOffset, off)
            }
        }
        lastVideo.flags = []
        lastAudio.flags = []
        return
    }
    var out:CMSampleBuffer?
    if cmOffset.value > 0 {
        var count:CMItemCount = CMSampleBufferGetNumSamples(sampleBuffer)
        let pInfo = UnsafeMutablePointer<CMSampleTimingInfo>.allocate(capacity: count)
        CMSampleBufferGetSampleTimingInfoArray(sampleBuffer, entryCount: count, arrayToFill: pInfo, entriesNeededOut: &count)
        var i = 0
        while i<count {
            pInfo[i].decodeTimeStamp = CMTimeSubtract(pInfo[i].decodeTimeStamp, cmOffset)
            pInfo[i].presentationTimeStamp = CMTimeSubtract(pInfo[i].presentationTimeStamp, cmOffset)
            i+=1
        }
        CMSampleBufferCreateCopyWithNewTiming(allocator: nil, sampleBuffer: sampleBuffer, sampleTimingEntryCount: count, sampleTimingArray: pInfo, sampleBufferOut: &out)
    }
    else {
        out = sampleBuffer
    }
    var pts = CMSampleBufferGetPresentationTimeStamp(out!)
    let dur = CMSampleBufferGetDuration(out!)
    if (dur.value > 0)
    {
        pts = CMTimeAdd(pts, dur);
    }
    if (isVideo) {
        lastVideo = pts;
    }
    else {
        lastAudio = pts;
    }
    encoder?.encodeFrame(sampleBuffer: out!, isVideo: isVideo)
}

And this is my VideoEncoder class:

final class VideoEncoder {
    var writer:AVAssetWriter
    var videoInput:AVAssetWriterInput
    var audioInput:AVAssetWriterInput
    var path:String

    init(path:String, height:Int, width:Int, channels:UInt32, rate:Float64) {
        self.path = path
        if FileManager.default.fileExists(atPath:path) {
            try? FileManager.default.removeItem(atPath: path)
        }
        let url = URL(fileURLWithPath: path)
        writer = try! AVAssetWriter(outputURL: url, fileType: .mp4)
        videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: [
            AVVideoCodecKey: AVVideoCodecType.h264,
            AVVideoWidthKey:height,
            AVVideoHeightKey:width
        ])
        videoInput.expectsMediaDataInRealTime = true
        writer.add(videoInput)

        audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: [
            AVFormatIDKey:kAudioFormatMPEG4AAC,
            AVNumberOfChannelsKey:channels,
            AVSampleRateKey:rate
        ])
        audioInput.expectsMediaDataInRealTime = true
        writer.add(audioInput)
    }

    func finish(with completionHandler:@escaping ()->Void) {
        writer.finishWriting(completionHandler: completionHandler)
    }

    func encodeFrame(sampleBuffer:CMSampleBuffer, isVideo:Bool) -> Bool {
        if CMSampleBufferDataIsReady(sampleBuffer) {
            if writer.status == .unknown {
                writer.startWriting()
                writer.startSession(atSourceTime: CMSampleBufferGetPresentationTimeStamp(sampleBuffer))
            }
            if writer.status == .failed {
                QFLogger.shared.addLog(format: "[ERROR initiating AVAssetWriter]", args: [], error: writer.error)
                return false
            }
            if isVideo {
                if videoInput.isReadyForMoreMediaData {
                    videoInput.append(sampleBuffer)
                    return true
                }
            }
            else {
                if audioInput.isReadyForMoreMediaData {
                    audioInput.append(sampleBuffer)
                    return true
                }
            }
        }
        return false
    }
}

The rest of the code should be pretty obvious, but just to make it complete, here is what I have for pausing:

isPaused = true
discont = true

And here is resume:

isPaused = false

If anyone could help me to understand how to align video and audio tracks during such live recording that would be great!

Weiman answered 1/5, 2020 at 17:27 Comment(0)
W
0

Ok, turns out there was no mistake in the code which I provided. The issue which I experienced was caused by a video smoothing which was turned ON :) I guess it needs extra frames to smooth the video, which is why the video output freezes at the end for a short period of time.

Weiman answered 2/5, 2020 at 17:19 Comment(5)
Hi, could you share the whole CameraEngine class converted to Swift?Rackley
what is the video smoothing setting you are talking about?Lashondalashonde
@Lashondalashonde i'm not sure if this is what Andriy was talking about with smoothing turned ON. But I had an issue when setting the AVCaptureVideoDataOutput Connetion's preferredVideoStabilizationMode to auto. The video would have the last two seconds of frames frozen, upon removing the preferredVideoStabilizationMode there was no more frozen frames!Brenza
Interesting. I'll check it out. ThanksLashondalashonde
just in case somebody needs a working swift copy github.com/ankits16/CVRecorderFinal/blob/main/README.mdBedell

© 2022 - 2025 — McMap. All rights reserved.