Real-time AVAssetWriter synchronise audio and video when pausing/resuming

I am trying to record a video with sound using iPhone's front camera. As I need to also support pause/resume functionality, I need to use AVAssetWriter. I've found an example online, written in Objective-C, which almost achieves the desired functionality (http://www.gdcl.co.uk/2013/02/20/iPhone-Pause.html)

Unfortunately, after converting this example to Swift, I notice that if I pause/resume, at the end of each "section" there is a small but noticeable period during which the video is just a still frame and the audio is playing. So, it seems that when isPaused is triggered, the recorded audio track is longer than the recorded video track.

Sorry if it may seem like a noob question, but I am not a great expert in AVFoundation and some help would be appreciated!

Below I post my implementation of didOutput sampleBuffer.

func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
    var isVideo = true
    if videoConntection != connection {
        isVideo = false
    }
    if (!isCapturing || isPaused) {
        return
    }

    if (encoder == nil) {
        if isVideo {
            return
        }
        if let fmt = CMSampleBufferGetFormatDescription(sampleBuffer) {
            let desc = CMAudioFormatDescriptionGetStreamBasicDescription(fmt as CMAudioFormatDescription)
            if let chan = desc?.pointee.mChannelsPerFrame, let rate = desc?.pointee.mSampleRate {
                let path = tempPath()!
                encoder = VideoEncoder(path: path, height: Int(cameraSize.height), width: Int(cameraSize.width), channels: chan, rate: rate)
            }
        }
    }
    if discont {
        if isVideo {
            return
        }
        discont = false
        var pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
        let last = lastAudio
        if last.flags.contains(CMTimeFlags.valid) {
            if cmOffset.flags.contains(CMTimeFlags.valid) {
                pts = CMTimeSubtract(pts, cmOffset)
            }
            let off = CMTimeSubtract(pts, last)
            print("setting offset from \(isVideo ? "video":"audio")")
            print("adding \(CMTimeGetSeconds(off)) to \(CMTimeGetSeconds(cmOffset)) (pts \(CMTimeGetSeconds(cmOffset)))")
            if cmOffset.value == 0 {
                cmOffset = off
            }
            else {
                cmOffset = CMTimeAdd(cmOffset, off)
            }
        }
        lastVideo.flags = []
        lastAudio.flags = []
        return
    }
    var out:CMSampleBuffer?
    if cmOffset.value > 0 {
        var count:CMItemCount = CMSampleBufferGetNumSamples(sampleBuffer)
        let pInfo = UnsafeMutablePointer<CMSampleTimingInfo>.allocate(capacity: count)
        CMSampleBufferGetSampleTimingInfoArray(sampleBuffer, entryCount: count, arrayToFill: pInfo, entriesNeededOut: &count)
        var i = 0
        while i<count {
            pInfo[i].decodeTimeStamp = CMTimeSubtract(pInfo[i].decodeTimeStamp, cmOffset)
            pInfo[i].presentationTimeStamp = CMTimeSubtract(pInfo[i].presentationTimeStamp, cmOffset)
            i+=1
        }
        CMSampleBufferCreateCopyWithNewTiming(allocator: nil, sampleBuffer: sampleBuffer, sampleTimingEntryCount: count, sampleTimingArray: pInfo, sampleBufferOut: &out)
    }
    else {
        out = sampleBuffer
    }
    var pts = CMSampleBufferGetPresentationTimeStamp(out!)
    let dur = CMSampleBufferGetDuration(out!)
    if (dur.value > 0)
    {
        pts = CMTimeAdd(pts, dur);
    }
    if (isVideo) {
        lastVideo = pts;
    }
    else {
        lastAudio = pts;
    }
    encoder?.encodeFrame(sampleBuffer: out!, isVideo: isVideo)
}

And this is my VideoEncoder class:

final class VideoEncoder {
    var writer:AVAssetWriter
    var videoInput:AVAssetWriterInput
    var audioInput:AVAssetWriterInput
    var path:String

    init(path:String, height:Int, width:Int, channels:UInt32, rate:Float64) {
        self.path = path
        if FileManager.default.fileExists(atPath:path) {
            try? FileManager.default.removeItem(atPath: path)
        }
        let url = URL(fileURLWithPath: path)
        writer = try! AVAssetWriter(outputURL: url, fileType: .mp4)
        videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: [
            AVVideoCodecKey: AVVideoCodecType.h264,
            AVVideoWidthKey:height,
            AVVideoHeightKey:width
        ])
        videoInput.expectsMediaDataInRealTime = true
        writer.add(videoInput)

        audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: [
            AVFormatIDKey:kAudioFormatMPEG4AAC,
            AVNumberOfChannelsKey:channels,
            AVSampleRateKey:rate
        ])
        audioInput.expectsMediaDataInRealTime = true
        writer.add(audioInput)
    }

    func finish(with completionHandler:@escaping ()->Void) {
        writer.finishWriting(completionHandler: completionHandler)
    }

    func encodeFrame(sampleBuffer:CMSampleBuffer, isVideo:Bool) -> Bool {
        if CMSampleBufferDataIsReady(sampleBuffer) {
            if writer.status == .unknown {
                writer.startWriting()
                writer.startSession(atSourceTime: CMSampleBufferGetPresentationTimeStamp(sampleBuffer))
            }
            if writer.status == .failed {
                QFLogger.shared.addLog(format: "[ERROR initiating AVAssetWriter]", args: [], error: writer.error)
                return false
            }
            if isVideo {
                if videoInput.isReadyForMoreMediaData {
                    videoInput.append(sampleBuffer)
                    return true
                }
            }
            else {
                if audioInput.isReadyForMoreMediaData {
                    audioInput.append(sampleBuffer)
                    return true
                }
            }
        }
        return false
    }
}

The rest of the code should be pretty obvious, but just to make it complete, here is what I have for pausing:

isPaused = true
discont = true

And here is resume:

isPaused = false

If anyone could help me to understand how to align video and audio tracks during such live recording that would be great!

Recommended topics

Hot tags