I am trying to record a video with sound using iPhone's front camera. As I need to also support pause/resume functionality, I need to use AVAssetWriter
. I've found an example online, written in Objective-C, which almost achieves the desired functionality (http://www.gdcl.co.uk/2013/02/20/iPhone-Pause.html)
Unfortunately, after converting this example to Swift, I notice that if I pause/resume, at the end of each "section" there is a small but noticeable period during which the video is just a still frame and the audio is playing. So, it seems that when isPaused
is triggered, the recorded audio track is longer than the recorded video track.
Sorry if it may seem like a noob question, but I am not a great expert in AVFoundation
and some help would be appreciated!
Below I post my implementation of didOutput sampleBuffer
.
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
var isVideo = true
if videoConntection != connection {
isVideo = false
}
if (!isCapturing || isPaused) {
return
}
if (encoder == nil) {
if isVideo {
return
}
if let fmt = CMSampleBufferGetFormatDescription(sampleBuffer) {
let desc = CMAudioFormatDescriptionGetStreamBasicDescription(fmt as CMAudioFormatDescription)
if let chan = desc?.pointee.mChannelsPerFrame, let rate = desc?.pointee.mSampleRate {
let path = tempPath()!
encoder = VideoEncoder(path: path, height: Int(cameraSize.height), width: Int(cameraSize.width), channels: chan, rate: rate)
}
}
}
if discont {
if isVideo {
return
}
discont = false
var pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
let last = lastAudio
if last.flags.contains(CMTimeFlags.valid) {
if cmOffset.flags.contains(CMTimeFlags.valid) {
pts = CMTimeSubtract(pts, cmOffset)
}
let off = CMTimeSubtract(pts, last)
print("setting offset from \(isVideo ? "video":"audio")")
print("adding \(CMTimeGetSeconds(off)) to \(CMTimeGetSeconds(cmOffset)) (pts \(CMTimeGetSeconds(cmOffset)))")
if cmOffset.value == 0 {
cmOffset = off
}
else {
cmOffset = CMTimeAdd(cmOffset, off)
}
}
lastVideo.flags = []
lastAudio.flags = []
return
}
var out:CMSampleBuffer?
if cmOffset.value > 0 {
var count:CMItemCount = CMSampleBufferGetNumSamples(sampleBuffer)
let pInfo = UnsafeMutablePointer<CMSampleTimingInfo>.allocate(capacity: count)
CMSampleBufferGetSampleTimingInfoArray(sampleBuffer, entryCount: count, arrayToFill: pInfo, entriesNeededOut: &count)
var i = 0
while i<count {
pInfo[i].decodeTimeStamp = CMTimeSubtract(pInfo[i].decodeTimeStamp, cmOffset)
pInfo[i].presentationTimeStamp = CMTimeSubtract(pInfo[i].presentationTimeStamp, cmOffset)
i+=1
}
CMSampleBufferCreateCopyWithNewTiming(allocator: nil, sampleBuffer: sampleBuffer, sampleTimingEntryCount: count, sampleTimingArray: pInfo, sampleBufferOut: &out)
}
else {
out = sampleBuffer
}
var pts = CMSampleBufferGetPresentationTimeStamp(out!)
let dur = CMSampleBufferGetDuration(out!)
if (dur.value > 0)
{
pts = CMTimeAdd(pts, dur);
}
if (isVideo) {
lastVideo = pts;
}
else {
lastAudio = pts;
}
encoder?.encodeFrame(sampleBuffer: out!, isVideo: isVideo)
}
And this is my VideoEncoder
class:
final class VideoEncoder {
var writer:AVAssetWriter
var videoInput:AVAssetWriterInput
var audioInput:AVAssetWriterInput
var path:String
init(path:String, height:Int, width:Int, channels:UInt32, rate:Float64) {
self.path = path
if FileManager.default.fileExists(atPath:path) {
try? FileManager.default.removeItem(atPath: path)
}
let url = URL(fileURLWithPath: path)
writer = try! AVAssetWriter(outputURL: url, fileType: .mp4)
videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey:height,
AVVideoHeightKey:width
])
videoInput.expectsMediaDataInRealTime = true
writer.add(videoInput)
audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: [
AVFormatIDKey:kAudioFormatMPEG4AAC,
AVNumberOfChannelsKey:channels,
AVSampleRateKey:rate
])
audioInput.expectsMediaDataInRealTime = true
writer.add(audioInput)
}
func finish(with completionHandler:@escaping ()->Void) {
writer.finishWriting(completionHandler: completionHandler)
}
func encodeFrame(sampleBuffer:CMSampleBuffer, isVideo:Bool) -> Bool {
if CMSampleBufferDataIsReady(sampleBuffer) {
if writer.status == .unknown {
writer.startWriting()
writer.startSession(atSourceTime: CMSampleBufferGetPresentationTimeStamp(sampleBuffer))
}
if writer.status == .failed {
QFLogger.shared.addLog(format: "[ERROR initiating AVAssetWriter]", args: [], error: writer.error)
return false
}
if isVideo {
if videoInput.isReadyForMoreMediaData {
videoInput.append(sampleBuffer)
return true
}
}
else {
if audioInput.isReadyForMoreMediaData {
audioInput.append(sampleBuffer)
return true
}
}
}
return false
}
}
The rest of the code should be pretty obvious, but just to make it complete, here is what I have for pausing:
isPaused = true
discont = true
And here is resume:
isPaused = false
If anyone could help me to understand how to align video and audio tracks during such live recording that would be great!