For the past few weeks, I've ben trying to hobble together a solution from a number of different examples which will allow me to:
- Capture the audio from the microphone
- Down-sample it to 8khz
- Encode each buffer using AAC
- Send the result out via a socket
Almost all the examples I've seen deal with encoding the audio to a file, which I do, for testing, but isn't what I need. I need to take a packet of data, encode and transmit it
MicrophoneService
This basically sets up a AVAudioEngine
and attaches a AVAudioMixerNode
to it, which downsamples the audio.
The result is then placed into a blocking queue (AudioEncoderQueue
) so that encoding service can encode the buffer
import Foundation
import AVFoundation
// Base on https://mcmap.net/q/666838/-avaudioengine-downsample-issue
// https://github.com/onmyway133/notes/issues/367
class MicrophoneService {
let audioEngine = AVAudioEngine()
init() {
do {
try AVAudioSession.sharedInstance().setPreferredSampleRate(16000)
} catch let error {
print(error)
}
let engineInputNode = audioEngine.inputNode
let bus = 0
let engineInputNodeFormat = engineInputNode.outputFormat(forBus: bus)
//engineInputNode.installTap(onBus: bus, bufferSize: 1024, format: engineInputNodeFormat) { (buffer, time) in
// AudioEncoderQueue.shared.put(buffer)
//}
let mixer = AVAudioMixerNode()
audioEngine.attach(mixer)
let mixerOutputFormat = AVAudioFormat(standardFormatWithSampleRate: 8000, channels: 1)
audioEngine.connect(engineInputNode, to: mixer, format: engineInputNodeFormat)
audioEngine.connect(mixer, to: audioEngine.outputNode, format: mixerOutputFormat)
mixer.installTap(onBus: bus, bufferSize: 1024 * 4, format: mixerOutputFormat) { (buffer: AVAudioPCMBuffer, time: AVAudioTime) in
AudioEncoderQueue.shared.put(buffer)
}
}
func start() throws {
stop()
audioEngine.prepare()
try audioEngine.start()
}
func stop() {
audioEngine.stop()
}
}
AudioEncoderService
This basically pops the next audio packet from the encoding queue, encodes it and places it onto the transmission queue (TransportQueue
)
import Foundation
import AVFoundation
import Cadmus
class AudioEncoderService {
fileprivate var stopped: Bool = false
// This is for debuging
fileprivate let audioFile: AVAudioFile
init() throws {
// The audio file is used to ensure that I'm getting audio from the microphone
// As well as test the hardware can receive the data through a seperate C program
var url = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!
url.appendPathComponent("Test.aac", isDirectory: false)
let settings: [String: Any] = [
AVFormatIDKey: NSNumber(value: kAudioFormatMPEG4AAC),
AVSampleRateKey: NSNumber(value: 8000),
AVNumberOfChannelsKey: NSNumber(value: 1),
AVEncoderBitRatePerChannelKey: NSNumber(value: 16),
AVEncoderAudioQualityKey: NSNumber(value: AVAudioQuality.high.rawValue)
]
audioFile = try AVAudioFile(forWriting: url, settings: settings)
}
func start() {
DispatchQueue.global(qos: .userInitiated).async {
self.encodeAudio()
}
}
func stop() {
stopped = true
}
func encodeAudio() {
repeat {
do {
if let buffer = AudioEncoderQueue.shared.take() {
// I normally write the buffer to the AVAudioFile here, but I've removed for brevity
let data = Data(bytes: encodedBuffer.data, count: Int(encodedBuffer.byteLength))
TransportQueue.shared.put(data)
}
} catch let error {
print(error)
}
} while !stopped
}
func encode(_ buffer: AVAudioPCMBuffer) throws -> AVAudioCompressedBuffer? {
return try AudioUtilities.convertToAAC(from: buffer)
}
func toData(buffer: AVAudioPCMBuffer) -> Data {
let audioBuffer = buffer.audioBufferList.pointee.mBuffers
return Data(bytes: audioBuffer.mData!, count: Int(audioBuffer.mDataByteSize))
}
// Prevois downsampling attempt
// func resample(_ buffer: AVAudioPCMBuffer) throws -> AVAudioPCMBuffer? {
// guard let pcmBuffer = AVAudioPCMBuffer(pcmFormat: resampleFormat, frameCapacity: AVAudioFrameCount(resampleFormat.sampleRate * 2.0)) else { return nil }
//
// let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
// outStatus.pointee = AVAudioConverterInputStatus.haveData
// return buffer
// }
//
// var error: NSError? = nil
// resampleConverter.convert(to: pcmBuffer, error: &error, withInputFrom: inputBlock)
//
// guard let resampleError = error else { return pcmBuffer }
//
// throw resampleError
// }
}
AudioUtilities
This is where the audio buffer is encoded using AAC and converted to Data
. This is based on a number of SO posts and some tweaking
import Foundation
import AVFoundation
// https://mcmap.net/q/645878/-decode-aac-to-pcm-format-using-avaudioconverter-swift
// https://mcmap.net/q/645878/-decode-aac-to-pcm-format-using-avaudioconverter-swift
class AudioUtilities {
static func AACFormat() -> AVAudioFormat? {
var outDesc = AudioStreamBasicDescription(
mSampleRate: 8000,
mFormatID: kAudioFormatMPEG4AAC,
mFormatFlags: 0,
mBytesPerPacket: 0,
mFramesPerPacket: 0,
mBytesPerFrame: 0,
mChannelsPerFrame: 1,
mBitsPerChannel: 0,
mReserved: 0)
let outFormat = AVAudioFormat(streamDescription: &outDesc)
return outFormat
}
static var lpcmToAACConverter: AVAudioConverter! = nil
static func convertToAAC(from buffer: AVAudioBuffer) throws -> AVAudioCompressedBuffer? {
let outputFormat = AACFormat()
//init converter once
if lpcmToAACConverter == nil {
let inputFormat = buffer.format
lpcmToAACConverter = AVAudioConverter(from: inputFormat, to: outputFormat!)
lpcmToAACConverter.bitRate = 8000
}
let outBuffer = AVAudioCompressedBuffer(format: outputFormat!,
packetCapacity: 8,
maximumPacketSize: lpcmToAACConverter.maximumOutputPacketSize)
//maximumPacketSize: 768)
try self.convert(withConverter: lpcmToAACConverter,
from: buffer,
to: outBuffer)
return outBuffer
}
private static func convert(withConverter: AVAudioConverter, from sourceBuffer: AVAudioBuffer, to destinationBuffer: AVAudioBuffer) throws {
// input each buffer only once
var newBufferAvailable = true
let inputBlock : AVAudioConverterInputBlock = {
inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return sourceBuffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
var outError: NSError? = nil
let status = withConverter.convert(to: destinationBuffer, error: &outError, withInputFrom: inputBlock)
switch status {
case .haveData: break
case .inputRanDry: print("Input run dry")
case .endOfStream: print("End of stream")
case .error: print("!! Error")
@unknown default: break
}
guard let error = outError else {
return
}
throw error
}
}
TransportService
This basically takes the data from the transport queue and sends it out via a socket. Each data packet is proceeded by a custom header, which isn't included as it is irrelevant.
import Foundation
import SwiftSocket
class TransportService {
fileprivate var stopped: Bool = false
var socket: TCPClient?
let config: DeviceConfiguration
init(config: DeviceConfiguration) {
self.config = config
}
func start() throws {
stop()
stopped = false
socket = TCPClient(address: config.ipAddress, port: Int32(config.port))
print("Connect to \(config.ipAddress) : \(config.port)")
switch socket!.connect(timeout: 30) {
case .success:
DispatchQueue.global(qos: .userInitiated).async {
self.transportData()
}
case .failure(let error):
socket = nil
throw error
}
}
func stop() {
stopped = true
socket?.close()
socket = nil
}
func transportData() {
guard let socket = socket else { return }
let headerLength = MemoryLayout<SPECIAL_HEADER>.size
var header = SPECIAL_HEADER()
// Populate the header properties
// ...
header.header_length = Int32(headerLength)
header.sample_rate = 8000
var count = 0
repeat {
guard let data = TransportQueue.shared.take() else { return }
guard data.count > 0 else { return }
header.packet_sn = Int32(count)
header.data_length = Int32(data.count)
var headerData = Data(bytes: &header, count: headerLength)
headerData.append(data)
print("\(count); \(header.data_length); \(headerData.count)")
let result = socket.send(data: headerData)
switch result {
case .success: break
case .failure(let error):
log(error: "\(error)")
return
}
count += 1
Thread.sleep(forTimeInterval: 0.1)
} while !stopped
print("!! Stopped")
}
}
nb: I've tested the transmission service independently using a test file and extracting each from it, so I know this part works. This is only included to complete the basic overall picture.
The problem...
- I keep getting
Input run dry
from my encoder, no matter how large I make the buffer in theMicrophoneService
- There is no audio hear at the hardware end (again, I've tested the transmission workflow using a static file and it does work) (no I don't control the hardware, but I have test source code which can send a AAC file, frame by frame, using the
TransportService
to it which works)
The requirement...
The audio MUST be 8khz, mono and encoded using AAC.
The Question(s)...
Is this the right approach or is there another approach I should take, to capture the audio, down sample it and encode it?
Is there anything I can do to limit the number of occurrences of Input ran dry
or should I not care?