Yes. You can scan the incoming words on partial results by setting recognitionRequest.shouldReportPartialResults = YES
and then the results callback is called multiple times.
You can then process the results as you go, scanning for keyword / keyphrase before you get to the final result (ie ignore result.isFinal
). When you find a keyword / keyphrase you are looking for, then just then cancel the recognition.
I have successfully implemented voice commands using this approach in Speaking Email as a modified Cordova plugin (source here).
Example:
- (void) recordAndRecognizeWithLang:(NSString *) lang
{
NSLocale *locale = [[NSLocale alloc] initWithLocaleIdentifier:lang];
self.sfSpeechRecognizer = [[SFSpeechRecognizer alloc] initWithLocale:locale];
if (!self.sfSpeechRecognizer) {
[self sendErrorWithMessage:@"The language is not supported" andCode:7];
} else {
// Cancel the previous task if it's running.
if ( self.recognitionTask ) {
[self.recognitionTask cancel];
self.recognitionTask = nil;
}
[self initAudioSession];
self.recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
self.recognitionRequest.shouldReportPartialResults = [[self.command argumentAtIndex:1] boolValue];
self.recognitionTask = [self.sfSpeechRecognizer recognitionTaskWithRequest:self.recognitionRequest resultHandler:^(SFSpeechRecognitionResult *result, NSError *error) {
if (error) {
NSLog(@"error");
[self stopAndRelease];
[self sendErrorWithMessage:error.localizedFailureReason andCode:error.code];
}
if (result) {
NSMutableArray * alternatives = [[NSMutableArray alloc] init];
int maxAlternatives = [[self.command argumentAtIndex:2] intValue];
for ( SFTranscription *transcription in result.transcriptions ) {
if (alternatives.count < maxAlternatives) {
float confMed = 0;
for ( SFTranscriptionSegment *transcriptionSegment in transcription.segments ) {
NSLog(@"transcriptionSegment.confidence %f", transcriptionSegment.confidence);
confMed +=transcriptionSegment.confidence;
}
NSMutableDictionary * resultDict = [[NSMutableDictionary alloc]init];
[resultDict setValue:transcription.formattedString forKey:@"transcript"];
[resultDict setValue:[NSNumber numberWithBool:result.isFinal] forKey:@"final"];
[resultDict setValue:[NSNumber numberWithFloat:confMed/transcription.segments.count]forKey:@"confidence"];
[alternatives addObject:resultDict];
}
}
[self sendResults:@[alternatives]];
if ( result.isFinal ) {
[self stopAndRelease];
}
}
}];
AVAudioFormat *recordingFormat = [self.audioEngine.inputNode outputFormatForBus:0];
[self.audioEngine.inputNode installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when) {
[self.recognitionRequest appendAudioPCMBuffer:buffer];
}],
[self.audioEngine prepare];
[self.audioEngine startAndReturnError:nil];
}
}