I have a requirement to use IBM Watson SDK to record the audio using microphone and send it to IBM Watson speech-to-text using C#. I am able to achieve this functionality by saving the audio file locally and then sending it using NAudio library. But my requirement is to use streaming mode to send live audio to IBM Watson Speech-to-Text service without storing the audio file physically. I am not able to find RecognizeUsingWebSocket service in the SDK. I am able to find only Recognize service.
Below is my code which is used to save audio file locally.
Can anyone please help me to achieve it ?
using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using IBM.Cloud.SDK.Core.Authentication.Iam;
using IBM.Watson.Assistant.v1.Model;
using IBM.Watson.Assistant.v1;
using IBM.Cloud.SDK.Core.Authentication.Bearer;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using NAudio.Wave;
using System.Net.WebSockets;
using IBM.Cloud.SDK.Core.Http;
using IBM.Watson.SpeechToText.v1;
namespace watsonConsole
{
class Program
{
string apikey = "";
string sttApiKey = "";
string url = "";
string stturl = "";
string versionDate = "";
string workspaceId = "";
static public AssistantService service;
static public SpeechToTextService sttservice;
private WaveInEvent waveIn;
private WaveFormat format = new WaveFormat(16000, 16, 1);
private ClientWebSocket ws;
static public WaveFileWriter waveFile;
static void Main(string[] args)
{
Program pr = new Program();
BearerTokenAuthenticator authenticator = new BearerTokenAuthenticator(
bearerToken: pr.apikey);
service = new AssistantService(pr.versionDate, authenticator);
service.SetServiceUrl(pr.url);
BearerTokenAuthenticator sttauthenticator = new BearerTokenAuthenticator(
bearerToken: pr.sttApiKey);
sttservice = new SpeechToTextService(sttauthenticator);
sttservice.SetServiceUrl(pr.stturl);
WaveInCapabilities deviceInfo = WaveIn.GetCapabilities(0);
Console.WriteLine("Now recording...");
WaveInEvent waveSource = new WaveInEvent();
waveSource.DeviceNumber = 0;
waveSource.WaveFormat = new WaveFormat(44100, 1);
waveSource.DataAvailable += new EventHandler<WaveInEventArgs>(waveSource_DataAvailable);
string tempFile = (@"C:/watsonConsole/bin/Debug/test/testaudiotest.wav");
waveFile = new WaveFileWriter(tempFile, waveSource.WaveFormat);
waveSource.StartRecording();
Console.WriteLine("Press enter to stop");
Console.ReadLine();
waveSource.StopRecording();
waveFile.Dispose();
pr.Recognize();
Console.WriteLine("done");
Console.ReadKey();
}
static void waveSource_DataAvailable(object sender, WaveInEventArgs e)
{
waveFile.Write(e.Buffer, 0, e.BytesRecorded);
}
public void StartRecording()
{
waveIn = new WaveInEvent
{
BufferMilliseconds = 50,
DeviceNumber = 0,
WaveFormat = format
};
waveIn.StartRecording();
}
public void Recognize()
{
var result = sttservice.Recognize(
audio: File.ReadAllBytes("test/testaudiotest.wav"),
contentType: "audio/wav",
wordAlternativesThreshold: 0.9f,
languageCustomizationId: "",
acousticCustomizationId: "",
customizationWeight: 0.7,
smartFormatting: true
);
Console.WriteLine(result.Response);
}
}
}