I am using Microsoft.CognitiveServices.Speech (https://www.nuget.org/packages/Microsoft.CognitiveServices.Speech) in Unity.
In the editor with a Windows target everything works perfectly, but I get an error in a Dedicated Server build for Linux (running on Ubuntu 22.04 LTS).
The error:
ApplicationException: Runtime error: Failed to initialize platform (azure-c-shared). Error: 2153
at Microsoft.CognitiveServices.Speech.Internal.SpxExceptionThrower.ThrowIfFail (System.IntPtr hr) [0x0005d] in <439ae8e654bd4287a1d7ffd07bb64d43>:0
at Microsoft.CognitiveServices.Speech.SpeechSynthesizer.FromConfig (Microsoft.CognitiveServices.Speech.SpeechConfig speechConfig, Microsoft.CognitiveServices.Speech.Audio.AudioConfig audioConfig) [0x00030] in <439ae8e654bd4287a1d7ffd07bb64d43>:0
at Microsoft.CognitiveServices.Speech.SpeechSynthesizer..ctor (Microsoft.CognitiveServices.Speech.SpeechConfig speechConfig, Microsoft.CognitiveServices.Speech.Audio.AudioConfig audioConfig) [0x00000] in <439ae8e654bd4287a1d7ffd07bb64d43>:0
at Evo.TTS.TTSClientMicrosoft.ConvertTextToSpeechAsync (Evo.Gender gender, System.String text) [0x0004c] in <dbdd55022f014a4e90cc144f717d0703>:0
at Evo.TTS.TTSClient.ConvertTextToSpeechAsync (Evo.Gender gender, System.String text) [0x00073] in <dbdd55022f014a4e90cc144f717d0703>:0
at Evo.TTS.VoiceController.TextToAudioData (System.String text) [0x0007c] in <dbdd55022f014a4e90cc144f717d0703>:0
at Evo.TTS.VoiceController.CmdSpeak (System.String text) [0x00078] in <dbdd55022f014a4e90cc144f717d0703>:0
at System.Runtime.CompilerServices.AsyncMethodBuilderCore+<>c.<ThrowAsync>b__7_0 (System.Object state) [0x00000] in <7fb66c41b6e641fb91b7fd5e48b4c50d>:0
at UnityEngine.UnitySynchronizationContext+WorkRequest.Invoke () [0x00002] in <46e7a35cb7c643d69d5edabca2b1a316>:0
at UnityEngine.UnitySynchronizationContext.Exec () [0x00056] in <46e7a35cb7c643d69d5edabca2b1a316>:0
at UnityEngine.UnitySynchronizationContext.ExecuteTasks () [0x00014] in <46e7a35cb7c643d69d5edabca2b1a316>:0
public async Task<SpeechSynthesisResult> ConvertTextToSpeechAsync(Gender gender, string text)
{
var speechConfig = SpeechConfig.FromSubscription("REMOVED_THE_KEY", "eastus");
// Note: if only language is set, the default voice of that language is chosen.
speechConfig.SpeechSynthesisLanguage = "en-US"; // For example, "de-DE"
// The voice setting will overwrite the language setting.
// The voice setting will not overwrite the voice element in input SSML.
if (gender == Gender.Female)
{
speechConfig.SpeechSynthesisVoiceName = "en-US-AshleyNeural";
}
else
{
speechConfig.SpeechSynthesisVoiceName = "en-US-DavisNeural";
}
using (var synthesizer = new SpeechSynthesizer(speechConfig, null))
{
var res = await synthesizer.SpeakTextAsync(text);
Debug.Log(res.Reason);
CheckResultForErrors(res);
return res;
}
}