I am trying to convert audio clips [.flac format] to text using the google cloud speech api.I am using the python client library for making requests and have a bucket on the GCP with a flat directory structure which is used to store the audio files and make asynchronous requests.Below is the python code for making requests
from google.cloud import storage
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
import json
def AudioAnalyze(bucket_name):
storage_client = storage.Client()
speech_client = speech.SpeechClient()
bucket = storage_client.get_bucket(bucket_name)
bucket_contents = bucket.list_blobs()
urls = []
audio_content=[]
for i in bucket_contents:
urls.append("gs://"+i.bucket.name+"/"+i.public_url.split("/")[-1])
for gcs_uri in urls:
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=44100,
language_code='en-US')
operation = speech_client.long_running_recognize(config,audio)
print(gcs_uri)
print('Waiting for operation to complete...')
response = operation.result(timeout=450)
speech2text=[]
for result in response.results:
for alternative in result.alternatives:
speech2text.append(alternative.transcript)
ad_content=" ".join(speech2text)
audio_content.append(ad_content)
with open("path/to a/json file/file.json","w") as f:
json.dump(audio_content,f)
if __name__=="__main__":
AudioAnalyze("adsaudiocontent")
I am encountering an error: google.api_core.exceptions.NotFound: 404 Requested entity was not found on a particular file present in the bucket while all the other files are being processed properly.The audio files have gone through a similar processing pipeline.The audio files are available on my public bucket:adsaudiocontent so that you can replicate the error.For Additional details here is the traceback.
Traceback (most recent call last):
File "/home/pythonuser1/.virtualenvs/virtual_env/lib/python3.5/site-packages/google/api_core/grpc_helpers.py", line 54, in error_remapped_callable
return callable_(*args, **kwargs)
File "/home/pythonuser1/.virtualenvs/virtual_env/lib/python3.5/site-packages/grpc/_channel.py", line 487, in __call__
return _end_unary_response_blocking(state, call, False, deadline)
File "/home/pythonuser1/.virtualenvs/virtual_env/lib/python3.5/site-packages/grpc/_channel.py", line 437, in _end_unary_response_blocking
raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with (StatusCode.NOT_FOUND, Requested entity was not found.)>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "audioanalyze.py", line 40, in <module>
AudioAnalyze("adsaudiocontent")
File "audioanalyze.py", line 25, in AudioAnalyze
operation = speech_client.long_running_recognize(config,audio)
File "/home/pythonuser1/.virtualenvs/virtual_env/lib/python3.5/site-packages/google/cloud/speech_v1/gapic/speech_client.py", line 264, in long_running_recognize
request, retry=retry, timeout=timeout, metadata=metadata)
File "/home/pythonuser1/.virtualenvs/virtual_env/lib/python3.5/site-packages/google/api_core/gapic_v1/method.py", line 139, in __call__
return wrapped_func(*args, **kwargs)
File "/home/pythonuser1/.virtualenvs/virtual_env/lib/python3.5/site-packages/google/api_core/retry.py", line 260, in retry_wrapped_func
on_error=on_error,
File "/home/pythonuser1/.virtualenvs/virtual_env/lib/python3.5/site-packages/google/api_core/retry.py", line 177, in retry_target
return target()
File "/home/pythonuser1/.virtualenvs/virtual_env/lib/python3.5/site-packages/google/api_core/timeout.py", line 206, in func_with_timeout
return func(*args, **kwargs)
File "/home/pythonuser1/.virtualenvs/virtual_env/lib/python3.5/site-packages/google/api_core/grpc_helpers.py", line 56, in error_remapped_callable
six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.NotFound: 404 Requested entity was not found.
i.public_url.split("/")[-1]
seems like a bad idea. Any object names that contain a '/' character will have most of their name stripped out. Why not justi.name
? – Foreshore