Android audio capture silence detection
Asked Answered
W

4

12

I found this android code to record the user sound when he/she starts to speak and stop recording when he/she stops. But the problem is that the recording stops very quickly. If one wants to say two words, it just records the first.

How can the following code be changed to make the recording process less sensitive to momentary silence?

public void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.main);

    final int RECORDER_BPP = 16;
    int RECORDER_SAMPLERATE = 8000;
    int RECORDER_CHANNELS = AudioFormat.CHANNEL_IN_MONO;
    int RECORDER_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT;

    // Get the minimum buffer size required for the successful creation of
    // an AudioRecord object.
    int bufferSizeInBytes = AudioRecord
            .getMinBufferSize(RECORDER_SAMPLERATE, RECORDER_CHANNELS,
                    RECORDER_AUDIO_ENCODING);
    // Initialize Audio Recorder.
    AudioRecord audioRecorder = new AudioRecord(
            MediaRecorder.AudioSource.MIC, RECORDER_SAMPLERATE,
            RECORDER_CHANNELS, RECORDER_AUDIO_ENCODING, bufferSizeInBytes);
    // Start Recording.
    audioRecorder.startRecording();

    int numberOfReadBytes = 0;
    byte audioBuffer[] = new byte[bufferSizeInBytes];
    boolean recording = false;
    float tempFloatBuffer[] = new float[3];
    int tempIndex = 0;
    int totalReadBytes = 0;
    byte totalByteBuffer[] = new byte[60 * 44100 * 2];

    // While data come from microphone.
    while (true) {
        float totalAbsValue = 0.0f;
        short sample = 0;

        numberOfReadBytes = audioRecorder.read(audioBuffer, 0,
                bufferSizeInBytes);

        // Analyze Sound.
        for (int i = 0; i < bufferSizeInBytes; i += 2) {
            sample = (short) ((audioBuffer[i]) | audioBuffer[i + 1] << 8);
            totalAbsValue += Math.abs(sample) / (numberOfReadBytes / 2);
        }

        // Analyze temp buffer.
        tempFloatBuffer[tempIndex % 3] = totalAbsValue;
        float temp = 0.0f;
        for (int i = 0; i < 3; ++i)
            temp += tempFloatBuffer[i];

        if ((temp >= 0 && temp <= 350) && recording == false) {
            Log.i("TAG", "1");
            tempIndex++;
            continue;
        }

        if (temp > 350 && recording == false) {
            Log.i("TAG", "2");
            recording = true;
        }

        if ((temp >= 0 && temp <= 350) && recording == true) {
            Log.i("TAG", "Save audio to file.");

            // Save audio to file.
            String filepath = Environment.getExternalStorageDirectory()
                    .getPath();
            File file = new File(filepath, "AudioRecorder");
            if (!file.exists())
                file.mkdirs();

            String fn = file.getAbsolutePath() + "/"
                    + System.currentTimeMillis() + ".wav";

            long totalAudioLen = 0;
            long totalDataLen = totalAudioLen + 36;
            long longSampleRate = RECORDER_SAMPLERATE;
            int channels = 1;
            long byteRate = RECORDER_BPP * RECORDER_SAMPLERATE * channels
                    / 8;
            totalAudioLen = totalReadBytes;
            totalDataLen = totalAudioLen + 36;
            byte finalBuffer[] = new byte[totalReadBytes + 44];

            finalBuffer[0] = 'R'; // RIFF/WAVE header
            finalBuffer[1] = 'I';
            finalBuffer[2] = 'F';
            finalBuffer[3] = 'F';
            finalBuffer[4] = (byte) (totalDataLen & 0xff);
            finalBuffer[5] = (byte) ((totalDataLen >> 8) & 0xff);
            finalBuffer[6] = (byte) ((totalDataLen >> 16) & 0xff);
            finalBuffer[7] = (byte) ((totalDataLen >> 24) & 0xff);
            finalBuffer[8] = 'W';
            finalBuffer[9] = 'A';
            finalBuffer[10] = 'V';
            finalBuffer[11] = 'E';
            finalBuffer[12] = 'f'; // 'fmt ' chunk
            finalBuffer[13] = 'm';
            finalBuffer[14] = 't';
            finalBuffer[15] = ' ';
            finalBuffer[16] = 16; // 4 bytes: size of 'fmt ' chunk
            finalBuffer[17] = 0;
            finalBuffer[18] = 0;
            finalBuffer[19] = 0;
            finalBuffer[20] = 1; // format = 1
            finalBuffer[21] = 0;
            finalBuffer[22] = (byte) channels;
            finalBuffer[23] = 0;
            finalBuffer[24] = (byte) (longSampleRate & 0xff);
            finalBuffer[25] = (byte) ((longSampleRate >> 8) & 0xff);
            finalBuffer[26] = (byte) ((longSampleRate >> 16) & 0xff);
            finalBuffer[27] = (byte) ((longSampleRate >> 24) & 0xff);
            finalBuffer[28] = (byte) (byteRate & 0xff);
            finalBuffer[29] = (byte) ((byteRate >> 8) & 0xff);
            finalBuffer[30] = (byte) ((byteRate >> 16) & 0xff);
            finalBuffer[31] = (byte) ((byteRate >> 24) & 0xff);
            finalBuffer[32] = (byte) (2 * 16 / 8); // block align
            finalBuffer[33] = 0;
            finalBuffer[34] = RECORDER_BPP; // bits per sample
            finalBuffer[35] = 0;
            finalBuffer[36] = 'd';
            finalBuffer[37] = 'a';
            finalBuffer[38] = 't';
            finalBuffer[39] = 'a';
            finalBuffer[40] = (byte) (totalAudioLen & 0xff);
            finalBuffer[41] = (byte) ((totalAudioLen >> 8) & 0xff);
            finalBuffer[42] = (byte) ((totalAudioLen >> 16) & 0xff);
            finalBuffer[43] = (byte) ((totalAudioLen >> 24) & 0xff);

            for (int i = 0; i < totalReadBytes; ++i)
                finalBuffer[44 + i] = totalByteBuffer[i];

            FileOutputStream out;
            try {
                out = new FileOutputStream(fn);
                try {
                    out.write(finalBuffer);
                    out.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }

            } catch (FileNotFoundException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }

            // */
            tempIndex++;
            break;
        }

        // -> Recording sound here.
        Log.i("TAG", "Recording Sound.");
        for (int i = 0; i < numberOfReadBytes; i++)
            totalByteBuffer[totalReadBytes + i] = audioBuffer[i];
        totalReadBytes += numberOfReadBytes;
        // */

        tempIndex++;

    }
}
Whitacre answered 2/10, 2013 at 19:26 Comment(0)
P
23

I don't like the way you wrote your code. Use mine. You can change the threshold variable according to your voice volume. In this sample app there is no button. When you launch the app, it starts to listen, when you press back button, the aquisition is stopped and the file is stored on phone memory, folder AudioRecorder, with a new file each time you run it. read even comments and commented code. Note: the voice is appended to a temporary file each time the voice exceeds the threshold. The wav header is added when you manually stop the recording by pressing back (the wav file is created from the temporary file, with a new unique name) . If you need to create a new file each time the voice is revealed, you can easily modify it according to your needs, but you should always pass through temporary file saving. If you need a sort of delay, in order to record more, even when there is no voice (after the voice), just continue to save the data while your delay is elapsed. You can create the delay, counting the elapsed time (System.nanotime) since last found Peak (indicating voice presence).

Don't forget to mark the accepted solution (i tested it).

 package com.example.testaudiocapturewiththreshold;

 import android.os.Bundle;
 import android.app.Activity;
 import android.view.Menu;


 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;

 import android.media.AudioFormat;
 import android.media.AudioRecord;
 import android.media.MediaRecorder;
 import android.os.AsyncTask;
 import android.os.Environment;
 import android.os.Handler;
 import android.util.Log;

 public class TestAudioCaptureWithThreshold extends Activity {

private static final String TAG = TestAudioCaptureWithThreshold.class.getSimpleName();
private static final int RECORDER_BPP = 16;
private static final String AUDIO_RECORDER_FILE_EXT_WAV = ".wav";
private static final String AUDIO_RECORDER_FOLDER = "AudioRecorder";
private static final String AUDIO_RECORDER_TEMP_FILE = "record_temp.raw";

FileOutputStream os = null;

int bufferSize ;
int frequency = 44100; //8000;
int channelConfiguration = AudioFormat.CHANNEL_IN_MONO;
int audioEncoding = AudioFormat.ENCODING_PCM_16BIT;
boolean started = false;
  RecordAudio recordTask;

short threshold=15000; 

boolean debug=false;

@Override
protected void onCreate(Bundle savedInstanceState) {
    Log.w(TAG, "onCreate");
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_test_audio_capture_with_threshold);

    startAquisition();
}


@Override
 protected void onResume() {
    Log.w(TAG, "onResume");
    super.onResume();


}

@Override
protected void onDestroy() {
    Log.w(TAG, "onDestroy");
    stopAquisition();
    super.onDestroy();

}

public class RecordAudio extends AsyncTask<Void, Double, Void> {

    @Override
    protected Void doInBackground(Void... arg0) {
        Log.w(TAG, "doInBackground");
        try {

                String filename = getTempFilename();

            try {
                        os = new FileOutputStream(filename);
            } catch (FileNotFoundException e) {
                        e.printStackTrace();
            }   


            bufferSize = AudioRecord.getMinBufferSize(frequency, 
            channelConfiguration, audioEncoding); 

            AudioRecord audioRecord = new AudioRecord( MediaRecorder.AudioSource.MIC, frequency, 
                    channelConfiguration, audioEncoding, bufferSize); 

            short[] buffer = new short[bufferSize];

            audioRecord.startRecording();

            while (started) {
                int bufferReadResult = audioRecord.read(buffer, 0,bufferSize);
                if(AudioRecord.ERROR_INVALID_OPERATION != bufferReadResult){
                      //check signal
                    //put a threshold
                      int foundPeak=searchThreshold(buffer,threshold);
                        if (foundPeak>-1){ //found signal
                                                //record signal
                            byte[] byteBuffer =ShortToByte(buffer,bufferReadResult);
                        try {
                                os.write(byteBuffer);
                        } catch (IOException e) {
                                e.printStackTrace();
                        }
                        }else{//count the time
                            //don't save signal
                        }


                                //show results
                        //here, with publichProgress function, if you calculate the total saved samples, 
                        //you can optionally show the recorded file length in seconds:      publishProgress(elsapsedTime,0);


                }
            }

            audioRecord.stop();


            //close file
              try {
                    os.close();
              } catch (IOException e) {
                    e.printStackTrace();
              }

              copyWaveFile(getTempFilename(),getFilename());
              deleteTempFile();


        } catch (Throwable t) {
            t.printStackTrace();
            Log.e("AudioRecord", "Recording Failed");
        }
        return null;

    } //fine di doInBackground

      byte [] ShortToByte(short [] input, int elements) {
      int short_index, byte_index;
      int iterations = elements; //input.length;
      byte [] buffer = new byte[iterations * 2];

      short_index = byte_index = 0;

      for(/*NOP*/; short_index != iterations; /*NOP*/)
      {
        buffer[byte_index]     = (byte) (input[short_index] & 0x00FF); 
        buffer[byte_index + 1] = (byte) ((input[short_index] & 0xFF00) >> 8);

        ++short_index; byte_index += 2;
      }

      return buffer;
    }


    int searchThreshold(short[]arr,short thr){
        int peakIndex;
        int arrLen=arr.length;
        for (peakIndex=0;peakIndex<arrLen;peakIndex++){
            if ((arr[peakIndex]>=thr) || (arr[peakIndex]<=-thr)){
                //se supera la soglia, esci e ritorna peakindex-mezzo kernel.

                return peakIndex;
            }
        }
        return -1; //not found
    }

    /*
    @Override
    protected void onProgressUpdate(Double... values) {
        DecimalFormat sf = new DecimalFormat("000.0000");           
        elapsedTimeTxt.setText(sf.format(values[0]));

    }
    */

    private String getFilename(){
        String filepath = Environment.getExternalStorageDirectory().getPath();
        File file = new File(filepath,AUDIO_RECORDER_FOLDER);

        if(!file.exists()){
                file.mkdirs();
        }

        return (file.getAbsolutePath() + "/" + System.currentTimeMillis() + AUDIO_RECORDER_FILE_EXT_WAV);
    }


    private String getTempFilename(){
        String filepath = Environment.getExternalStorageDirectory().getPath();
        File file = new File(filepath,AUDIO_RECORDER_FOLDER);

        if(!file.exists()){
                file.mkdirs();
        }

        File tempFile = new File(filepath,AUDIO_RECORDER_TEMP_FILE);

        if(tempFile.exists())
                tempFile.delete();

        return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_TEMP_FILE);
    }





    private void deleteTempFile() {
            File file = new File(getTempFilename());

            file.delete();
    }

    private void copyWaveFile(String inFilename,String outFilename){
        FileInputStream in = null;
        FileOutputStream out = null;
        long totalAudioLen = 0;
        long totalDataLen = totalAudioLen + 36;
        long longSampleRate = frequency;
        int channels = 1;
        long byteRate = RECORDER_BPP * frequency * channels/8;

        byte[] data = new byte[bufferSize];

        try {
                in = new FileInputStream(inFilename);
                out = new FileOutputStream(outFilename);
                totalAudioLen = in.getChannel().size();
                totalDataLen = totalAudioLen + 36;


                WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
                                longSampleRate, channels, byteRate);

                while(in.read(data) != -1){
                        out.write(data);
                }

                in.close();
                out.close();
        } catch (FileNotFoundException e) {
                e.printStackTrace();
        } catch (IOException e) {
                e.printStackTrace();
        }
    }

    private void WriteWaveFileHeader(
                    FileOutputStream out, long totalAudioLen,
                    long totalDataLen, long longSampleRate, int channels,
                    long byteRate) throws IOException {

            byte[] header = new byte[44];

            header[0] = 'R';  // RIFF/WAVE header
            header[1] = 'I';
            header[2] = 'F';
            header[3] = 'F';
            header[4] = (byte) (totalDataLen & 0xff);
            header[5] = (byte) ((totalDataLen >> 8) & 0xff);
            header[6] = (byte) ((totalDataLen >> 16) & 0xff);
            header[7] = (byte) ((totalDataLen >> 24) & 0xff);
            header[8] = 'W';
            header[9] = 'A';
            header[10] = 'V';
            header[11] = 'E';
            header[12] = 'f';  // 'fmt ' chunk
            header[13] = 'm';
            header[14] = 't';
            header[15] = ' ';
            header[16] = 16;  // 4 bytes: size of 'fmt ' chunk
            header[17] = 0;
            header[18] = 0;
            header[19] = 0;
            header[20] = 1;  // format = 1
            header[21] = 0;
            header[22] = (byte) channels;
            header[23] = 0;
            header[24] = (byte) (longSampleRate & 0xff);
            header[25] = (byte) ((longSampleRate >> 8) & 0xff);
            header[26] = (byte) ((longSampleRate >> 16) & 0xff);
            header[27] = (byte) ((longSampleRate >> 24) & 0xff);
            header[28] = (byte) (byteRate & 0xff);
            header[29] = (byte) ((byteRate >> 8) & 0xff);
            header[30] = (byte) ((byteRate >> 16) & 0xff);
            header[31] = (byte) ((byteRate >> 24) & 0xff);
            header[32] = (byte) (channels * 16 / 8);  // block align
            header[33] = 0;
            header[34] = RECORDER_BPP;  // bits per sample
            header[35] = 0;
            header[36] = 'd';
            header[37] = 'a';
            header[38] = 't';
            header[39] = 'a';
            header[40] = (byte) (totalAudioLen & 0xff);
            header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
            header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
            header[43] = (byte) ((totalAudioLen >> 24) & 0xff);

            out.write(header, 0, 44);
    }

} //Fine Classe RecordAudio (AsyncTask)

@Override
public boolean onCreateOptionsMenu(Menu menu) {
    getMenuInflater().inflate(R.menu.test_audio_capture_with_threshold,
            menu);
    return true;

}


public void resetAquisition() {
    Log.w(TAG, "resetAquisition");
    stopAquisition();
  //startButton.setText("WAIT");
  startAquisition();
}

public void stopAquisition() {
    Log.w(TAG, "stopAquisition");
    if (started) {
        started = false;
        recordTask.cancel(true);
    }
}

public void startAquisition(){
    Log.w(TAG, "startAquisition");
    Handler handler = new Handler(); 
    handler.postDelayed(new Runnable() { 
         public void run() { 

            //elapsedTime=0;
            started = true;
            recordTask = new RecordAudio();
            recordTask.execute();
            //startButton.setText("RESET");
         } 
    }, 500); 
}


 }

Don't forget to add permissions to manifest file:

 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.example.testaudiocapturewiththreshold"
android:versionCode="1"
android:versionName="1.0" >
<uses-permission android:name="android.permission.MODIFY_AUDIO_SETTINGS"/>
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />

<uses-sdk
    android:minSdkVersion="8"
    android:targetSdkVersion="17" />

<application
    android:allowBackup="true"
    android:icon="@drawable/ic_launcher"
    android:label="@string/app_name"
    android:theme="@style/AppTheme" >
    <activity
        android:name="com.example.testaudiocapturewiththreshold.TestAudioCaptureWithThreshold"
        android:label="@string/app_name" >
        <intent-filter>
            <action android:name="android.intent.action.MAIN" />

            <category android:name="android.intent.category.LAUNCHER" />
        </intent-filter>
    </activity>
</application>

 </manifest>
Phocine answered 3/11, 2013 at 10:52 Comment(19)
Thanks! I've changed the code a little to fit my use case. I detect silence and if it remains silent for X seconded i stop recording.Diazine
@Phocine I used your code .But can you please tell me how to stop recording and play sound if there is no sound?Kaneshakang
@Phocine : Thanks for awesome code..I have one requirement in my application..I need to detect a voice from a user and if there is no input from user for X no. of seconds than it should provide an error to user..What changes do I need to make that happen??Congregationalist
@Diazine : How do you detect silence ? Any help will be appreciated.I need to detect silence and show an error to user that there is no input from users end.Congregationalist
the silence is detected with a threshold. better read my answer.Phocine
@SiddharthVyas if the if (foundPeak>-1) returns false, i capture the current time, if (foundPeak>-1) returns true i set that time to 0 again, in the end i check if the time from first moment of silence is longer than my max wait time, if so ill do an action to inform the userDiazine
@Diazine : Hi, can you provide me the sample code snippet ? I am not able to get clarity on your solution of "in the end i check if the time from first moment of silence is longer than my max wait time, if so ill do an action to inform the user". Any help will be appreciated.Congregationalist
@Diazine : I am getting "start() status -38" error on calling "audioRecord.startRecording();".Congregationalist
did you first tried my solution as I wrote it in my post? fill required files, also the manifest file, and run it. if you find errors running it, upload your project and detail your environment.Phocine
@SiddharthVyas i've taken my working code and made a snipet for u at pastbin > pastebin.com/447qx83jDiazine
@Diazine Thanks bro..I will try it and let you know the output..Once again Thank you..Congregationalist
@Phocine : Your solution is working fine but when I implement silence detection logic I get "start() status -38" error.I will try the solution of SjoerdvGestelCongregationalist
@Diazine : Thanks bro..your code works for me..Appreciating your help.Congregationalist
@Gaucho: Thanks for your solution,This solution is working fine but not able to understand if I used threshold as 1000 then its working fine in nexus 4 and Asus and moto G and its not working for some samsung devices, but if I used 10000 then its work fine with samsung devices is these os specific issue or devices spedific issue? how we can resolve this issue?Freeforall
recording audio its too fast. if i say "hii" then after an second i said "hello", it will remove space between two words and speak without stops.how could i resolved this?its playing like i pressed fast forward button.Appendicular
Just fill the empty space with array with 0 value each time you don't find signal over required thresholdPhocine
@Phocine Hi,how do I stop the recording if there's no signal after the certain threshold ? can I just call stop recording when there's no signal ?Timothy
I get a java.nio.channels.ClosedByInterruptException when I read in.getChannel().size() into totalAudioLen. Did anyone else get it too? I used file.length() instead but a get faulty file saved. Any help?Remedial
@Phocine thank you for sharing this. I have converted the code in kotlin and it looks working too but file save in folder is not playing, it throws error, This format isn't supported or the file is corrupted. do have any idea what could be the issue? Also size of file is 0 bytes too.Disfranchise
G
2

For those who didn't find an answer yet.It is possible to detect the silence and stop recording when the user hasn't spoken for some seconds.

In order to calculate if the user has stopped speaking we can take the data from the last second of the recording, map it to a number and compare this number to the numbers obtained previously. Were turn a confidence score (0-INF) of a longer pause having occurred in the speech input.

Follow this link everything is explained there. https://github.com/Kaljurand/speechutils/blob/master/app/src/main/java/ee/ioc/phon/android/speechutils/AbstractAudioRecorder.java thanks to Kaljurand and his wonderful code.

You can find the entire project in this link. https://github.com/Kaljurand/K6nele.

Gao answered 10/2, 2015 at 9:48 Comment(0)
G
2

Code from this post is working. But has some issue. The fact is that the record is interrupting too severely abruptly. And sound is playing by stuttering. For solving I created conter silenceDegree.

See my Kotlin code:

var threshold: Short = 5000
val SILENCE_DEGREE = 15

//buffer size - need be fixed, established value for IOS compatibility
val buffer = ShortArray(MIN_SIZE)
var silenceDegree = 0

while (record) {
    val bytesRead = audioRecord?.read(buffer, 0, MIN_SIZE)
    if (bytesRead != null) {
        if (bytesRead > 0) {
            val foundPeak = searchThreshold(buffer, threshold)

            if (foundPeak == -1) {
                if (silenceDegree <= SILENCE_DEGREE) {
                    silenceDegree++
                }

            } else {
                silenceDegree = 0
            }

            //stoping to send, only when counter became equals SILENCE_DEGREE 
            if (silenceDegree < SILENCE_DEGREE) {
                //SEND USEFUL DATA  
                handler.sendDataOnRecord(INSTANCE.shorts2Bytes(buffer))
            }

        } else {
            if (bytesRead == AudioRecord.ERROR_INVALID_OPERATION) {
                // This can happen if there is already an active
                // AudioRecord (e.g. in another tab).
                record = false;
            }
        }
    }
}
Greige answered 15/1, 2018 at 13:41 Comment(0)
F
-7

Go through this documentation of CAPTURE

Then Follow this few links this might be helpful to find out your error.

http://www.devlper.com/2010/12/android-audio-recording-part-1/

http://www.devlper.com/2010/12/android-audio-recording-part-2/

Also finally Make sure you give your applications the Permissions it will need. At the very least it will need RECORD_AUDIO and WRITE_EXTERNAL_STORAGE.

Foveola answered 2/10, 2013 at 19:30 Comment(1)
I have no problem with recording, my problem is I want to stop recording when user stops speaking but not so quick, for example if microphone does not get any sound for 1 second then recording stopsWhitacre

© 2022 - 2024 — McMap. All rights reserved.