c# Pitch shift of wave files
Asked Answered
B

2

6

I'm currently trying to do pitch shifting of a wave file using this algorithm

https://sites.google.com/site/mikescoderama/pitch-shifting

Here my code which use the above implementation, but with no luck. The outputted wave file seems to be corrupted or not valid.

The code is quite simple, except for the pitch shift algorithm :)

  1. It load a wave file, it reads the wave file data and put it in a byte[] array.
  2. Then it "normalize" bytes data into -1.0f to 1.0f format (as requested by the creator of the pitch shift algorithm).
  3. It applies the pitch shift algorithm and then convert back the normalized data into a bytes[] array.
  4. Finally saves a wave file with the same header of the original wave file and the pitch shifted data.

Am I missing something?

        static void Main(string[] args)
    {
        // Read the wave file data bytes

        byte[] waveheader = null;
        byte[] wavedata = null;
        using (BinaryReader reader = new BinaryReader(File.OpenRead("sound.wav")))
        {
            // Read first 44 bytes (header);
            waveheader= reader.ReadBytes(44);

            // Read data
            wavedata = reader.ReadBytes((int)reader.BaseStream.Length - 44);
        }

        short nChannels = BitConverter.ToInt16(waveheader, 22);
        int sampleRate = BitConverter.ToInt32(waveheader, 24);
        short bitRate = BitConverter.ToInt16(waveheader, 34);

        // Normalized data store. Store values in the format -1.0 to 1.0
        float[] in_data = new float[wavedata.Length / 2];

        // Normalize wave data into -1.0 to 1.0 values
        using(BinaryReader reader = new BinaryReader(new MemoryStream(wavedata)))
        {
            for (int i = 0; i < in_data.Length; i++)
            {
                if(bitRate == 16)
                    in_data[i] = reader.ReadInt16() / 32768f;

                if (bitRate == 8)                
                    in_data[i] = (reader.ReadByte() - 128) / 128f;
            }
        }

        //PitchShifter.PitchShift(1f, in_data.Length, (long)1024, (long)32, sampleRate, in_data);

        // Backup wave data
        byte[] copydata = new byte[wavedata.Length];
        Array.Copy(wavedata, copydata, wavedata.Length);

        // Revert data to byte format
        Array.Clear(wavedata, 0, wavedata.Length);
        using (BinaryWriter writer = new BinaryWriter(new MemoryStream(wavedata)))
        {
            for (int i = 0; i < in_data.Length; i++)
            {
                if(bitRate == 16)
                    writer.Write((short)(in_data[i] * 32768f));

                if (bitRate == 8)
                    writer.Write((byte)((in_data[i] * 128f) + 128));
            }
        }

        // Compare new wavedata with copydata
        if (wavedata.SequenceEqual(copydata))
        {
            Console.WriteLine("Data has no changes");
        }
        else
        {
            Console.WriteLine("Data has changed!");
        }

        // Save modified wavedata

        string targetFilePath = "sound_low.wav";
        if (File.Exists(targetFilePath))
            File.Delete(targetFilePath);

        using (BinaryWriter writer = new BinaryWriter(File.OpenWrite(targetFilePath)))
        {
            writer.Write(waveheader);
            writer.Write(wavedata);
        }

        Console.ReadLine();
    }
Bateman answered 1/10, 2013 at 8:53 Comment(2)
Are you sure the header for your audio file is 44 bytes? According to this page sonicspot.com/guide/wavefiles.html it's depends on many things and needs to be parsed properly.Maccarthy
You are right! I'm going to auto answer my question to post the correct usage.Bateman
B
4

The algorithm here works fine

https://sites.google.com/site/mikescoderama/pitch-shifting

My mistake was on how i was reading the wave header and wave data. I post here the fully working code

WARNING: this code works only for PCM 16 bit (stereo/mono) waves. Can be easily adapted to works with PCM 8 bit.

    static void Main(string[] args)
    {
        // Read header, data and channels as separated data

        // Normalized data stores. Store values in the format -1.0 to 1.0
        byte[] waveheader = null;
        byte[] wavedata = null;

        int sampleRate = 0;

        float[] in_data_l = null;
        float[] in_data_r = null;

        GetWaveData("sound.wav", out waveheader, out wavedata, out sampleRate, out in_data_l, out in_data_r); 

        //
        // Apply Pitch Shifting
        //

        if(in_data_l != null)
            PitchShifter.PitchShift(2f, in_data_l.Length, (long)1024, (long)10, sampleRate, in_data_l);

        if(in_data_r != null)
            PitchShifter.PitchShift(2f, in_data_r.Length, (long)1024, (long)10, sampleRate, in_data_r);

        //
        // Time to save the processed data
        //

        // Backup wave data
        byte[] copydata = new byte[wavedata.Length];
        Array.Copy(wavedata, copydata, wavedata.Length);

        GetWaveData(in_data_l, in_data_r, ref wavedata);

        //
        // Check if data actually changed
        //

        bool noChanges = true;
        for (int i = 0; i < wavedata.Length; i++)
        {
            if (wavedata[i] != copydata[i])
            {
                noChanges = false;
                Console.WriteLine("Data has changed!");
                break;
            }
        }

        if(noChanges)
            Console.WriteLine("Data has no changes");

        // Save modified wavedata

        string targetFilePath = "sound_low.wav";
        if (File.Exists(targetFilePath))
            File.Delete(targetFilePath);

        using (BinaryWriter writer = new BinaryWriter(File.OpenWrite(targetFilePath)))
        {
            writer.Write(waveheader);
            writer.Write(wavedata);
        }

        Console.ReadLine();
    }

    // Returns left and right float arrays. 'right' will be null if sound is mono.
    public static void GetWaveData(string filename, out byte[] header, out byte[] data, out int sampleRate, out float[] left, out float[] right)
    {
        byte[] wav = File.ReadAllBytes(filename);

        // Determine if mono or stereo
        int channels = wav[22];     // Forget byte 23 as 99.999% of WAVs are 1 or 2 channels

        // Get sample rate
        sampleRate = BitConverter.ToInt32(wav, 24);

        int pos = 12;

        // Keep iterating until we find the data chunk (i.e. 64 61 74 61 ...... (i.e. 100 97 116 97 in decimal))
        while(!(wav[pos]==100 && wav[pos+1]==97 && wav[pos+2]==116 && wav[pos+3]==97)) {
            pos += 4;
            int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216;
            pos += 4 + chunkSize;
        }

        pos += 4;

        int subchunk2Size = BitConverter.ToInt32(wav, pos);
        pos += 4;

        // Pos is now positioned to start of actual sound data.
        int samples = subchunk2Size / 2;     // 2 bytes per sample (16 bit sound mono)
        if (channels == 2) 
            samples /= 2;        // 4 bytes per sample (16 bit stereo)

        // Allocate memory (right will be null if only mono sound)
        left = new float[samples];

        if (channels == 2) 
            right = new float[samples];
        else 
            right = null;

        header = new byte[pos];
        Array.Copy(wav, header, pos);

        data = new byte[subchunk2Size];
        Array.Copy(wav, pos, data, 0, subchunk2Size);

        // Write to float array/s:
        int i=0;            
        while (pos < subchunk2Size) 
        {

            left[i] = BytesToNormalized_16(wav[pos], wav[pos + 1]);
            pos += 2;
            if (channels == 2) 
            {
                right[i] = BytesToNormalized_16(wav[pos], wav[pos + 1]);
                pos += 2;
            }
            i++;
        }
    }

    // Return byte data from left and right float data. Ignore right when sound is mono
    public static void GetWaveData(float[] left, float[] right, ref byte[] data)
    {
        // Calculate k
        // This value will be used to convert float to Int16
        // We are not using Int16.Max to avoid peaks due to overflow conversions            
        float k = (float)Int16.MaxValue / left.Select(x => Math.Abs(x)).Max();          

        // Revert data to byte format
        Array.Clear(data, 0, data.Length);
        int dataLenght = left.Length;
        int byteId = -1;
        using (BinaryWriter writer = new BinaryWriter(new MemoryStream(data)))
        {
            for (int i = 0; i < dataLenght; i++)
            {
                byte byte1 = 0;
                byte byte2 = 0;

                byteId++;
                NormalizedToBytes_16(left[i], k, out byte1, out byte2);
                writer.Write(byte1);
                writer.Write(byte2);

                if (right != null)
                {
                    byteId++;
                    NormalizedToBytes_16(right[i], k, out byte1, out byte2);
                    writer.Write(byte1);
                    writer.Write(byte2);                        
                }
            }
        }        
    }

    // Convert two bytes to one double in the range -1 to 1
    static float BytesToNormalized_16(byte firstByte, byte secondByte) 
    {
        // convert two bytes to one short (little endian)
        short s = (short)((secondByte << 8) | firstByte);
        // convert to range from -1 to (just below) 1
        return s / 32678f;
    }

    // Convert a float value into two bytes (use k as conversion value and not Int16.MaxValue to avoid peaks)
    static void NormalizedToBytes_16(float value, float k, out byte firstByte, out byte secondByte)
    {
        short s = (short)(value * k);
        firstByte  = (byte)(s & 0x00FF);
        secondByte = (byte)(s >> 8);
    }
Bateman answered 1/10, 2013 at 14:48 Comment(1)
I am wondering if it is possible to speed up the pitch shifting alogorithm. I'm using it on a iPhone (thanks to xamarin.ios) and it's really slow. I guess it depends on the fact it use float and double vars (which in turn stress the fpu?)Bateman
C
0

sorry to revive this but I tried that pitchshifter class and, while it works, I get crackles in the audio while pitching down(0.5f). You work out a way around that?

Carolinacaroline answered 22/5, 2014 at 13:38 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.