How to lossless concatenate ogg vorbis files?
Asked Answered
G

1

7

I'm trying to concatenate multiple ogg vorbis files into one.

I know that theoretically it should be enough to do:

cat 1.ogg 2.ogg > combined.ogg

But this has disadvantages:

  • not all players support files created like this (gstreamer doesn't)
  • the players that do, do not smoothly concatenate them but create ugly split second pauses
  • seeking seems not to be possible

I don't want to loose quality, so I could re-encode them into a lossless format like flac but that would let the file size explode.

There seems to be no tool which does this. For example oggCat will re-encode the audio and thus lead to a slight loss in quality and the ffmpeg concat demuxer will not work for all input files. I opened this superuser question to find a tool but wrote my own when I figured that there is none.

So I tried to use libogg and libvorbis to manually concatenate ogg packets from the input files into ogg pages of the output file. The assumption is, that all ogg input files were encoded using the exact same parameters.

I came up with the following code:

#include <ogg/ogg.h>
#include <vorbis/codec.h>
#include <stdio.h>
#include <unistd.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <time.h>

int read_page(int fd, ogg_sync_state *state, ogg_page *page)
{
    int ret;
    ssize_t bytes;

    while(ogg_sync_pageout(state, page) != 1) {
        char *buffer = ogg_sync_buffer(state, 4096);
        if (buffer == NULL) {
            fprintf(stderr, "ogg_sync_buffer failed\n");
            return -1;
        }
        bytes = read(fd, buffer, 4096);
        if (bytes == 0) {
            return -1;
        }
        ret = ogg_sync_wrote(state, bytes);
        if (ret != 0) {
            fprintf(stderr, "ogg_sync_wrote failed\n");
            return -1;
        }
    }
    return 0;
}

int main(int argc, char *argv[])
{
    int ret;
    ogg_sync_state state;
    ogg_page page;
    int serial;
    ogg_stream_state sstate;
    bool found_bos;
    ogg_packet packet;
    int fd;
    int i;
    vorbis_info info;
    vorbis_comment comment;
    int vorbis_header_read;
    ssize_t bytes;
    ogg_stream_state out_stream;
    ogg_page out_page;

    if (argc < 2) {
        fprintf(stderr, "usage: %s file.ogg\n", argv[0]);
        return 1;
    }

    srand(time(NULL));
    ogg_stream_init(&out_stream, rand());

    // go through all input files
    for (i = 1; i < argc; i++) {
        vorbis_header_read = 0;
        found_bos = false;

        fd = open(argv[i], O_RDONLY);
        if (fd < 0) {
            fprintf(stderr, "cannot open %s\n", argv[1]);
            return 1;
        }

        ret = ogg_sync_init(&state);
        if (ret != 0) {
            fprintf(stderr, "ogg_sync_init failed\n");
            return 1;
        }

        vorbis_info_init(&info);
        vorbis_comment_init(&comment);

        // go through all ogg pages
        while (read_page(fd, &state, &page) == 0) {
            serial = ogg_page_serialno(&page);

            if (ogg_page_bos(&page)) {
                if (found_bos) {
                    fprintf(stderr, "cannot handle more than one stream\n");
                    return 1;
                }
                ret = ogg_stream_init(&sstate, serial);
                if (ret != 0) {
                    fprintf(stderr, "ogg_stream_init failed\n");
                    return 1;
                }
                found_bos = true;
            }

            if (!found_bos) {
                fprintf(stderr, "cannot continue without bos\n");
                return 1;
            }

            ret = ogg_stream_pagein(&sstate, &page);
            if (ret != 0) {
                fprintf(stderr, "ogg_stream_pagein failed\n");
                return 1;
            }

            // if this is the last page, then only write it if we are in the
            // last file
            if (ogg_page_eos(&page) && i != argc - 1) {
                continue;
            }

            // go through all (hopefully vorbis) packets
            while((ret = ogg_stream_packetout(&sstate, &packet)) != 0) {
                if (ret != 1) {
                    fprintf(stderr, "ogg_stream_packetout failed\n");
                    return 1;
                }

                // test if this stream is vorbis
                if (vorbis_header_read == 0) {
                    ret = vorbis_synthesis_idheader(&packet);
                    if (ret == 0) {
                        fprintf(stderr, "stream is not vorbis\n");
                        return 1;
                    }
                }

                // read exactly three vorbis headers
                if (vorbis_header_read < 3) {
                    ret = vorbis_synthesis_headerin(&info, &comment, &packet);
                    if (ret != 0) {
                        fprintf(stderr, "vorbis_synthesis_headerin failed\n");
                        return 1;
                    }
                    // if this is the first file, copy the header packet to the
                    // output
                    if (i == 1) {
                        ret = ogg_stream_packetin(&out_stream, &packet);
                        if (ret != 0) {
                            fprintf(stderr, "ogg_stream_packetin failed\n");
                            return 1;
                        }
                    }
                    vorbis_header_read++;
                    continue;
                }

                // if this is the first file, write a page to the output
                if (vorbis_header_read == 3 && i == 1) {
                    while ((ret = ogg_stream_flush(&out_stream, &out_page)) != 0) {
                        bytes = write(STDOUT_FILENO, out_page.header, out_page.header_len);
                        if (bytes != out_page.header_len) {
                            fprintf(stderr, "write failed\n");
                            return 1;
                        }
                        bytes = write(STDOUT_FILENO, out_page.body, out_page.body_len);
                        if (bytes != out_page.body_len) {
                            fprintf(stderr, "write failed\n");
                            return 1;
                        }
                    }
                    vorbis_header_read++;
                }

                ogg_stream_packetin(&out_stream, &packet);
                do {
                    ret = ogg_stream_pageout(&out_stream, &out_page);
                    if (ret == 0) break;
                    bytes = write(STDOUT_FILENO, out_page.header, out_page.header_len);
                    if (bytes != out_page.header_len) {
                        fprintf(stderr, "write failed\n");
                        return 1;
                    }
                    bytes = write(STDOUT_FILENO, out_page.body, out_page.body_len);
                    if (bytes != out_page.body_len) {
                        fprintf(stderr, "write failed\n");
                        return 1;
                    }
                } while (!ogg_page_eos(&out_page));

            }
        }

        vorbis_info_clear(&info);
        vorbis_comment_clear(&comment);

        ret = ogg_sync_clear(&state);
        if (ret != 0) {
            fprintf(stderr, "ogg_sync_clear failed\n");
            return 1;
        }

        ret = ogg_stream_clear(&sstate);
        if (ret != 0) {
            fprintf(stderr, "ogg_stream_clear failed\n");
            return 1;
        }

        close(fd);
    }

    ogg_stream_clear(&out_stream);

    return 0;
}

This nearly works but inserts barely audible click sounds at the points where the vorbis streams are joined.

How to do this correctly?

Can it be done at all?

Glanders answered 16/1, 2015 at 9:37 Comment(7)
A related question is here - superuser.com/questions/367584/…Goodell
@Glanders doesn't the same "click" happen in the concatenated file? The waveform should be the same for it...Mcwhorter
For context, here's the initial discussion that led to this question: github.com/villermen/runescape-cache-tools/issues/8Mcwhorter
Why do you think the seeking is broken?Meissner
@VitalyZdanevich because all players I tried will refuse to seek beyond the first fileGlanders
@Glanders I mean do you know what the internal reason for broken seeking? I have the same problem, but I concatenate Ogg on the client side (in the browser with JS).Meissner
@VitalyZdanevich there are multiple reasons: why would an ogg decoder read anything beyond the ogg page that was marked as the last? How would an ogg decoder handle if the timestamp of the next page would start again at zero? Obviously seeking cannot work if you just concatenate two ogg files without changing at least something in the individual pages or correct the timestamps. Also, the ogg page sequence number will be totally off. Is all this not obvious?Glanders
J
2

This is a fun one... :)

If you can afford a few ms of silence / skew between the files, just drop couple silent packets in between the two streams (I'll have to check the spec for the exact bit pattern in each packet, but it shouldn't be difficult to figure out if you have access to a decoder's source code).

If you can't afford the silence / skew, you probably need to re-encode as the only other option is to twiddle the compressed data to change the slope of the connecting parts of the waveform...

edit

Another option is to apply a smoothing algorithm to the PCM data at the point where the files are concatenated. This is not simple to do, but the idea is that you want the waveform to be "smooth" between the files. That's about all I've got...

edit 2

Just to be clear, the question's example code will almost work perfectly, assuming the source files use the same parameters. The one thing it is missing is a way to keep the seams from being audible. My suggestion to put in a couple silent packets would handle it, but for those that can't afford to, one might (as a pure guess) consider reducing the multiplier on the floors of the two packets around the seam by one to make the seam less apparent.

Jurisprudence answered 16/1, 2015 at 19:46 Comment(3)
thanks but I cannot afford silence because this would not keep the concatenation "lossless" compared to the original.Glanders
How a couple silent packets fix the seeking?Meissner
It won't really address seeking to an arbitrary position, but it will give the decoder some empty space in which to spread the energy between the end of the first file and the start of the second. That's the same as re-encoding the files with a ramp down & up between them. Note the second edit above, though.Jurisprudence

© 2022 - 2024 — McMap. All rights reserved.