Is It More Efficient to Use GL_TRIANGLE_STRIP or Indexed GL_TRIANGLES to a Draw a Dynamic Number of Quads
Asked Answered
P

1

7

I'm developing a simple sprite-based 2D game in C++ that uses OpenGL for hardware-accelerated rendering, and SDL for window management and user input handling. Since it's a 2D game, I'm only ever going to need to draw quads, but because the number of sprites is dynamic, I can never rely on there being a constant number of quads. Consequently, I need to rebuffer all of the vertex data via my VBO each frame (since there may be more or fewer quads than there were in the last frame, and thus the buffer may be a different size).

The prototype program I have so far creates a window and allows the user to add and remove quads in a diagonal row by using the up and down arrow keys. Right now the quads I'm drawing are simple, untextured white squares. Here is the code I'm working with (compiles and works correctly under OS X 10.6.8 and Ubuntu 12.04 with OpenGL 2.1):

#if defined(__APPLE__)
    #include <OpenGL/OpenGL.h>
#endif
#if defined(__linux__)
    #define GL_GLEXT_PROTOTYPES
    #include <GL/glx.h>
#endif

#include <GL/gl.h>
#include <SDL.h>
#include <iostream>
#include <vector>
#include <string>


struct Vertex
{   
    //vertex coordinates
    GLint x;
    GLint y;
};

//Constants
const int SCREEN_WIDTH = 1024;
const int SCREEN_HEIGHT = 768;
const int FPS = 60; //our framerate
//Globals
SDL_Surface *screen;                    //the screen
std::vector<Vertex> vertices;           //the actual vertices for the quads
std::vector<GLint> startingElements;    //the index where the 4 vertices of each quad begin in the 'vertices' vector
std::vector<GLint> counts;              //the number of vertices for each quad
GLuint VBO = 0;                         //the handle to the vertex buffer


void createVertex(GLint x, GLint y)
{
    Vertex vertex;
    vertex.x = x;
    vertex.y = y;
    vertices.push_back(vertex);
}

//creates a quad at position x,y, with a width of w and a height of h (in pixels)
void createQuad(GLint x, GLint y, GLint w, GLint h)
{
    //Since we're drawing the quads using GL_TRIANGLE_STRIP, the vertex drawing
    //order is from top to bottom, left to right, like so:
    //
    //    1-----3
    //    |     |
    //    |     |
    //    2-----4

    createVertex(x, y);     //top-left vertex
    createVertex(x, y+h);   //bottom-left vertex
    createVertex(x+w, y);   //top-right vertex
    createVertex(x+w, y+h); //bottom-right vertex

    counts.push_back(4);    //each quad will always have exactly 4 vertices
    startingElements.push_back(startingElements.size()*4);

    std::cout << "Number of Quads: " << counts.size() << std::endl; //print out the current number of quads
}

//removes the most recently created quad
void removeQuad()
{
    if (counts.size() > 0)  //we don't want to remove a quad if there aren't any to remove
    {
        for (int i=0; i<4; i++)
        {
            vertices.pop_back();
        }

        startingElements.pop_back();
        counts.pop_back();

        std::cout << "Number of Quads: " << counts.size() << std::endl;
    }
    else
    {
        std::cout << "Sorry, you can't remove a quad if there are no quads to remove!" << std::endl;
    }
}


void init()
{
    //initialize SDL
    SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER);

    screen = SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, 0, SDL_OPENGL);

#if defined(__APPLE__)
    //Enable vsync so that we don't get tearing when rendering
    GLint swapInterval = 1;
    CGLSetParameter(CGLGetCurrentContext(), kCGLCPSwapInterval, &swapInterval);
#endif

    //Disable depth testing, lighting, and dithering, since we're going to be doing 2D rendering only
    glDisable(GL_DEPTH_TEST);
    glDisable(GL_LIGHTING);
    glDisable(GL_DITHER);
    glPushAttrib(GL_DEPTH_BUFFER_BIT | GL_LIGHTING_BIT);

    //Set the projection matrix
    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    glOrtho(0, SCREEN_WIDTH, SCREEN_HEIGHT, 0, -1.0, 1.0);

    //Set the modelview matrix
    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    //Create VBO
    glGenBuffers(1, &VBO);
    glBindBuffer(GL_ARRAY_BUFFER, VBO);
}


void gameLoop()
{   
    int frameDuration = 1000/FPS;   //the set duration (in milliseconds) of a single frame      
    int currentTicks;       
    int pastTicks = SDL_GetTicks();
    bool done = false;  
    SDL_Event event;

    while(!done)
    {   
        //handle user input
        while(SDL_PollEvent(&event))
        {
            switch(event.type)
            {
                case SDL_KEYDOWN:
                    switch (event.key.keysym.sym)
                    {
                        case SDLK_UP:   //create a new quad every time the up arrow key is pressed
                            createQuad(64*counts.size(), 64*counts.size(), 64, 64);
                            break;
                        case SDLK_DOWN: //remove the most recently created quad every time the down arrow key is pressed
                            removeQuad();
                            break;
                        default:
                            break;
                    }
                    break;
                case SDL_QUIT:
                    done = true;
                    break;
                default:
                    break;
            }           
        }


        //Clear the color buffer
        glClear(GL_COLOR_BUFFER_BIT);

        glBindBuffer(GL_ARRAY_BUFFER, VBO);
        //replace the current contents of the VBO with a completely new set of data (possibly including either more or fewer quads)
        glBufferData(GL_ARRAY_BUFFER, vertices.size()*sizeof(Vertex), &vertices.front(), GL_DYNAMIC_DRAW);

        glEnableClientState(GL_VERTEX_ARRAY);

            //Set vertex data
            glVertexPointer(2, GL_INT, sizeof(Vertex), 0);
            //Draw the quads
            glMultiDrawArrays(GL_TRIANGLE_STRIP, &startingElements.front(), &counts.front(), counts.size());

        glDisableClientState(GL_VERTEX_ARRAY);

        glBindBuffer(GL_ARRAY_BUFFER, 0);


        //Check to see if we need to delay the duration of the current frame to match the set framerate
        currentTicks = SDL_GetTicks();
        int currentDuration = (currentTicks - pastTicks);   //the duration of the frame so far
        if (currentDuration < frameDuration)
        {
            SDL_Delay(frameDuration - currentDuration);
        }
        pastTicks = SDL_GetTicks();

        // flip the buffers
        SDL_GL_SwapBuffers();
    }
}


void cleanUp()
{   
    glDeleteBuffers(1, &VBO);

    SDL_FreeSurface(screen);
    SDL_Quit();
}


int main(int argc, char *argv[])
{
    std::cout << "To create a quad, press the up arrow. To remove the most recently created quad, press the down arrow." << std::endl;

    init();
    gameLoop();
    cleanUp();

    return 0;
}

At the moment I'm using GL_TRIANGLE_STRIPS with glMultiDrawArrays() to render my quads. This works, and seems do be pretty decent in terms of performance, but I have to wonder whether using GL_TRIANGLES in conjunction with an IBO to avoid duplicate vertices would be a more efficient way to render? I've done some research, and some people suggest that indexed GL_TRIANGLES generally outperform GL_TRIANGLE_STRIPS, but they also seem to assume that the number of quads would remain constant, and thus the size of the VBO and IBO would not have to be rebuffered each frame. That's my biggest hesitation with indexed GL_TRIANGLES: if I did implement indexed GL_TRIANGLES, I would have to rebuffer the entire index buffer each frame in addition to rebuffering the entire VBO each frame, again because of the dynamic number of quads.

So basically, my question is this: Given that I have to rebuffer all of my vertex data to the GPU each frame due to the dynamic number of quads, would it be more efficient to switch to indexed GL_TRIANGLES to draw the quads, or should I stick with my current GL_TRIANGLE_STRIP implementation?

Paresis answered 25/3, 2013 at 14:55 Comment(2)
i think before you have to worry about GL_TRIANGLES vs. GL_TRIANGLE_STRIP you should minimize your glBufferData() calls. Easiest optimization: keep a dirty flag which stores if createQuad/removeQuad was called since the last glBufferData() call and only recreate the buffer when the flag is set.Arrington
That's a great suggestion, thanks! I'll certainly implement it.Paresis
T
3

You'll probably be fine using un-indexed GL_QUADS/GL_TRIANGLES and a glDrawArrays() call.


SDL_Surface *screen; 
...
screen = SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, 0, SDL_OPENGL);
...
SDL_FreeSurface(screen);

Don't do that:

The returned surface is freed by SDL_Quit and must not be freed by the caller. This rule also includes consecutive calls to SDL_SetVideoMode (i.e. resize or resolution change) because the existing surface will be released automatically.


EDIT: Simple vertex array demo:

// g++ main.cpp -lglut -lGL
#include <GL/glut.h>
#include <vector>
using namespace std;

// OpenGL Mathematics (GLM): http://glm.g-truc.net/
#include <glm/glm.hpp>
#include <glm/gtc/random.hpp>
using namespace glm;

struct SpriteWrangler
{
    SpriteWrangler( unsigned int aSpriteCount )
    {
        verts.resize( aSpriteCount * 6 );
        states.resize( aSpriteCount );

        for( size_t i = 0; i < states.size(); ++i )
        {
            states[i].pos = linearRand( vec2( -400, -400 ), vec2( 400, 400 ) );
            states[i].vel = linearRand( vec2( -30, -30 ), vec2( 30, 30 ) );

            Vertex vert;
            vert.r = (unsigned char)linearRand( 64.0f, 255.0f );
            vert.g = (unsigned char)linearRand( 64.0f, 255.0f );
            vert.b = (unsigned char)linearRand( 64.0f, 255.0f );
            vert.a = 255;
            verts[i*6 + 0] = verts[i*6 + 1] = verts[i*6 + 2] =
            verts[i*6 + 3] = verts[i*6 + 4] = verts[i*6 + 5] = vert;
        }
    }

    void wrap( const float minVal, float& val, const float maxVal )
    {
        if( val < minVal )
            val = maxVal - fmod( maxVal - val, maxVal - minVal );
        else
            val = minVal + fmod( val - minVal, maxVal - minVal );
    }

    void Update( float dt )
    {
        for( size_t i = 0; i < states.size(); ++i )
        {
            states[i].pos += states[i].vel * dt;
            wrap( -400.0f, states[i].pos.x, 400.0f );
            wrap( -400.0f, states[i].pos.y, 400.0f );

            float size = 20.0f;
            verts[i*6 + 0].pos = states[i].pos + vec2( -size, -size );
            verts[i*6 + 1].pos = states[i].pos + vec2(  size, -size );
            verts[i*6 + 2].pos = states[i].pos + vec2(  size,  size );
            verts[i*6 + 3].pos = states[i].pos + vec2(  size,  size );
            verts[i*6 + 4].pos = states[i].pos + vec2( -size,  size );
            verts[i*6 + 5].pos = states[i].pos + vec2( -size, -size );
        }
    }

    struct Vertex
    {
        vec2 pos;
        unsigned char r, g, b, a;
    };

    struct State
    {
        vec2 pos;
        vec2 vel;       // units per second
    };

    vector< Vertex > verts;
    vector< State > states;
};

void display()
{
    // timekeeping
    static int prvTime = glutGet(GLUT_ELAPSED_TIME);
    const int curTime = glutGet(GLUT_ELAPSED_TIME);
    const float dt = ( curTime - prvTime ) / 1000.0f;
    prvTime = curTime;

    // sprite updates
    static SpriteWrangler wrangler( 2000 );
    wrangler.Update( dt );
    vector< SpriteWrangler::Vertex >& verts = wrangler.verts;

    glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

    // set up projection and camera
    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    double w = glutGet( GLUT_WINDOW_WIDTH );
    double h = glutGet( GLUT_WINDOW_HEIGHT );
    double ar = w / h;
    glOrtho( -400 * ar, 400 * ar, -400, 400, -1, 1);

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    glEnableClientState( GL_VERTEX_ARRAY );
    glEnableClientState( GL_COLOR_ARRAY );

    glVertexPointer( 2, GL_FLOAT, sizeof( SpriteWrangler::Vertex ), &verts[0].pos.x );
    glColorPointer( 4, GL_UNSIGNED_BYTE, sizeof( SpriteWrangler::Vertex ), &verts[0].r );
    glDrawArrays( GL_TRIANGLES, 0, verts.size() );

    glDisableClientState( GL_VERTEX_ARRAY );
    glDisableClientState( GL_COLOR_ARRAY );

    glutSwapBuffers();
}

// run display() every 16ms or so
void timer( int extra )
{
    glutTimerFunc( 16, timer, 0 );
    glutPostRedisplay();
}

int main(int argc, char **argv)
{
    glutInit( &argc, argv );
    glutInitWindowSize( 600, 600 );
    glutInitDisplayMode( GLUT_RGBA | GLUT_DEPTH | GLUT_DOUBLE );
    glutCreateWindow( "Sprites" );

    glutDisplayFunc( display );
    glutTimerFunc( 0, timer, 0 );
    glutMainLoop();
    return 0;
}

You can get decent performance with just vertex arrays.

Ideally most/all of your dts should be <= 16 milliseconds.

Threecornered answered 25/3, 2013 at 15:17 Comment(4)
Thanks for the SDL tip, good to know! I'd prefer to avoid GL_QUADS if possible, since it's deprecated in modern OpenGL implementations. So if I go with non-indexed GL_TRIANGLES, adding an additional 2 vertices per quad probably won't have any significant impact on performance?Paresis
Probably, unless you're looking to move and draw >20,000 quads/triangle-pairs.Threecornered
Gimme a few and I can whip up a demo you can try on your hardware.Threecornered
Tested it out, and any performance decrease there is negligible, so it looks like it's quite reasonable to go with non-indexed GL_TRIANGLES (and they're definitely lot cleaner to work with than fooling with glMultiDrawArrays and triangle strips!). Thanks a lot for your time and expertise, much appreciated! :)Paresis

© 2022 - 2024 — McMap. All rights reserved.