OpenGL Convolution performance on O2 (or lack therof)

OpenGL Convolution performance on O2 (or lack therof)

Post by Dave Erickso » Thu, 15 Oct 1998 04:00:00



Hi,
  I recently tried to replace some C++ code to low pass filter the
results of some OpenGL rendering with some of the OpenGL
filtering/convolution extensions. This generated a nice speedup on an
R4400 Impact but it actually ran slower on an O2. I would have expected
the OpenGL routines would be well tuned and optimized to beat the C++
code which was compiled with the -g option, minimal optimization. Here
are some timings for the filtering:
                                                         O2
Impact r4400
Software (Code not shown)        1.4             1.6
glConvolutionFilter2DEXT        3.5             0.27
glSeparableFilter2DEXT             2.2             0.14?

   One thing about these timings seems strange and that is the time for
final call to glReadPixels takes .5 to 1.0 seconds if the extensions are
in use but less than .1 seconds if not. This may be due to glReadPixels
blocking until the preceding call to glDrawPixels or glCopyPixels has
actaully completed.

I tried three different ways of doing the filter step:
1. glCopyPixels
2. glReadPixels followed by glDrawPixels using GL_RGBA
3. glReadPixels followed by glDrawPixels using GL_ABGR_EXT
These three variations all took about the same time.

All code was compiled on the Impact R4400 under IRIX5.3.
The O2 is running IRIX 6.3 with the September recommended patches.

Can anyone explain these O2 results or suggest how to improve them?

Thanks,

Dave

This is most of the code that uses the OpenGL extensions:
----------------------------------------------------------------------------------------------

void *saveImage(int width, int height, int flag)
{
    void *ibuff;
    int a, b, c;
    double startTime = getTime ();

    fastPixelState ();
    if (flag) {
#if SEPERABLE_FILTER
        printf("col: ");
        for (a = 0; a < XtNumber (filter); a++) {
            for (c = 0; c < XtNumber (filter [0] [0]); c++) {
                colKernel [a] [c] = (float) filter [a] [0] [c] / 34.0;
            }
            printf(" %f", colKernel [a] [0]);
        }
        printf("\n");
        printf("row: ");
        for (b = 0; b < XtNumber (filter [0]); b++) {
            for (c = 0; c < XtNumber (filter [0] [0]); c++) {
                rowKernel [b] [c] = (float) filter [0] [b] [c] / 6.0;
            }
            printf(" %f", rowKernel [b] [0]);
        }
        printf("\n");
        glSeparableFilter2DEXT( GL_SEPARABLE_2D_EXT, GL_RGBA,
                                    3, 5, GL_RGBA, GL_FLOAT,
                                    rowKernel, colKernel);
        glEnable(GL_SEPARABLE_2D_EXT);
#else
        for (a=0; a < XtNumber (filter); a++) {
            for (b=0; b < XtNumber (filter [0]); b++) {
                for (c=0; c < XtNumber (filter [0] [0]); c++) {
                    ffilter[a][b][c]=(float)filter[a][b][c]/204.0;
                }
            }
printf("Row %d %f %f %f\n", a, ffilter[a][0][0], ffilter[a][1][0],
ffilter[a][2][0]);
        }
        glConvolutionFilter2DEXT(GL_CONVOLUTION_2D_EXT, GL_RGBA,
                                3, 5, GL_RGBA, GL_FLOAT,
                                ffilter);
        glEnable(GL_CONVOLUTION_2D_EXT);
#endif
        glConvolutionParameterfvEXT( GL_CONVOLUTION_2D_EXT,
                                GL_CONVOLUTION_FILTER_SCALE_EXT,
                                scale);
        glConvolutionParameterfvEXT( GL_CONVOLUTION_2D_EXT,
                                GL_CONVOLUTION_FILTER_BIAS_EXT,
                                bias);
    }
    r_width=width;
    r_height=height;
    if ((ibuff=malloc(width*height*4)) == NULL) {
        printf("Unable to allocate buffer\n");
        return NULL;
    }
    setMatrixPix();
    glPixelStorei(GL_PACK_ALIGNMENT, 4);
    glPixelZoom(1.0, 1.0);
    double filterTime = startTime;
    if (flag)
    {
#if 0
#ifndef GL_ABGR_EXT
        GLenum pixelFormat = GL_RGBA;
#else
        GLenum pixelFormat = GL_ABGR_EXT;
#endif
        glReadPixels(0, 0, width, height, pixelFormat, GL_UNSIGNED_BYTE,
ibuff);
        glRasterPos2i (0, 0);
        glDrawPixels (width, height, pixelFormat, GL_UNSIGNED_BYTE,
ibuff);
#else
        glCopyPixels(0, 0, width, height, GL_COLOR);
#endif
        filterTime = getTime ();
    }
    glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, ibuff);

    if (flag) {
        glDisable(GL_CONVOLUTION_2D_EXT);
        glDisable(GL_SEPARABLE_2D_EXT);
    }
    glPopAttrib ();
    double readTime = getTime ();
    VkSODebug (DEBUG_LAYER_TIMING, ("saveImage (%d, %d) filter %f read
%f\n",
        width,
        height,
        filterTime - startTime,
        readTime - filterTime
    ));
    return ibuff;

}

-----------------------------------------------------------------------------------------------------------------------------

static const GLbitfield fastPixelMask =
        GL_DEPTH_BUFFER_BIT |
//          glDepthFunc ( GL_ALWAYS);
        GL_ENABLE_BIT |
//          glDisable(GL_ALPHA_TEST);
//          glDisable(GL_BLEND);
//          glDisable(GL_DEPTH_TEST);
//          glDisable(GL_DITHER);
//          glDisable(GL_FOG);
//          glDisable(GL_LIGHTING);
//          glDisable(GL_LOGIC_OP);
//          glDisable(GL_STENCIL_TEST);
//          glDisable(GL_TEXTURE_1D);
//          glDisable(GL_TEXTURE_2D);
        GL_PIXEL_MODE_BIT |
//          glPixelTransferi(GL_MAP_COLOR, GL_FALSE);
//          glPixelTransferi(GL_RED_SCALE, 1);
//          glPixelTransferi(GL_RED_BIAS, 0);
//          glPixelTransferi(GL_GREEN_SCALE, 1);
//          glPixelTransferi(GL_GREEN_BIAS, 0);
//          glPixelTransferi(GL_BLUE_SCALE, 1);
//          glPixelTransferi(GL_BLUE_BIAS, 0);
//          glPixelTransferi(GL_ALPHA_SCALE, 1);
//          glPixelTransferi(GL_ALPHA_BIAS, 0);
        0;

void fastPixelState ()
{
/*
 * Disable stuff that's likely to slow down glDrawPixels.
 * (Omit as much of this as possible, when you know in advance
 * that the OpenGL state will already be set correctly.)
 */

    glPushAttrib (fastPixelMask);

    glDisable(GL_ALPHA_TEST);
    glDisable(GL_BLEND);
    glDisable(GL_DEPTH_TEST);
    glDisable(GL_DITHER);
    glDisable(GL_FOG);
    glDisable(GL_LIGHTING);
    glDisable(GL_LOGIC_OP);
    glDisable(GL_STENCIL_TEST);
    glDisable(GL_TEXTURE_1D);
    glDisable(GL_TEXTURE_2D);
    glPixelTransferi(GL_MAP_COLOR, GL_FALSE);
    glPixelTransferi(GL_RED_SCALE, 1);
    glPixelTransferi(GL_RED_BIAS, 0);
    glPixelTransferi(GL_GREEN_SCALE, 1);
    glPixelTransferi(GL_GREEN_BIAS, 0);
    glPixelTransferi(GL_BLUE_SCALE, 1);
    glPixelTransferi(GL_BLUE_BIAS, 0);
    glPixelTransferi(GL_ALPHA_SCALE, 1);
    glPixelTransferi(GL_ALPHA_BIAS, 0);

}

----------------------------------------------------------------------------------------------
 
 
 

1. Iris vs. OpenGL O2 performance, documentation

I'm sharing some comments (below) received from Paul Spencer of SGI
regarding the use of IrisGL and OpenGL on the O2, and on-line manuals.
--
======================================================================

McGill Vision Research  687 Pine Ave, Rm H4-14 Montreal, Quebec CANADA      
Voice: (514) 398-1019   Messages: (514) 843-1690   FAX: (514) 843-1691    
======================================================================


Date: Tue, 29 Oct 1996 16:43:33 -0800 (PST)

Lynn Ziegler said:

Yes; so do all of our new systems, and I believe IR graphics too.
igloo = (I)ris (GL) (O)n (O)penGL :-)

Not much. Yes, some, but not a big hit; and the new graphics are
pretty fast anyway, so you may get a net speedup. (Comparing new
to old systems).

There are two standard works you should probably get:

   "OpenGL Reference Manual" ISBN 0-201-63276-4 (blue book)
   "OpenGL Programming Guide" (red book)

That isbn is for the first (1992) edition, it's probably updated by
now. Someone stole my other book so I don't have an isbn for that
one. Both published by Addison-Wesley.

The on-line books aren't too bad, either.

[LATER: ]
These two books *are* on-line (along with other useful info).

....paul

--
Paul Spencer                 Silicon Graphics Advanced Systems Division

2. 3d surface

3. O2 performance

4. LW 6.0B Patch

5. Video capture performance on O2

6. DepthOfField

7. glDrawPixels performance on O2

8. sell MAX get sued !!

9. New O2 performance patch?

10. O2 Performance with large textures

11. O2 graphics performances

12. Poor performance O2 for texture mapping?