QGLWidget + OpenGL + CUDA [Archive]

View Full Version : QGLWidget + OpenGL + CUDA

sic(6)SaNdMaN

19th October 2010, 14:56

Hi everyone.

I'm currently trying to get a combination of an QGLWidget showing an OpenGL texture which was modified by a CUDA kernel to work :cool:.

So, once again, I want to have a texture, modify it in some ways with CUDA kernels working on it, and display it in a QGLWidget, having OpenGL as the interface to both of them.

Basically, I was trying to get some of NVIDIA's examples (SobelFilter) to work in an QGLWidget.

My steps so far:
init():
- load the texture (tried QImage and cutLoadPPM4ub)
- generate pixel buffer object (pbo)
- bind pbo and copy the data
- register the buffer as cuda_resource
- generate and bind texture object

display():
- cudaGraphicsMapResources()
- cudaGraphicsResourceGetMappedPointer()
- CUDA kernel working on the date behind the pointer
- cudaGraphicsUnmapResources()
- bind texture and buffer
- use glTexImage2D()
- draw the texture onto a quad

Well, I tried many combinations of the above steps, going one line after another through the examples NVIDIA provided with their CUDA SDK.
But I can't get it to work.

- Has anybody done sth. like that already and can provide some useful hints?
- Is it possible, that the QGLWidget has some kind of problem with CUDA altering the texture data? Or this whole shared context thing between OpenGL and CUDA?
- I also habe concerns about double buffering and the correct use of swapBuffers()...

Any help is appreciated :).

Here a the three most important functions of my QGLWidget subclass:

void GLWidget::initializeGL()
{
makeCurrent();

m_pQImage = QGLWidget::convertToGLFormat(m_pQImage);

cudaGLSetGLDevice (cutGetMaxGflopsDeviceId() );

glewInit();

unsigned int w, h;
const char* image_filename = getImagePath();

if (cutLoadPPM4ub(image_filename, &pixels, &w, &h) != CUTTrue) {
printf("Failed to load image file: %s\n", image_filename);
exit(-1);
}

GLint bsize;

setupTexture(m_pQImage.width(), m_pQImage.height(), pixels, 4);

memset(pixels, 0x0, 4 * sizeof(Pixel) * m_pQImage.width() * m_pQImage.height());

glGenBuffers(1, &pbo_buffer);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer);
glBufferData(GL_PIXEL_UNPACK_BUFFER,
4 * sizeof(Pixel) * m_pQImage.width() * m_pQImage.height(),
pixels, GL_STREAM_DRAW);

glGetBufferParameteriv(GL_PIXEL_UNPACK_BUFFER, GL_BUFFER_SIZE, &bsize);
if ((GLuint)bsize != (4 * sizeof(Pixel) * m_pQImage.width() * m_pQImage.height()))
{
printf("Buffer object (%d) has incorrect size (%d).\n", (unsigned)pbo_buffer, (unsigned)bsize);
cudaThreadExit();
exit(-1);
}

glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

// register this buffer object with CUDA
cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo_buffer, cudaGraphicsMapFlagsWriteDiscard));

glGenTextures (1, &m_pTexture);
glBindTexture (GL_TEXTURE_2D, m_pTexture);
glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, m_pQImage.width(), m_pQImage.height(), 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture (GL_TEXTURE_2D, 0);

glPixelStorei (GL_UNPACK_ALIGNMENT, 1);
glPixelStorei (GL_PACK_ALIGNMENT, 1);
}

void GLWidget::paintGL()
{
qDebug() << "paintGL()";

makeCurrent();

// Sobel operation
Pixel *data = NULL;

// map PBO to get CUDA device pointer
cutilSafeCall(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));
size_t num_bytes;
cutilSafeCall(cudaGraphicsResourceGetMappedPointer ((void **)&data, &num_bytes,
cuda_pbo_resource));
qDebug() << QString("CUDA mapped PBO: May access %1 bytes").arg(num_bytes);

sobelFilter(data, m_pQImage.width(), m_pQImage.height(), SOBELDISPLAY_IMAGE, 1.0f);
cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));

glClear(GL_COLOR_BUFFER_BIT);

glBindTexture(GL_TEXTURE_2D, m_pTexture);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer);
glTexImage2D(GL_TEXTURE_2D, 0, 0, 0, m_pQImage.width(), m_pQImage.height(),
GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL /*= OFFSET*/);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

glDisable(GL_DEPTH_TEST);
glEnable(GL_TEXTURE_2D);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);

glBegin(GL_QUADS);
glTexCoord2f(0.0f, 0.0f);
glVertex2f(-1.0f, -1.0f);
glTexCoord2f(1.0f, 0.0f);
glVertex2f(1.0f, -1.0f);
glTexCoord2f(1.0f, 1.0f);
glVertex2f(1.0f, 1.0f);
glTexCoord2f(0.0f, 1.0f);
glVertex2f(-1.0f, 1.0f);
glEnd();

glBindTexture(GL_TEXTURE_2D, 0);

swapBuffers();
}

void GLWidget::resizeGL(int iWidth, int iHeight)
{
makeCurrent();
int iSide = qMin(iWidth, iHeight);
glViewport((iWidth - iSide) / 2, (iHeight - iSide) / 2, iSide, iSide);

glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(-0.5, +0.5, +0.5, -0.5, 4.0, 15.0);
glMatrixMode(GL_MODELVIEW);

setFixedSize(iWidth, iHeight);
}

I tried to build a little example, but you'll have to set some path variables and the CUDA Build rules must be used of course.
5363

THANKS!

wysota

19th October 2010, 22:05

Could you define "can't get it to work"?

sic(6)SaNdMaN

20th October 2010, 08:05

Thanks for your reply.

Well, yes, I should have done that already...

Most of the time I see a black square, nothing more. If I play around with the pointers to show and so on, I can sometimes see the picture of some uninitialized memory ;).
But I newer saw the picture/texture after it was altered by the CUDA kernel, I newer saw it after the sobel filter.

If I give the pointer to the texture pixels to glTexImage2D() and don't do the memset, I can see the picture. But without filtering, of course. Like this
glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, m_pQImage.width(), m_pQImage.height(), 0, GL_RGBA, GL_UNSIGNED_BYTE, pixels);
So I believe, that the steps until this point are not totally wrong.

Could you imagine something?
Thanks!

Edit:
Here's an updated version of my example... I hope it's easier to get it to work. One should only need to set the right Qt Version in the projects properties, at least in VS2008.
Unfortunately i had to upload it somewhere else, but its less than 600KB:
http://filestore.to/?d=HXW1B22TJ9

sic(6)SaNdMaN

20th October 2010, 15:22

Well... after nearly three weeks of unsuccessful work on this I decided to create a topic here...
Now, one day later I found my "mistake"... you have to use glTexSubImage2D() instead of glTexImage2D() wit same parameters... even if you want to read the whole image.

I don't get it... but here's some working code, for anybody, that comes across this thread (this time in CODE-tags instead of QUOTE-tags ;)):

void GLWidget::initializeGL()
{
makeCurrent();

cudaGLSetGLDevice(cutGetMaxGflopsDeviceId() );

int c=1;
char* dummy = "";
glutInit( &c, &dummy );

glewInit();

unsigned int w, h;
char data[1024];
strncpy(data, getImagePath().toLatin1(), sizeof(data) - 1);
const char* image_filename = data;

if (cutLoadPPM4ub(image_filename, &pixels, &w, &h) != CUTTrue) {
printf("Failed to load image file: %s\n", image_filename);
exit(-1);
}

GLint bsize;
setupTexture(m_pQImage.width(), m_pQImage.height(), pixels, 4);

memset(pixels, 0x0, 4 * sizeof(Pixel) * m_pQImage.width() * m_pQImage.height());

// use OpenGL Path
glGenBuffers(1, &pbo_buffer);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer);
glBufferData(GL_PIXEL_UNPACK_BUFFER,
4 * sizeof(Pixel) * m_pQImage.width() * m_pQImage.height(),
pixels, GL_STREAM_DRAW);

glGetBufferParameteriv(GL_PIXEL_UNPACK_BUFFER, GL_BUFFER_SIZE, &bsize);
if ((GLuint)bsize != (4 * sizeof(Pixel) * m_pQImage.width() * m_pQImage.height()))
{
printf("Buffer object (%d) has incorrect size (%d).\n", (unsigned)pbo_buffer, (unsigned)bsize);
cudaThreadExit();
exit(-1);
}

glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

// register this buffer object with CUDA
cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo_buffer, cudaGraphicsMapFlagsWriteDiscard));

glGenTextures (1, &m_pTexture);
glBindTexture (GL_TEXTURE_2D, m_pTexture);
glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, m_pQImage.width(), m_pQImage.height(), 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glBindTexture (GL_TEXTURE_2D, 0);

glPixelStorei (GL_UNPACK_ALIGNMENT, 1);
glPixelStorei (GL_PACK_ALIGNMENT, 1);
}

void GLWidget::paintGL()
{
makeCurrent();

glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glMatrixMode(GL_MODELVIEW);

// Sobel operation
Pixel *data = NULL;

// map PBO to get CUDA device pointer
cutilSafeCall(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));
size_t num_bytes;
cutilSafeCall(cudaGraphicsResourceGetMappedPointer ((void **)&data, &num_bytes,
cuda_pbo_resource));
qDebug() << QString("CUDA mapped PBO: May access %1 bytes").arg(num_bytes);

sobelFilter(data, m_pQImage.width(), m_pQImage.height(), SOBELDISPLAY_IMAGE, 1.0f);
cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));

glClear(GL_COLOR_BUFFER_BIT);

glBindTexture(GL_TEXTURE_2D, m_pTexture);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_pQImage.width(), m_pQImage.height(),
GL_LUMINANCE, GL_UNSIGNED_BYTE, (char *)NULL);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

glDisable(GL_DEPTH_TEST);
glEnable(GL_TEXTURE_2D);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);

glBegin(GL_QUADS);
glVertex2f(-1, -1); glTexCoord2f(0, 0);
glVertex2f(-1, 1); glTexCoord2f(1, 0);
glVertex2f( 1, 1); glTexCoord2f(1, 1);
glVertex2f( 1, -1); glTexCoord2f(0, 1);
glEnd();

//glDisable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D, 0);

swapBuffers();
}

Cheers!

jpapon

10th January 2011, 13:03

Quick question for you about compiling this; where exactly do you use the nvcc compiler in all this?
Do you need to compile the Sobel filter separately using nvcc and then link to the library?

Thanks in advance!

sic(6)SaNdMaN

10th January 2011, 13:14

jpapon

10th January 2011, 13:26

Yeah, I just didn't see any reference to a pre compiled library in the .pro file.
I'm just adding a subdirectory, placing the kernel in it, compiling to a static lib.
Then I'm just linking to the library in the .pro file. Does that sound like the correct procedure?

Then I need to figure out how to put a command in a .pro file so the qmake generated makefile also runs the make in the subdirectory.

Zeus13i

25th January 2011, 01:20

Hi,

I am trying to do something similar, but I'm finding that I cannot register the cuda resource from outside of the GLWidget... if I call cudaGraphicsGLRegisterBuffer from my QGLWidget everything is fine, but I wanted to seperate the logic into a different class, say 'Worker'.

So I tried sending a signal to the main window when the QGLWidget has initialized OpenGL/VBO's and then spawn my Worker class (which is a thread and in which I wish to run my CUDA kernels).

I just get a segfault. Does anyone see why I shouldn't be able to do this?

Thanks