Commit 563c4e97 authored by Ravi's avatar Ravi

memory allocation of device arrays only once at initialize

parent 6a4ddb5e
......@@ -17,6 +17,7 @@ GlWidget::GlWidget(QWidget *parent)
GlWidget::~GlWidget()
{
cudaFree(d_in);
}
QSize GlWidget::sizeHint() const
......@@ -35,6 +36,9 @@ void GlWidget::initializeGL()
size_t size = camera.width() * camera.height() * 4 * sizeof(unsigned char);
gl.glBufferData(GL_PIXEL_UNPACK_BUFFER, size, 0, GL_DYNAMIC_DRAW);
cudaGraphicsGLRegisterBuffer(&pixelsVBO_CUDA, pixelsVBO, cudaGraphicsMapFlagsWriteDiscard);
size_t inBytes = camera.width() * camera.height() * sizeof(float);
cudaMalloc(&d_in, inBytes);
}
void GlWidget::paintGL()
......@@ -46,8 +50,6 @@ void GlWidget::paintGL()
cudaGraphicsResourceGetMappedPointer(&d_out, &size, pixelsVBO_CUDA);
size_t inBytes = camera.width() * camera.height() * sizeof(float);
void *d_in;
cudaMalloc(&d_in, inBytes);
{
QMutexLocker locker(&camera.mutex);
cudaMemcpy(d_in, camera.data(), inBytes, cudaMemcpyHostToDevice);
......@@ -58,8 +60,6 @@ void GlWidget::paintGL()
// Execute kernel
executeKernel(d_in, d_out, camera.width(), camera.height());
cudaFree(d_in);
// Unmap buffer object
cudaGraphicsUnmapResources(1, &pixelsVBO_CUDA, 0);
......
......@@ -19,6 +19,7 @@ protected:
private:
QGLFunctions gl;
void *d_in;
};
#endif // GLWIDGET_H
......@@ -116,6 +116,17 @@ __global__ void createVertices(float *in, uchar4* pixel, int w, int h)
pixel[i].w = 255;
}
static float *d_T, *d_F, *d_Xi, *d_Xj;
void allocate_device_memory(size_t w, size_t h)
{
size_t imageBytes = w*h*sizeof(float);
cudaMalloc(&d_T, imageBytes);
cudaMalloc(&d_F, imageBytes);
cudaMalloc(&d_Xi, imageBytes);
cudaMalloc(&d_Xj, imageBytes);
}
void executeKernel(void *d_in, void *d_out, size_t w, size_t h)
{
float *d_U = reinterpret_cast<float *>(d_in);
......@@ -137,12 +148,7 @@ void executeKernel(void *d_in, void *d_out, size_t w, size_t h)
float c1 = 1.0;
float c2 = 0.00;
float *d_T, *d_F, *d_Xi, *d_Xj;
size_t imageBytes = w*h*sizeof(float);
cudaMalloc(&d_T, imageBytes);
cudaMalloc(&d_F, imageBytes);
cudaMalloc(&d_Xi, imageBytes);
cudaMalloc(&d_Xj, imageBytes);
cudaMemcpy(d_T, d_U, imageBytes, cudaMemcpyDeviceToDevice);
cudaMemset(d_Xi, 0, imageBytes);
cudaMemset(d_Xj, 0, imageBytes);
......@@ -155,8 +161,4 @@ void executeKernel(void *d_in, void *d_out, size_t w, size_t h)
update_U<<< dimGrid, dimBlock >>>(d_T, d_Xi, d_Xj, d_F, d_U, w, h, tau);
}
update_Output<<< dimGrid, dimBlock >>>(pixel, d_U, w, h);
cudaFree(d_T);
cudaFree(d_F);
cudaFree(d_Xi);
cudaFree(d_Xj);
}
#ifndef KERNEL_H
#define KERNEL_H
extern "C" void allocate_device_memory(size_t width, size_t height);
extern "C" void executeKernel(void *d_in, void *d_out, size_t width, size_t height);
#endif // KERNEL_H
#include "camera.h"
#include "kernel.h"
#include "glwidget.h"
#include <iostream>
......@@ -16,6 +17,7 @@ int main(int argc, char *argv[])
}
QObject::connect(&camera, SIGNAL(newFrame()), &w, SLOT(updateGL()));
allocate_device_memory(camera.width(), camera.height());
camera.start();
w.show();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment