Commit 563c4e97 authored by Ravi's avatar Ravi

memory allocation of device arrays only once at initialize

parent 6a4ddb5e
...@@ -17,6 +17,7 @@ GlWidget::GlWidget(QWidget *parent) ...@@ -17,6 +17,7 @@ GlWidget::GlWidget(QWidget *parent)
GlWidget::~GlWidget() GlWidget::~GlWidget()
{ {
cudaFree(d_in);
} }
QSize GlWidget::sizeHint() const QSize GlWidget::sizeHint() const
...@@ -35,6 +36,9 @@ void GlWidget::initializeGL() ...@@ -35,6 +36,9 @@ void GlWidget::initializeGL()
size_t size = camera.width() * camera.height() * 4 * sizeof(unsigned char); size_t size = camera.width() * camera.height() * 4 * sizeof(unsigned char);
gl.glBufferData(GL_PIXEL_UNPACK_BUFFER, size, 0, GL_DYNAMIC_DRAW); gl.glBufferData(GL_PIXEL_UNPACK_BUFFER, size, 0, GL_DYNAMIC_DRAW);
cudaGraphicsGLRegisterBuffer(&pixelsVBO_CUDA, pixelsVBO, cudaGraphicsMapFlagsWriteDiscard); cudaGraphicsGLRegisterBuffer(&pixelsVBO_CUDA, pixelsVBO, cudaGraphicsMapFlagsWriteDiscard);
size_t inBytes = camera.width() * camera.height() * sizeof(float);
cudaMalloc(&d_in, inBytes);
} }
void GlWidget::paintGL() void GlWidget::paintGL()
...@@ -46,8 +50,6 @@ void GlWidget::paintGL() ...@@ -46,8 +50,6 @@ void GlWidget::paintGL()
cudaGraphicsResourceGetMappedPointer(&d_out, &size, pixelsVBO_CUDA); cudaGraphicsResourceGetMappedPointer(&d_out, &size, pixelsVBO_CUDA);
size_t inBytes = camera.width() * camera.height() * sizeof(float); size_t inBytes = camera.width() * camera.height() * sizeof(float);
void *d_in;
cudaMalloc(&d_in, inBytes);
{ {
QMutexLocker locker(&camera.mutex); QMutexLocker locker(&camera.mutex);
cudaMemcpy(d_in, camera.data(), inBytes, cudaMemcpyHostToDevice); cudaMemcpy(d_in, camera.data(), inBytes, cudaMemcpyHostToDevice);
...@@ -58,8 +60,6 @@ void GlWidget::paintGL() ...@@ -58,8 +60,6 @@ void GlWidget::paintGL()
// Execute kernel // Execute kernel
executeKernel(d_in, d_out, camera.width(), camera.height()); executeKernel(d_in, d_out, camera.width(), camera.height());
cudaFree(d_in);
// Unmap buffer object // Unmap buffer object
cudaGraphicsUnmapResources(1, &pixelsVBO_CUDA, 0); cudaGraphicsUnmapResources(1, &pixelsVBO_CUDA, 0);
......
...@@ -19,6 +19,7 @@ protected: ...@@ -19,6 +19,7 @@ protected:
private: private:
QGLFunctions gl; QGLFunctions gl;
void *d_in;
}; };
#endif // GLWIDGET_H #endif // GLWIDGET_H
...@@ -116,6 +116,17 @@ __global__ void createVertices(float *in, uchar4* pixel, int w, int h) ...@@ -116,6 +116,17 @@ __global__ void createVertices(float *in, uchar4* pixel, int w, int h)
pixel[i].w = 255; pixel[i].w = 255;
} }
static float *d_T, *d_F, *d_Xi, *d_Xj;
void allocate_device_memory(size_t w, size_t h)
{
size_t imageBytes = w*h*sizeof(float);
cudaMalloc(&d_T, imageBytes);
cudaMalloc(&d_F, imageBytes);
cudaMalloc(&d_Xi, imageBytes);
cudaMalloc(&d_Xj, imageBytes);
}
void executeKernel(void *d_in, void *d_out, size_t w, size_t h) void executeKernel(void *d_in, void *d_out, size_t w, size_t h)
{ {
float *d_U = reinterpret_cast<float *>(d_in); float *d_U = reinterpret_cast<float *>(d_in);
...@@ -137,12 +148,7 @@ void executeKernel(void *d_in, void *d_out, size_t w, size_t h) ...@@ -137,12 +148,7 @@ void executeKernel(void *d_in, void *d_out, size_t w, size_t h)
float c1 = 1.0; float c1 = 1.0;
float c2 = 0.00; float c2 = 0.00;
float *d_T, *d_F, *d_Xi, *d_Xj;
size_t imageBytes = w*h*sizeof(float); size_t imageBytes = w*h*sizeof(float);
cudaMalloc(&d_T, imageBytes);
cudaMalloc(&d_F, imageBytes);
cudaMalloc(&d_Xi, imageBytes);
cudaMalloc(&d_Xj, imageBytes);
cudaMemcpy(d_T, d_U, imageBytes, cudaMemcpyDeviceToDevice); cudaMemcpy(d_T, d_U, imageBytes, cudaMemcpyDeviceToDevice);
cudaMemset(d_Xi, 0, imageBytes); cudaMemset(d_Xi, 0, imageBytes);
cudaMemset(d_Xj, 0, imageBytes); cudaMemset(d_Xj, 0, imageBytes);
...@@ -155,8 +161,4 @@ void executeKernel(void *d_in, void *d_out, size_t w, size_t h) ...@@ -155,8 +161,4 @@ void executeKernel(void *d_in, void *d_out, size_t w, size_t h)
update_U<<< dimGrid, dimBlock >>>(d_T, d_Xi, d_Xj, d_F, d_U, w, h, tau); update_U<<< dimGrid, dimBlock >>>(d_T, d_Xi, d_Xj, d_F, d_U, w, h, tau);
} }
update_Output<<< dimGrid, dimBlock >>>(pixel, d_U, w, h); update_Output<<< dimGrid, dimBlock >>>(pixel, d_U, w, h);
cudaFree(d_T);
cudaFree(d_F);
cudaFree(d_Xi);
cudaFree(d_Xj);
} }
#ifndef KERNEL_H #ifndef KERNEL_H
#define KERNEL_H #define KERNEL_H
extern "C" void allocate_device_memory(size_t width, size_t height);
extern "C" void executeKernel(void *d_in, void *d_out, size_t width, size_t height); extern "C" void executeKernel(void *d_in, void *d_out, size_t width, size_t height);
#endif // KERNEL_H #endif // KERNEL_H
#include "camera.h" #include "camera.h"
#include "kernel.h"
#include "glwidget.h" #include "glwidget.h"
#include <iostream> #include <iostream>
...@@ -16,6 +17,7 @@ int main(int argc, char *argv[]) ...@@ -16,6 +17,7 @@ int main(int argc, char *argv[])
} }
QObject::connect(&camera, SIGNAL(newFrame()), &w, SLOT(updateGL())); QObject::connect(&camera, SIGNAL(newFrame()), &w, SLOT(updateGL()));
allocate_device_memory(camera.width(), camera.height());
camera.start(); camera.start();
w.show(); w.show();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment