memory allocation of device arrays only once at initialize

563c4e97 · Ravi · 6a4ddb5e · 563c4e97 · 563c4e97 · 563c4e97
Commit 563c4e97 authored Mar 27, 2014 by Ravi
5 changed files
--- a/miklos/project_integration/glwidget.cpp
+++ b/miklos/project_integration/glwidget.cpp
@@ -17,6 +17,7 @@ GlWidget::GlWidget(QWidget *parent)

 GlWidget::~GlWidget()
 {
+	cudaFree(d_in);
 }

 QSize GlWidget::sizeHint() const
@@ -35,6 +36,9 @@ void GlWidget::initializeGL()
    size_t size = camera.width() * camera.height() * 4 * sizeof(unsigned char);
    gl.glBufferData(GL_PIXEL_UNPACK_BUFFER, size, 0, GL_DYNAMIC_DRAW);
    cudaGraphicsGLRegisterBuffer(&pixelsVBO_CUDA, pixelsVBO, cudaGraphicsMapFlagsWriteDiscard);
+
+    size_t inBytes = camera.width() * camera.height() * sizeof(float);
+    cudaMalloc(&d_in, inBytes);
 }

 void GlWidget::paintGL()
@@ -46,8 +50,6 @@ void GlWidget::paintGL()
    cudaGraphicsResourceGetMappedPointer(&d_out, &size,  pixelsVBO_CUDA);

    size_t inBytes = camera.width() * camera.height() * sizeof(float);
-    void *d_in;
-    cudaMalloc(&d_in, inBytes);
    {
        QMutexLocker locker(&camera.mutex);
        cudaMemcpy(d_in, camera.data(), inBytes, cudaMemcpyHostToDevice);
@@ -58,8 +60,6 @@ void GlWidget::paintGL()
    // Execute kernel
    executeKernel(d_in, d_out, camera.width(), camera.height());

-    cudaFree(d_in);
-
    // Unmap buffer object
    cudaGraphicsUnmapResources(1, &pixelsVBO_CUDA, 0);


--- a/miklos/project_integration/glwidget.h
+++ b/miklos/project_integration/glwidget.h
@@ -19,6 +19,7 @@ protected:

 private:
    QGLFunctions gl;
+    void *d_in;
 };

 #endif // GLWIDGET_H
--- a/miklos/project_integration/kernel.cu
+++ b/miklos/project_integration/kernel.cu
@@ -116,6 +116,17 @@ __global__ void createVertices(float *in, uchar4* pixel, int w, int h)
    pixel[i].w = 255;
 }

+static float *d_T, *d_F, *d_Xi, *d_Xj;
+
+void allocate_device_memory(size_t w, size_t h)
+{
+	size_t imageBytes = w*h*sizeof(float);
+	cudaMalloc(&d_T, imageBytes);
+	cudaMalloc(&d_F, imageBytes);
+	cudaMalloc(&d_Xi, imageBytes);
+	cudaMalloc(&d_Xj, imageBytes);
+}
+
 void executeKernel(void *d_in, void *d_out, size_t w, size_t h)
 {
    float *d_U = reinterpret_cast<float *>(d_in);
@@ -137,12 +148,7 @@ void executeKernel(void *d_in, void *d_out, size_t w, size_t h)
    float c1 = 1.0;
    float c2 = 0.00;

-    float *d_T, *d_F, *d_Xi, *d_Xj;
    size_t imageBytes = w*h*sizeof(float);
-    cudaMalloc(&d_T, imageBytes);
-    cudaMalloc(&d_F, imageBytes);
-    cudaMalloc(&d_Xi, imageBytes);
-    cudaMalloc(&d_Xj, imageBytes);
    cudaMemcpy(d_T, d_U, imageBytes, cudaMemcpyDeviceToDevice);
    cudaMemset(d_Xi, 0, imageBytes);
    cudaMemset(d_Xj, 0, imageBytes);
@@ -155,8 +161,4 @@ void executeKernel(void *d_in, void *d_out, size_t w, size_t h)
        update_U<<< dimGrid, dimBlock >>>(d_T, d_Xi, d_Xj, d_F, d_U, w, h, tau);
    }
    update_Output<<< dimGrid, dimBlock >>>(pixel, d_U, w, h);
-    cudaFree(d_T);
-    cudaFree(d_F);
-    cudaFree(d_Xi);
-    cudaFree(d_Xj);
 }
--- a/miklos/project_integration/kernel.h
+++ b/miklos/project_integration/kernel.h
 #ifndef KERNEL_H
 #define KERNEL_H

+extern "C" void allocate_device_memory(size_t width, size_t height);
 extern "C" void executeKernel(void *d_in, void *d_out, size_t width, size_t height);

 #endif // KERNEL_H
--- a/miklos/project_integration/main.cpp
+++ b/miklos/project_integration/main.cpp
 #include "camera.h"
+#include "kernel.h"
 #include "glwidget.h"

 #include <iostream>
@@ -16,6 +17,7 @@ int main(int argc, char *argv[])
    }

    QObject::connect(&camera, SIGNAL(newFrame()), &w, SLOT(updateGL()));
+    allocate_device_memory(camera.width(), camera.height());

    camera.start();
    w.show();