Slightly better, not working nonetheless code for interop

Signed-off-by: Gaurav Kukreja <gmkukreja@gmail.com>

Slightly better, not working nonetheless code for interop
Signed-off-by: Gaurav Kukreja <gmkukreja@gmail.com>
639b6601 · Gaurav Kukreja · 4490dbf2 · 639b6601 · 639b6601
Commit 639b6601 authored Mar 22, 2014 by Gaurav Kukreja
Hide whitespace changes
Inline Side-by-side

Showing with 114 additions and 147 deletions

Makefile miklos/project/Makefile +1 -1

main.cu miklos/project/main.cu +113 -146

No files found.
--- a/miklos/project/Makefile
+++ b/miklos/project/Makefile
 main: main.cu aux.cu aux.h Makefile
-	nvcc -o main main.cu aux.cu --ptxas-options=-v --use_fast_math --compiler-options -Wall -lopencv_highgui -lopencv_core -lGL -lglut
+	nvcc -o main main.cu aux.cu --ptxas-options=-v --use_fast_math --compiler-options -Wall -lopencv_highgui -lopencv_core -lopencv_imgproc -lGL `pkg-config --cflags glfw3` `pkg-config --static --libs glfw3`
--- a/miklos/project/main.cu
+++ b/miklos/project/main.cu
@@ -26,45 +26,18 @@
 #include "aux.h"
 #include <iostream>
-// #include <GLTools.h>            // OpenGL toolkit
-// #include <GLShaderManager.h>    // Shader Manager Class
 #define GL_GLEXT_PROTOTYPES
 #include <GL/gl.h>
-#include <GL/glut.h>            // Windows FreeGlut equivalent
 #include <GL/glext.h>
+#include <GLFW/glfw3.h>
 #include "cuda_gl_interop.h"
 using namespace std;
 // uncomment to use the camera
-// #define CAMERA
+//#define CAMERA
-/*********************************************************************
- *** Global Variables                                           ******
- *********************************************************************/
-// Pointers to Device Memory for Input, Output and Intermediate images
-float *d_F, *d_U, *d_vx, *d_vy;
-// Variables for OpenGL Interoperability
-GLuint outputVBO;
-struct cudaGraphicsResource* outputVBO_CUDA;
-int repeats;
-bool gray;
-float lambda;
-float tau;
-int N;
-cv::VideoCapture camera(0);
-cv::Mat mIn;
-float *imgIn;
-int w, h, nc;
 template<typename T>
 __device__ __host__ T min(T a, T b)
@@ -85,8 +58,6 @@ __device__ __host__ T clamp(T m, T x, T M)
 }
 /**
 * Computes the normalized gradient.
 *
@@ -119,7 +90,6 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h)
 /**
 * Update approximation.
 *
- * @param output
 * @param U approximation of solution (single-channel)
 * @param F original input image (single-channel)
 * @param vx normalized gradient of U (x-coordinate)
@@ -129,8 +99,13 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h)
 * @param lambda weight of 'similarity' energy component
 * @param tau update coefficient
 */
-__global__ void update(float* output, float *U, float *F, float *vx, float *vy,
+#ifdef CAMERA
+__global__ void update(float4 *output, float *U, float *F, float *vx, float *vy,
                       int w, int h, float lambda, float tau)
+#else
+__global__ void update(float *U, float *F, float *vx, float *vy,
+                       int w, int h, float lambda, float tau)
+#endif
 {
    int x = threadIdx.x + blockDim.x * blockIdx.x;
    int y = threadIdx.y + blockDim.y * blockIdx.y;
@@ -150,7 +125,14 @@ __global__ void update(float* output, float *U, float *F, float *vx, float *vy,
        float div_v = dx_vx + dy_vy;
        // explicit Euler update rule
-        output[i] = U[i] + tau * (lambda * d + div_v);
+        U[i] += tau * (lambda * d + div_v);
+#ifdef CAMERA
+        float u = x / (float)w;
+        float v = y / (float)h;
+        output[i] = make_float4(u, U[i], v, 1.0f);
+#else        
+#endif
    }
 }
@@ -163,11 +145,6 @@ inline dim3 make_grid(dim3 whole, dim3 block)
                div_ceil(whole.z, block.z));
 }
-void display();
-void bye() {
-    cout << "bye!\n";
-}
 int main(int argc, char **argv)
 {
@@ -176,6 +153,9 @@ int main(int argc, char **argv)
    // We will do it right here, so that the run time measurements are accurate
    cudaDeviceSynchronize();  CUDA_CHECK;
    // Reading command line parameters:
    // getParam("param", var, argc, argv) looks whether "-param xyz" is specified, and if so stores the value "xyz" in "var"
    // If "-param" is not specified, the value of "var" remains unchanged
@@ -190,47 +170,50 @@ int main(int argc, char **argv)
    if (!ret) cerr << "ERROR: no image specified" << endl;
    if (argc <= 1) { cout << "Usage: " << argv[0] << " -i <image> [-repeats <repeats>] [-gray]" << endl; return 1; }
 #endif
    // number of computation repetitions to get a better run time measurement
-    repeats = 1;
+    int repeats = 1;
    getParam("repeats", repeats, argc, argv);
    cout << "repeats: " << repeats << endl;
    // load the input image as grayscale if "-gray" is specifed
-    gray = true;
+    bool gray = true;
    // always true: getParam("gray", gray, argc, argv);
    cout << "gray: " << gray << endl;
    // ### Define your own parameters here as needed    
-    lambda = 0.8;
+    float lambda = 0.8;
    getParam("lambda", lambda, argc, argv);
    cout << "λ: " << lambda << endl;
-    tau = 0.01;
+    float tau = 0.01;
    getParam("tau", tau, argc, argv);
    cout << "τ: " << tau << endl;
-    N = 2000;
+    int N = 2000;
    getParam("N", N, argc, argv);
    cout << "N: " << N << endl;
    // Init camera / Load input image
 #ifdef CAMERA
    // Init camera
-  	// cv::VideoCapture camera(0);
+  	cv::VideoCapture camera(0);
  	if(!camera.isOpened()) { cerr << "ERROR: Could not open camera" << endl; return 1; }
    int camW = 640;
    int camH = 480;
  	camera.set(CV_CAP_PROP_FRAME_WIDTH,camW);
  	camera.set(CV_CAP_PROP_FRAME_HEIGHT,camH);
    // read in first frame to get the dimensions
-    // cv::Mat mIn;
+    cv::Mat mIn;
    camera >> mIn;
+    if(gray)
+        cvtColor (mIn, mIn, CV_BGR2GRAY);
 #else
    // Load the input image using opencv (load as grayscale if "gray==true", otherwise as is (may be color or grayscale))
-    // cv::Mat mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1));
+    cv::Mat mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1));
-    mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1));
    // check
    if (mIn.data == NULL) { cerr << "ERROR: Could not load image " << image << endl; return 1; }
@@ -241,9 +224,9 @@ int main(int argc, char **argv)
    // convert range of each channel to [0,1] (opencv default is [0,255])
    mIn /= 255.f;
    // get image dimensions
-    w = mIn.cols;         // width
+    int w = mIn.cols;         // width
-    h = mIn.rows;         // height
+    int h = mIn.rows;         // height
-    nc = mIn.channels();  // number of channels
+    int nc = mIn.channels();  // number of channels
    cout << "image: " << w << " x " << h << endl;
@@ -263,42 +246,69 @@ int main(int argc, char **argv)
    // output image number of channels: mOut.channels(), as defined above (nc, 3, or 1)
    // allocate raw input image array
-    imgIn  = new float[(size_t)w*h*nc];
+    float *imgIn  = new float[(size_t)w*h*nc];
+    size_t imageBytes = (size_t)w*h*nc*sizeof(float);
    // allocate raw output array (the computation result will be stored in this array, then later converted to mOut for displaying)
    float *imgOut = new float[(size_t)w*h*mOut.channels()];
+    // For camera mode: Make a loop to read in camera frames
 #ifdef CAMERA
-    // Initialize OpenGL and GLUT for device 0
+    // KUKU: OpenGL GLFW Code
-    // and make the OpenGL context current
+    glfwInit();
-    glutInit(&argc, argv);
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
+    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
+    glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
+    GLFWwindow* window = glfwCreateWindow(640, 480, "Project", NULL, NULL); // Windowed
+    glfwMakeContextCurrent(window);
+    // KUKU: Interoperability Code  
-    glutInitDisplayMode(GLUT_DEPTH | GLUT_DOUBLE | GLUT_RGBA);
+    GLuint outputVBO;
-    glutInitWindowPosition(100,100);
+    struct cudaGraphicsResource* outputVBO_CUDA;
-    glutInitWindowSize(640,480);
-    glutCreateWindow("Project");
-    glutDisplayFunc(&display);
    // Explicitly set device 0
    cudaGLSetGLDevice(0);
    // Create buffer object and register it with CUDA
    glGenBuffers(1, &outputVBO);
-    glBindBuffer(GL_ARRAY_BUFFER, outputVBO);
+    glBindBuffer(GL_ARRAY_BUFFER, outputVBO);   
-    unsigned int size = w * h * nc * sizeof(float);
+    unsigned int size = w * h * 4 * sizeof(float);
    glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
    glBindBuffer(GL_ARRAY_BUFFER, 0);
    cudaGraphicsGLRegisterBuffer(&outputVBO_CUDA,
                                 outputVBO,
                                 cudaGraphicsMapFlagsWriteDiscard);
-    // Launch rendering loop
+    // Read a camera image frame every 30 milliseconds:
-    glutMainLoop();
+    // cv::waitKey(30) waits 30 milliseconds for a keyboard input,
-    atexit(bye);
+    // returns a value <0 if no key is pressed during this time, returns immediately with a value >=0 if a key is pressed
+    while (cv::waitKey(30) < 0 || !glfwWindowShouldClose(window))
+    {
+    // Get camera image
+    camera >> mIn;
+    cvtColor(mIn, mIn, CV_BGR2GRAY);
+    // convert to float representation (opencv loads image values as single bytes by default)
+    mIn.convertTo(mIn,CV_32F);
+    // convert range of each channel to [0,1] (opencv default is [0,255])
+    mIn /= 255.f;
+    float4* d_output;
+    cudaGraphicsMapResources(1, &outputVBO_CUDA, 0);
+    size_t num_bytes; 
+    cudaGraphicsResourceGetMappedPointer((void**)&d_output,
+                                         &num_bytes,  
+                                         outputVBO_CUDA);
-#else // CAMERA
+#endif
-    size_t imageBytes = (size_t)w*h*nc*sizeof(float);
    // Init raw input image array
    // opencv images are interleaved: rgb rgb rgb...  (actually bgr bgr bgr...)
@@ -310,7 +320,7 @@ int main(int argc, char **argv)
    dim3 grid = make_grid(dim3(w, h, 1), block);
    Timer timer; timer.start();
-    // float *d_F, *d_U, *d_vx, *d_vy;
+    float *d_F, *d_U, *d_vx, *d_vy;
    cudaMalloc(&d_F, imageBytes);
    cudaMalloc(&d_U, imageBytes);
    cudaMalloc(&d_vx, imageBytes);
@@ -320,10 +330,19 @@ int main(int argc, char **argv)
    for (int n = 0; n < N; n++) {
        norm_grad<<< grid, block >>>(d_U, d_vx, d_vy, w, h);
-        update<<< grid, block >>>(d_U, d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
+#ifdef CAMERA
+        update<<< grid, block >>>(d_output, d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
+#else
+        update<<< grid, block >>>(d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
+#endif
    }
+#ifdef CAMERA
+    // Unmap buffer object
+    cudaGraphicsUnmapResources(1, &outputVBO_CUDA, 0);
+#else
    cudaMemcpy(imgOut, d_U, imageBytes, cudaMemcpyDeviceToHost);
+#endif    
    cudaFree(d_F);
    cudaFree(d_U);
    cudaFree(d_vx);
@@ -332,21 +351,43 @@ int main(int argc, char **argv)
    cout << "time: " << t*1000 << " ms" << endl;
+#ifdef CAMERA
+    // Render from buffer object
+    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+    glBindBuffer(GL_ARRAY_BUFFER, outputVBO);
+    glVertexPointer(4, GL_FLOAT, 0, 0);
+    glEnableClientState(GL_VERTEX_ARRAY);
+    glDrawArrays(GL_POINTS, 0, w * h);
+    glDisableClientState(GL_VERTEX_ARRAY);
+#else
    // show input image
    showImage("Input", mIn, 100, 100);  // show at position (x_from_left=100,y_from_above=100)
    // show output image: first convert to interleaved opencv format from the layered raw array
    convert_layered_to_mat(mOut, imgOut);
    showImage("Output", mOut, 100+w+40, 100);
+#endif
    // ### Display your own output images here as needed
+#ifdef CAMERA
+    glfwSwapBuffers(window);
+    glfwPollEvents();
+    if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS)
+        glfwSetWindowShouldClose(window, GL_TRUE);
+    // end of camera loop
+    }
+    glfwTerminate();
+#else
    // wait for key inputs
    cv::waitKey(0);
    // save input and result
    cv::imwrite("image_input.png",mIn*255.f);  // "imwrite" assumes channel range [0,255]
    cv::imwrite("image_result.png",mOut*255.f);
+#endif
    // free allocated arrays
    delete[] imgIn;
@@ -354,82 +395,8 @@ int main(int argc, char **argv)
    // close all opencv windows
    cvDestroyAllWindows();
    return 0;
-#endif
 }
-void display()
-{
-    cout << __func__ << ": "<< __LINE__ << ": " << endl;
-    float *d_output;
-    cudaGraphicsMapResources(1, &outputVBO_CUDA, 0);
-    size_t num_bytes; 
-    cudaGraphicsResourceGetMappedPointer((void**)&d_output, &num_bytes, outputVBO_CUDA);
-    size_t imageBytes = (size_t)w*h*nc*sizeof(float);
-    // Read a camera image frame every 30 milliseconds:
-    // cv::waitKey(30) waits 30 milliseconds for a keyboard input,
-    // returns a value <0 if no key is pressed during this time, returns immediately with a value >=0 if a key is pressed
-    cv::waitKey(30);
-    // Get camera image
-    camera >> mIn;
-    // convert to float representation (opencv loads image values as single bytes by default)
-    mIn.convertTo(mIn,CV_32F);
-    // convert range of each channel to [0,1] (opencv default is [0,255])
-    mIn /= 255.f;
-    // Init raw input image array
-    // opencv images are interleaved: rgb rgb rgb...  (actually bgr bgr bgr...)
-    // But for CUDA it's better to work with layered images: rrr... ggg... bbb...
-    // So we will convert as necessary, using interleaved "cv::Mat" for loading/saving/displaying, and layered "float*" for CUDA computations
-    convert_mat_to_layered (imgIn, mIn);
-    dim3 block(32, 16);
-    dim3 grid = make_grid(dim3(w, h, 1), block);
-    Timer timer; timer.start();
-    // float *d_F, *d_U, *d_vx, *d_vy;
-    cudaMalloc(&d_F, imageBytes);
-    cudaMalloc(&d_U, imageBytes);
-    cudaMalloc(&d_vx, imageBytes);
-    cudaMalloc(&d_vy, imageBytes);
-    cudaMemcpy(d_F, imgIn, imageBytes, cudaMemcpyHostToDevice);
-    cudaMemcpy(d_U, imgIn, imageBytes, cudaMemcpyHostToDevice);
-    for (int n = 0; n < N; n++) {
-        norm_grad<<< grid, block >>>(d_U, d_vx, d_vy, w, h);
-        update<<< grid, block >>>(d_output, d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
-    }
-    // cudaMemcpy(imgOut, d_U, imageBytes, cudaMemcpyDeviceToHost);
-    cudaFree(d_F);
-    cudaFree(d_U);
-    cudaFree(d_vx);
-    cudaFree(d_vy);
-    timer.end();  float t = timer.get();  // elapsed time in seconds
-    cout << "time: " << t*1000 << " ms" << endl;
-    // Unmap buffer object
-    cudaGraphicsUnmapResources(1, &outputVBO_CUDA, 0);
-    // Render from buffer object
-    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
-    glBindBuffer(GL_ARRAY_BUFFER, outputVBO);
-    glVertexPointer(4, GL_FLOAT, 0, 0);
-    glEnableClientState(GL_VERTEX_ARRAY);
-    glDrawArrays(GL_POINTS, 0, w * h);
-    glDisableClientState(GL_VERTEX_ARRAY);
-    // Swap buffers
-    glutSwapBuffers();
-    glutPostRedisplay();
-}