Commit 639b6601 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Slightly better, not working nonetheless code for interop

Signed-off-by: 's avatarGaurav Kukreja <gmkukreja@gmail.com>
parent 4490dbf2
main: main.cu aux.cu aux.h Makefile main: main.cu aux.cu aux.h Makefile
nvcc -o main main.cu aux.cu --ptxas-options=-v --use_fast_math --compiler-options -Wall -lopencv_highgui -lopencv_core -lGL -lglut nvcc -o main main.cu aux.cu --ptxas-options=-v --use_fast_math --compiler-options -Wall -lopencv_highgui -lopencv_core -lopencv_imgproc -lGL `pkg-config --cflags glfw3` `pkg-config --static --libs glfw3`
...@@ -26,45 +26,18 @@ ...@@ -26,45 +26,18 @@
#include "aux.h" #include "aux.h"
#include <iostream> #include <iostream>
// #include <GLTools.h> // OpenGL toolkit
// #include <GLShaderManager.h> // Shader Manager Class
#define GL_GLEXT_PROTOTYPES #define GL_GLEXT_PROTOTYPES
#include <GL/gl.h> #include <GL/gl.h>
#include <GL/glut.h> // Windows FreeGlut equivalent
#include <GL/glext.h> #include <GL/glext.h>
#include <GLFW/glfw3.h>
#include "cuda_gl_interop.h" #include "cuda_gl_interop.h"
using namespace std; using namespace std;
// uncomment to use the camera // uncomment to use the camera
// #define CAMERA //#define CAMERA
/*********************************************************************
*** Global Variables ******
*********************************************************************/
// Pointers to Device Memory for Input, Output and Intermediate images
float *d_F, *d_U, *d_vx, *d_vy;
// Variables for OpenGL Interoperability
GLuint outputVBO;
struct cudaGraphicsResource* outputVBO_CUDA;
int repeats;
bool gray;
float lambda;
float tau;
int N;
cv::VideoCapture camera(0);
cv::Mat mIn;
float *imgIn;
int w, h, nc;
template<typename T> template<typename T>
__device__ __host__ T min(T a, T b) __device__ __host__ T min(T a, T b)
...@@ -85,8 +58,6 @@ __device__ __host__ T clamp(T m, T x, T M) ...@@ -85,8 +58,6 @@ __device__ __host__ T clamp(T m, T x, T M)
} }
/** /**
* Computes the normalized gradient. * Computes the normalized gradient.
* *
...@@ -119,7 +90,6 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h) ...@@ -119,7 +90,6 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h)
/** /**
* Update approximation. * Update approximation.
* *
* @param output
* @param U approximation of solution (single-channel) * @param U approximation of solution (single-channel)
* @param F original input image (single-channel) * @param F original input image (single-channel)
* @param vx normalized gradient of U (x-coordinate) * @param vx normalized gradient of U (x-coordinate)
...@@ -129,8 +99,13 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h) ...@@ -129,8 +99,13 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h)
* @param lambda weight of 'similarity' energy component * @param lambda weight of 'similarity' energy component
* @param tau update coefficient * @param tau update coefficient
*/ */
__global__ void update(float* output, float *U, float *F, float *vx, float *vy, #ifdef CAMERA
__global__ void update(float4 *output, float *U, float *F, float *vx, float *vy,
int w, int h, float lambda, float tau) int w, int h, float lambda, float tau)
#else
__global__ void update(float *U, float *F, float *vx, float *vy,
int w, int h, float lambda, float tau)
#endif
{ {
int x = threadIdx.x + blockDim.x * blockIdx.x; int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y; int y = threadIdx.y + blockDim.y * blockIdx.y;
...@@ -150,7 +125,14 @@ __global__ void update(float* output, float *U, float *F, float *vx, float *vy, ...@@ -150,7 +125,14 @@ __global__ void update(float* output, float *U, float *F, float *vx, float *vy,
float div_v = dx_vx + dy_vy; float div_v = dx_vx + dy_vy;
// explicit Euler update rule // explicit Euler update rule
output[i] = U[i] + tau * (lambda * d + div_v); U[i] += tau * (lambda * d + div_v);
#ifdef CAMERA
float u = x / (float)w;
float v = y / (float)h;
output[i] = make_float4(u, U[i], v, 1.0f);
#else
#endif
} }
} }
...@@ -163,11 +145,6 @@ inline dim3 make_grid(dim3 whole, dim3 block) ...@@ -163,11 +145,6 @@ inline dim3 make_grid(dim3 whole, dim3 block)
div_ceil(whole.z, block.z)); div_ceil(whole.z, block.z));
} }
void display();
void bye() {
cout << "bye!\n";
}
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
...@@ -176,6 +153,9 @@ int main(int argc, char **argv) ...@@ -176,6 +153,9 @@ int main(int argc, char **argv)
// We will do it right here, so that the run time measurements are accurate // We will do it right here, so that the run time measurements are accurate
cudaDeviceSynchronize(); CUDA_CHECK; cudaDeviceSynchronize(); CUDA_CHECK;
// Reading command line parameters: // Reading command line parameters:
// getParam("param", var, argc, argv) looks whether "-param xyz" is specified, and if so stores the value "xyz" in "var" // getParam("param", var, argc, argv) looks whether "-param xyz" is specified, and if so stores the value "xyz" in "var"
// If "-param" is not specified, the value of "var" remains unchanged // If "-param" is not specified, the value of "var" remains unchanged
...@@ -190,47 +170,50 @@ int main(int argc, char **argv) ...@@ -190,47 +170,50 @@ int main(int argc, char **argv)
if (!ret) cerr << "ERROR: no image specified" << endl; if (!ret) cerr << "ERROR: no image specified" << endl;
if (argc <= 1) { cout << "Usage: " << argv[0] << " -i <image> [-repeats <repeats>] [-gray]" << endl; return 1; } if (argc <= 1) { cout << "Usage: " << argv[0] << " -i <image> [-repeats <repeats>] [-gray]" << endl; return 1; }
#endif #endif
// number of computation repetitions to get a better run time measurement // number of computation repetitions to get a better run time measurement
repeats = 1; int repeats = 1;
getParam("repeats", repeats, argc, argv); getParam("repeats", repeats, argc, argv);
cout << "repeats: " << repeats << endl; cout << "repeats: " << repeats << endl;
// load the input image as grayscale if "-gray" is specifed // load the input image as grayscale if "-gray" is specifed
gray = true; bool gray = true;
// always true: getParam("gray", gray, argc, argv); // always true: getParam("gray", gray, argc, argv);
cout << "gray: " << gray << endl; cout << "gray: " << gray << endl;
// ### Define your own parameters here as needed // ### Define your own parameters here as needed
lambda = 0.8; float lambda = 0.8;
getParam("lambda", lambda, argc, argv); getParam("lambda", lambda, argc, argv);
cout << "λ: " << lambda << endl; cout << "λ: " << lambda << endl;
tau = 0.01; float tau = 0.01;
getParam("tau", tau, argc, argv); getParam("tau", tau, argc, argv);
cout << "τ: " << tau << endl; cout << "τ: " << tau << endl;
N = 2000; int N = 2000;
getParam("N", N, argc, argv); getParam("N", N, argc, argv);
cout << "N: " << N << endl; cout << "N: " << N << endl;
// Init camera / Load input image // Init camera / Load input image
#ifdef CAMERA #ifdef CAMERA
// Init camera // Init camera
// cv::VideoCapture camera(0); cv::VideoCapture camera(0);
if(!camera.isOpened()) { cerr << "ERROR: Could not open camera" << endl; return 1; } if(!camera.isOpened()) { cerr << "ERROR: Could not open camera" << endl; return 1; }
int camW = 640; int camW = 640;
int camH = 480; int camH = 480;
camera.set(CV_CAP_PROP_FRAME_WIDTH,camW); camera.set(CV_CAP_PROP_FRAME_WIDTH,camW);
camera.set(CV_CAP_PROP_FRAME_HEIGHT,camH); camera.set(CV_CAP_PROP_FRAME_HEIGHT,camH);
// read in first frame to get the dimensions // read in first frame to get the dimensions
// cv::Mat mIn; cv::Mat mIn;
camera >> mIn; camera >> mIn;
if(gray)
cvtColor (mIn, mIn, CV_BGR2GRAY);
#else #else
// Load the input image using opencv (load as grayscale if "gray==true", otherwise as is (may be color or grayscale)) // Load the input image using opencv (load as grayscale if "gray==true", otherwise as is (may be color or grayscale))
// cv::Mat mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1)); cv::Mat mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1));
mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1));
// check // check
if (mIn.data == NULL) { cerr << "ERROR: Could not load image " << image << endl; return 1; } if (mIn.data == NULL) { cerr << "ERROR: Could not load image " << image << endl; return 1; }
...@@ -241,9 +224,9 @@ int main(int argc, char **argv) ...@@ -241,9 +224,9 @@ int main(int argc, char **argv)
// convert range of each channel to [0,1] (opencv default is [0,255]) // convert range of each channel to [0,1] (opencv default is [0,255])
mIn /= 255.f; mIn /= 255.f;
// get image dimensions // get image dimensions
w = mIn.cols; // width int w = mIn.cols; // width
h = mIn.rows; // height int h = mIn.rows; // height
nc = mIn.channels(); // number of channels int nc = mIn.channels(); // number of channels
cout << "image: " << w << " x " << h << endl; cout << "image: " << w << " x " << h << endl;
...@@ -263,42 +246,69 @@ int main(int argc, char **argv) ...@@ -263,42 +246,69 @@ int main(int argc, char **argv)
// output image number of channels: mOut.channels(), as defined above (nc, 3, or 1) // output image number of channels: mOut.channels(), as defined above (nc, 3, or 1)
// allocate raw input image array // allocate raw input image array
imgIn = new float[(size_t)w*h*nc]; float *imgIn = new float[(size_t)w*h*nc];
size_t imageBytes = (size_t)w*h*nc*sizeof(float);
// allocate raw output array (the computation result will be stored in this array, then later converted to mOut for displaying) // allocate raw output array (the computation result will be stored in this array, then later converted to mOut for displaying)
float *imgOut = new float[(size_t)w*h*mOut.channels()]; float *imgOut = new float[(size_t)w*h*mOut.channels()];
// For camera mode: Make a loop to read in camera frames
#ifdef CAMERA #ifdef CAMERA
// Initialize OpenGL and GLUT for device 0 // KUKU: OpenGL GLFW Code
// and make the OpenGL context current glfwInit();
glutInit(&argc, argv); glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
GLFWwindow* window = glfwCreateWindow(640, 480, "Project", NULL, NULL); // Windowed
glfwMakeContextCurrent(window);
// KUKU: Interoperability Code
glutInitDisplayMode(GLUT_DEPTH | GLUT_DOUBLE | GLUT_RGBA); GLuint outputVBO;
glutInitWindowPosition(100,100); struct cudaGraphicsResource* outputVBO_CUDA;
glutInitWindowSize(640,480);
glutCreateWindow("Project");
glutDisplayFunc(&display);
// Explicitly set device 0 // Explicitly set device 0
cudaGLSetGLDevice(0); cudaGLSetGLDevice(0);
// Create buffer object and register it with CUDA // Create buffer object and register it with CUDA
glGenBuffers(1, &outputVBO); glGenBuffers(1, &outputVBO);
glBindBuffer(GL_ARRAY_BUFFER, outputVBO); glBindBuffer(GL_ARRAY_BUFFER, outputVBO);
unsigned int size = w * h * nc * sizeof(float); unsigned int size = w * h * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0);
cudaGraphicsGLRegisterBuffer(&outputVBO_CUDA, cudaGraphicsGLRegisterBuffer(&outputVBO_CUDA,
outputVBO, outputVBO,
cudaGraphicsMapFlagsWriteDiscard); cudaGraphicsMapFlagsWriteDiscard);
// Launch rendering loop // Read a camera image frame every 30 milliseconds:
glutMainLoop(); // cv::waitKey(30) waits 30 milliseconds for a keyboard input,
atexit(bye); // returns a value <0 if no key is pressed during this time, returns immediately with a value >=0 if a key is pressed
while (cv::waitKey(30) < 0 || !glfwWindowShouldClose(window))
{
// Get camera image
camera >> mIn;
cvtColor(mIn, mIn, CV_BGR2GRAY);
// convert to float representation (opencv loads image values as single bytes by default)
mIn.convertTo(mIn,CV_32F);
// convert range of each channel to [0,1] (opencv default is [0,255])
mIn /= 255.f;
float4* d_output;
cudaGraphicsMapResources(1, &outputVBO_CUDA, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void**)&d_output,
&num_bytes,
outputVBO_CUDA);
#else // CAMERA #endif
size_t imageBytes = (size_t)w*h*nc*sizeof(float);
// Init raw input image array // Init raw input image array
// opencv images are interleaved: rgb rgb rgb... (actually bgr bgr bgr...) // opencv images are interleaved: rgb rgb rgb... (actually bgr bgr bgr...)
...@@ -310,7 +320,7 @@ int main(int argc, char **argv) ...@@ -310,7 +320,7 @@ int main(int argc, char **argv)
dim3 grid = make_grid(dim3(w, h, 1), block); dim3 grid = make_grid(dim3(w, h, 1), block);
Timer timer; timer.start(); Timer timer; timer.start();
// float *d_F, *d_U, *d_vx, *d_vy; float *d_F, *d_U, *d_vx, *d_vy;
cudaMalloc(&d_F, imageBytes); cudaMalloc(&d_F, imageBytes);
cudaMalloc(&d_U, imageBytes); cudaMalloc(&d_U, imageBytes);
cudaMalloc(&d_vx, imageBytes); cudaMalloc(&d_vx, imageBytes);
...@@ -320,10 +330,19 @@ int main(int argc, char **argv) ...@@ -320,10 +330,19 @@ int main(int argc, char **argv)
for (int n = 0; n < N; n++) { for (int n = 0; n < N; n++) {
norm_grad<<< grid, block >>>(d_U, d_vx, d_vy, w, h); norm_grad<<< grid, block >>>(d_U, d_vx, d_vy, w, h);
update<<< grid, block >>>(d_U, d_U, d_F, d_vx, d_vy, w, h, lambda, tau); #ifdef CAMERA
update<<< grid, block >>>(d_output, d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
#else
update<<< grid, block >>>(d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
#endif
} }
#ifdef CAMERA
// Unmap buffer object
cudaGraphicsUnmapResources(1, &outputVBO_CUDA, 0);
#else
cudaMemcpy(imgOut, d_U, imageBytes, cudaMemcpyDeviceToHost); cudaMemcpy(imgOut, d_U, imageBytes, cudaMemcpyDeviceToHost);
#endif
cudaFree(d_F); cudaFree(d_F);
cudaFree(d_U); cudaFree(d_U);
cudaFree(d_vx); cudaFree(d_vx);
...@@ -332,21 +351,43 @@ int main(int argc, char **argv) ...@@ -332,21 +351,43 @@ int main(int argc, char **argv)
cout << "time: " << t*1000 << " ms" << endl; cout << "time: " << t*1000 << " ms" << endl;
#ifdef CAMERA
// Render from buffer object
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glBindBuffer(GL_ARRAY_BUFFER, outputVBO);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glDrawArrays(GL_POINTS, 0, w * h);
glDisableClientState(GL_VERTEX_ARRAY);
#else
// show input image // show input image
showImage("Input", mIn, 100, 100); // show at position (x_from_left=100,y_from_above=100) showImage("Input", mIn, 100, 100); // show at position (x_from_left=100,y_from_above=100)
// show output image: first convert to interleaved opencv format from the layered raw array // show output image: first convert to interleaved opencv format from the layered raw array
convert_layered_to_mat(mOut, imgOut); convert_layered_to_mat(mOut, imgOut);
showImage("Output", mOut, 100+w+40, 100); showImage("Output", mOut, 100+w+40, 100);
#endif
// ### Display your own output images here as needed // ### Display your own output images here as needed
#ifdef CAMERA
glfwSwapBuffers(window);
glfwPollEvents();
if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);
// end of camera loop
}
glfwTerminate();
#else
// wait for key inputs // wait for key inputs
cv::waitKey(0); cv::waitKey(0);
// save input and result // save input and result
cv::imwrite("image_input.png",mIn*255.f); // "imwrite" assumes channel range [0,255] cv::imwrite("image_input.png",mIn*255.f); // "imwrite" assumes channel range [0,255]
cv::imwrite("image_result.png",mOut*255.f); cv::imwrite("image_result.png",mOut*255.f);
#endif
// free allocated arrays // free allocated arrays
delete[] imgIn; delete[] imgIn;
...@@ -354,82 +395,8 @@ int main(int argc, char **argv) ...@@ -354,82 +395,8 @@ int main(int argc, char **argv)
// close all opencv windows // close all opencv windows
cvDestroyAllWindows(); cvDestroyAllWindows();
return 0; return 0;
#endif
} }
void display()
{
cout << __func__ << ": "<< __LINE__ << ": " << endl;
float *d_output;
cudaGraphicsMapResources(1, &outputVBO_CUDA, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void**)&d_output, &num_bytes, outputVBO_CUDA);
size_t imageBytes = (size_t)w*h*nc*sizeof(float);
// Read a camera image frame every 30 milliseconds:
// cv::waitKey(30) waits 30 milliseconds for a keyboard input,
// returns a value <0 if no key is pressed during this time, returns immediately with a value >=0 if a key is pressed
cv::waitKey(30);
// Get camera image
camera >> mIn;
// convert to float representation (opencv loads image values as single bytes by default)
mIn.convertTo(mIn,CV_32F);
// convert range of each channel to [0,1] (opencv default is [0,255])
mIn /= 255.f;
// Init raw input image array
// opencv images are interleaved: rgb rgb rgb... (actually bgr bgr bgr...)
// But for CUDA it's better to work with layered images: rrr... ggg... bbb...
// So we will convert as necessary, using interleaved "cv::Mat" for loading/saving/displaying, and layered "float*" for CUDA computations
convert_mat_to_layered (imgIn, mIn);
dim3 block(32, 16);
dim3 grid = make_grid(dim3(w, h, 1), block);
Timer timer; timer.start();
// float *d_F, *d_U, *d_vx, *d_vy;
cudaMalloc(&d_F, imageBytes);
cudaMalloc(&d_U, imageBytes);
cudaMalloc(&d_vx, imageBytes);
cudaMalloc(&d_vy, imageBytes);
cudaMemcpy(d_F, imgIn, imageBytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_U, imgIn, imageBytes, cudaMemcpyHostToDevice);
for (int n = 0; n < N; n++) {
norm_grad<<< grid, block >>>(d_U, d_vx, d_vy, w, h);
update<<< grid, block >>>(d_output, d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
}
// cudaMemcpy(imgOut, d_U, imageBytes, cudaMemcpyDeviceToHost);
cudaFree(d_F);
cudaFree(d_U);
cudaFree(d_vx);
cudaFree(d_vy);
timer.end(); float t = timer.get(); // elapsed time in seconds
cout << "time: " << t*1000 << " ms" << endl;
// Unmap buffer object
cudaGraphicsUnmapResources(1, &outputVBO_CUDA, 0);
// Render from buffer object
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glBindBuffer(GL_ARRAY_BUFFER, outputVBO);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glDrawArrays(GL_POINTS, 0, w * h);
glDisableClientState(GL_VERTEX_ARRAY);
// Swap buffers
glutSwapBuffers();
glutPostRedisplay();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment