Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
cuda_lab
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
cuda_lab
Commits
639b6601
Commit
639b6601
authored
Mar 22, 2014
by
Gaurav Kukreja
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Slightly better, not working nonetheless code for interop
Signed-off-by:
Gaurav Kukreja
<
gmkukreja@gmail.com
>
parent
4490dbf2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
114 additions
and
147 deletions
+114
-147
Makefile
miklos/project/Makefile
+1
-1
main.cu
miklos/project/main.cu
+113
-146
No files found.
miklos/project/Makefile
View file @
639b6601
main
:
main.cu aux.cu aux.h Makefile
nvcc
-o
main main.cu aux.cu
--ptxas-options
=
-v
--use_fast_math
--compiler-options
-Wall
-lopencv_highgui
-lopencv_core
-l
GL
-lglut
nvcc
-o
main main.cu aux.cu
--ptxas-options
=
-v
--use_fast_math
--compiler-options
-Wall
-lopencv_highgui
-lopencv_core
-l
opencv_imgproc
-lGL
`
pkg-config
--cflags
glfw3
`
`
pkg-config
--static
--libs
glfw3
`
miklos/project/main.cu
View file @
639b6601
...
...
@@ -26,45 +26,18 @@
#include "aux.h"
#include <iostream>
// #include <GLTools.h> // OpenGL toolkit
// #include <GLShaderManager.h> // Shader Manager Class
#define GL_GLEXT_PROTOTYPES
#include <GL/gl.h>
#include <GL/glut.h> // Windows FreeGlut equivalent
#include <GL/glext.h>
#include <GLFW/glfw3.h>
#include "cuda_gl_interop.h"
using namespace std;
// uncomment to use the camera
// #define CAMERA
/*********************************************************************
*** Global Variables ******
*********************************************************************/
// Pointers to Device Memory for Input, Output and Intermediate images
float *d_F, *d_U, *d_vx, *d_vy;
// Variables for OpenGL Interoperability
GLuint outputVBO;
struct cudaGraphicsResource* outputVBO_CUDA;
int repeats;
bool gray;
float lambda;
float tau;
int N;
cv::VideoCapture camera(0);
cv::Mat mIn;
float *imgIn;
int w, h, nc;
//#define CAMERA
template<typename T>
__device__ __host__ T min(T a, T b)
...
...
@@ -85,8 +58,6 @@ __device__ __host__ T clamp(T m, T x, T M)
}
/**
* Computes the normalized gradient.
*
...
...
@@ -119,7 +90,6 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h)
/**
* Update approximation.
*
* @param output
* @param U approximation of solution (single-channel)
* @param F original input image (single-channel)
* @param vx normalized gradient of U (x-coordinate)
...
...
@@ -129,8 +99,13 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h)
* @param lambda weight of 'similarity' energy component
* @param tau update coefficient
*/
__global__ void update(float* output, float *U, float *F, float *vx, float *vy,
#ifdef CAMERA
__global__ void update(float4 *output, float *U, float *F, float *vx, float *vy,
int w, int h, float lambda, float tau)
#else
__global__ void update(float *U, float *F, float *vx, float *vy,
int w, int h, float lambda, float tau)
#endif
{
int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y;
...
...
@@ -150,7 +125,14 @@ __global__ void update(float* output, float *U, float *F, float *vx, float *vy,
float div_v = dx_vx + dy_vy;
// explicit Euler update rule
output[i] = U[i] + tau * (lambda * d + div_v);
U[i] += tau * (lambda * d + div_v);
#ifdef CAMERA
float u = x / (float)w;
float v = y / (float)h;
output[i] = make_float4(u, U[i], v, 1.0f);
#else
#endif
}
}
...
...
@@ -163,11 +145,6 @@ inline dim3 make_grid(dim3 whole, dim3 block)
div_ceil(whole.z, block.z));
}
void display();
void bye() {
cout << "bye!\n";
}
int main(int argc, char **argv)
{
...
...
@@ -176,6 +153,9 @@ int main(int argc, char **argv)
// We will do it right here, so that the run time measurements are accurate
cudaDeviceSynchronize(); CUDA_CHECK;
// Reading command line parameters:
// getParam("param", var, argc, argv) looks whether "-param xyz" is specified, and if so stores the value "xyz" in "var"
// If "-param" is not specified, the value of "var" remains unchanged
...
...
@@ -192,45 +172,48 @@ int main(int argc, char **argv)
#endif
// number of computation repetitions to get a better run time measurement
repeats = 1;
int
repeats = 1;
getParam("repeats", repeats, argc, argv);
cout << "repeats: " << repeats << endl;
// load the input image as grayscale if "-gray" is specifed
gray = true;
bool
gray = true;
// always true: getParam("gray", gray, argc, argv);
cout << "gray: " << gray << endl;
// ### Define your own parameters here as needed
lambda = 0.8;
float
lambda = 0.8;
getParam("lambda", lambda, argc, argv);
cout << "λ: " << lambda << endl;
tau = 0.01;
float
tau = 0.01;
getParam("tau", tau, argc, argv);
cout << "τ: " << tau << endl;
N = 2000;
int
N = 2000;
getParam("N", N, argc, argv);
cout << "N: " << N << endl;
// Init camera / Load input image
#ifdef CAMERA
// Init camera
//
cv::VideoCapture camera(0);
cv::VideoCapture camera(0);
if(!camera.isOpened()) { cerr << "ERROR: Could not open camera" << endl; return 1; }
int camW = 640;
int camH = 480;
camera.set(CV_CAP_PROP_FRAME_WIDTH,camW);
camera.set(CV_CAP_PROP_FRAME_HEIGHT,camH);
// read in first frame to get the dimensions
//
cv::Mat mIn;
cv::Mat mIn;
camera >> mIn;
if(gray)
cvtColor (mIn, mIn, CV_BGR2GRAY);
#else
// Load the input image using opencv (load as grayscale if "gray==true", otherwise as is (may be color or grayscale))
// cv::Mat mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1));
mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1));
cv::Mat mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1));
// check
if (mIn.data == NULL) { cerr << "ERROR: Could not load image " << image << endl; return 1; }
...
...
@@ -241,9 +224,9 @@ int main(int argc, char **argv)
// convert range of each channel to [0,1] (opencv default is [0,255])
mIn /= 255.f;
// get image dimensions
w = mIn.cols; // width
h = mIn.rows; // height
nc = mIn.channels(); // number of channels
int
w = mIn.cols; // width
int
h = mIn.rows; // height
int
nc = mIn.channels(); // number of channels
cout << "image: " << w << " x " << h << endl;
...
...
@@ -263,22 +246,34 @@ int main(int argc, char **argv)
// output image number of channels: mOut.channels(), as defined above (nc, 3, or 1)
// allocate raw input image array
imgIn = new float[(size_t)w*h*nc];
float *imgIn = new float[(size_t)w*h*nc];
size_t imageBytes = (size_t)w*h*nc*sizeof(float);
// allocate raw output array (the computation result will be stored in this array, then later converted to mOut for displaying)
float *imgOut = new float[(size_t)w*h*mOut.channels()];
// For camera mode: Make a loop to read in camera frames
#ifdef CAMERA
// Initialize OpenGL and GLUT for device 0
// and make the OpenGL context current
glutInit(&argc, argv);
// KUKU: OpenGL GLFW Code
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
GLFWwindow* window = glfwCreateWindow(640, 480, "Project", NULL, NULL); // Windowed
glfwMakeContextCurrent(window);
// KUKU: Interoperability Code
glutInitDisplayMode(GLUT_DEPTH | GLUT_DOUBLE | GLUT_RGBA);
glutInitWindowPosition(100,100);
glutInitWindowSize(640,480);
glutCreateWindow("Project");
glutDisplayFunc(&display);
GLuint outputVBO;
struct cudaGraphicsResource* outputVBO_CUDA;
// Explicitly set device 0
cudaGLSetGLDevice(0);
...
...
@@ -286,19 +281,34 @@ int main(int argc, char **argv)
// Create buffer object and register it with CUDA
glGenBuffers(1, &outputVBO);
glBindBuffer(GL_ARRAY_BUFFER, outputVBO);
unsigned int size = w * h *
nc
* sizeof(float);
unsigned int size = w * h *
4
* sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
cudaGraphicsGLRegisterBuffer(&outputVBO_CUDA,
outputVBO,
cudaGraphicsMapFlagsWriteDiscard);
// Launch rendering loop
glutMainLoop();
atexit(bye);
// Read a camera image frame every 30 milliseconds:
// cv::waitKey(30) waits 30 milliseconds for a keyboard input,
// returns a value <0 if no key is pressed during this time, returns immediately with a value >=0 if a key is pressed
while (cv::waitKey(30) < 0 || !glfwWindowShouldClose(window))
{
// Get camera image
camera >> mIn;
cvtColor(mIn, mIn, CV_BGR2GRAY);
// convert to float representation (opencv loads image values as single bytes by default)
mIn.convertTo(mIn,CV_32F);
// convert range of each channel to [0,1] (opencv default is [0,255])
mIn /= 255.f;
#else // CAMERA
size_t imageBytes = (size_t)w*h*nc*sizeof(float);
float4* d_output;
cudaGraphicsMapResources(1, &outputVBO_CUDA, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void**)&d_output,
&num_bytes,
outputVBO_CUDA);
#endif
// Init raw input image array
// opencv images are interleaved: rgb rgb rgb... (actually bgr bgr bgr...)
...
...
@@ -310,7 +320,7 @@ int main(int argc, char **argv)
dim3 grid = make_grid(dim3(w, h, 1), block);
Timer timer; timer.start();
//
float *d_F, *d_U, *d_vx, *d_vy;
float *d_F, *d_U, *d_vx, *d_vy;
cudaMalloc(&d_F, imageBytes);
cudaMalloc(&d_U, imageBytes);
cudaMalloc(&d_vx, imageBytes);
...
...
@@ -320,10 +330,19 @@ int main(int argc, char **argv)
for (int n = 0; n < N; n++) {
norm_grad<<< grid, block >>>(d_U, d_vx, d_vy, w, h);
update<<< grid, block >>>(d_U, d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
#ifdef CAMERA
update<<< grid, block >>>(d_output, d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
#else
update<<< grid, block >>>(d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
#endif
}
#ifdef CAMERA
// Unmap buffer object
cudaGraphicsUnmapResources(1, &outputVBO_CUDA, 0);
#else
cudaMemcpy(imgOut, d_U, imageBytes, cudaMemcpyDeviceToHost);
#endif
cudaFree(d_F);
cudaFree(d_U);
cudaFree(d_vx);
...
...
@@ -332,21 +351,43 @@ int main(int argc, char **argv)
cout << "time: " << t*1000 << " ms" << endl;
#ifdef CAMERA
// Render from buffer object
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glBindBuffer(GL_ARRAY_BUFFER, outputVBO);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glDrawArrays(GL_POINTS, 0, w * h);
glDisableClientState(GL_VERTEX_ARRAY);
#else
// show input image
showImage("Input", mIn, 100, 100); // show at position (x_from_left=100,y_from_above=100)
// show output image: first convert to interleaved opencv format from the layered raw array
convert_layered_to_mat(mOut, imgOut);
showImage("Output", mOut, 100+w+40, 100);
#endif
// ### Display your own output images here as needed
#ifdef CAMERA
glfwSwapBuffers(window);
glfwPollEvents();
if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);
// end of camera loop
}
glfwTerminate();
#else
// wait for key inputs
cv::waitKey(0);
// save input and result
cv::imwrite("image_input.png",mIn*255.f); // "imwrite" assumes channel range [0,255]
cv::imwrite("image_result.png",mOut*255.f);
#endif
// free allocated arrays
delete[] imgIn;
...
...
@@ -354,82 +395,8 @@ int main(int argc, char **argv)
// close all opencv windows
cvDestroyAllWindows();
return 0;
#endif
}
void display()
{
cout << __func__ << ": "<< __LINE__ << ": " << endl;
float *d_output;
cudaGraphicsMapResources(1, &outputVBO_CUDA, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void**)&d_output, &num_bytes, outputVBO_CUDA);
size_t imageBytes = (size_t)w*h*nc*sizeof(float);
// Read a camera image frame every 30 milliseconds:
// cv::waitKey(30) waits 30 milliseconds for a keyboard input,
// returns a value <0 if no key is pressed during this time, returns immediately with a value >=0 if a key is pressed
cv::waitKey(30);
// Get camera image
camera >> mIn;
// convert to float representation (opencv loads image values as single bytes by default)
mIn.convertTo(mIn,CV_32F);
// convert range of each channel to [0,1] (opencv default is [0,255])
mIn /= 255.f;
// Init raw input image array
// opencv images are interleaved: rgb rgb rgb... (actually bgr bgr bgr...)
// But for CUDA it's better to work with layered images: rrr... ggg... bbb...
// So we will convert as necessary, using interleaved "cv::Mat" for loading/saving/displaying, and layered "float*" for CUDA computations
convert_mat_to_layered (imgIn, mIn);
dim3 block(32, 16);
dim3 grid = make_grid(dim3(w, h, 1), block);
Timer timer; timer.start();
// float *d_F, *d_U, *d_vx, *d_vy;
cudaMalloc(&d_F, imageBytes);
cudaMalloc(&d_U, imageBytes);
cudaMalloc(&d_vx, imageBytes);
cudaMalloc(&d_vy, imageBytes);
cudaMemcpy(d_F, imgIn, imageBytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_U, imgIn, imageBytes, cudaMemcpyHostToDevice);
for (int n = 0; n < N; n++) {
norm_grad<<< grid, block >>>(d_U, d_vx, d_vy, w, h);
update<<< grid, block >>>(d_output, d_U, d_F, d_vx, d_vy, w, h, lambda, tau);
}
// cudaMemcpy(imgOut, d_U, imageBytes, cudaMemcpyDeviceToHost);
cudaFree(d_F);
cudaFree(d_U);
cudaFree(d_vx);
cudaFree(d_vy);
timer.end(); float t = timer.get(); // elapsed time in seconds
cout << "time: " << t*1000 << " ms" << endl;
// Unmap buffer object
cudaGraphicsUnmapResources(1, &outputVBO_CUDA, 0);
// Render from buffer object
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glBindBuffer(GL_ARRAY_BUFFER, outputVBO);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glDrawArrays(GL_POINTS, 0, w * h);
glDisableClientState(GL_VERTEX_ARRAY);
// Swap buffers
glutSwapBuffers();
glutPostRedisplay();
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment