Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
cuda_lab
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
cuda_lab
Commits
2023ccb8
Commit
2023ccb8
authored
Mar 09, 2014
by
Ravikishore
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ex15 added
parent
8039190a
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
651 additions
and
0 deletions
+651
-0
Makefile
ravi/ex15/Makefile
+5
-0
aux.cu
ravi/ex15/aux.cu
+146
-0
aux.h
ravi/ex15/aux.h
+109
-0
main.cu
ravi/ex15/main.cu
+391
-0
No files found.
ravi/ex15/Makefile
0 → 100644
View file @
2023ccb8
all
:
main
main
:
main.cu aux.cu aux.h Makefile
nvcc
-o
main main.cu aux.cu
--ptxas-options
=
-v
--use_fast_math
--compiler-options
-Wall
-lopencv_highgui
-lopencv_core
ravi/ex15/aux.cu
0 → 100644
View file @
2023ccb8
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
// ###
// ###
#include "aux.h"
#include <cstdlib>
#include <iostream>
using std::stringstream;
using std::cerr;
using std::cout;
using std::endl;
using std::string;
// parameter processing: template specialization for T=bool
template<>
bool getParam<bool>(std::string param, bool &var, int argc, char **argv)
{
const char *c_param = param.c_str();
for(int i=argc-1; i>=1; i--)
{
if (argv[i][0]!='-') continue;
if (strcmp(argv[i]+1, c_param)==0)
{
if (!(i+1<argc) || argv[i+1][0]=='-') { var = true; return true; }
std::stringstream ss;
ss << argv[i+1];
ss >> var;
return (bool)ss;
}
}
return false;
}
// opencv helpers
void convert_layered_to_interleaved(float *aOut, const float *aIn, int w, int h, int nc)
{
if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
size_t nOmega = (size_t)w*h;
for (int y=0; y<h; y++)
{
for (int x=0; x<w; x++)
{
for (int c=0; c<nc; c++)
{
aOut[(nc-1-c) + nc*(x + (size_t)w*y)] = aIn[x + (size_t)w*y + nOmega*c];
}
}
}
}
void convert_layered_to_mat(cv::Mat &mOut, const float *aIn)
{
convert_layered_to_interleaved((float*)mOut.data, aIn, mOut.cols, mOut.rows, mOut.channels());
}
void convert_interleaved_to_layered(float *aOut, const float *aIn, int w, int h, int nc)
{
if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
size_t nOmega = (size_t)w*h;
for (int y=0; y<h; y++)
{
for (int x=0; x<w; x++)
{
for (int c=0; c<nc; c++)
{
aOut[x + (size_t)w*y + nOmega*c] = aIn[(nc-1-c) + nc*(x + (size_t)w*y)];
}
}
}
}
void convert_mat_to_layered(float *aOut, const cv::Mat &mIn)
{
convert_interleaved_to_layered(aOut, (float*)mIn.data, mIn.cols, mIn.rows, mIn.channels());
}
void showImage(string title, const cv::Mat &mat, int x, int y)
{
const char *wTitle = title.c_str();
cv::namedWindow(wTitle, CV_WINDOW_AUTOSIZE);
cvMoveWindow(wTitle, x, y);
cv::imshow(wTitle, mat);
}
// adding Gaussian noise
float noise(float sigma)
{
float x1 = (float)rand()/RAND_MAX;
float x2 = (float)rand()/RAND_MAX;
return sigma * sqrtf(-2*log(std::max(x1,0.000001f)))*cosf(2*M_PI*x2);
}
void addNoise(cv::Mat &m, float sigma)
{
float *data = (float*)m.data;
int w = m.cols;
int h = m.rows;
int nc = m.channels();
size_t n = (size_t)w*h*nc;
for(size_t i=0; i<n; i++)
{
data[i] += noise(sigma);
}
}
// cuda error checking
string prev_file = "";
int prev_line = 0;
void cuda_check(string file, int line)
{
cudaError_t e = cudaGetLastError();
if (e != cudaSuccess)
{
cout << endl << file << ", line " << line << ": " << cudaGetErrorString(e) << " (" << e << ")" << endl;
if (prev_line>0) cout << "Previous CUDA call:" << endl << prev_file << ", line " << prev_line << endl;
exit(1);
}
prev_file = file;
prev_line = line;
}
ravi/ex15/aux.h
0 → 100644
View file @
2023ccb8
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
// ###
// ###
#ifndef AUX_H
#define AUX_H
#include <cuda_runtime.h>
#include <ctime>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <string>
#include <sstream>
// parameter processing
template
<
typename
T
>
bool
getParam
(
std
::
string
param
,
T
&
var
,
int
argc
,
char
**
argv
)
{
const
char
*
c_param
=
param
.
c_str
();
for
(
int
i
=
argc
-
1
;
i
>=
1
;
i
--
)
{
if
(
argv
[
i
][
0
]
!=
'-'
)
continue
;
if
(
strcmp
(
argv
[
i
]
+
1
,
c_param
)
==
0
)
{
if
(
!
(
i
+
1
<
argc
))
continue
;
std
::
stringstream
ss
;
ss
<<
argv
[
i
+
1
];
ss
>>
var
;
return
(
bool
)
ss
;
}
}
return
false
;
}
// opencv helpers
void
convert_mat_to_layered
(
float
*
aOut
,
const
cv
::
Mat
&
mIn
);
void
convert_layered_to_mat
(
cv
::
Mat
&
mOut
,
const
float
*
aIn
);
void
showImage
(
std
::
string
title
,
const
cv
::
Mat
&
mat
,
int
x
,
int
y
);
// adding Gaussian noise
void
addNoise
(
cv
::
Mat
&
m
,
float
sigma
);
// measuring time
class
Timer
{
public
:
Timer
()
:
tStart
(
0
),
running
(
false
),
sec
(
0
.
f
)
{
}
void
start
()
{
tStart
=
clock
();
running
=
true
;
}
void
end
()
{
if
(
!
running
)
{
sec
=
0
;
return
;
}
cudaDeviceSynchronize
();
clock_t
tEnd
=
clock
();
sec
=
(
float
)(
tEnd
-
tStart
)
/
CLOCKS_PER_SEC
;
running
=
false
;
}
float
get
()
{
if
(
running
)
end
();
return
sec
;
}
private
:
clock_t
tStart
;
bool
running
;
float
sec
;
};
// cuda error checking
#define CUDA_CHECK cuda_check(__FILE__,__LINE__)
void
cuda_check
(
std
::
string
file
,
int
line
);
#endif // AUX_H
ravi/ex15/main.cu
0 → 100644
View file @
2023ccb8
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ###
// ###
// ### TODO: For every student of your group, please provide here:
// ###
// ### Gaurav Kukreja, gaurav.kukreja@tum.de, p058
// ###
// ###
#include "aux.h"
#include <iostream>
#include <math.h>
using namespace std;
// uncomment to use the camera
//#define CAMERA
#define USING_GPU
__device__ __host__ float huber(float s, float epsilon)
{
return 1.0F / max(epsilon, s);
//return 1.0F;
//return expf(-s*s / epsilon) / epsilon;
}
// This function finds green colored pixels in the image
// It outputs a bool array mask, which tells if pixel(x, y) is green
// It also sets the green pixel value to (0.5, 0.5, 0.5)
__global__ void findGreen(float* imgIn, bool* d_mask, size_t n_pixels, int w, int h, int nc)
{
size_t ix = threadIdx.x + blockDim.x * blockIdx.x;
size_t iy = threadIdx.y + blockDim.y * blockIdx.y;
if(ix < w && iy < h && nc == 3)
{
// Only the green Layer
size_t b_idx = ix + (size_t)(iy * w);
size_t g_idx = b_idx + (size_t)(w * h);
size_t r_idx = g_idx + (size_t)(w * h);
if(imgIn[g_idx] == 1.0f && imgIn[b_idx] == 0.0f && imgIn[r_idx] == 0.0f)
{
d_mask[b_idx] = true;
imgIn[b_idx] = 0.5f; // Blue
imgIn[g_idx] = 0.5f; // Green
imgIn[r_idx] = 0.5f; // Red
}
else
{
d_mask[b_idx] = false;
}
}
}
__global__ void compute_g(float *image, float *g, int w, int h, int nc, float epsilon)
{
int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y;
if (x < w && y < h) {
float G2 = 0.0f;
for (int c = 0; c < nc; c++) {
size_t idx = x + (size_t)w*y + (size_t)w*h*c;
float ux = ((x < w-1) ? (image[idx + 1] - image[idx]) : 0);
float uy = ((y < h-1) ? (image[idx + w] - image[idx]) : 0);
G2 += ux*ux + uy*uy;
}
g[x + (size_t) w*y] = huber(sqrtf(G2), epsilon);
}
}
__global__ void rb_gs_update(float *image, float *g, bool* mask, int w, int h, int nc, float lambda, float theta, int red_black)
{
int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y;
if( ((x + y) % 2) != red_black) {
return;
}
if ( (x < w) && (y < h) && (mask[x + w*y])) {
for (int c = 0; c < nc; c++) {
size_t idx = x + (size_t)w*y + (size_t)w*h*c;
size_t idx_2d = x + (size_t)w*y;
float temp_uxy = image[idx];
float gsum_u = (((x+1) < (w) ? 1.0f : 0.0f) * (g[idx_2d + 1]) * (image[idx + 1])) +
(((x) > 0 ? 1.0f : 0.0f) * (g[idx_2d - 1]) * ( image[idx - 1])) +
(((y+1) < (h) ? 1.0f : 0.0f) * (g[idx_2d + w]) * ( image[idx + w])) +
(((y) > 0 ? 1.0f : 0.0f ) * (g[idx_2d - w]) * ( image[idx - w]));
float gsum = ((x+1) < (w) ? 1.0f : 0.0f) * (g[idx_2d + 1]) +
((x) > 0 ? 1.0f : 0.0f) * (g[idx_2d - 1]) +
((y+1) < (h) ? 1.0f : 0.0f) * (g[idx_2d + w]) +
((y) > 0 ? 1.0f : 0.0f ) * (g[idx_2d - w]);
float gs_result = ( (lambda * gsum_u)) / ( (lambda * gsum) );
// SOR step
image[idx] = gs_result + theta * ( gs_result - temp_uxy );
}
}
}
int main(int argc, char **argv)
{
#ifdef USING_GPU
// Before the GPU can process your kernels, a so called "CUDA context" must be initialized
// This happens on the very first call to a CUDA function, and takes some time (around half a second)
// We will do it right here, so that the run time measurements are accurate
cudaDeviceSynchronize(); CUDA_CHECK;
#endif // USING_GPU
// Reading command line parameters:
// getParam("param", var, argc, argv) looks whether "-param xyz" is specified, and if so stores the value "xyz" in "var"
// If "-param" is not specified, the value of "var" remains unchanged
//
// return value: getParam("param", ...) returns true if "-param" is specified, and false otherwise
#ifdef CAMERA
#else
// input image
string image = "";
bool ret = getParam("i", image, argc, argv);
if (!ret) cerr << "ERROR: no image specified" << endl;
if (argc <= 1) { cout << "Usage: " << argv[0] << " -i <image> [-repeats <repeats>] [-gray] [-sigma <sigma>]" << endl << "\t Default Value of sigma = 0.5" << endl; return 1; }
#endif
// number of computation repetitions to get a better run time measurement
int repeats = 1;
getParam("repeats", repeats, argc, argv);
cout << "repeats: " << repeats << endl;
// load the input image as grayscale if "-gray" is specifed
bool gray = false;
getParam("gray", gray, argc, argv);
cout << "gray: " << gray << endl;
// ### Define your own parameters here as needed
float sigma = 0.1;
getParam("sigma", sigma, argc, argv);
cout << "σ: " << sigma << endl;
float lambda = 0.2;
getParam("lambda", lambda, argc, argv);
cout << "lambda: " << lambda << endl;
float theta = 0.8;
getParam("theta", theta, argc, argv);
cout << "theta: " << theta << endl;
float epsilon = 0.01;
getParam("epsilon", epsilon, argc, argv);
cout << "ε: " << epsilon << endl;
int N = 100;
getParam("N", N, argc, argv);
cout << "N: " << N << endl;
// Init camera / Load input image
#ifdef CAMERA
// Init camera
cv::VideoCapture camera(0);
if(!camera.isOpened()) { cerr << "ERROR: Could not open camera" << endl; return 1; }
int camW = 640;
int camH = 480;
camera.set(CV_CAP_PROP_FRAME_WIDTH,camW);
camera.set(CV_CAP_PROP_FRAME_HEIGHT,camH);
// read in first frame to get the dimensions
cv::Mat mIn;
camera >> mIn;
#else
// Load the input image using opencv (load as grayscale if "gray==true", otherwise as is (may be color or grayscale))
cv::Mat mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1));
// check
if (mIn.data == NULL) { cerr << "ERROR: Could not load image " << image << endl; return 1; }
#endif
// convert to float representation (opencv loads image values as single bytes by default)
mIn.convertTo(mIn,CV_32F);
// convert range of each channel to [0,1] (opencv default is [0,255])
mIn /= 255.f;
// get image dimensions
int w = mIn.cols; // width
int h = mIn.rows; // height
int nc = mIn.channels(); // number of channels
cout << "image: " << w << " x " << h << endl;
// Set the output image format
// ###
// ###
// ### TODO: Change the output image format as needed
// ###
// ###
cv::Mat mOut(h,w,mIn.type()); // mOut will have the same number of channels as the input image, nc layers
//cv::Mat mOut(h,w,CV_32FC3); // mOut will be a color image, 3 layers
//cv::Mat mOut(h,w,CV_32FC1); // mOut will be a grayscale image, 1 layer
// ### Define your own output images here as needed
// Allocate arrays
// input/output image width: w
// input/output image height: h
// input image number of channels: nc
// output image number of channels: mOut.channels(), as defined above (nc, 3, or 1)
// allocate raw input image array
float *imgIn = new float[(size_t)w*h*nc];
// allocate raw output array (the computation result will be stored in this array, then later converted to mOut for displaying)
float *imgOut = new float[(size_t)w*h*mOut.channels()];
// For camera mode: Make a loop to read in camera frames
#ifdef CAMERA
// Read a camera image frame every 30 milliseconds:
// cv::waitKey(30) waits 30 milliseconds for a keyboard input,
// returns a value <0 if no key is pressed during this time, returns immediately with a value >=0 if a key is pressed
while (cv::waitKey(30) < 0)
{
// Get camera image
camera >> mIn;
// convert to float representation (opencv loads image values as single bytes by default)
mIn.convertTo(mIn,CV_32F);
// convert range of each channel to [0,1] (opencv default is [0,255])
mIn /= 255.f;
#endif
// Init raw input image array
// opencv images are interleaved: rgb rgb rgb... (actually bgr bgr bgr...)
// But for CUDA it's better to work with layered images: rrr... ggg... bbb...
// So we will convert as necessary, using interleaved "cv::Mat" for loading/saving/displaying, and layered "float*" for CUDA computations
convert_mat_to_layered (imgIn, mIn);
Timer timer;
float t;
// ###
// ###
// ### TODO: Main computation
// ###
// ###
#ifdef USING_GPU
timer.start();
// Repetitions Loop
for(int rep = 0; rep < repeats; rep++)
{
size_t n_pixels = w * h;
size_t count = w * h * nc;
// Thread Dimensions
dim3 block = dim3(16, 16, 1);
dim3 grid = dim3((w + block.x - 1) / block.x, (h + block.y - 1) / block.y, 1);
size_t smBytes = (size_t)block.x*block.y*nc*2*sizeof(float);
// Allocating memory on the device
float *d_imgIn = NULL;
bool *d_mask = NULL;
float *d_g = NULL;
cudaMalloc(&d_imgIn, count * sizeof(float));
cudaMalloc(&d_mask, (n_pixels * sizeof(bool) + 7) / 8);
cudaMalloc(&d_g, n_pixels * sizeof(float));
cout << "n_pixels = " << n_pixels << "sizeof(bool)" << sizeof(bool) ;
// Copying Input image to device, and initializing result to 0
cudaMemcpy(d_imgIn, imgIn, count * sizeof(float), cudaMemcpyHostToDevice);
// Calling Kernel
findGreen <<<grid, block>>> (d_imgIn, d_mask, count, w, h, nc);
for (int n = 0; n < N; n++) {
compute_g<<< grid, block, smBytes >>>(d_imgIn, d_g, w, h, nc, epsilon);
rb_gs_update<<< grid, block >>>(d_imgIn, d_g, d_mask, w, h, nc, lambda, theta, 0);
rb_gs_update<<< grid, block >>>(d_imgIn, d_g, d_mask, w, h, nc, lambda, theta, 1);
}
// Copying result back
cudaMemcpy(imgOut, d_imgIn, count * sizeof(float), cudaMemcpyDeviceToHost);
CUDA_CHECK;
// Freeing Memory
cudaFree(d_imgIn);
cudaFree(d_mask);
cudaFree(d_g);
}
timer.end();
t = timer.get();
#else // USING_GPU
// CPU Implementation
timer.start();
// Repetitions Loop
for(int rep = 0; rep < repeats; rep++)
{
for(int ix = 0; ix < w; ix++)
{
for(int iy = 0; iy < h; iy++)
{
for(int iz = 0; iz < nc; iz++)
{
int idx = ix + (iy * w) + (iz * w * h);
imgOut[idx] = 0; // initialize
float value = 0;
for(int j = -rad; j <= rad; j++) // for each row in kernel
{
int iny = max(0, min(iy+j, h-1));
for(int i = -rad; i <= rad; i++) // for each element in the kernel row
{
int inx = max(0, min(ix+i, w-1));
int inIdx = inx + (iny * w) + (iz * w * h); // Index of Input Image to be multiplied by corresponding element in kernel
value += imgIn[inIdx] * kernel[i+rad + ((j+rad) * rad)];
}
}
imgOut[idx] = value;
}
}
}
}
timer.end();
t = timer.get(); // elapsed time in seconds
#endif
cout << "time: " << t*1000 << " ms" << endl;
// show input image
showImage("Input", mIn, 100, 100); // show at position (x_from_left=100,y_from_above=100)
// show output image: first convert to interleaved opencv format from the layered raw array
convert_layered_to_mat(mOut, imgOut);
showImage("Output", mOut, 100+w+40, 100);
// ### Display your own output images here as needed
#ifdef CAMERA
// end of camera loop
}
#else
// wait for key inputs
cv::waitKey(0);
#endif
// save input and result
cv::imwrite("image_input.png",mIn*255.f); // "imwrite" assumes channel range [0,255]
cv::imwrite("image_result.png",mOut*255.f);
// free allocated arrays
delete[] imgIn;
delete[] imgOut;
// close all opencv windows
cvDestroyAllWindows();
return 0;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment