Commit f7960fc2 authored by Ravikishore's avatar Ravikishore

Merge branch 'master' of https://github.com/gkernel/cuda_lab

parents 79a7bd92 06d27721
main main
image_input.png
image_result.png
*.swp
...@@ -66,18 +66,18 @@ __device__ void convolveImage(float* imgIn, float* kernel, float* imgOut, int ra ...@@ -66,18 +66,18 @@ __device__ void convolveImage(float* imgIn, float* kernel, float* imgOut, int ra
if (idx < w * h * nc) if (idx < w * h * nc)
{ {
imgOut[idx] = 0; // initialize imgOut[idx] = 0; // initialize
float value = 0; float value = 0;
for(int j = -rad; j < rad; j++) // for each row in kernel for(int j = -rad; j <= rad; j++) // for each row in kernel
{ {
int iny = gpu_max(0, gpu_min(iy+j, h-1)); int iny = gpu_max(0, gpu_min(iy+j, h-1));
for(int i = -rad; i < rad; i++) // for each element in the kernel row for(int i = -rad; i <= rad; i++) // for each element in the kernel row
{ {
int inx = gpu_max(0, gpu_min(ix+i, w-1)); int inx = gpu_max(0, gpu_min(ix+i, w-1));
int inIdx = inx + (iny * w) + (iz * w * h); // Index of Input Image to be multiplied by corresponding element in kernel int inIdx = inx + (iny * w) + (iz * w * h); // Index of Input Image to be multiplied by corresponding element in kernel
value += imgIn[inIdx] * kernel[i+rad + ((j+rad) * rad)]; value += imgIn[inIdx] * kernel[i+rad + ((j+rad) * (2 * rad + 1))];
}
} }
} imgOut[idx] = value;
imgOut[idx] = value;
} }
} }
...@@ -191,9 +191,11 @@ int main(int argc, char **argv) ...@@ -191,9 +191,11 @@ int main(int argc, char **argv)
float *imgOut = new float[(size_t)w*h*mOut.channels()]; float *imgOut = new float[(size_t)w*h*mOut.channels()];
int rad = ceil(3 * sigma); // kernel radius int rad = ceil(3 * sigma); // kernel radius
int kw = 2 * rad; // kernel width int kw = 2 * rad + 1; // kernel width
float c = 1. / (2. * 3.142857 * sigma * sigma); // constant float c = 1. / (2. * 3.142857 * sigma * sigma); // constant
cout << "c = " << c << endl;
float *kernel = new float[(size_t) (kw * kw)]; // kernel float *kernel = new float[(size_t) (kw * kw)]; // kernel
float *kernelOut = new float[(size_t) (kw * kw)]; // kernel to be displayed float *kernelOut = new float[(size_t) (kw * kw)]; // kernel to be displayed
...@@ -232,9 +234,9 @@ int main(int argc, char **argv) ...@@ -232,9 +234,9 @@ int main(int argc, char **argv)
} }
// Display Kernel // Display Kernel
cv::Mat cvKernelOut(2*rad, 2*rad, CV_32F); cv::Mat cvKernelOut(kw, kw, CV_32FC1);
convert_layered_to_mat(cvKernelOut, kernelOut); convert_layered_to_mat(cvKernelOut, kernelOut);
showImage("Kernel", cvKernelOut, 100, 10); showImage("Kernel", cvKernelOut, 100, 100);
// For camera mode: Make a loop to read in camera frames // For camera mode: Make a loop to read in camera frames
...@@ -280,10 +282,10 @@ int main(int argc, char **argv) ...@@ -280,10 +282,10 @@ int main(int argc, char **argv)
// Allocating memory on the device // Allocating memory on the device
float *d_imgIn = NULL; float *d_imgIn = NULL;
float *d_imgOut = NULL; float *d_imgOut = NULL;
float *d_kernel = NULL; float *d_kernel = NULL;
cudaMalloc(&d_imgIn, count * sizeof(float)); cudaMalloc(&d_imgIn, count * sizeof(float));
cudaMalloc(&d_imgOut, count * sizeof(float)); cudaMalloc(&d_imgOut, count * sizeof(float));
cudaMalloc(&d_kernel, kw * kw * sizeof(float)); cudaMalloc(&d_kernel, kw * kw * sizeof(float));
// Copying Input image to device, and initializing result to 0 // Copying Input image to device, and initializing result to 0
cudaMemcpy(d_imgIn, imgIn, count * sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(d_imgIn, imgIn, count * sizeof(float), cudaMemcpyHostToDevice);
...@@ -323,10 +325,10 @@ int main(int argc, char **argv) ...@@ -323,10 +325,10 @@ int main(int argc, char **argv)
int idx = ix + (iy * w) + (iz * w * h); int idx = ix + (iy * w) + (iz * w * h);
imgOut[idx] = 0; // initialize imgOut[idx] = 0; // initialize
float value = 0; float value = 0;
for(int j = -rad; j < rad; j++) // for each row in kernel for(int j = -rad; j <= rad; j++) // for each row in kernel
{ {
int iny = max(0, min(iy+j, h-1)); int iny = max(0, min(iy+j, h-1));
for(int i = -rad; i < rad; i++) // for each element in the kernel row for(int i = -rad; i <= rad; i++) // for each element in the kernel row
{ {
int inx = max(0, min(ix+i, w-1)); int inx = max(0, min(ix+i, w-1));
int inIdx = inx + (iny * w) + (iz * w * h); // Index of Input Image to be multiplied by corresponding element in kernel int inIdx = inx + (iny * w) + (iz * w * h); // Index of Input Image to be multiplied by corresponding element in kernel
......
main: main.cu aux.cu aux.h Makefile
nvcc -o main main.cu aux.cu --ptxas-options=-v --use_fast_math --compiler-options -Wall -lopencv_highgui -lopencv_core
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
// ###
// ###
#include "aux.h"
#include <cstdlib>
#include <iostream>
using std::stringstream;
using std::cerr;
using std::cout;
using std::endl;
using std::string;
// parameter processing: template specialization for T=bool
template<>
bool getParam<bool>(std::string param, bool &var, int argc, char **argv)
{
const char *c_param = param.c_str();
for(int i=argc-1; i>=1; i--)
{
if (argv[i][0]!='-') continue;
if (strcmp(argv[i]+1, c_param)==0)
{
if (!(i+1<argc) || argv[i+1][0]=='-') { var = true; return true; }
std::stringstream ss;
ss << argv[i+1];
ss >> var;
return (bool)ss;
}
}
return false;
}
// opencv helpers
void convert_layered_to_interleaved(float *aOut, const float *aIn, int w, int h, int nc)
{
if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
size_t nOmega = (size_t)w*h;
for (int y=0; y<h; y++)
{
for (int x=0; x<w; x++)
{
for (int c=0; c<nc; c++)
{
aOut[(nc-1-c) + nc*(x + (size_t)w*y)] = aIn[x + (size_t)w*y + nOmega*c];
}
}
}
}
void convert_layered_to_mat(cv::Mat &mOut, const float *aIn)
{
convert_layered_to_interleaved((float*)mOut.data, aIn, mOut.cols, mOut.rows, mOut.channels());
}
void convert_interleaved_to_layered(float *aOut, const float *aIn, int w, int h, int nc)
{
if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
size_t nOmega = (size_t)w*h;
for (int y=0; y<h; y++)
{
for (int x=0; x<w; x++)
{
for (int c=0; c<nc; c++)
{
aOut[x + (size_t)w*y + nOmega*c] = aIn[(nc-1-c) + nc*(x + (size_t)w*y)];
}
}
}
}
void convert_mat_to_layered(float *aOut, const cv::Mat &mIn)
{
convert_interleaved_to_layered(aOut, (float*)mIn.data, mIn.cols, mIn.rows, mIn.channels());
}
void showImage(string title, const cv::Mat &mat, int x, int y)
{
const char *wTitle = title.c_str();
cv::namedWindow(wTitle, CV_WINDOW_AUTOSIZE);
cvMoveWindow(wTitle, x, y);
cv::imshow(wTitle, mat);
}
// adding Gaussian noise
float noise(float sigma)
{
float x1 = (float)rand()/RAND_MAX;
float x2 = (float)rand()/RAND_MAX;
return sigma * sqrtf(-2*log(std::max(x1,0.000001f)))*cosf(2*M_PI*x2);
}
void addNoise(cv::Mat &m, float sigma)
{
float *data = (float*)m.data;
int w = m.cols;
int h = m.rows;
int nc = m.channels();
size_t n = (size_t)w*h*nc;
for(size_t i=0; i<n; i++)
{
data[i] += noise(sigma);
}
}
// cuda error checking
string prev_file = "";
int prev_line = 0;
void cuda_check(string file, int line)
{
cudaError_t e = cudaGetLastError();
if (e != cudaSuccess)
{
cout << endl << file << ", line " << line << ": " << cudaGetErrorString(e) << " (" << e << ")" << endl;
if (prev_line>0) cout << "Previous CUDA call:" << endl << prev_file << ", line " << prev_line << endl;
exit(1);
}
prev_file = file;
prev_line = line;
}
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
// ###
// ###
#ifndef AUX_H
#define AUX_H
#include <cuda_runtime.h>
#include <ctime>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <string>
#include <sstream>
// parameter processing
template<typename T>
bool getParam(std::string param, T &var, int argc, char **argv)
{
const char *c_param = param.c_str();
for(int i=argc-1; i>=1; i--)
{
if (argv[i][0]!='-') continue;
if (strcmp(argv[i]+1, c_param)==0)
{
if (!(i+1<argc)) continue;
std::stringstream ss;
ss << argv[i+1];
ss >> var;
return (bool)ss;
}
}
return false;
}
// opencv helpers
void convert_mat_to_layered(float *aOut, const cv::Mat &mIn);
void convert_layered_to_mat(cv::Mat &mOut, const float *aIn);
void showImage(std::string title, const cv::Mat &mat, int x, int y);
// adding Gaussian noise
void addNoise(cv::Mat &m, float sigma);
// measuring time
class Timer
{
public:
Timer() : tStart(0), running(false), sec(0.f)
{
}
void start()
{
tStart = clock();
running = true;
}
void end()
{
if (!running) { sec = 0; return; }
cudaDeviceSynchronize();
clock_t tEnd = clock();
sec = (float)(tEnd - tStart) / CLOCKS_PER_SEC;
running = false;
}
float get()
{
if (running) end();
return sec;
}
private:
clock_t tStart;
bool running;
float sec;
};
// cuda error checking
#define CUDA_CHECK cuda_check(__FILE__,__LINE__)
void cuda_check(std::string file, int line);
#endif // AUX_H
This diff is collapsed.
Assign1 @ cfbd98ec
Subproject commit cfbd98ece1b84d654d3eb9c73714b421616a4199
main: main.cu aux.cu aux.h Makefile
nvcc -o main main.cu aux.cu --ptxas-options=-v --use_fast_math --compiler-options -Wall -lopencv_highgui -lopencv_core
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
// ###
// ###
#include "aux.h"
#include <cstdlib>
#include <iostream>
using std::stringstream;
using std::cerr;
using std::cout;
using std::endl;
using std::string;
// parameter processing: template specialization for T=bool
template<>
bool getParam<bool>(std::string param, bool &var, int argc, char **argv)
{
const char *c_param = param.c_str();
for(int i=argc-1; i>=1; i--)
{
if (argv[i][0]!='-') continue;
if (strcmp(argv[i]+1, c_param)==0)
{
if (!(i+1<argc) || argv[i+1][0]=='-') { var = true; return true; }
std::stringstream ss;
ss << argv[i+1];
ss >> var;
return (bool)ss;
}
}
return false;
}
// opencv helpers
void convert_layered_to_interleaved(float *aOut, const float *aIn, int w, int h, int nc)
{
if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
size_t nOmega = (size_t)w*h;
for (int y=0; y<h; y++)
{
for (int x=0; x<w; x++)
{
for (int c=0; c<nc; c++)
{
aOut[(nc-1-c) + nc*(x + (size_t)w*y)] = aIn[x + (size_t)w*y + nOmega*c];
}
}
}
}
void convert_layered_to_mat(cv::Mat &mOut, const float *aIn)
{
convert_layered_to_interleaved((float*)mOut.data, aIn, mOut.cols, mOut.rows, mOut.channels());
}
void convert_interleaved_to_layered(float *aOut, const float *aIn, int w, int h, int nc)
{
if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
size_t nOmega = (size_t)w*h;
for (int y=0; y<h; y++)
{
for (int x=0; x<w; x++)
{
for (int c=0; c<nc; c++)
{
aOut[x + (size_t)w*y + nOmega*c] = aIn[(nc-1-c) + nc*(x + (size_t)w*y)];
}
}
}
}
void convert_mat_to_layered(float *aOut, const cv::Mat &mIn)
{
convert_interleaved_to_layered(aOut, (float*)mIn.data, mIn.cols, mIn.rows, mIn.channels());
}
void showImage(string title, const cv::Mat &mat, int x, int y)
{
const char *wTitle = title.c_str();
cv::namedWindow(wTitle, CV_WINDOW_AUTOSIZE);
cvMoveWindow(wTitle, x, y);
cv::imshow(wTitle, mat);
}
// adding Gaussian noise
float noise(float sigma)
{
float x1 = (float)rand()/RAND_MAX;
float x2 = (float)rand()/RAND_MAX;
return sigma * sqrtf(-2*log(std::max(x1,0.000001f)))*cosf(2*M_PI*x2);
}
void addNoise(cv::Mat &m, float sigma)
{
float *data = (float*)m.data;
int w = m.cols;
int h = m.rows;
int nc = m.channels();
size_t n = (size_t)w*h*nc;
for(size_t i=0; i<n; i++)
{
data[i] += noise(sigma);
}
}
// cuda error checking
string prev_file = "";
int prev_line = 0;
void cuda_check(string file, int line)
{
cudaError_t e = cudaGetLastError();
if (e != cudaSuccess)
{
cout << endl << file << ", line " << line << ": " << cudaGetErrorString(e) << " (" << e << ")" << endl;
if (prev_line>0) cout << "Previous CUDA call:" << endl << prev_file << ", line " << prev_line << endl;
exit(1);
}
prev_file = file;
prev_line = line;
}
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
// ###
// ###
#ifndef AUX_H
#define AUX_H
#include <cuda_runtime.h>
#include <ctime>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <string>
#include <sstream>
// parameter processing
template<typename T>
bool getParam(std::string param, T &var, int argc, char **argv)
{
const char *c_param = param.c_str();
for(int i=argc-1; i>=1; i--)
{
if (argv[i][0]!='-') continue;
if (strcmp(argv[i]+1, c_param)==0)
{
if (!(i+1<argc)) continue;
std::stringstream ss;
ss << argv[i+1];
ss >> var;
return (bool)ss;
}
}
return false;
}
// opencv helpers
void convert_mat_to_layered(float *aOut, const cv::Mat &mIn);
void convert_layered_to_mat(cv::Mat &mOut, const float *aIn);
void showImage(std::string title, const cv::Mat &mat, int x, int y);
// adding Gaussian noise
void addNoise(cv::Mat &m, float sigma);
// measuring time
class Timer
{
public:
Timer() : tStart(0), running(false), sec(0.f)
{
}
void start()
{
tStart = clock();
running = true;
}
void end()
{
if (!running) { sec = 0; return; }
cudaDeviceSynchronize();
clock_t tEnd = clock();
sec = (float)(tEnd - tStart) / CLOCKS_PER_SEC;
running = false;
}
float get()
{
if (running) end();
return sec;
}
private:
clock_t tStart;
bool running;
float sec;
};
// cuda error checking
#define CUDA_CHECK cuda_check(__FILE__,__LINE__)
void cuda_check(std::string file, int line);
#endif // AUX_H
This diff is collapsed.
main: main.cu aux.cu aux.h Makefile
nvcc -o main main.cu aux.cu --ptxas-options=-v --use_fast_math --compiler-options -Wall -lopencv_highgui -lopencv_core
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
// ###
// ###
#include "aux.h"
#include <cstdlib>
#include <iostream>
using std::stringstream;
using std::cerr;
using std::cout;
using std::endl;
using std::string;
// parameter processing: template specialization for T=bool
template<>
bool getParam<bool>(std::string param, bool &var, int argc, char **argv)
{
const char *c_param = param.c_str();
for(int i=argc-1; i>=1; i--)
{
if (argv[i][0]!='-') continue;
if (strcmp(argv[i]+1, c_param)==0)
{
if (!(i+1<argc) || argv[i+1][0]=='-') { var = true; return true; }
std::stringstream ss;
ss << argv[i+1];
ss >> var;
return (bool)ss;
}
}
return false;
}
// opencv helpers
void convert_layered_to_interleaved(float *aOut, const float *aIn, int w, int h, int nc)
{
if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
size_t nOmega = (size_t)w*h;
for (int y=0; y<h; y++)
{
for (int x=0; x<w; x++)
{
for (int c=0; c<nc; c++)
{
aOut[(nc-1-c) + nc*(x + (size_t)w*y)] = aIn[x + (size_t)w*y + nOmega*c];
}
}
}
}
void convert_layered_to_mat(cv::Mat &mOut, const float *aIn)
{
convert_layered_to_interleaved((float*)mOut.data, aIn, mOut.cols, mOut.rows, mOut.channels());
}
void convert_interleaved_to_layered(float *aOut, const float *aIn, int w, int h, int nc)
{
if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
size_t nOmega = (size_t)w*h;
for (int y=0; y<h; y++)
{
for (int x=0; x<w; x++)
{
for (int c=0; c<nc; c++)
{
aOut[x + (size_t)w*y + nOmega*c] = aIn[(nc-1-c) + nc*(x + (size_t)w*y)];
}
}
}
}
void convert_mat_to_layered(float *aOut, const cv::Mat &mIn)
{
convert_interleaved_to_layered(aOut, (float*)mIn.data, mIn.cols, mIn.rows, mIn.channels());
}
void showImage(string title, const cv::Mat &mat, int x, int y)
{
const char *wTitle = title.c_str();
cv::namedWindow(wTitle, CV_WINDOW_AUTOSIZE);
cvMoveWindow(wTitle, x, y);
cv::imshow(wTitle, mat);
}
// adding Gaussian noise
float noise(float sigma)
{
float x1 = (float)rand()/RAND_MAX;
float x2 = (float)rand()/RAND_MAX;
return sigma * sqrtf(-2*log(std::max(x1,0.000001f)))*cosf(2*M_PI*x2);
}
void addNoise(cv::Mat &m, float sigma)
{
float *data = (float*)m.data;
int w = m.cols;
int h = m.rows;
int nc = m.channels();
size_t n = (size_t)w*h*nc;
for(size_t i=0; i<n; i++)
{
data[i] += noise(sigma);
}
}
// cuda error checking
string prev_file = "";
int prev_line = 0;
void cuda_check(string file, int line)
{
cudaError_t e = cudaGetLastError();
if (e != cudaSuccess)
{
cout << endl << file << ", line " << line << ": " << cudaGetErrorString(e) << " (" << e << ")" << endl;
if (prev_line>0) cout << "Previous CUDA call:" << endl << prev_file << ", line " << prev_line << endl;
exit(1);
}
prev_file = file;
prev_line = line;
}
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
// ###
// ###
#ifndef AUX_H
#define AUX_H
#include <cuda_runtime.h>
#include <ctime>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <string>
#include <sstream>
// parameter processing
template<typename T>
bool getParam(std::string param, T &var, int argc, char **argv)
{
const char *c_param = param.c_str();
for(int i=argc-1; i>=1; i--)
{
if (argv[i][0]!='-') continue;
if (strcmp(argv[i]+1, c_param)==0)
{
if (!(i+1<argc)) continue;
std::stringstream ss;
ss << argv[i+1];
ss >> var;
return (bool)ss;
}
}
return false;
}
// opencv helpers
void convert_mat_to_layered(float *aOut, const cv::Mat &mIn);
void convert_layered_to_mat(cv::Mat &mOut, const float *aIn);
void showImage(std::string title, const cv::Mat &mat, int x, int y);
// adding Gaussian noise
void addNoise(cv::Mat &m, float sigma);
// measuring time
class Timer
{
public:
Timer() : tStart(0), running(false), sec(0.f)
{
}
void start()
{
tStart = clock();
running = true;
}
void end()
{
if (!running) { sec = 0; return; }
cudaDeviceSynchronize();
clock_t tEnd = clock();
sec = (float)(tEnd - tStart) / CLOCKS_PER_SEC;
running = false;
}
float get()
{
if (running) end();
return sec;
}
private:
clock_t tStart;
bool running;
float sec;
};
// cuda error checking
#define CUDA_CHECK cuda_check(__FILE__,__LINE__)
void cuda_check(std::string file, int line);
#endif // AUX_H
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment