Commit a3e34e88 authored by Ravikishore's avatar Ravikishore

exercise 12 bonus mandelbrot set added

parent 9c772fb8
all: main
main: main.cu aux.cu aux.h Makefile
nvcc -o main main.cu aux.cu --ptxas-options=-v --use_fast_math --compiler-options -Wall -lopencv_highgui -lopencv_core
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
// ###
// ###
#include "aux.h"
#include <cstdlib>
#include <iostream>
using std::stringstream;
using std::cerr;
using std::cout;
using std::endl;
using std::string;
// parameter processing: template specialization for T=bool
template<>
bool getParam<bool>(std::string param, bool &var, int argc, char **argv)
{
const char *c_param = param.c_str();
for(int i=argc-1; i>=1; i--)
{
if (argv[i][0]!='-') continue;
if (strcmp(argv[i]+1, c_param)==0)
{
if (!(i+1<argc) || argv[i+1][0]=='-') { var = true; return true; }
std::stringstream ss;
ss << argv[i+1];
ss >> var;
return (bool)ss;
}
}
return false;
}
// opencv helpers
void convert_layered_to_interleaved(float *aOut, const float *aIn, int w, int h, int nc)
{
if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
size_t nOmega = (size_t)w*h;
for (int y=0; y<h; y++)
{
for (int x=0; x<w; x++)
{
for (int c=0; c<nc; c++)
{
aOut[(nc-1-c) + nc*(x + (size_t)w*y)] = aIn[x + (size_t)w*y + nOmega*c];
}
}
}
}
void convert_layered_to_mat(cv::Mat &mOut, const float *aIn)
{
convert_layered_to_interleaved((float*)mOut.data, aIn, mOut.cols, mOut.rows, mOut.channels());
}
void convert_interleaved_to_layered(float *aOut, const float *aIn, int w, int h, int nc)
{
if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
size_t nOmega = (size_t)w*h;
for (int y=0; y<h; y++)
{
for (int x=0; x<w; x++)
{
for (int c=0; c<nc; c++)
{
aOut[x + (size_t)w*y + nOmega*c] = aIn[(nc-1-c) + nc*(x + (size_t)w*y)];
}
}
}
}
void convert_mat_to_layered(float *aOut, const cv::Mat &mIn)
{
convert_interleaved_to_layered(aOut, (float*)mIn.data, mIn.cols, mIn.rows, mIn.channels());
}
void showImage(string title, const cv::Mat &mat, int x, int y)
{
const char *wTitle = title.c_str();
cv::namedWindow(wTitle, CV_WINDOW_AUTOSIZE);
cvMoveWindow(wTitle, x, y);
cv::imshow(wTitle, mat);
}
// adding Gaussian noise
float noise(float sigma)
{
float x1 = (float)rand()/RAND_MAX;
float x2 = (float)rand()/RAND_MAX;
return sigma * sqrtf(-2*log(std::max(x1,0.000001f)))*cosf(2*M_PI*x2);
}
void addNoise(cv::Mat &m, float sigma)
{
float *data = (float*)m.data;
int w = m.cols;
int h = m.rows;
int nc = m.channels();
size_t n = (size_t)w*h*nc;
for(size_t i=0; i<n; i++)
{
data[i] += noise(sigma);
}
}
// cuda error checking
string prev_file = "";
int prev_line = 0;
void cuda_check(string file, int line)
{
cudaError_t e = cudaGetLastError();
if (e != cudaSuccess)
{
cout << endl << file << ", line " << line << ": " << cudaGetErrorString(e) << " (" << e << ")" << endl;
if (prev_line>0) cout << "Previous CUDA call:" << endl << prev_file << ", line " << prev_line << endl;
exit(1);
}
prev_file = file;
prev_line = line;
}
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
// ###
// ###
#ifndef AUX_H
#define AUX_H
#include <cuda_runtime.h>
#include <ctime>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <string>
#include <sstream>
// parameter processing
template<typename T>
bool getParam(std::string param, T &var, int argc, char **argv)
{
const char *c_param = param.c_str();
for(int i=argc-1; i>=1; i--)
{
if (argv[i][0]!='-') continue;
if (strcmp(argv[i]+1, c_param)==0)
{
if (!(i+1<argc)) continue;
std::stringstream ss;
ss << argv[i+1];
ss >> var;
return (bool)ss;
}
}
return false;
}
// opencv helpers
void convert_mat_to_layered(float *aOut, const cv::Mat &mIn);
void convert_layered_to_mat(cv::Mat &mOut, const float *aIn);
void showImage(std::string title, const cv::Mat &mat, int x, int y);
// adding Gaussian noise
void addNoise(cv::Mat &m, float sigma);
// measuring time
class Timer
{
public:
Timer() : tStart(0), running(false), sec(0.f)
{
}
void start()
{
tStart = clock();
running = true;
}
void end()
{
if (!running) { sec = 0; return; }
cudaDeviceSynchronize();
clock_t tEnd = clock();
sec = (float)(tEnd - tStart) / CLOCKS_PER_SEC;
running = false;
}
float get()
{
if (running) end();
return sec;
}
private:
clock_t tStart;
bool running;
float sec;
};
// cuda error checking
#define CUDA_CHECK cuda_check(__FILE__,__LINE__)
void cuda_check(std::string file, int line);
#endif // AUX_H
// ###
// ###
// ### Practical Course: GPU Programming in Computer Vision
// ###
// ###
// ### Technical University Munich, Computer Vision Group
// ### Winter Semester 2013/2014, March 3 - April 4
// ###
// ###
// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
// ###
// ###
// ###
// ###
// ###
// ### TODO: For every student of your group, please provide here:
// ###
// ### Gaurav Kukreja, gaurav.kukreja@tum.de, p058
// ### Miklos Homolya, miklos.homolya@tum.de, p056
// ### Ravikishore Kommajosyula, r.kommajosyula, p057
// ###
#include "aux.h"
#include <iostream>
#include <math.h>
using namespace std;
// uncomment to use the camera
//#define CAMERA
#define USING_GPU
__host__ __device__ float absolute_value ( float2 z ) {
return sqrtf((z.x * z.x) + (z.y * z.y));
}
__host__ __device__ float2 add_complex ( float2 z1, float2 z2 ) {
return {z1.x + z2.x, z1.y + z2.y };
}
__host__ __device__ float2 square_complex ( float2 z ) {
return {((z.x*z.x) - (z.y*z.y)), (2.0f * z.x * z.y) };
}
__global__ void callKernel(float* imgOut, int width, int height, float2 center, float radius, int iterations) {
int iy = blockIdx.y * blockDim.y + threadIdx.y; // WIDTH
int ix = blockIdx.x * blockDim.x + threadIdx.x; // HEIGHT
int idx = iy * width + ix;
if(ix >= width || iy >= height) return;
float2 c, z;
c.x = ((float)ix / width) * (2.0f * radius) + center.x - radius;
c.y = ((float)iy / height) * (2.0f * radius) + center.y - radius;
z = c;
int n = 0;
while( (absolute_value(z) < 2.0f) && (n < iterations))
{
z = add_complex ( square_complex(z), c);
n++;
}
imgOut[idx] = 1 - (1.0f * n)/iterations;
}
int main(int argc, char **argv)
{
#ifdef USING_GPU
// Before the GPU can process your kernels, a so called "CUDA context" must be initialized
// This happens on the very first call to a CUDA function, and takes some time (around half a second)
// We will do it right here, so that the run time measurements are accurate
cudaDeviceSynchronize(); CUDA_CHECK;
#endif // USING_GPU
// Reading command line parameters:
// getParam("param", var, argc, argv) looks whether "-param xyz" is specified, and if so stores the value "xyz" in "var"
// If "-param" is not specified, the value of "var" remains unchanged
//
// return value: getParam("param", ...) returns true if "-param" is specified, and false otherwise
// ### Define your own parameters here as needed
float width = 640;
getParam("width", width, argc, argv);
cout << "width = " << width << endl;
float height = 480;
getParam("height", height, argc, argv);
cout << "height = " << height<< endl;
float2 center = {-0.5f, 0.0f};
// getParam("center", center, argc, argv);
// cout << "center = " << center.x << ", " << center.y << endl;
float radius = 1.5f;
getParam("radius", radius, argc, argv);
cout << "radius = " << radius << endl;
int iterations = 100;
getParam("iterations", iterations, argc, argv);
cout << "iterations = " << iterations << endl;
int repeats = 100;
getParam("repeats", repeats, argc, argv);
cout << "repeats = " << repeats << endl;
// Set the output image format
// ###
cv::Mat mOut(height, width, CV_32FC1); // mOut will be a grayscale image, 1 layer
// Allocate arrays
// input/output image width: w
// input/output image height: h
// input image number of channels: nc
// output image number of channels: mOut.channels(), as defined above (nc, 3, or 1)
// allocate raw output array (the computation result will be stored in this array, then later converted to mOut for displaying)
float *imgOut = new float[(size_t) (width*height) ];
Timer timer;
float t;
// ###
// ###
// ### TODO: Main computation
// ###
// ###
timer.start();
// Repetitions Loop
for(int rep = 0; rep < repeats; rep++)
{
size_t count = (size_t)width * height;
// Thread Dimensions
dim3 block = dim3(32, 8, 1);
dim3 grid = dim3((width + block.x - 1) / block.x, (height + block.y - 1) / block.y, 1);
// Allocating memory on the device
float *d_imgOut = NULL;
cudaMalloc(&d_imgOut, count * sizeof(float));
// Calling gaussian smoothing kernel
callKernel <<< grid, block >>> (d_imgOut, width, height, center, radius, iterations );
// Copying result back
cudaMemcpy(imgOut, d_imgOut, count * sizeof(float), cudaMemcpyDeviceToHost);
CUDA_CHECK;
// Freeing Memory
cudaFree(d_imgOut);
}
timer.end();
t = timer.get();
cout << "time: " << t*1000 << " ms" << endl;
// show output image: first convert to interleaved opencv format from the layered raw array
convert_layered_to_mat(mOut, imgOut);
showImage("Output", mOut, 100, 100);
// ### Display your own output images here as needed
// wait for key inputs
cv::waitKey(0);
// save input and result
cv::imwrite("image_result.png",mOut*255.f);
// free allocated arrays
delete[] imgOut;
// close all opencv windows
cvDestroyAllWindows();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment