exercise 12 bonus mandelbrot set added

a3e34e88 · Ravikishore · 9c772fb8 · a3e34e88 · a3e34e88 · a3e34e88
Commit a3e34e88 authored Mar 06, 2014 by Ravikishore
Hide whitespace changes
Inline Side-by-side

Showing with 441 additions and 0 deletions

Makefile ravi/ex12/Makefile +5 -0

aux.cu ravi/ex12/aux.cu +146 -0

aux.h ravi/ex12/aux.h +109 -0

main.cu ravi/ex12/main.cu +181 -0

No files found.
--- a/ravi/ex12/Makefile
+++ b/ravi/ex12/Makefile
+all: main
+
+main: main.cu aux.cu aux.h Makefile
+	nvcc -o main main.cu aux.cu  --ptxas-options=-v --use_fast_math --compiler-options -Wall -lopencv_highgui -lopencv_core
+
--- a/ravi/ex12/aux.cu
+++ b/ravi/ex12/aux.cu
+// ###
+// ###
+// ### Practical Course: GPU Programming in Computer Vision
+// ###
+// ###
+// ### Technical University Munich, Computer Vision Group
+// ### Winter Semester 2013/2014, March 3 - April 4
+// ###
+// ###
+// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
+// ###
+// ###
+// ###
+// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
+// ###
+// ###
+
+
+#include "aux.h"
+#include <cstdlib>
+#include <iostream>
+using std::stringstream;
+using std::cerr;
+using std::cout;
+using std::endl;
+using std::string;
+
+
+
+
+// parameter processing: template specialization for T=bool
+template<>
+bool getParam<bool>(std::string param, bool &var, int argc, char **argv)
+{
+    const char *c_param = param.c_str();
+    for(int i=argc-1; i>=1; i--)
+    {
+        if (argv[i][0]!='-') continue;
+        if (strcmp(argv[i]+1, c_param)==0)
+        {
+            if (!(i+1<argc) || argv[i+1][0]=='-') { var = true; return true; }
+            std::stringstream ss;
+            ss << argv[i+1];
+            ss >> var;
+            return (bool)ss;
+        }
+    }
+    return false;
+}
+
+
+
+
+// opencv helpers
+void convert_layered_to_interleaved(float *aOut, const float *aIn, int w, int h, int nc)
+{
+    if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
+    size_t nOmega = (size_t)w*h;
+    for (int y=0; y<h; y++)
+    {
+        for (int x=0; x<w; x++)
+        {
+            for (int c=0; c<nc; c++)
+            {
+                aOut[(nc-1-c) + nc*(x + (size_t)w*y)] = aIn[x + (size_t)w*y + nOmega*c];
+            }
+        }
+    }
+}
+void convert_layered_to_mat(cv::Mat &mOut, const float *aIn)
+{
+    convert_layered_to_interleaved((float*)mOut.data, aIn, mOut.cols, mOut.rows, mOut.channels());
+}
+
+
+void convert_interleaved_to_layered(float *aOut, const float *aIn, int w, int h, int nc)
+{
+    if (nc==1) { memcpy(aOut, aIn, w*h*sizeof(float)); return; }
+    size_t nOmega = (size_t)w*h;
+    for (int y=0; y<h; y++)
+    {
+        for (int x=0; x<w; x++)
+        {
+            for (int c=0; c<nc; c++)
+            {
+                aOut[x + (size_t)w*y + nOmega*c] = aIn[(nc-1-c) + nc*(x + (size_t)w*y)];
+            }
+        }
+    }
+}
+void convert_mat_to_layered(float *aOut, const cv::Mat &mIn)
+{
+    convert_interleaved_to_layered(aOut, (float*)mIn.data, mIn.cols, mIn.rows, mIn.channels());
+}
+
+
+
+void showImage(string title, const cv::Mat &mat, int x, int y)
+{
+    const char *wTitle = title.c_str();
+    cv::namedWindow(wTitle, CV_WINDOW_AUTOSIZE);
+    cvMoveWindow(wTitle, x, y);
+    cv::imshow(wTitle, mat);
+}
+
+
+
+
+// adding Gaussian noise
+float noise(float sigma)
+{
+    float x1 = (float)rand()/RAND_MAX;
+    float x2 = (float)rand()/RAND_MAX;
+    return sigma * sqrtf(-2*log(std::max(x1,0.000001f)))*cosf(2*M_PI*x2);
+}
+void addNoise(cv::Mat &m, float sigma)
+{
+    float *data = (float*)m.data;
+    int w = m.cols;
+    int h = m.rows;
+    int nc = m.channels();
+    size_t n = (size_t)w*h*nc;
+    for(size_t i=0; i<n; i++)
+    {
+        data[i] += noise(sigma);
+    }
+}
+
+
+
+
+// cuda error checking
+string prev_file = "";
+int prev_line = 0;
+void cuda_check(string file, int line)
+{
+    cudaError_t e = cudaGetLastError();
+    if (e != cudaSuccess)
+    {
+        cout << endl << file << ", line " << line << ": " << cudaGetErrorString(e) << " (" << e << ")" << endl;
+        if (prev_line>0) cout << "Previous CUDA call:" << endl << prev_file << ", line " << prev_line << endl;
+        exit(1);
+    }
+    prev_file = file;
+    prev_line = line;
+}
--- a/ravi/ex12/aux.h
+++ b/ravi/ex12/aux.h
+// ###
+// ###
+// ### Practical Course: GPU Programming in Computer Vision
+// ###
+// ###
+// ### Technical University Munich, Computer Vision Group
+// ### Winter Semester 2013/2014, March 3 - April 4
+// ###
+// ###
+// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
+// ###
+// ###
+// ###
+// ### THIS FILE IS SUPPOSED TO REMAIN UNCHANGED
+// ###
+// ###
+
+
+#ifndef AUX_H
+#define AUX_H
+
+#include <cuda_runtime.h>
+#include <ctime>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#include <string>
+#include <sstream>
+
+
+
+
+// parameter processing
+template<typename T>
+bool getParam(std::string param, T &var, int argc, char **argv)
+{
+    const char *c_param = param.c_str();
+    for(int i=argc-1; i>=1; i--)
+    {
+        if (argv[i][0]!='-') continue;
+        if (strcmp(argv[i]+1, c_param)==0)
+        {
+            if (!(i+1<argc)) continue;
+            std::stringstream ss;
+            ss << argv[i+1];
+            ss >> var;
+            return (bool)ss;
+        }
+    }
+    return false;
+}
+
+
+
+
+// opencv helpers
+void convert_mat_to_layered(float *aOut, const cv::Mat &mIn);
+void convert_layered_to_mat(cv::Mat &mOut, const float *aIn);
+void showImage(std::string title, const cv::Mat &mat, int x, int y);
+
+
+
+
+// adding Gaussian noise
+void addNoise(cv::Mat &m, float sigma);
+
+
+
+
+// measuring time
+class Timer
+{
+    public:
+	Timer() : tStart(0), running(false), sec(0.f)
+	{
+	}
+	void start()
+	{
+		tStart = clock();
+		running = true;
+	}
+	void end()
+	{
+		if (!running) { sec = 0; return; }
+        cudaDeviceSynchronize();
+		clock_t tEnd = clock();
+		sec = (float)(tEnd - tStart) / CLOCKS_PER_SEC;
+		running = false;
+	}
+	float get()
+	{
+		if (running) end();
+		return sec;
+	}
+    private:
+	clock_t tStart;
+	bool running;
+	float sec;
+};
+
+
+
+
+// cuda error checking
+#define CUDA_CHECK cuda_check(__FILE__,__LINE__)
+void cuda_check(std::string file, int line);
+
+
+
+#endif  // AUX_H
--- a/ravi/ex12/main.cu
+++ b/ravi/ex12/main.cu
+// ###
+// ###
+// ### Practical Course: GPU Programming in Computer Vision
+// ###
+// ###
+// ### Technical University Munich, Computer Vision Group
+// ### Winter Semester 2013/2014, March 3 - April 4
+// ###
+// ###
+// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
+// ###
+// ###
+// ###
+
+
+
+// ###
+// ###
+// ### TODO: For every student of your group, please provide here:
+// ###
+// ### Gaurav Kukreja, gaurav.kukreja@tum.de, p058
+// ### Miklos Homolya, miklos.homolya@tum.de, p056 
+// ### Ravikishore Kommajosyula, r.kommajosyula, p057
+// ###
+
+
+#include "aux.h"
+#include <iostream>
+#include <math.h>
+using namespace std;
+
+// uncomment to use the camera
+//#define CAMERA
+
+#define USING_GPU
+
+__host__ __device__ float absolute_value ( float2 z ) {
+	return sqrtf((z.x * z.x) + (z.y * z.y));
+}
+
+__host__ __device__ float2 add_complex ( float2 z1, float2 z2 ) {
+	return {z1.x + z2.x, z1.y + z2.y };
+}
+
+__host__ __device__ float2 square_complex ( float2 z ) {
+	return {((z.x*z.x) - (z.y*z.y)), (2.0f * z.x * z.y) };
+}
+
+
+__global__ void callKernel(float* imgOut, int width, int height, float2 center, float radius, int iterations) {
+	  int iy = blockIdx.y * blockDim.y + threadIdx.y;  // WIDTH
+	  int ix = blockIdx.x * blockDim.x + threadIdx.x;  // HEIGHT
+	  int idx = iy * width + ix;
+	  if(ix >= width || iy >= height) return;
+
+	  float2 c, z;
+	  c.x = ((float)ix / width) * (2.0f * radius) + center.x - radius;
+	  c.y = ((float)iy / height) * (2.0f * radius) + center.y - radius;
+	  z = c;
+	  int n = 0;
+	  while( (absolute_value(z) < 2.0f) && (n < iterations))
+	  {
+		  z = add_complex ( square_complex(z), c);
+		  n++;
+	  }
+
+	  imgOut[idx] = 1 - (1.0f * n)/iterations;
+}
+
+int main(int argc, char **argv)
+{
+#ifdef USING_GPU
+	// Before the GPU can process your kernels, a so called "CUDA context" must be initialized
+	// This happens on the very first call to a CUDA function, and takes some time (around half a second)
+	// We will do it right here, so that the run time measurements are accurate
+	cudaDeviceSynchronize();  CUDA_CHECK;
+#endif // USING_GPU
+
+	// Reading command line parameters:
+	// getParam("param", var, argc, argv) looks whether "-param xyz" is specified, and if so stores the value "xyz" in "var"
+	// If "-param" is not specified, the value of "var" remains unchanged
+	//
+	// return value: getParam("param", ...) returns true if "-param" is specified, and false otherwise
+
+	// ### Define your own parameters here as needed
+	float width = 640;
+	getParam("width", width, argc, argv);
+	cout << "width = " << width << endl;
+
+	float height = 480;
+	getParam("height", height, argc, argv);
+	cout << "height = " << height<< endl;
+
+	float2 center = {-0.5f, 0.0f};
+//	getParam("center", center, argc, argv);
+//	cout << "center = " << center.x << ", " << center.y << endl;
+
+	float radius = 1.5f;
+	getParam("radius", radius, argc, argv);
+	cout << "radius = " << radius << endl;
+
+	int iterations = 100;
+	getParam("iterations", iterations, argc, argv);
+	cout << "iterations = " << iterations << endl;
+
+	int repeats = 100;
+	getParam("repeats", repeats, argc, argv);
+	cout << "repeats = " << repeats << endl;
+
+	// Set the output image format
+	// ###
+	cv::Mat mOut(height, width, CV_32FC1);    // mOut will be a grayscale image, 1 layer
+
+	// Allocate arrays
+	// input/output image width: w
+	// input/output image height: h
+	// input image number of channels: nc
+	// output image number of channels: mOut.channels(), as defined above (nc, 3, or 1)
+
+	// allocate raw output array (the computation result will be stored in this array, then later converted to mOut for displaying)
+	float *imgOut = new float[(size_t) (width*height) ];
+
+		Timer timer;
+		float t;
+		// ###
+		// ###
+		// ### TODO: Main computation
+		// ###
+		// ###
+		timer.start();
+
+		// Repetitions Loop
+		for(int rep = 0; rep < repeats; rep++)
+		{
+			size_t count = (size_t)width * height;
+
+			// Thread Dimensions
+			dim3 block = dim3(32, 8, 1);
+			dim3 grid = dim3((width + block.x - 1) / block.x, (height + block.y - 1) / block.y, 1);
+
+			// Allocating memory on the device
+			float *d_imgOut = NULL;
+			cudaMalloc(&d_imgOut, count * sizeof(float));
+
+			// Calling gaussian smoothing kernel
+			callKernel <<< grid, block >>> (d_imgOut, width, height, center, radius, iterations );
+
+			// Copying result back
+			cudaMemcpy(imgOut, d_imgOut, count * sizeof(float), cudaMemcpyDeviceToHost);
+
+			CUDA_CHECK;
+
+			// Freeing Memory
+			cudaFree(d_imgOut);
+		}
+
+		timer.end();
+		t = timer.get();
+
+		cout << "time: " << t*1000 << " ms" << endl;
+
+		// show output image: first convert to interleaved opencv format from the layered raw array
+		convert_layered_to_mat(mOut, imgOut);
+		showImage("Output", mOut, 100, 100);
+
+		// ### Display your own output images here as needed
+
+	// wait for key inputs
+	cv::waitKey(0);
+	// save input and result
+	cv::imwrite("image_result.png",mOut*255.f);
+
+	// free allocated arrays
+	delete[] imgOut;
+
+	// close all opencv windows
+	cvDestroyAllWindows();
+	return 0;
+}
+
+