constant kernel based on gaurav's convolution

Signed-off-by: Gaurav Kukreja <gmkukreja@gmail.com>

constant kernel based on gaurav's convolution
Signed-off-by: Gaurav Kukreja <gmkukreja@gmail.com>
35d91d13 · Gaurav Kukreja · fb855284 · 35d91d13
Commit 35d91d13 authored Mar 05, 2014 by Gaurav Kukreja
Show whitespace changes
Inline Side-by-side

Showing with 352 additions and 352 deletions

main.cu gaurav/2_Assign/ex4/main.cu +352 -352

No files found.
--- a/gaurav/2_Assign/ex4/main.cu
+++ b/gaurav/2_Assign/ex4/main.cu
-    // ###
-    // ###
-    // ### Practical Course: GPU Programming in Computer Vision
-    // ###
-    // ###
-    // ### Technical University Munich, Computer Vision Group
-    // ### Winter Semester 2013/2014, March 3 - April 4
-    // ###
-    // ###
-    // ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
-    // ###
-    // ###
-    // ###
+// ###
+// ###
+// ### Practical Course: GPU Programming in Computer Vision
+// ###
+// ###
+// ### Technical University Munich, Computer Vision Group
+// ### Winter Semester 2013/2014, March 3 - April 4
+// ###
+// ###
+// ### Evgeny Strekalovskiy, Maria Klodt, Jan Stuehmer, Mohamed Souiai
+// ###
+// ###
+// ###



-    // ###
-    // ###
-    // ### TODO: For every student of your group, please provide here:
-    // ###
-    // ### Gaurav Kukreja, gaurav.kukreja@tum.de, p058 
-    // ###
-    // ###
+// ###
+// ###
+// ### TODO: For every student of your group, please provide here:
+// ###
+// ### Gaurav Kukreja, gaurav.kukreja@tum.de, p058 
+// ###
+// ###


-    #include "aux.h"
-    #include <iostream>
-    #include <math.h>
-    using namespace std;
+#include "aux.h"
+#include <iostream>
+#include <math.h>
+using namespace std;

-    #define MAX_KERNEL_WIDTH 20
+#define MAX_KERNEL_WIDTH 20

-    __constant__ float constKernel[MAX_KERNEL_WIDTH * MAX_KERNEL_WIDTH];
+__constant__ float constKernel[MAX_KERNEL_WIDTH * MAX_KERNEL_WIDTH];

-    // uncomment to use the camera
-    //#define CAMERA
+// uncomment to use the camera
+//#define CAMERA

-    #define USING_GPU
+#define USING_GPU

-    template<typename T>
-    __device__ T gpu_min(T a, T b)
-    {
+template<typename T>
+__device__ T gpu_min(T a, T b)
+{
    if (a < b)
        return a;
    else
        return b;
-    }
+}

-    template<typename T>
-    __device__ T gpu_max(T a, T b)
-    {
+template<typename T>
+__device__ T gpu_max(T a, T b)
+{
    if (a < b)
        return b;
    else
        return a;
-    }
+}


-    // Image Gradient 
-    __device__ void convolveImage(float* imgIn, float* imgOut, int rad, int w, int h, int nc) 
-    {
+// Image Gradient 
+__device__ void convolveImage(float* imgIn, float* imgOut, int rad, int w, int h, int nc) 
+{
    int ix = threadIdx.x + blockDim.x * blockIdx.x;
    int iy = threadIdx.y + blockDim.y * blockIdx.y;
    int iz = threadIdx.z + blockDim.z * blockIdx.z;
@@ -71,33 +71,33 @@
    { 
        imgOut[idx] = 0;						    // initialize
        float value = 0;
-            for(int j = -rad; j < rad; j++)					    // for each row in kernel
+        for(int j = -rad; j <= rad; j++)					    // for each row in kernel
        {   
            int iny = gpu_max(0, gpu_min(iy+j, h-1));			    
-    	    for(int i = -rad; i < rad; i++)				    // for each element in the kernel row
+            for(int i = -rad; i <= rad; i++)				    // for each element in the kernel row
            {
                int inx = gpu_max(0, gpu_min(ix+i, w-1));
                int inIdx = inx + (iny * w) + (iz * w * h);		    // Index of Input Image to be multiplied by corresponding element in kernel
-    		value += imgIn[inIdx] * constKernel[i+rad + ((j+rad) * rad)];
+                value += imgIn[inIdx] * constKernel[i+rad + ((j+rad) * (2 * rad + 1))];
            }
        }
        imgOut[idx] = value;
    }
-    }
+}

-    __global__ void callKernel(float* imgIn, float* imgOut, int rad, int w, int h, int nc)
-    {
+__global__ void callKernel(float* imgIn, float* imgOut, int rad, int w, int h, int nc)
+{
    convolveImage(imgIn, imgOut, rad, w, h, nc);    
-    }
+}

-    int main(int argc, char **argv)
-    {
-    #ifdef USING_GPU
+int main(int argc, char **argv)
+{
+#ifdef USING_GPU
    // Before the GPU can process your kernels, a so called "CUDA context" must be initialized
    // This happens on the very first call to a CUDA function, and takes some time (around half a second)
    // We will do it right here, so that the run time measurements are accurate
    cudaDeviceSynchronize();  CUDA_CHECK;
-    #endif // USING_GPU
+#endif // USING_GPU



@@ -107,14 +107,14 @@
    //
    // return value: getParam("param", ...) returns true if "-param" is specified, and false otherwise

-    #ifdef CAMERA
-    #else
+#ifdef CAMERA
+#else
    // input image
    string image = "";
    bool ret = getParam("i", image, argc, argv);
    if (!ret) cerr << "ERROR: no image specified" << endl;
    if (argc <= 1) { cout << "Usage: " << argv[0] << " -i <image> [-repeats <repeats>] [-gray] [-sigma <sigma>]" << endl << "\t Default Value of sigma = 0.5" << endl; return 1; }
-    #endif
+#endif
    
    // number of computation repetitions to get a better run time measurement
    int repeats = 1;
@@ -133,7 +133,7 @@
    // ### Define your own parameters here as needed    

    // Init camera / Load input image
-    #ifdef CAMERA
+#ifdef CAMERA

    // Init camera
  	cv::VideoCapture camera(0);
@@ -146,14 +146,14 @@
    cv::Mat mIn;
    camera >> mIn;
    
-    #else
+#else

    // Load the input image using opencv (load as grayscale if "gray==true", otherwise as is (may be color or grayscale))
    cv::Mat mIn = cv::imread(image.c_str(), (gray? CV_LOAD_IMAGE_GRAYSCALE : -1));
    // check
    if (mIn.data == NULL) { cerr << "ERROR: Could not load image " << image << endl; return 1; }
    
-    #endif
+#endif

    // convert to float representation (opencv loads image values as single bytes by default)
    mIn.convertTo(mIn,CV_32F);
@@ -195,7 +195,7 @@
    float *imgOut = new float[(size_t)w*h*mOut.channels()];

    int rad = ceil(3 * sigma); // kernel radius
-        int kw = 2 * rad; // kernel width
+    int kw = 2 * rad + 1; // kernel width
    
    if (kw > MAX_KERNEL_WIDTH)
    {
@@ -243,13 +243,13 @@
    }

    // Display Kernel
-        cv::Mat cvKernelOut(2*rad, 2*rad, CV_32F);
+    cv::Mat cvKernelOut(kw, kw, CV_32F);
    convert_layered_to_mat(cvKernelOut, kernelOut);
    showImage("Kernel", cvKernelOut, 100, 10);


    // For camera mode: Make a loop to read in camera frames
-    #ifdef CAMERA
+#ifdef CAMERA
    // Read a camera image frame every 30 milliseconds:
    // cv::waitKey(30) waits 30 milliseconds for a keyboard input,
    // returns a value <0 if no key is pressed during this time, returns immediately with a value >=0 if a key is pressed
@@ -261,7 +261,7 @@
    mIn.convertTo(mIn,CV_32F);
    // convert range of each channel to [0,1] (opencv default is [0,255])
    mIn /= 255.f;
-    #endif
+#endif

    // Init raw input image array
    // opencv images are interleaved: rgb rgb rgb...  (actually bgr bgr bgr...)
@@ -335,10 +335,10 @@
 		    int idx = ix + (iy * w) + (iz * w * h);
 	            imgOut[idx] = 0;                                                    // initialize
 	            float value = 0;
-    	            for(int j = -rad; j < rad; j++)                                     // for each row in kernel
+	            for(int j = -rad; j <= rad; j++)                                     // for each row in kernel
 	            {
 	                int iny = max(0, min(iy+j, h-1));
-    	                for(int i = -rad; i < rad; i++)                                 // for each element in the kernel row
+	                for(int i = -rad; i <= rad; i++)                                 // for each element in the kernel row
 	                {
 	                    int inx = max(0, min(ix+i, w-1));
 	                    int inIdx = inx + (iny * w) + (iz * w * h);                 // Index of Input Image to be multiplied by corresponding element in kernel
@@ -368,13 +368,13 @@

    // ### Display your own output images here as needed

-    #ifdef CAMERA
+#ifdef CAMERA
    // end of camera loop
    }
-    #else
+#else
    // wait for key inputs
    cv::waitKey(0);
-    #endif
+#endif



@@ -392,7 +392,7 @@
    // close all opencv windows
    cvDestroyAllWindows();
    return 0;
-    }
+}