Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
cuda_lab
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
cuda_lab
Commits
bf2c11be
Commit
bf2c11be
authored
Mar 05, 2014
by
Gaurav Kukreja
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed convolution
Signed-off-by:
Gaurav Kukreja
<
gmkukreja@gmail.com
>
parent
3c0ac52c
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
12 deletions
+14
-12
main.cu
gaurav/1_Assign/ex6/main.cu
+14
-12
No files found.
gaurav/1_Assign/ex6/main.cu
View file @
bf2c11be
...
@@ -67,14 +67,14 @@ __device__ void convolveImage(float* imgIn, float* kernel, float* imgOut, int ra
...
@@ -67,14 +67,14 @@ __device__ void convolveImage(float* imgIn, float* kernel, float* imgOut, int ra
{
{
imgOut[idx] = 0; // initialize
imgOut[idx] = 0; // initialize
float value = 0;
float value = 0;
for(int j = -rad; j < rad; j++) // for each row in kernel
for(int j = -rad; j <
=
rad; j++) // for each row in kernel
{
{
int iny = gpu_max(0, gpu_min(iy+j, h-1));
int iny = gpu_max(0, gpu_min(iy+j, h-1));
for(int i = -rad; i < rad; i++) // for each element in the kernel row
for(int i = -rad; i <
=
rad; i++) // for each element in the kernel row
{
{
int inx = gpu_max(0, gpu_min(ix+i, w-1));
int inx = gpu_max(0, gpu_min(ix+i, w-1));
int inIdx = inx + (iny * w) + (iz * w * h); // Index of Input Image to be multiplied by corresponding element in kernel
int inIdx = inx + (iny * w) + (iz * w * h); // Index of Input Image to be multiplied by corresponding element in kernel
value += imgIn[inIdx] * kernel[i+rad + ((j+rad) * rad
)];
value += imgIn[inIdx] * kernel[i+rad + ((j+rad) * (2 * rad + 1)
)];
}
}
}
}
imgOut[idx] = value;
imgOut[idx] = value;
...
@@ -191,9 +191,11 @@ int main(int argc, char **argv)
...
@@ -191,9 +191,11 @@ int main(int argc, char **argv)
float *imgOut = new float[(size_t)w*h*mOut.channels()];
float *imgOut = new float[(size_t)w*h*mOut.channels()];
int rad = ceil(3 * sigma); // kernel radius
int rad = ceil(3 * sigma); // kernel radius
int kw = 2 * rad; // kernel width
int kw = 2 * rad
+ 1
; // kernel width
float c = 1. / (2. * 3.142857 * sigma * sigma); // constant
float c = 1. / (2. * 3.142857 * sigma * sigma); // constant
cout << "c = " << c << endl;
float *kernel = new float[(size_t) (kw * kw)]; // kernel
float *kernel = new float[(size_t) (kw * kw)]; // kernel
float *kernelOut = new float[(size_t) (kw * kw)]; // kernel to be displayed
float *kernelOut = new float[(size_t) (kw * kw)]; // kernel to be displayed
...
@@ -232,9 +234,9 @@ int main(int argc, char **argv)
...
@@ -232,9 +234,9 @@ int main(int argc, char **argv)
}
}
// Display Kernel
// Display Kernel
cv::Mat cvKernelOut(
2*rad, 2*rad, CV_32F
);
cv::Mat cvKernelOut(
kw, kw, CV_32FC1
);
convert_layered_to_mat(cvKernelOut, kernelOut);
convert_layered_to_mat(cvKernelOut, kernelOut);
showImage("Kernel", cvKernelOut, 100, 10);
showImage("Kernel", cvKernelOut, 100, 10
0
);
// For camera mode: Make a loop to read in camera frames
// For camera mode: Make a loop to read in camera frames
...
@@ -280,10 +282,10 @@ int main(int argc, char **argv)
...
@@ -280,10 +282,10 @@ int main(int argc, char **argv)
// Allocating memory on the device
// Allocating memory on the device
float *d_imgIn = NULL;
float *d_imgIn = NULL;
float *d_imgOut = NULL;
float *d_imgOut = NULL;
float *d_kernel = NULL;
float *d_kernel = NULL;
cudaMalloc(&d_imgIn, count * sizeof(float));
cudaMalloc(&d_imgIn, count * sizeof(float));
cudaMalloc(&d_imgOut, count * sizeof(float));
cudaMalloc(&d_imgOut, count * sizeof(float));
cudaMalloc(&d_kernel, kw * kw * sizeof(float));
cudaMalloc(&d_kernel, kw * kw * sizeof(float));
// Copying Input image to device, and initializing result to 0
// Copying Input image to device, and initializing result to 0
cudaMemcpy(d_imgIn, imgIn, count * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_imgIn, imgIn, count * sizeof(float), cudaMemcpyHostToDevice);
...
@@ -323,10 +325,10 @@ int main(int argc, char **argv)
...
@@ -323,10 +325,10 @@ int main(int argc, char **argv)
int idx = ix + (iy * w) + (iz * w * h);
int idx = ix + (iy * w) + (iz * w * h);
imgOut[idx] = 0; // initialize
imgOut[idx] = 0; // initialize
float value = 0;
float value = 0;
for(int j = -rad; j < rad; j++) // for each row in kernel
for(int j = -rad; j <
=
rad; j++) // for each row in kernel
{
{
int iny = max(0, min(iy+j, h-1));
int iny = max(0, min(iy+j, h-1));
for(int i = -rad; i < rad; i++) // for each element in the kernel row
for(int i = -rad; i <
=
rad; i++) // for each element in the kernel row
{
{
int inx = max(0, min(ix+i, w-1));
int inx = max(0, min(ix+i, w-1));
int inIdx = inx + (iny * w) + (iz * w * h); // Index of Input Image to be multiplied by corresponding element in kernel
int inIdx = inx + (iny * w) + (iz * w * h); // Index of Input Image to be multiplied by corresponding element in kernel
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment