Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
cuda_lab
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
cuda_lab
Commits
9313ebfa
Commit
9313ebfa
authored
Mar 04, 2014
by
Gaurav Kukreja
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Working Convolution
Signed-off-by:
Gaurav Kukreja
<
gaurav@gauravk.in
>
parent
5cf429a7
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
32 additions
and
10 deletions
+32
-10
.main.cu.swp
gaurav/1_Assign/ex6/.main.cu.swp
+0
-0
image_input.png
gaurav/1_Assign/ex6/image_input.png
+0
-0
image_result.png
gaurav/1_Assign/ex6/image_result.png
+0
-0
main
gaurav/1_Assign/ex6/main
+0
-0
main.cu
gaurav/1_Assign/ex6/main.cu
+32
-10
No files found.
gaurav/1_Assign/ex6/.main.cu.swp
deleted
100644 → 0
View file @
5cf429a7
File deleted
gaurav/1_Assign/ex6/image_input.png
deleted
100644 → 0
View file @
5cf429a7
175 KB
gaurav/1_Assign/ex6/image_result.png
deleted
100644 → 0
View file @
5cf429a7
90.8 KB
gaurav/1_Assign/ex6/main
deleted
100755 → 0
View file @
5cf429a7
File deleted
gaurav/1_Assign/ex6/main.cu
View file @
9313ebfa
...
@@ -33,8 +33,27 @@ using namespace std;
...
@@ -33,8 +33,27 @@ using namespace std;
#define USING_GPU
#define USING_GPU
template<typename T>
__device__ T gpu_min(T a, T b)
{
if (a < b)
return a;
else
return b;
}
template<typename T>
__device__ T gpu_max(T a, T b)
{
if (a < b)
return b;
else
return a;
}
// Image Gradient
// Image Gradient
__device__ void convolveImage(float* imgIn, float* kernel, float* imgOut, int rad, int w,
size_t h, size_
t nc)
__device__ void convolveImage(float* imgIn, float* kernel, float* imgOut, int rad, int w,
int h, in
t nc)
{
{
int ix = threadIdx.x + blockDim.x * blockIdx.x;
int ix = threadIdx.x + blockDim.x * blockIdx.x;
int iy = threadIdx.y + blockDim.y * blockIdx.y;
int iy = threadIdx.y + blockDim.y * blockIdx.y;
...
@@ -42,24 +61,27 @@ __device__ void convolveImage(float* imgIn, float* kernel, float* imgOut, int ra
...
@@ -42,24 +61,27 @@ __device__ void convolveImage(float* imgIn, float* kernel, float* imgOut, int ra
int idx = ix + (iy * w) + (iz * w * h);
int idx = ix + (iy * w) + (iz * w * h);
if (i
dx < count
)
if (i
x < w && iy < h && iz < nc
)
{
{
imgOut[idx] = 0;
imgOut[idx] = 0;
float value = 0;
for(int j = -rad; j < rad; j++)
for(int j = -rad; j < rad; j++)
{
{
int iny = gpu_max(0, gpu_min(iy+j, h-1));
for(int i = -rad; i < rad; i++)
for(int i = -rad; i < rad; i++)
{
{
size_t inIdx = idx + i + (w * j
);
int inx = gpu_max(0, gpu_min(ix+i, w-1)
);
i
f ((ix + i + (w * j)) > 0 && (ix + i + (w * j)) < n_pixels)
i
nt inIdx = inx + iny * w + iz * w * h;
imgOut[idx]
+= imgIn[inIdx] * kernel[i+rad + ((j+rad) * rad)];
value
+= imgIn[inIdx] * kernel[i+rad + ((j+rad) * rad)];
}
}
}
}
imgOut[idx] = value;
}
}
}
}
__global__ void callKernel(float* imgIn, float* kernel, float* imgOut, int rad, int w,
size_t n_pixels, size_t count
)
__global__ void callKernel(float* imgIn, float* kernel, float* imgOut, int rad, int w,
int h, int nc
)
{
{
convolveImage(imgIn, kernel, imgOut, rad, w,
n_pixels, count
);
convolveImage(imgIn, kernel, imgOut, rad, w,
h, nc
);
}
}
int main(int argc, char **argv)
int main(int argc, char **argv)
...
@@ -250,8 +272,8 @@ int main(int argc, char **argv)
...
@@ -250,8 +272,8 @@ int main(int argc, char **argv)
size_t count = w * h * nc;
size_t count = w * h * nc;
// Thread Dimensions
// Thread Dimensions
dim3 block = dim3(1
28, nc, 1
);
dim3 block = dim3(1
6, 4, nc
);
dim3 grid = dim3((
n_pixels + block.x - 1) / block.x, 1
, 1);
dim3 grid = dim3((
w + block.x - 1) / block.x, (h + block.y - 1) / block.y
, 1);
// Allocating memory on the device
// Allocating memory on the device
float *d_imgIn = NULL;
float *d_imgIn = NULL;
...
@@ -266,7 +288,7 @@ int main(int argc, char **argv)
...
@@ -266,7 +288,7 @@ int main(int argc, char **argv)
cudaMemcpy(d_kernel, kernel, 2 * rad * 2 * rad * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_kernel, kernel, 2 * rad * 2 * rad * sizeof(float), cudaMemcpyHostToDevice);
// Calling Kernel
// Calling Kernel
callKernel <<< grid, block >>> (d_imgIn, d_kernel, d_imgOut, rad, w,
n_pixels, count
);
callKernel <<< grid, block >>> (d_imgIn, d_kernel, d_imgOut, rad, w,
h, nc
);
// Copying result back
// Copying result back
cudaMemcpy(imgOut, d_imgOut, count * sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(imgOut, d_imgOut, count * sizeof(float), cudaMemcpyDeviceToHost);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment