Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
cuda_lab
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
cuda_lab
Commits
c97f960e
Commit
c97f960e
authored
Mar 05, 2014
by
Gaurav Kukreja
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Optimized exercise 5
Signed-off-by:
Gaurav Kukreja
<
gmkukreja@gmail.com
>
parent
f88fdaee
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
6 deletions
+7
-6
main.cu
gaurav/1_Assign/ex5/main.cu
+7
-6
No files found.
gaurav/1_Assign/ex5/main.cu
View file @
c97f960e
...
@@ -38,14 +38,15 @@ __device__ void gradImage(float* imgIn, float* forwardDiffX, float* forwardDiffY
...
@@ -38,14 +38,15 @@ __device__ void gradImage(float* imgIn, float* forwardDiffX, float* forwardDiffY
int ix = threadIdx.x + blockDim.x * blockIdx.x;
int ix = threadIdx.x + blockDim.x * blockIdx.x;
int iy = threadIdx.y + blockDim.y * blockIdx.y;
int iy = threadIdx.y + blockDim.y * blockIdx.y;
imgOut[ix + (iy * w)] = 0;
float value = 0.;
for(int i = 0; i < nc; i++)
for(int i = 0; i < nc; i++)
{
{
forwardDiffX[ix + (iy * w) + (i * w * h)] = (ix < (w-2)) ? (imgIn[ix + (iy * w) + (i * w * h) + 1] - imgIn[ix + (iy * w) + (i * w * h)]) : 0;
int idx = ix + iy*w + i*w*h;
forwardDiffY[ix + (iy * w) + (i * w * h)] = (iy < (h-2)) ? (imgIn[ix + ((iy + 1) * w) + (i * w * h)] - imgIn[ix + (iy * w) + (i * w * h)]) : 0;
forwardDiffX[idx] = (ix < (w-2)) ? (imgIn[idx + 1] - imgIn[idx]) : 0;
imgOut[ix + (iy * w)] += pow(forwardDiffX[ix + (iy * w) + (i * w * h)] , 2) + pow(forwardDiffY[ix + (iy * w) + (i * w * h)] , 2);
forwardDiffY[idx] = (iy < (h-2)) ? (imgIn[idx + w] - imgIn[idx]) : 0;
value += pow(forwardDiffX[idx] , 2) + pow(forwardDiffY[idx] , 2);
}
}
imgOut[ix + (iy * w)] = sqrt(
imgOut[ix + (iy * w)]
);
imgOut[ix + (iy * w)] = sqrt(
value
);
}
}
__global__ void callKernel(float* imgIn, float* forwardDiffX, float* forwardDiffY, float* imgOut, int w, int h, int nc)
__global__ void callKernel(float* imgIn, float* forwardDiffX, float* forwardDiffY, float* imgOut, int w, int h, int nc)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment