Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
cuda_lab
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
cuda_lab
Commits
0d8fa5a9
Commit
0d8fa5a9
authored
Mar 05, 2014
by
Gaurav Kukreja
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Constant Kernel Memory on Ex6
Signed-off-by:
Gaurav Kukreja
<
gaurav@gauravk.in
>
parent
b0bc636c
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
15 deletions
+19
-15
main.cu
gaurav/2_Assign/constant_gaurav_convolution/main.cu
+2
-1
main.cu
gaurav/2_Assign/global_memory+constant_kernel/main.cu
+17
-14
No files found.
gaurav/2_Assign/constant_gaurav_convolution/main.cu
View file @
0d8fa5a9
...
...
@@ -65,6 +65,7 @@ __device__ void convolveImage(float* imgIn, float* imgOut, int rad, int w, int h
// Index of the output image, this kernel works on
int idx = ix + (iy * w) + (iz * w * h);
int kw = 2 * rad + 1;
// check limits
if (idx < w * h * nc)
...
...
@@ -78,7 +79,7 @@ __device__ void convolveImage(float* imgIn, float* imgOut, int rad, int w, int h
{
int inx = gpu_max(0, gpu_min(ix+i, w-1));
int inIdx = inx + (iny * w) + (iz * w * h); // Index of Input Image to be multiplied by corresponding element in kernel
value += imgIn[inIdx] * constKernel[i+rad + ((j+rad) * (
2 * rad + 1
))];
value += imgIn[inIdx] * constKernel[i+rad + ((j+rad) * (
kw
))];
}
}
imgOut[idx] = value;
...
...
gaurav/2_Assign/global_memory+constant_kernel/main.cu
View file @
0d8fa5a9
...
...
@@ -18,8 +18,9 @@
// ###
// ### TODO: For every student of your group, please provide here:
// ###
// ### name, email, login username (for example p123)
// ###
// ### Gaurav Kukreja, gaurav.kukreja@tum.de, p058
// ### Miklos Homolya, miklos.homolya@tum.de, p056
// ### Ravikishore Kommajosyula, r.kommajosyula, p057
// ###
...
...
@@ -29,6 +30,7 @@
using namespace std;
#define MAX_KERNEL_WIDTH 20
__constant__ float constKernel[MAX_KERNEL_WIDTH * MAX_KERNEL_WIDTH];
// uncomment to use the camera
...
...
@@ -165,12 +167,6 @@ int main(int argc, char **argv)
int r = ceil(3 * sigma);
int ksize = 2*r + 1;
if(ksize > MAX_KERNEL_WIDTH)
{
cout << "Kernel width more than Max Kernel width viz. 20" << endl;
return -1;
}
float *kern = new float[ksize * ksize];
for (int i = 0; i < 2*r+1; i++) {
double a = i - r;
...
...
@@ -243,7 +239,8 @@ int main(int argc, char **argv)
Timer timer; timer.start();
#define CPU
for (int measurement = 0; measurement < repeats; measurement++) {
//#define CPU
#ifdef CPU
for (int c = 0; c < nc; c++) {
for (int y = 0; y < h; y++) {
...
...
@@ -261,13 +258,13 @@ int main(int argc, char **argv)
}
}
#else
float *d_in, *d_out
, *d_kern
;
float *d_in, *d_out;
size_t nbytes = (size_t)w*h*nc*sizeof(float);
cudaMalloc(&d_in, nbytes);
cudaMalloc(&d_out, nbytes);
//
cudaMalloc(&d_kern, (size_t)ksize*ksize*sizeof(float));
//
cudaMalloc(&d_kern, (size_t)ksize*ksize*sizeof(float));
cudaMemcpy(d_in, imgIn, nbytes, cudaMemcpyHostToDevice);
//
cudaMemcpy(d_kern, kern, (size_t)ksize*ksize*sizeof(float), cudaMemcpyHostToDevice);
//
cudaMemcpy(d_kern, kern, (size_t)ksize*ksize*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(constKernel, kern, (size_t)ksize*ksize*sizeof(float));
dim3 block(16, 8, 3);
dim3 grid = make_grid(dim3(w, h, nc), block);
...
...
@@ -275,10 +272,16 @@ int main(int argc, char **argv)
cudaMemcpy(imgOut, d_out, nbytes, cudaMemcpyDeviceToHost);
cudaFree(d_in);
cudaFree(d_out);
//
cudaFree(d_kern);
//
cudaFree(d_kern);
#endif
}
timer.end(); float t = timer.get(); // elapsed time in seconds
cout << "time: " << t*1000 << " ms" << endl;
cout << "time: " << (t / repeats)*1000 << " ms" << endl;
// show input image
showImage("Input", mIn, 100, 100); // show at position (x_from_left=100,y_from_above=100)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment