Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
cuda_lab
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
cuda_lab
Commits
b0bc636c
Commit
b0bc636c
authored
Mar 05, 2014
by
Gaurav Kukreja
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Miklos's working code for convolution
Signed-off-by:
Gaurav Kukreja
<
gaurav@gauravk.in
>
parent
636c1600
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
13 deletions
+7
-13
main.cu
miklos/ex06/main.cu
+7
-13
No files found.
miklos/ex06/main.cu
View file @
b0bc636c
...
@@ -18,8 +18,9 @@
...
@@ -18,8 +18,9 @@
// ###
// ###
// ### TODO: For every student of your group, please provide here:
// ### TODO: For every student of your group, please provide here:
// ###
// ###
// ### name, email, login username (for example p123)
// ### Gaurav Kukreja, gaurav.kukreja@tum.de, p058
// ###
// ### Miklos Homolya, miklos.homolya@tum.de, p056
// ### Ravikishore Kommajosyula, r.kommajosyula, p057
// ###
// ###
...
@@ -234,7 +235,8 @@ int main(int argc, char **argv)
...
@@ -234,7 +235,8 @@ int main(int argc, char **argv)
Timer timer; timer.start();
Timer timer; timer.start();
#define CPU
for (int measurement = 0; measurement < repeats; measurement++) {
//#define CPU
#ifdef CPU
#ifdef CPU
for (int c = 0; c < nc; c++) {
for (int c = 0; c < nc; c++) {
for (int y = 0; y < h; y++) {
for (int y = 0; y < h; y++) {
...
@@ -255,29 +257,21 @@ int main(int argc, char **argv)
...
@@ -255,29 +257,21 @@ int main(int argc, char **argv)
float *d_in, *d_out, *d_kern;
float *d_in, *d_out, *d_kern;
size_t nbytes = (size_t)w*h*nc*sizeof(float);
size_t nbytes = (size_t)w*h*nc*sizeof(float);
cudaMalloc(&d_in, nbytes);
cudaMalloc(&d_in, nbytes);
CUDA_CHECK;
cudaMalloc(&d_out, nbytes);
cudaMalloc(&d_out, nbytes);
CUDA_CHECK;
cudaMalloc(&d_kern, (size_t)ksize*ksize*sizeof(float));
cudaMalloc(&d_kern, (size_t)ksize*ksize*sizeof(float));
CUDA_CHECK;
cudaMemcpy(d_in, imgIn, nbytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_in, imgIn, nbytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_kern, kern, (size_t)ksize*ksize*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_kern, kern, (size_t)ksize*ksize*sizeof(float), cudaMemcpyHostToDevice);
CUDA_CHECK;
dim3 block(16, 8, 3);
dim3 block(16, 8, 3);
dim3 grid = make_grid(dim3(w, h, nc), block);
dim3 grid = make_grid(dim3(w, h, nc), block);
convolution<<<grid, block>>>(d_in, d_out, d_kern, w, h, nc, r);
convolution<<<grid, block>>>(d_in, d_out, d_kern, w, h, nc, r);
CUDA_CHECK;
cudaMemcpy(imgOut, d_out, nbytes, cudaMemcpyDeviceToHost);
cudaMemcpy(imgOut, d_out, nbytes, cudaMemcpyDeviceToHost);
CUDA_CHECK;
cudaFree(d_in);
cudaFree(d_in);
CUDA_CHECK;
cudaFree(d_out);
cudaFree(d_out);
CUDA_CHECK;
cudaFree(d_kern);
cudaFree(d_kern);
CUDA_CHECK;
#endif
#endif
}
timer.end(); float t = timer.get(); // elapsed time in seconds
timer.end(); float t = timer.get(); // elapsed time in seconds
cout << "time: " <<
t
*1000 << " ms" << endl;
cout << "time: " <<
(t / repeats)
*1000 << " ms" << endl;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment