Commit 87170c69 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Exercise 10, working

Signed-off-by: 's avatarGaurav Kukreja <gmkukreja@gmail.com>
parent 6b572f44
...@@ -2,3 +2,5 @@ main ...@@ -2,3 +2,5 @@ main
image_input.png image_input.png
image_result.png image_result.png
*.swp *.swp
*.cproject
*.project
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="com.nvidia.cuda.ide.toolchain.base.1894445088">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="com.nvidia.cuda.ide.toolchain.base.1894445088" moduleId="org.eclipse.cdt.core.settings" name="Default">
<externalSettings/>
<extensions>
<extension id="com.nvidia.cuda.ide.cubin" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="com.nvidia.cuda.ide.elf" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="com.nvidia.cuda.ide.macho" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="nvcc.errorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration buildProperties="" id="com.nvidia.cuda.ide.toolchain.base.1894445088" name="Default" parent="org.eclipse.cdt.build.core.emptycfg">
<folderInfo id="com.nvidia.cuda.ide.toolchain.base.1894445088.1260089397" name="/" resourcePath="">
<toolChain id="com.nvidia.cuda.ide.toolchain.base.45116147" name="com.nvidia.cuda.ide.toolchain.base" superClass="com.nvidia.cuda.ide.toolchain.base">
<targetPlatform archList="all" binaryParser="com.nvidia.cuda.ide.elf;com.nvidia.cuda.ide.macho;com.nvidia.cuda.ide.cubin" id="com.nvidia.cuda.ide.targetPlatform.1109642219" isAbstract="false" name="Debug Platform" osList="linux,macosx" superClass="com.nvidia.cuda.ide.targetPlatform"/>
<builder id="com.nvidia.cuda.ide.builder.526294828" managedBuildOn="false" name="CUDA Toolkit 5.5 Builder.Default" superClass="com.nvidia.cuda.ide.builder"/>
<tool id="nvcc.compiler.base.752460411" name="NVCC Compiler" superClass="nvcc.compiler.base">
<option id="nvcc.compiler.pic.1335045133" superClass="nvcc.compiler.pic"/>
</tool>
<tool id="nvcc.linker.base.1798929077" name="NVCC Linker" superClass="nvcc.linker.base"/>
<tool id="nvcc.archiver.base.1941168379" name="NVCC Archiver" superClass="nvcc.archiver.base"/>
<tool id="com.nvidia.host.assembler.316411086" name="Host Assembler" superClass="com.nvidia.host.assembler"/>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="com.nvidia.cuda.ide.build.project.ICudaProjectConfiguration"/>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="ex10.null.82547195" name="ex10"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
</cproject>
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>ex10</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
</projectDescription>
...@@ -50,7 +50,7 @@ __device__ T gpu_max(T a, T b) ...@@ -50,7 +50,7 @@ __device__ T gpu_max(T a, T b)
return a; return a;
} }
__device__ void calculate_laplacian(float *image, float *jacobian, int w, int h, int nc, float tau) { __global__ void calculate_laplacian(float *image, float *jacobian, int w, int h, int nc, float tau) {
int x = threadIdx.x + blockDim.x * blockIdx.x; int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y; int y = threadIdx.y + blockDim.y * blockIdx.y;
int c = threadIdx.z + blockDim.z * blockIdx.z; int c = threadIdx.z + blockDim.z * blockIdx.z;
...@@ -66,7 +66,7 @@ __device__ void calculate_laplacian(float *image, float *jacobian, int w, int h, ...@@ -66,7 +66,7 @@ __device__ void calculate_laplacian(float *image, float *jacobian, int w, int h,
} }
__device__ void update_operator(float *image, float *jacobian, int w, int h, int nc, float tau) { __global__ void update_operator(float *image, float *jacobian, int w, int h, int nc, float tau) {
int x = threadIdx.x + blockDim.x * blockIdx.x; int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y; int y = threadIdx.y + blockDim.y * blockIdx.y;
int c = threadIdx.z + blockDim.z * blockIdx.z; int c = threadIdx.z + blockDim.z * blockIdx.z;
...@@ -76,15 +76,6 @@ __device__ void update_operator(float *image, float *jacobian, int w, int h, int ...@@ -76,15 +76,6 @@ __device__ void update_operator(float *image, float *jacobian, int w, int h, int
} }
} }
__global__ void linear_diffusion(float *image, float *jacobian, int w, int h, int nc,
float tau, int iterations) {
for( int i = 1; i < iterations; i++ ) {
calculate_laplacian(image, jacobian, w, h, nc, tau);
update_operator(image, jacobian, w, h, nc, tau);
}
}
inline int divc(int n, int b) { return (n + b - 1) / b; } inline int divc(int n, int b) { return (n + b - 1) / b; }
inline dim3 make_grid(dim3 whole, dim3 block) inline dim3 make_grid(dim3 whole, dim3 block)
...@@ -224,8 +215,15 @@ int main(int argc, char **argv) ...@@ -224,8 +215,15 @@ int main(int argc, char **argv)
dim3 block(16, 8, 3); dim3 block(16, 8, 3);
dim3 grid = make_grid(dim3(w, h, nc), block); dim3 grid = make_grid(dim3(w, h, nc), block);
linear_diffusion<<<grid, block>>>(d_in, d_out, w, h, nc, tau, iterations);
for(int iter = 0; iter < iterations; iter++)
{
calculate_laplacian <<<grid, block>>> (d_in, d_out, w, h, nc, tau);
update_operator <<<grid, block>>> (d_in, d_out, w, h, nc, tau);
}
cudaMemcpy(imgOut, d_in, nbytes, cudaMemcpyDeviceToHost); cudaMemcpy(imgOut, d_in, nbytes, cudaMemcpyDeviceToHost);
cudaFree(d_in); cudaFree(d_in);
cudaFree(d_out); cudaFree(d_out);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment