Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
cuda_lab
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
cuda_lab
Commits
b4ddfe2e
Commit
b4ddfe2e
authored
Mar 23, 2014
by
Miklós Homolya
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
algorithm for grayscale images
parent
b6af0a33
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
61 additions
and
26 deletions
+61
-26
main.cu
miklos/project_nogui/main.cu
+61
-26
No files found.
miklos/project_nogui/main.cu
View file @
b4ddfe2e
...
@@ -66,15 +66,39 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h)
...
@@ -66,15 +66,39 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h)
size_t i = x + (size_t)w*y;
size_t i = x + (size_t)w*y;
float ux = ((x+1 < w) ? (U[i + 1] - U[i]) : 0);
float ux = ((x+1 < w) ? (U[i + 1] - U[i]) : 0);
float uy = ((y+1 < h) ? (U[i + w] - U[i]) : 0);
float uy = ((y+1 < h) ? (U[i + w] - U[i]) : 0);
float gn = sqrtf(ux*ux + uy*uy);
float gn = sqrtf(ux*ux + uy*uy + FLT_EPSILON);
if (fabsf(gn) < FLT_EPSILON) {
// Prevent division by zero. Result will be null vector.
vx[i] = 0.0;
vy[i] = 0.0;
} else {
vx[i] = ux / gn;
vx[i] = ux / gn;
vy[i] = uy / gn;
vy[i] = uy / gn;
}
}
}
/**
* nu (Greek letter) function penalizes being outside the interval [0; 1].
*/
__device__ float nu(float u)
{
if (u < 0.f)
return -2.f;
if (u > 1.f)
return +2.f;
return 0.f;
}
/**
* Calculate s(x) = (c1 - f(x))^2 - (c2 - f(x))^2.
*
* @param F original input image (single-channel)
* @param S result (single-channel)
* @param w width of image (pixels)
* @param h height of image (pixels)
*/
__global__ void calculate_S(float *F, float *S, int w, int h, float c1, float c2)
{
int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y;
if (x < w && y < h) {
size_t i = x + (size_t)w*y;
S[i] = (c1 - F[i])*(c1 - F[i]) - (c2 - F[i])*(c2 - F[i]);
}
}
}
}
...
@@ -82,36 +106,30 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h)
...
@@ -82,36 +106,30 @@ __global__ void norm_grad(float *U, float *vx, float *vy, int w, int h)
* Update approximation.
* Update approximation.
*
*
* @param U approximation of solution (single-channel)
* @param U approximation of solution (single-channel)
* @param
F original
input image (single-channel)
* @param
S update component from
input image (single-channel)
* @param vx normalized gradient of U (x-coordinate)
* @param vx normalized gradient of U (x-coordinate)
* @param vy normalized gradient of U (y-coordinate)
* @param vy normalized gradient of U (y-coordinate)
* @param w width of image (pixels)
* @param w width of image (pixels)
* @param h height of image (pixels)
* @param h height of image (pixels)
* @param lambda weight of 'similarity' energy component
* @param lambda weight of S
* @param alpha weight of nu
* @param tau update coefficient
* @param tau update coefficient
*/
*/
__global__ void update(float *U, float *
F
, float *vx, float *vy,
__global__ void update(float *U, float *
S
, float *vx, float *vy,
int w, int h, float lambda, float tau)
int w, int h, float lambda, float
alpha, float
tau)
{
{
int x = threadIdx.x + blockDim.x * blockIdx.x;
int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y;
int y = threadIdx.y + blockDim.y * blockIdx.y;
if (x < w && y < h) {
if (x < w && y < h) {
size_t i = x + (size_t)w*y;
size_t i = x + (size_t)w*y;
// similarity to input image (functional derivative of energy)
float d = F[i] - U[i];
if (fabsf(d) < FLT_EPSILON * U[i])
d = 0.f;
else
d = ((d > 0) ? 1.f : -1.f);
// smoothness (functional derivative of energy)
// smoothness (functional derivative of energy)
float dx_vx = ((x+1 < w) ? vx[i] : 0) - ((x > 0) ? vx[i - 1] : 0);
float dx_vx = ((x+1 < w) ? vx[i] : 0) - ((x > 0) ? vx[i - 1] : 0);
float dy_vy = ((y+1 < h) ? vy[i] : 0) - ((y > 0) ? vy[i - w] : 0);
float dy_vy = ((y+1 < h) ? vy[i] : 0) - ((y > 0) ? vy[i - w] : 0);
float div_v = dx_vx + dy_vy;
float div_v = dx_vx + dy_vy;
// explicit Euler update rule
// explicit Euler update rule
U[i] += tau * (
lambda * d + div_v
);
U[i] += tau * (
div_v - lambda * S[i] - alpha * nu(U[i])
);
}
}
}
}
...
@@ -173,6 +191,14 @@ int main(int argc, char **argv)
...
@@ -173,6 +191,14 @@ int main(int argc, char **argv)
getParam("N", N, argc, argv);
getParam("N", N, argc, argv);
cout << "N: " << N << endl;
cout << "N: " << N << endl;
float c1 = 0.65;
getParam("c1", c1, argc, argv);
cout << "c1: " << c1 << endl;
float c2 = 0.00;
getParam("c2", c2, argc, argv);
cout << "c2: " << c2 << endl;
// Init camera / Load input image
// Init camera / Load input image
#ifdef CAMERA
#ifdef CAMERA
...
@@ -257,22 +283,31 @@ int main(int argc, char **argv)
...
@@ -257,22 +283,31 @@ int main(int argc, char **argv)
dim3 grid = make_grid(dim3(w, h, 1), block);
dim3 grid = make_grid(dim3(w, h, 1), block);
Timer timer; timer.start();
Timer timer; timer.start();
float *d_F, *d_U, *d_vx, *d_vy;
float *d_U, *d_S, *d_vx, *d_vy;
cudaMalloc(&d_F, imageBytes);
cudaMalloc(&d_U, imageBytes);
cudaMalloc(&d_U, imageBytes);
cudaMalloc(&d_S, imageBytes);
cudaMalloc(&d_vx, imageBytes);
cudaMalloc(&d_vx, imageBytes);
cudaMalloc(&d_vy, imageBytes);
cudaMalloc(&d_vy, imageBytes);
cudaMemcpy(d_F, imgIn, imageBytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_U, imgIn, imageBytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_U, imgIn, imageBytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_S, imgIn, imageBytes, cudaMemcpyHostToDevice);
calculate_S<<< grid, block >>>(d_U, d_S, w, h, c1, c2);
float *S = new float[(size_t)w*h];
cudaMemcpy(S, d_S, imageBytes, cudaMemcpyDeviceToHost);
float S_max = 0.0;
for (size_t i = 0; i < (size_t)w*h; i++)
S_max = max(S_max, fabs(S[i])); // TODO: CPU thing
delete[] S;
float alpha = 0.5 * lambda * S_max;
for (int n = 0; n < N; n++) {
for (int n = 0; n < N; n++) {
norm_grad<<< grid, block >>>(d_U, d_vx, d_vy, w, h);
norm_grad<<< grid, block >>>(d_U, d_vx, d_vy, w, h);
update<<< grid, block >>>(d_U, d_
F, d_vx, d_vy, w, h, lambd
a, tau);
update<<< grid, block >>>(d_U, d_
S, d_vx, d_vy, w, h, lambda, alph
a, tau);
}
}
cudaMemcpy(imgOut, d_U, imageBytes, cudaMemcpyDeviceToHost);
cudaMemcpy(imgOut, d_U, imageBytes, cudaMemcpyDeviceToHost);
cudaFree(d_F);
cudaFree(d_U);
cudaFree(d_U);
cudaFree(d_S);
cudaFree(d_vx);
cudaFree(d_vx);
cudaFree(d_vy);
cudaFree(d_vy);
timer.end(); float t = timer.get(); // elapsed time in seconds
timer.end(); float t = timer.get(); // elapsed time in seconds
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment