Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
S
swe_solvers
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
swe_solvers
Commits
998480e1
Commit
998480e1
authored
Jan 19, 2014
by
Gaurav Kukreja
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed bugs in FWaveVec.hpp
Signed-off-by:
Gaurav Kukreja
<
gaurav.kukreja@tum.de
>
parent
c81126f6
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
406 additions
and
93 deletions
+406
-93
FWaveVec.hpp
src/solver/FWaveVec.hpp
+406
-93
No files found.
src/solver/FWaveVec.hpp
View file @
998480e1
/**
* FWave
Cuda
.h
* FWave
Vec
.h
*
****
**** This is a
C++ wrapper for the Cuda
implementation of the F-Wave solver (FWave.hpp).
**** This is a
vectorizable C++
implementation of the F-Wave solver (FWave.hpp).
****
*
* Created on: Nov 13, 2012
* Last Update:
Nov 16, 2012
* Last Update:
Dec 28, 2013
*
****
*
* Author: Sebastian Rettenberger
* Homepage: http://www5.in.tum.de/wiki/index.php/Sebastian_Rettenberger,_M.Sc.
* E-Mail: rettenbs AT in.tum.de
* Some optimzations: Michael Bader
* Homepage: http://www5.in.tum.de/wiki/index.php/Michael_Bader
* E-Mail: bader AT in.tum.de
*
****
*
...
...
@@ -25,13 +28,10 @@
* volume={24},
* number={3},
* pages={955--978},
* year={2002},
* publisher={Citeseer}}
* year={2002}}
*
* @book{leveque2002finite,
* Author = {LeVeque, R. J.},
* Date-Added = {2011-09-13 14:09:31 +0000},
* Date-Modified = {2011-10-31 09:46:40 +0000},
* Publisher = {Cambridge University Press},
* Title = {Finite Volume Methods for Hyperbolic Problems},
* Volume = {31},
...
...
@@ -49,7 +49,12 @@
#ifndef FWAVEVEC_HPP_
#define FWAVEVEC_HPP_
//define vlength - test and then push to separate header file, common to solver and block classes
#define vlength 8
#include <cmath>
#include <cilk/cilk.h>
#include <malloc.h>
namespace
solver
{
...
...
@@ -62,19 +67,93 @@ class FWaveVec
{
private
:
const
T
dryTol
;
const
T
gravity
;
const
T
half_gravity
;
// 0.5 * gravity constant
const
T
sqrt_gravity
;
// square root of the gravity constant
const
T
zeroTol
;
public
:
FWaveVec
(
T
i_dryTol
=
(
T
)
100
,
T
*
uLeft
;
T
*
uRight
;
T
*
waveSpeeds0
;
T
*
waveSpeeds1
;
T
*
fWaves0
;
T
*
fWaves1
;
T
*
sqrt_hLeft
;
T
*
sqrt_hRight
;
T
*
characteristicSpeed0
;
T
*
characteristicSpeed1
;
T
*
hRoe
;
T
*
sqrt_hRoe
;
T
*
uRoe
;
T
*
roeSpeed0
;
T
*
roeSpeed1
;
T
*
fDif0
;
T
*
fDif1
;
T
*
inverseSpeedDiff
;
//public:
/**
* FWaveVec Constructor, takes three problem parameters
* @param dryTol "dry tolerance": if the water height falls below dryTol, wall boundary conditions are applied (default value is 100)
* @param gravity takes the value of the gravity constant (default value is 9.81 m/s^2)
* @param zeroTol computed f-waves with an absolute value < zeroTol are treated as static waves (default value is 10^{-7})
*/
FWaveVec
(
T
i_dryTol
=
(
T
)
1.0
,
T
i_gravity
=
(
T
)
9.81
,
T
i_zeroTol
=
(
T
)
0.0000001
)
:
dryTol
(
i_dryTol
),
gravity
(
i_gravity
),
half_gravity
(
(
T
)
.5
*
i_gravity
),
sqrt_gravity
(
std
::
sqrt
(
i_gravity
)
),
zeroTol
(
i_zeroTol
)
{
uLeft
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
uRight
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
waveSpeeds0
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
waveSpeeds1
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
fWaves0
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
fWaves1
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
sqrt_hLeft
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
sqrt_hRight
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
characteristicSpeed0
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
characteristicSpeed1
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
hRoe
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
sqrt_hRoe
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
uRoe
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
roeSpeed0
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
roeSpeed1
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
fDif0
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
fDif1
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
inverseSpeedDiff
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
vlength
);
}
~
FWaveVec
()
{
free
(
uLeft
);
free
(
uRight
);
free
(
waveSpeeds0
);
free
(
waveSpeeds1
);
free
(
fWaves0
);
free
(
fWaves1
);
free
(
sqrt_hLeft
);
free
(
sqrt_hRight
);
free
(
characteristicSpeed0
);
free
(
characteristicSpeed1
);
free
(
hRoe
);
free
(
sqrt_hRoe
);
free
(
uRoe
);
free
(
roeSpeed0
);
free
(
roeSpeed1
);
free
(
fDif0
);
free
(
fDif1
);
free
(
inverseSpeedDiff
);
}
/**
* takes the water height, discharge and bathymatry in the left and right cell
* and computes net updates (left and right going waves) according to the f-wave approach.
* It also returns the maximum wave speed.
*/
void
computeNetUpdates
(
T
i_hLeft
,
T
i_hRight
,
T
i_huLeft
,
T
i_huRight
,
T
i_bLeft
,
T
i_bRight
,
...
...
@@ -86,27 +165,33 @@ public:
T
&
o_maxWaveSpeed
)
const
{
// determine the wet dry state and corr. values, if necessary.
if
(
i_hLeft
<
dryTol
&&
i_hRight
<
dryTol
)
{
// Dry/Dry case
// Set dummy values such that the result is zero
i_hLeft
=
dryTol
;
i_huLeft
=
0.
;
i_bLeft
=
0.
;
i_hRight
=
dryTol
;
i_huRight
=
0.
;
i_bRight
=
0.
;
}
else
if
(
i_hLeft
<
dryTol
)
{
if
(
i_hLeft
>=
dryTol
)
{
if
(
i_hRight
<
dryTol
)
{
// Wet/Dry case
// Set values according to wall boundary condition
i_hLeft
=
i_hRight
;
i_huLeft
=
-
i_huRight
;
i_bLeft
=
i_bRight
;
}
else
if
(
i_hRight
<
dryTol
)
{
}
}
else
if
(
i_hRight
>=
dryTol
)
{
// Dry/Wet case
// Set values according to wall boundary condition
i_hRight
=
i_hLeft
;
i_huRight
=
-
i_huLeft
;
i_bRight
=
i_bLeft
;
}
}
else
{
// Dry/Dry case
// Set dummy values such that the result is zero
i_hLeft
=
dryTol
;
i_huLeft
=
0.
;
i_bLeft
=
0.
;
i_hRight
=
dryTol
;
i_huRight
=
0.
;
i_bRight
=
0.
;
};
//! velocity on the left side of the edge
T
uLeft
=
i_huLeft
/
i_hLeft
;
T
uLeft
=
i_huLeft
/
i_hLeft
;
// 1 FLOP (div)
//! velocity on the right side of the edge
T
uRight
=
i_huRight
/
i_hRight
;
T
uRight
=
i_huRight
/
i_hRight
;
// 1 FLOP (div)
//! wave speeds of the f-waves
T
waveSpeeds0
=
0.
,
waveSpeeds1
=
0.
;
...
...
@@ -117,9 +202,9 @@ public:
uLeft
,
uRight
,
i_bLeft
,
i_bRight
,
waveSpeeds0
,
waveSpeeds1
);
waveSpeeds0
,
waveSpeeds1
);
// 20 FLOPs (incl. 3 sqrt, 1 div, 2 min/max)
//!
where
to store the two f-waves
//!
variables
to store the two f-waves
T
fWaves0
=
0.
,
fWaves1
=
0.
;
//compute the decomposition into f-waves
...
...
@@ -129,56 +214,197 @@ public:
i_bLeft
,
i_bRight
,
waveSpeeds0
,
waveSpeeds1
,
fWaves0
,
fWaves1
);
fWaves0
,
fWaves1
);
// 23 FLOPs (incl. 1 div)
//compute the net-updates
T
hUpdateLeft
=
0.
;
T
hUpdateRight
=
0.
;
T
huUpdateLeft
=
0.
;
T
huUpdateRight
=
0.
;
o_
hUpdateLeft
=
0.
;
o_
hUpdateRight
=
0.
;
o_
huUpdateLeft
=
0.
;
o_
huUpdateRight
=
0.
;
//1st wave family
if
(
waveSpeeds0
<
-
zeroTol
)
{
//left going
hUpdateLeft
+=
fWaves0
;
huUpdateLeft
+=
fWaves0
*
waveSpeeds0
;
o_
hUpdateLeft
+=
fWaves0
;
o_huUpdateLeft
+=
fWaves0
*
waveSpeeds0
;
// 3 FLOPs (assume left going wave ...)
}
else
if
(
waveSpeeds0
>
zeroTol
)
{
//right going
hUpdateRight
+=
fWaves0
;
huUpdateRight
+=
fWaves0
*
waveSpeeds0
;
o_
hUpdateRight
+=
fWaves0
;
o_
huUpdateRight
+=
fWaves0
*
waveSpeeds0
;
}
else
{
//split waves
hUpdateLeft
+=
(
T
)
.5
*
fWaves0
;
huUpdateLeft
+=
(
T
)
.5
*
fWaves0
*
waveSpeeds0
;
hUpdateRight
+=
(
T
)
.5
*
fWaves0
;
huUpdateRight
+=
(
T
)
.5
*
fWaves0
*
waveSpeeds0
;
else
{
//split waves
, if waveSpeeds0 close to 0
o_
hUpdateLeft
+=
(
T
)
.5
*
fWaves0
;
o_
huUpdateLeft
+=
(
T
)
.5
*
fWaves0
*
waveSpeeds0
;
o_
hUpdateRight
+=
(
T
)
.5
*
fWaves0
;
o_
huUpdateRight
+=
(
T
)
.5
*
fWaves0
*
waveSpeeds0
;
}
//2nd wave family
if
(
waveSpeeds1
>
zeroTol
)
{
//right going
hUpdateRight
+=
fWaves1
;
huUpdateRight
+=
fWaves1
*
waveSpeeds1
;
o_
hUpdateRight
+=
fWaves1
;
o_huUpdateRight
+=
fWaves1
*
waveSpeeds1
;
// 3 FLOPs (assume right going wave ...)
}
else
if
(
waveSpeeds1
<
-
zeroTol
)
{
//left going
hUpdateLeft
+=
fWaves1
;
huUpdateLeft
+=
fWaves1
*
waveSpeeds1
;
o_
hUpdateLeft
+=
fWaves1
;
o_
huUpdateLeft
+=
fWaves1
*
waveSpeeds1
;
}
else
{
//split waves
hUpdateLeft
+=
(
T
)
.5
*
fWaves1
;
huUpdateLeft
+=
(
T
)
.5
*
fWaves1
*
waveSpeeds1
;
hUpdateRight
+=
(
T
)
.5
*
fWaves1
;
huUpdateRight
+=
(
T
)
.5
*
fWaves1
*
waveSpeeds1
;
o_hUpdateLeft
+=
(
T
)
.5
*
fWaves1
;
o_huUpdateLeft
+=
(
T
)
.5
*
fWaves1
*
waveSpeeds1
;
o_hUpdateRight
+=
(
T
)
.5
*
fWaves1
;
o_huUpdateRight
+=
(
T
)
.5
*
fWaves1
*
waveSpeeds1
;
}
//compute maximum wave speed (-> CFL-condition)
o_maxWaveSpeed
=
std
::
max
(
std
::
abs
(
waveSpeeds0
)
,
std
::
abs
(
waveSpeeds1
)
);
// 3 FLOPs (2 abs, 1 max)
//========================
// 54 FLOPs (3 sqrt, 4 div, 2 abs, 3 min/max)
}
//E.Drossos - vectorized code using Intel Cilk
//E.Drossos - all flop counts refer to one iteration, for total num flops has to be calculated by vlength
/**
* takes the water height, discharge and bathymatry in the left and right cell
* and computes net updates (left and right going waves) according to the f-wave approach.
* It also returns the maximum wave speed.
*/
void
computeNetUpdates_cilk
(
T
*
i_hLeft
,
T
*
i_hRight
,
T
*
i_huLeft
,
T
*
i_huRight
,
T
*
i_bLeft
,
T
*
i_bRight
,
T
*
o_hUpdateLeft
,
T
*
o_hUpdateRight
,
T
*
o_huUpdateLeft
,
T
*
o_huUpdateRight
,
T
&
o_maxWaveSpeed
)
const
{
// determine the wet dry state and corr. values, if necessary.
if
(
i_hLeft
[
0
:
vlength
]
>=
dryTol
)
{
if
(
i_hRight
[
0
:
vlength
]
<
dryTol
)
{
// Wet/Dry case
// Set values according to wall boundary condition
i_hLeft
[
0
:
vlength
]
=
i_hRight
[
0
:
vlength
];
i_huLeft
[
0
:
vlength
]
=
-
i_huRight
[
0
:
vlength
];
i_bLeft
[
0
:
vlength
]
=
i_bRight
[
0
:
vlength
];
}
}
else
if
(
i_hRight
[
0
:
vlength
]
>=
dryTol
)
{
// Dry/Wet case
// Set values according to wall boundary condition
i_hRight
[
0
:
vlength
]
=
i_hLeft
[
0
:
vlength
];
i_huRight
[
0
:
vlength
]
=
-
i_huLeft
[
0
:
vlength
];
i_bRight
[
0
:
vlength
]
=
i_bLeft
[
0
:
vlength
];
}
else
{
// Dry/Dry case
// Set dummy values such that the result is zero
i_hLeft
[
0
:
vlength
]
=
dryTol
;
i_huLeft
[
0
:
vlength
]
=
0.
;
i_bLeft
[
0
:
vlength
]
=
0.
;
//can use __sec_reduce_all_zero(a[0:vlength]) instead
i_hRight
[
0
:
vlength
]
=
dryTol
;
i_huRight
[
0
:
vlength
]
=
0.
;
i_bRight
[
0
:
vlength
]
=
0.
;
};
//! velocity on the left side of the edge
//TODO: Use intrinsics to allocate and assure alligned
// T* uLeft = (T*)malloc(vlength*sizeof(T));
uLeft
[
0
:
vlength
]
=
(
i_huLeft
[
0
:
vlength
])
/
(
i_hLeft
[
0
:
vlength
]);
// 1 FLOP (div)
//! velocity on the right side of the edge
//TODO: Use intrinsics to allocate
// T* uRight = (T*)malloc(vlength*sizeof(T));
uRight
[
0
:
vlength
]
=
(
i_huRight
[
0
:
vlength
])
/
(
i_hRight
[
0
:
vlength
]);
// 1 FLOP (div)
//! wave speeds of the f-waves
//TODO: Use intrinsics to allocate
// T* waveSpeeds0 = (T*)malloc(vlength*sizeof(T));
// T* waveSpeeds1 = (T*)malloc(vlength*sizeof(T));
waveSpeeds0
[
0
:
vlength
]
=
0.
;
waveSpeeds1
[
0
:
vlength
]
=
0.
;
//compute the wave speeds
// E.Drossos - function vectorized
fWaveComputeWaveSpeeds_cilk
(
&
i_hLeft
[
0
],
&
i_hRight
[
0
],
&
i_huLeft
[
0
],
&
i_huRight
[
0
],
&
uLeft
[
0
],
&
uRight
[
0
],
&
i_bLeft
[
0
],
&
i_bRight
[
0
],
&
waveSpeeds0
[
0
],
&
waveSpeeds1
[
0
]
);
// 20 FLOPs (incl. 3 sqrt, 1 div, 2 min/max)
//! variables to store the two f-waves
// T* fWaves0 = (T*)malloc(vlength*sizeof(T));
// T* fWaves1 = (T*)malloc(vlength*sizeof(T));
fWaves0
[
0
:
vlength
]
=
0.
;
fWaves1
[
0
:
vlength
]
=
0.
;
//compute the decomposition into f-waves
// E.Drossos - function vectorized
fWaveComputeWaveDecomposition_cilk
(
&
i_hLeft
[
0
],
&
i_hRight
[
0
],
&
i_huLeft
[
0
],
&
i_huRight
[
0
],
&
uLeft
[
0
],
&
uRight
[
0
],
&
i_bLeft
[
0
],
&
i_bRight
[
0
],
&
waveSpeeds0
[
0
],
&
waveSpeeds1
[
0
],
&
fWaves0
[
0
],
&
fWaves1
[
0
]);
// 23 FLOPs (incl. 1 div)
//compute the net-updates
//alternatively use __sec_reduce_all_zero(a[0:vlength])
o_hUpdateLeft
[
0
:
vlength
]
=
0.
;
o_hUpdateRight
[
0
:
vlength
]
=
0.
;
o_huUpdateLeft
[
0
:
vlength
]
=
0.
;
o_huUpdateRight
[
0
:
vlength
]
=
0.
;
//1st wave family
if
(
waveSpeeds0
[
0
:
vlength
]
<
-
zeroTol
)
{
//left going
o_hUpdateLeft
[
0
:
vlength
]
+=
fWaves0
[
0
:
vlength
];
o_huUpdateLeft
[
0
:
vlength
]
+=
(
fWaves0
[
0
:
vlength
])
*
(
waveSpeeds0
[
0
:
vlength
]);
// 3 FLOPs (assume left going wave ...)
}
else
if
(
waveSpeeds0
[
0
:
vlength
]
>
zeroTol
)
{
//right going
o_hUpdateRight
[
0
:
vlength
]
+=
fWaves0
[
0
:
vlength
];
o_huUpdateRight
[
0
:
vlength
]
+=
fWaves0
[
0
:
vlength
]
*
waveSpeeds0
[
0
:
vlength
];
}
else
{
//split waves, if waveSpeeds0 close to 0
o_hUpdateLeft
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves0
[
0
:
vlength
]);
o_huUpdateLeft
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves0
[
0
:
vlength
]
*
waveSpeeds0
[
0
:
vlength
]);
o_hUpdateRight
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves0
[
0
:
vlength
]);
o_huUpdateRight
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves0
[
0
:
vlength
]
*
waveSpeeds0
[
0
:
vlength
]);
}
// Set output variables
o_hUpdateLeft
=
hUpdateLeft
;
o_hUpdateRight
=
hUpdateRight
;
o_huUpdateLeft
=
huUpdateLeft
;
o_huUpdateRight
=
huUpdateRight
;
//2nd wave family
if
(
waveSpeeds1
[
0
:
vlength
]
>
zeroTol
)
{
//right going
o_hUpdateRight
[
0
:
vlength
]
+=
fWaves1
[
0
:
vlength
];
o_huUpdateRight
[
0
:
vlength
]
+=
(
fWaves1
[
0
:
vlength
]
*
waveSpeeds1
[
0
:
vlength
]);
// 3 FLOPs (assume right going wave ...)
}
else
if
(
waveSpeeds1
[
0
:
vlength
]
<
-
zeroTol
)
{
//left going
o_hUpdateLeft
[
0
:
vlength
]
+=
fWaves1
[
0
:
vlength
];
o_huUpdateLeft
[
0
:
vlength
]
+=
(
fWaves1
[
0
:
vlength
]
*
waveSpeeds1
[
0
:
vlength
]);
}
else
{
//split waves
o_hUpdateLeft
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves1
[
0
:
vlength
]);
o_huUpdateLeft
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves1
[
0
:
vlength
]
*
waveSpeeds1
[
0
:
vlength
]);
o_hUpdateRight
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves1
[
0
:
vlength
]);
o_huUpdateRight
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves1
[
0
:
vlength
]
*
waveSpeeds1
[
0
:
vlength
]);
}
//compute maximum wave speed (-> CFL-condition)
o_maxWaveSpeed
=
std
::
max
(
std
::
abs
(
waveSpeeds0
)
,
std
::
abs
(
waveSpeeds1
)
);
//o_maxWaveSpeed = std::max( std::abs(waveSpeeds0) , std::abs(waveSpeeds1) );
//o_maxWaveSpeed = __sec_reduce_max(std::max( std::abs(waveSpeeds0[0:vlength]) , std::abs(waveSpeeds1[0:vlength]) ));
waveSpeeds0
[
0
:
vlength
]
=
std
::
abs
(
waveSpeeds0
[
0
:
vlength
]);
waveSpeeds1
[
0
:
vlength
]
=
std
::
abs
(
waveSpeeds1
[
0
:
vlength
]);
T
o_maxWaveSpeed0
;
T
o_maxWaveSpeed1
;
o_maxWaveSpeed0
=
__sec_reduce_max
(
waveSpeeds0
[
0
:
vlength
]);
o_maxWaveSpeed1
=
__sec_reduce_max
(
waveSpeeds1
[
0
:
vlength
]);
o_maxWaveSpeed
=
std
::
max
(
o_maxWaveSpeed0
,
o_maxWaveSpeed1
);
// 3 FLOPs (2 abs, 1 max)
//========================
// 54 FLOPs (3 sqrt, 4 div, 2 abs, 3 min/max)
}
inline
void
fWaveComputeWaveSpeeds
(
const
T
i_hLeft
,
const
T
i_hRight
,
...
...
@@ -188,25 +414,81 @@ public:
T
&
o_waveSpeed0
,
T
&
o_waveSpeed1
)
const
{
//compute eigenvalues of the jacobian matrices in states Q_{i-1} and Q_{i}
T
characteristicSpeed0
=
0.
,
characteristicSpeed1
=
0.
;
characteristicSpeed0
=
i_uLeft
-
std
::
sqrt
(
gravity
*
i_hLeft
);
characteristicSpeed1
=
i_uRight
+
std
::
sqrt
(
gravity
*
i_hRight
);
//compute "Roe speeds"
T
hRoe
=
(
T
)
.5
*
(
i_hRight
+
i_hLeft
);
T
uRoe
=
i_uLeft
*
std
::
sqrt
(
i_hLeft
)
+
i_uRight
*
std
::
sqrt
(
i_hRight
);
uRoe
/=
std
::
sqrt
(
i_hLeft
)
+
std
::
sqrt
(
i_hRight
);
T
roeSpeed0
=
0.
,
roeSpeed1
=
0.
;
roeSpeed0
=
uRoe
-
std
::
sqrt
(
gravity
*
hRoe
);
roeSpeed1
=
uRoe
+
std
::
sqrt
(
gravity
*
hRoe
);
//computer eindfeldt speeds
o_waveSpeed0
=
std
::
min
(
characteristicSpeed0
,
roeSpeed0
);
o_waveSpeed1
=
std
::
max
(
characteristicSpeed1
,
roeSpeed1
);
// helper variables for sqrt of h:
T
sqrt_hLeft
=
std
::
sqrt
(
i_hLeft
);
// 1 FLOP (sqrt)
T
sqrt_hRight
=
std
::
sqrt
(
i_hRight
);
// 1 FLOP (sqrt)
// compute eigenvalues of the jacobian matrices
// in states Q_{i-1} and Q_{i}
T
characteristicSpeed0
=
i_uLeft
-
sqrt_gravity
*
sqrt_hLeft
;
// 2 FLOPs
T
characteristicSpeed1
=
i_uRight
+
sqrt_gravity
*
sqrt_hRight
;
// 2 FLOPs
// compute "Roe averages"
T
hRoe
=
(
T
)
.5
*
(
i_hRight
+
i_hLeft
);
// 2 FLOPs
T
sqrt_hRoe
=
std
::
sqrt
(
hRoe
);
// 1 FLOP (sqrt)
T
uRoe
=
i_uLeft
*
sqrt_hLeft
+
i_uRight
*
sqrt_hRight
;
// 3 FLOPs
uRoe
/=
sqrt_hLeft
+
sqrt_hRight
;
// 2 FLOPs (1 div)
// compute "Roe speeds" from Roe averages
T
roeSpeed0
=
uRoe
-
sqrt_gravity
*
sqrt_hRoe
;
// 2 FLOPs
T
roeSpeed1
=
uRoe
+
sqrt_gravity
*
sqrt_hRoe
;
// 2 FLOPs
// compute Eindfeldt speeds (returned as output parameters)
o_waveSpeed0
=
std
::
min
(
characteristicSpeed0
,
roeSpeed0
);
// 1 FLOP (min)
o_waveSpeed1
=
std
::
max
(
characteristicSpeed1
,
roeSpeed1
);
// 1 FLOP (max)
//==============
//20 FLOPs (incl. 3 sqrt, 1 div, 2 min/max)
}
inline
void
fWaveComputeWaveSpeeds_cilk
(
const
T
*
i_hLeft
,
const
T
*
i_hRight
,
const
T
*
i_huLeft
,
const
T
*
i_huRight
,
const
T
*
i_uLeft
,
const
T
*
i_uRight
,
const
T
*
i_bLeft
,
const
T
*
i_bRight
,
T
*
o_waveSpeed0
,
T
*
o_waveSpeed1
)
const
{
// helper variables for sqrt of h:
// define arrays for helpers
// T* sqrt_hLeft = (T*)malloc(vlength*sizeof(T));
// T* sqrt_hRight = (T*)malloc(vlength*sizeof(T));
sqrt_hLeft
[
0
:
vlength
]
=
std
::
sqrt
(
i_hLeft
[
0
:
vlength
]);
// 1 FLOP (sqrt)
sqrt_hRight
[
0
:
vlength
]
=
std
::
sqrt
(
i_hRight
[
0
:
vlength
]);
// 1 FLOP (sqrt)
// compute eigenvalues of the jacobian matrices
// in states Q_{i-1} and Q_{i}
// T* characteristicSpeed0 = (T*)malloc(vlength*sizeof(T));
// T* characteristicSpeed1 = (T*)malloc(vlength*sizeof(T));
characteristicSpeed0
[
0
:
vlength
]
=
i_uLeft
[
0
:
vlength
]
-
sqrt_gravity
*
sqrt_hLeft
[
0
:
vlength
];
// 2 FLOPs
characteristicSpeed1
[
0
:
vlength
]
=
i_uRight
[
0
:
vlength
]
+
sqrt_gravity
*
sqrt_hRight
[
0
:
vlength
];
// 2 FLOPs
// compute "Roe averages"
// E.Drossos - Define arrays for hRoe, sqrt_hRoe, uRoe
// T* hRoe = (T*)malloc(vlength*sizeof(T));
// T* sqrt_hRoe = (T*)malloc(vlength*sizeof(T));
// T* uRoe = (T*)malloc(vlength*sizeof(T));
hRoe
[
0
:
vlength
]
=
(
T
)
.5
*
(
i_hRight
[
0
:
vlength
]
+
i_hLeft
[
0
:
vlength
]);
// 2 FLOPs
sqrt_hRoe
[
0
:
vlength
]
=
std
::
sqrt
(
hRoe
[
0
:
vlength
]);
// 1 FLOP (sqrt)
uRoe
[
0
:
vlength
]
=
i_uLeft
[
0
:
vlength
]
*
sqrt_hLeft
[
0
:
vlength
]
+
i_uRight
[
0
:
vlength
]
*
sqrt_hRight
[
0
:
vlength
];
// 3 FLOPs
uRoe
[
0
:
vlength
]
/=
(
sqrt_hLeft
[
0
:
vlength
]
+
sqrt_hRight
[
0
:
vlength
]);
// 2 FLOPs (1 div)
// compute "Roe speeds" from Roe averages
// define arrays for "Roe speeds"
// T* roeSpeed0 = (T*)malloc(vlength*sizeof(T));
// T* roeSpeed1 = (T*)malloc(vlength*sizeof(T));
roeSpeed0
[
0
:
vlength
]
=
uRoe
[
0
:
vlength
]
-
sqrt_gravity
*
sqrt_hRoe
[
0
:
vlength
];
// 2 FLOPs
roeSpeed1
[
0
:
vlength
]
=
uRoe
[
0
:
vlength
]
+
sqrt_gravity
*
sqrt_hRoe
[
0
:
vlength
];
// 2 FLOPs
// compute Eindfeldt speeds (returned as output parameters)
o_waveSpeed0
[
0
:
vlength
]
=
std
::
min
(
characteristicSpeed0
[
0
:
vlength
],
roeSpeed0
[
0
:
vlength
]);
// 1 FLOP (min)
o_waveSpeed1
[
0
:
vlength
]
=
std
::
max
(
characteristicSpeed1
[
0
:
vlength
],
roeSpeed1
[
0
:
vlength
]);
// 1 FLOP (max)
//20 FLOPs (incl. 3 sqrt, 1 div, 2 min/max)
}
inline
void
fWaveComputeWaveDecomposition
(
const
T
i_hLeft
,
const
T
i_hRight
,
...
...
@@ -217,37 +499,68 @@ public:
T
&
o_fWave0
,
T
&
o_fWave1
)
const
{
T
lambdaDif
=
i_waveSpeed1
-
i_waveSpeed0
;
//compute the inverse matrix R^{-1}
T
Rinv00
=
0.
,
Rinv01
=
0.
,
Rinv10
=
0.
,
Rinv11
=
0.
;
T
oneDivLambdaDif
=
(
T
)
1.
/
lambdaDif
;
Rinv00
=
oneDivLambdaDif
*
i_waveSpeed1
;
Rinv01
=
-
oneDivLambdaDif
;
Rinv10
=
oneDivLambdaDif
*
-
i_waveSpeed0
;
Rinv11
=
oneDivLambdaDif
;
//calculate modified (bathymetry!) flux difference
// f(Q_i) - f(Q_{i-1}) -> serve as right hand sides
T
fDif0
=
i_huRight
-
i_huLeft
;
// 1 FLOP
T
fDif1
=
i_huRight
*
i_uRight
+
half_gravity
*
i_hRight
*
i_hRight
-
(
i_huLeft
*
i_uLeft
+
half_gravity
*
i_hLeft
*
i_hLeft
);
// 9 FLOPs
//right hand side
T
fDif0
=
0.
,
fDif1
=
0.
;
// \delta x \Psi[2]
fDif1
+=
half_gravity
*
(
i_hRight
+
i_hLeft
)
*
(
i_bRight
-
i_bLeft
);
// 5 FLOPs
// solve linear system of equations to obtain f-waves:
// ( 1 1 ) ( o_fWave0 ) = ( fDif0 )
// ( i_waveSpeed0 i_waveSpeed1 ) ( o_fWave1 ) ( fDif1 )
// compute the inverse of the wave speed difference:
T
inverseSpeedDiff
=
(
T
)
1.
/
(
i_waveSpeed1
-
i_waveSpeed0
);
// 2 FLOPs (1 div)
// compute f-waves:
o_fWave0
=
(
i_waveSpeed1
*
fDif0
-
fDif1
)
*
inverseSpeedDiff
;
// 3 FLOPs
o_fWave1
=
(
-
i_waveSpeed0
*
fDif0
+
fDif1
)
*
inverseSpeedDiff
;
// 3 FLOPs
//=========
//23 FLOPs in total (incl. 1 div)
}
inline
void
fWaveComputeWaveDecomposition_cilk
(
const
T
*
i_hLeft
,
const
T
*
i_hRight
,
const
T
*
i_huLeft
,
const
T
*
i_huRight
,
const
T
*
i_uLeft
,
const
T
*
i_uRight
,
const
T
*
i_bLeft
,
const
T
*
i_bRight
,
const
T
*
i_waveSpeed0
,
const
T
*
i_waveSpeed1
,
T
*
o_fWave0
,
T
*
o_fWave1
)
const
{
//calculate modified (bathymetry!) flux difference
// f(Q_i) - f(Q_{i-1})
fDif0
=
i_huRight
-
i_huLeft
;
fDif1
=
i_huRight
*
i_uRight
+
(
T
)
.5
*
gravity
*
i_hRight
*
i_hRight
-
(
i_huLeft
*
i_uLeft
+
(
T
)
.5
*
gravity
*
i_hLeft
*
i_hLeft
);
// f(Q_i) - f(Q_{i-1}) -> serve as right hand sides
//allocate mem for fDif0, fDif1 arrays
// T* fDif0 = (T*)malloc(vlength*sizeof(T));
// T* fDif1 = (T*)malloc(vlength*sizeof(T));
fDif0
[
0
:
vlength
]
=
i_huRight
[
0
:
vlength
]
-
i_huLeft
[
0
:
vlength
];
// 1 FLOP
fDif1
[
0
:
vlength
]
=
i_huRight
[
0
:
vlength
]
*
i_uRight
[
0
:
vlength
]
+
half_gravity
*
i_hRight
[
0
:
vlength
]
*
i_hRight
[
0
:
vlength
]
-
(
i_huLeft
[
0
:
vlength
]
*
i_uLeft
[
0
:
vlength
]
+
half_gravity
*
i_hLeft
[
0
:
vlength
]
*
i_hLeft
[
0
:
vlength
]);
// 9 FLOPs
// \delta x \Psi[2]
T
psi
=
-
gravity
*
(
T
)
.5
*
(
i_hRight
+
i_hLeft
)
*
(
i_bRight
-
i_bLeft
);
fDif1
-=
psi
;
//solve linear equations
o_fWave0
=
Rinv00
*
fDif0
+
Rinv01
*
fDif1
;
o_fWave1
=
Rinv10
*
fDif0
+
Rinv11
*
fDif1
;
fDif1
[
0
:
vlength
]
+=
half_gravity
*
(
i_hRight
[
0
:
vlength
]
+
i_hLeft
[
0
:
vlength
])
*
(
i_bRight
[
0
:
vlength
]
-
i_bLeft
[
0
:
vlength
]);
// 5 FLOPs
// solve linear system of equations to obtain f-waves:
// ( 1 1 ) ( o_fWave0 ) = ( fDif0 )
// ( i_waveSpeed0 i_waveSpeed1 ) ( o_fWave1 ) ( fDif1 )
// compute the inverse of the wave speed difference:
// T* inverseSpeedDiff = (T*)malloc(vlength*sizeof(T));
inverseSpeedDiff
[
0
:
vlength
]
=
(
T
)
1.
/
(
i_waveSpeed1
[
0
:
vlength
]
-
i_waveSpeed0
[
0
:
vlength
]
);
// 2 FLOPs (1 div)
// compute f-waves:
o_fWave0
[
0
:
vlength
]
=
(
i_waveSpeed1
[
0
:
vlength
]
*
fDif0
[
0
:
vlength
]
-
fDif1
[
0
:
vlength
]
)
*
inverseSpeedDiff
[
0
:
vlength
];
// 3 FLOPs
o_fWave1
[
0
:
vlength
]
=
(
-
i_waveSpeed0
[
0
:
vlength
]
*
fDif0
[
0
:
vlength
]
+
fDif1
[
0
:
vlength
]
)
*
inverseSpeedDiff
[
0
:
vlength
];
// 3 FLOPs
//=========
//23 FLOPs in total (incl. 1 div)
}
};
}
#endif // FWAVEVEC_HPP_
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment