Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
S
swe_solvers
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
swe_solvers
Commits
c22e9dbc
Commit
c22e9dbc
authored
Jan 19, 2014
by
drossostum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Using Intel Cilk for Fwave
Signed-off-by:
drossostum
<
drevangel@mytum.de
>
parent
c81126f6
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
192 additions
and
134 deletions
+192
-134
FWaveVec.hpp
src/solver/FWaveVec.hpp
+192
-134
No files found.
src/solver/FWaveVec.hpp
View file @
c22e9dbc
/**
/**
* FWave
Cuda
.h
* FWave
Vec
.h
*
*
****
****
**** This is a
C++ wrapper for the Cuda
implementation of the F-Wave solver (FWave.hpp).
**** This is a
vectorizable C++
implementation of the F-Wave solver (FWave.hpp).
****
****
*
*
* Created on: Nov 13, 2012
* Created on: Nov 13, 2012
* Last Update:
Nov 16, 2012
* Last Update:
Dec 28, 2013
*
*
****
****
*
*
* Author: Sebastian Rettenberger
* Author: Sebastian Rettenberger
* Homepage: http://www5.in.tum.de/wiki/index.php/Sebastian_Rettenberger,_M.Sc.
* Homepage: http://www5.in.tum.de/wiki/index.php/Sebastian_Rettenberger,_M.Sc.
* E-Mail: rettenbs AT in.tum.de
* E-Mail: rettenbs AT in.tum.de
* Some optimzations: Michael Bader
* Homepage: http://www5.in.tum.de/wiki/index.php/Michael_Bader
* E-Mail: bader AT in.tum.de
*
*
****
****
*
*
...
@@ -25,13 +28,10 @@
...
@@ -25,13 +28,10 @@
* volume={24},
* volume={24},
* number={3},
* number={3},
* pages={955--978},
* pages={955--978},
* year={2002},
* year={2002}}
* publisher={Citeseer}}
*
*
* @book{leveque2002finite,
* @book{leveque2002finite,
* Author = {LeVeque, R. J.},
* Author = {LeVeque, R. J.},
* Date-Added = {2011-09-13 14:09:31 +0000},
* Date-Modified = {2011-10-31 09:46:40 +0000},
* Publisher = {Cambridge University Press},
* Publisher = {Cambridge University Press},
* Title = {Finite Volume Methods for Hyperbolic Problems},
* Title = {Finite Volume Methods for Hyperbolic Problems},
* Volume = {31},
* Volume = {31},
...
@@ -49,202 +49,260 @@
...
@@ -49,202 +49,260 @@
#ifndef FWAVEVEC_HPP_
#ifndef FWAVEVEC_HPP_
#define FWAVEVEC_HPP_
#define FWAVEVEC_HPP_
//define vlength - test and then push to separate header file, common to solver and block classes
#define vlength 4
#include <cmath>
#include <cmath>
#include <cilk/cilk.h>
#include <malloc.h>
#define T float
namespace
solver
namespace
solver
{
{
/**
/**
*
*
*/
*/
template
<
typename
T
>
//
template<typename T>
class
FWaveVec
class
FWaveVec
{
{
private
:
private
:
const
T
dryTol
;
const
T
dryTol
;
const
T
gravity
;
const
T
half_gravity
;
// 0.5 * gravity constant
const
T
sqrt_gravity
;
// square root of the gravity constant
const
T
zeroTol
;
const
T
zeroTol
;
public
:
public
:
FWaveVec
(
T
i_dryTol
=
(
T
)
100
,
/**
* FWaveVec Constructor, takes three problem parameters
* @param dryTol "dry tolerance": if the water height falls below dryTol, wall boundary conditions are applied (default value is 100)
* @param gravity takes the value of the gravity constant (default value is 9.81 m/s^2)
* @param zeroTol computed f-waves with an absolute value < zeroTol are treated as static waves (default value is 10^{-7})
*/
FWaveVec
(
T
i_dryTol
=
(
T
)
1.0
,
T
i_gravity
=
(
T
)
9.81
,
T
i_gravity
=
(
T
)
9.81
,
T
i_zeroTol
=
(
T
)
0.0000001
)
T
i_zeroTol
=
(
T
)
0.0000001
)
:
dryTol
(
i_dryTol
),
:
dryTol
(
i_dryTol
),
gravity
(
i_gravity
),
half_gravity
(
(
T
)
.5
*
i_gravity
),
sqrt_gravity
(
std
::
sqrt
(
i_gravity
)
),
zeroTol
(
i_zeroTol
)
zeroTol
(
i_zeroTol
)
{
{
}
}
void
computeNetUpdates
(
T
i_hLeft
,
T
i_hRight
,
//E.Drossos - vectorized code using Intel Cilk
T
i_huLeft
,
T
i_huRight
,
//E.Drossos - all flop counts refer to one iteration, for total num flops has to be calculated by vlength
T
i_bLeft
,
T
i_bRight
,
/**
T
&
o_hUpdateLeft
,
* takes the water height, discharge and bathymatry in the left and right cell
T
&
o_hUpdateRight
,
* and computes net updates (left and right going waves) according to the f-wave approach.
T
&
o_huUpdateLeft
,
* It also returns the maximum wave speed.
T
&
o_huUpdateRight
,
*/
void
computeNetUpdates
(
T
*
i_hLeft
,
T
*
i_hRight
,
T
*
i_huLeft
,
T
*
i_huRight
,
T
*
i_bLeft
,
T
*
i_bRight
,
T
*
o_hUpdateLeft
,
T
*
o_hUpdateRight
,
T
*
o_huUpdateLeft
,
T
*
o_huUpdateRight
,
T
&
o_maxWaveSpeed
)
const
T
&
o_maxWaveSpeed
)
const
{
{
// determine the wet dry state and corr. values, if necessary.
// determine the wet dry state and corr. values, if necessary.
if
(
i_hLeft
<
dryTol
&&
i_hRight
<
dryTol
)
{
if
(
i_hLeft
[
0
:
vlength
]
>=
dryTol
)
{
if
(
i_hRight
[
0
:
vlength
]
<
dryTol
)
{
// Wet/Dry case
// Set values according to wall boundary condition
i_hLeft
[
0
:
vlength
]
=
i_hRight
[
0
:
vlength
];
i_huLeft
[
0
:
vlength
]
=
-
i_huRight
[
0
:
vlength
];
i_bLeft
[
0
:
vlength
]
=
i_bRight
[
0
:
vlength
];
}
}
else
if
(
i_hRight
[
0
:
vlength
]
>=
dryTol
)
{
// Dry/Wet case
// Set values according to wall boundary condition
i_hRight
[
0
:
vlength
]
=
i_hLeft
[
0
:
vlength
];
i_huRight
[
0
:
vlength
]
=
-
i_huLeft
[
0
:
vlength
];
i_bRight
[
0
:
vlength
]
=
i_bLeft
[
0
:
vlength
];
}
else
{
// Dry/Dry case
// Dry/Dry case
// Set dummy values such that the result is zero
// Set dummy values such that the result is zero
i_hLeft
=
dryTol
;
i_hLeft
[
0
:
vlength
]
=
dryTol
;
i_huLeft
=
0.
;
i_bLeft
=
0.
;
i_huLeft
[
0
:
vlength
]
=
0.
;
i_bLeft
[
0
:
vlength
]
=
0.
;
//can use __sec_reduce_all_zero(a[0:vlength]) instead
i_hRight
=
dryTol
;
i_hRight
[
0
:
vlength
]
=
dryTol
;
i_huRight
=
0.
;
i_bRight
=
0.
;
i_huRight
[
0
:
vlength
]
=
0.
;
i_bRight
[
0
:
vlength
]
=
0.
;
}
else
if
(
i_hLeft
<
dryTol
)
{
};
i_hLeft
=
i_hRight
;
i_huLeft
=
-
i_huRight
;
i_bLeft
=
i_bRight
;
}
else
if
(
i_hRight
<
dryTol
)
{
i_hRight
=
i_hLeft
;
i_huRight
=
-
i_huLeft
;
i_bRight
=
i_bLeft
;
}
//! velocity on the left side of the edge
//! velocity on the left side of the edge
T
uLeft
=
i_huLeft
/
i_hLeft
;
//TODO: Use intrinsics to allocate and assure alligned
T
*
uLeft
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
uLeft
[
0
:
vlength
]
=
(
i_huLeft
[
0
:
vlength
])
/
(
i_hLeft
[
0
:
vlength
]);
// 1 FLOP (div)
//! velocity on the right side of the edge
//! velocity on the right side of the edge
T
uRight
=
i_huRight
/
i_hRight
;
//TODO: Use intrinsics to allocate
T
*
uRight
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
uRight
[
0
:
vlength
]
=
(
i_huRight
[
0
:
vlength
])
/
(
i_hRight
[
0
:
vlength
]);
// 1 FLOP (div)
//! wave speeds of the f-waves
//! wave speeds of the f-waves
T
waveSpeeds0
=
0.
,
waveSpeeds1
=
0.
;
//TODO: Use intrinsics to allocate
T
*
waveSpeeds0
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
T
*
waveSpeeds1
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
waveSpeeds0
[
0
:
vlength
]
=
0.
;
waveSpeeds1
[
0
:
vlength
]
=
0.
;
//compute the wave speeds
//compute the wave speeds
fWaveComputeWaveSpeeds
(
i_hLeft
,
i_hRight
,
// E.Drossos - function vectorized
i_huLeft
,
i_huRight
,
fWaveComputeWaveSpeeds
(
&
i_hLeft
[
0
],
&
i_hRight
[
0
],
uLeft
,
uRight
,
&
i_huLeft
[
0
],
&
i_huRight
[
0
],
i_bLeft
,
i_bRight
,
&
uLeft
[
0
],
&
uRight
[
0
],
&
i_bLeft
[
0
],
&
i_bRight
[
0
],
waveSpeeds0
,
waveSpeeds1
);
&
waveSpeeds0
[
0
],
&
waveSpeeds1
[
0
]
);
// 20 FLOPs (incl. 3 sqrt, 1 div, 2 min/max)
//! where to store the two f-waves
//! variables to store the two f-waves
T
fWaves0
=
0.
,
fWaves1
=
0.
;
T
*
fWaves0
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
T
*
fWaves1
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
fWaves0
[
0
:
vlength
]
=
0.
;
fWaves1
[
0
:
vlength
]
=
0.
;
//compute the decomposition into f-waves
//compute the decomposition into f-waves
fWaveComputeWaveDecomposition
(
i_hLeft
,
i_hRight
,
// E.Drossos - function vectorized
i_huLeft
,
i_huRight
,
fWaveComputeWaveDecomposition
(
&
i_hLeft
[
0
],
&
i_hRight
[
0
],
uLeft
,
uRight
,
&
i_huLeft
[
0
],
&
i_huRight
[
0
],
i_bLeft
,
i_bRight
,
&
uLeft
[
0
],
&
uRight
[
0
],
&
i_bLeft
[
0
],
&
i_bRight
[
0
],
waveSpeeds0
,
waveSpeeds1
,
&
waveSpeeds0
[
0
],
&
waveSpeeds1
[
0
]
,
fWaves0
,
fWaves1
);
&
fWaves0
[
0
],
&
fWaves1
[
0
]);
// 23 FLOPs (incl. 1 div)
//compute the net-updates
//compute the net-updates
T
hUpdateLeft
=
0.
;
//alternatively use __sec_reduce_all_zero(a[0:vlength])
T
hUpdateRight
=
0.
;
o_hUpdateLeft
[
0
:
vlength
]
=
0.
;
T
huUpdateLeft
=
0.
;
o_hUpdateRight
[
0
:
vlength
]
=
0.
;
T
huUpdateRight
=
0.
;
o_huUpdateLeft
[
0
:
vlength
]
=
0.
;
o_huUpdateRight
[
0
:
vlength
]
=
0.
;
//1st wave family
//1st wave family
if
(
waveSpeeds0
<
-
zeroTol
)
{
//left going
if
(
waveSpeeds0
[
0
:
vlength
]
<
-
zeroTol
)
{
//left going
hUpdateLeft
+=
fWaves0
;
o_hUpdateLeft
[
0
:
vlength
]
+=
fWaves0
[
0
:
vlength
]
;
huUpdateLeft
+=
fWaves0
*
waveSpeeds0
;
o_huUpdateLeft
[
0
:
vlength
]
+=
(
fWaves0
[
0
:
vlength
])
*
(
waveSpeeds0
[
0
:
vlength
]);
// 3 FLOPs (assume left going wave ...)
}
}
else
if
(
waveSpeeds0
>
zeroTol
)
{
//right going
else
if
(
waveSpeeds0
[
0
:
vlength
]
>
zeroTol
)
{
//right going
hUpdateRight
+=
fWaves0
;
o_hUpdateRight
[
0
:
vlength
]
+=
fWaves0
[
0
:
vlength
]
;
huUpdateRight
+=
fWaves0
*
waveSpeeds0
;
o_huUpdateRight
[
0
:
vlength
]
+=
fWaves0
[
0
:
vlength
]
*
waveSpeeds0
[
0
:
vlength
]
;
}
}
else
{
//split waves
else
{
//split waves
, if waveSpeeds0 close to 0
hUpdateLeft
+=
(
T
)
.5
*
fWaves0
;
o_hUpdateLeft
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves0
[
0
:
vlength
])
;
huUpdateLeft
+=
(
T
)
.5
*
fWaves0
*
waveSpeeds0
;
o_huUpdateLeft
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves0
[
0
:
vlength
]
*
waveSpeeds0
[
0
:
vlength
])
;
hUpdateRight
+=
(
T
)
.5
*
fWaves0
;
o_hUpdateRight
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves0
[
0
:
vlength
])
;
huUpdateRight
+=
(
T
)
.5
*
fWaves0
*
waveSpeeds0
;
o_huUpdateRight
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves0
[
0
:
vlength
]
*
waveSpeeds0
[
0
:
vlength
])
;
}
}
//2nd wave family
//2nd wave family
if
(
waveSpeeds1
>
zeroTol
)
{
//right going
if
(
waveSpeeds1
[
0
:
vlength
]
>
zeroTol
)
{
//right going
hUpdateRight
+=
fWaves1
;
o_hUpdateRight
[
0
:
vlength
]
+=
fWaves1
[
0
:
vlength
]
;
huUpdateRight
+=
fWaves1
*
waveSpeeds1
;
o_huUpdateRight
[
0
:
vlength
]
+=
(
fWaves1
[
0
:
vlength
]
*
waveSpeeds1
[
0
:
vlength
]);
// 3 FLOPs (assume right going wave ...)
}
}
else
if
(
waveSpeeds1
<
-
zeroTol
)
{
//left going
else
if
(
waveSpeeds1
[
0
:
vlength
]
<
-
zeroTol
)
{
//left going
hUpdateLeft
+=
fWaves1
;
o_hUpdateLeft
[
0
:
vlength
]
+=
fWaves1
[
0
:
vlength
]
;
huUpdateLeft
+=
fWaves1
*
waveSpeeds1
;
o_huUpdateLeft
[
0
:
vlength
]
+=
(
fWaves1
[
0
:
vlength
]
*
waveSpeeds1
[
0
:
vlength
])
;
}
}
else
{
//split waves
else
{
//split waves
hUpdateLeft
+=
(
T
)
.5
*
fWaves1
;
o_hUpdateLeft
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves1
[
0
:
vlength
])
;
huUpdateLeft
+=
(
T
)
.5
*
fWaves1
*
waveSpeeds1
;
o_huUpdateLeft
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves1
[
0
:
vlength
]
*
waveSpeeds1
[
0
:
vlength
])
;
hUpdateRight
+=
(
T
)
.5
*
fWaves1
;
o_hUpdateRight
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves1
[
0
:
vlength
])
;
huUpdateRight
+=
(
T
)
.5
*
fWaves1
*
waveSpeeds1
;
o_huUpdateRight
[
0
:
vlength
]
+=
(
T
)
.5
*
(
fWaves1
[
0
:
vlength
]
*
waveSpeeds1
[
0
:
vlength
])
;
}
}
// Set output variables
o_hUpdateLeft
=
hUpdateLeft
;
o_hUpdateRight
=
hUpdateRight
;
o_huUpdateLeft
=
huUpdateLeft
;
o_huUpdateRight
=
huUpdateRight
;
//compute maximum wave speed (-> CFL-condition)
//compute maximum wave speed (-> CFL-condition)
o_maxWaveSpeed
=
std
::
max
(
std
::
abs
(
waveSpeeds0
)
,
std
::
abs
(
waveSpeeds1
)
);
//o_maxWaveSpeed = std::max( std::abs(waveSpeeds0) , std::abs(waveSpeeds1) );
o_maxWaveSpeed
=
__sec_reduce_max
(
std
::
max
(
std
::
abs
(
waveSpeeds0
[
0
:
vlength
])
,
std
::
abs
(
waveSpeeds1
[
0
:
vlength
])
));
// 3 FLOPs (2 abs, 1 max)
//========================
// 54 FLOPs (3 sqrt, 4 div, 2 abs, 3 min/max)
}
}
inline
inline
void
fWaveComputeWaveSpeeds
(
void
fWaveComputeWaveSpeeds
(
const
T
i_hLeft
,
const
T
i_hRight
,
const
T
*
i_hLeft
,
const
T
*
i_hRight
,
const
T
i_huLeft
,
const
T
i_huRight
,
const
T
*
i_huLeft
,
const
T
*
i_huRight
,
const
T
i_uLeft
,
const
T
i_uRight
,
const
T
*
i_uLeft
,
const
T
*
i_uRight
,
const
T
i_bLeft
,
const
T
i_bRight
,
const
T
*
i_bLeft
,
const
T
*
i_bRight
,
T
&
o_waveSpeed0
,
T
&
o_waveSpeed1
)
const
T
*
o_waveSpeed0
,
T
*
o_waveSpeed1
)
const
{
{
//compute eigenvalues of the jacobian matrices in states Q_{i-1} and Q_{i}
// helper variables for sqrt of h:
T
characteristicSpeed0
=
0.
,
characteristicSpeed1
=
0.
;
// define arrays for helpers
characteristicSpeed0
=
i_uLeft
-
std
::
sqrt
(
gravity
*
i_hLeft
);
T
*
sqrt_hLeft
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
characteristicSpeed1
=
i_uRight
+
std
::
sqrt
(
gravity
*
i_hRight
);
T
*
sqrt_hRight
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
sqrt_hLeft
[
0
:
vlength
]
=
std
::
sqrt
(
i_hLeft
[
0
:
vlength
]);
// 1 FLOP (sqrt)
//compute "Roe speeds"
sqrt_hRight
[
0
:
vlength
]
=
std
::
sqrt
(
i_hRight
[
0
:
vlength
]);
// 1 FLOP (sqrt)
T
hRoe
=
(
T
)
.5
*
(
i_hRight
+
i_hLeft
);
T
uRoe
=
i_uLeft
*
std
::
sqrt
(
i_hLeft
)
+
i_uRight
*
std
::
sqrt
(
i_hRight
);
// compute eigenvalues of the jacobian matrices
uRoe
/=
std
::
sqrt
(
i_hLeft
)
+
std
::
sqrt
(
i_hRight
);
// in states Q_{i-1} and Q_{i}
T
*
characteristicSpeed0
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
T
roeSpeed0
=
0.
,
roeSpeed1
=
0.
;
T
*
characteristicSpeed1
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
roeSpeed0
=
uRoe
-
std
::
sqrt
(
gravity
*
hRoe
);
characteristicSpeed0
[
0
:
vlength
]
=
i_uLeft
[
0
:
vlength
]
-
sqrt_gravity
*
sqrt_hLeft
[
0
:
vlength
];
// 2 FLOPs
roeSpeed1
=
uRoe
+
std
::
sqrt
(
gravity
*
hRoe
);
characteristicSpeed1
[
0
:
vlength
]
=
i_uRight
[
0
:
vlength
]
+
sqrt_gravity
*
sqrt_hRight
[
0
:
vlength
];
// 2 FLOPs
//computer eindfeldt speeds
// compute "Roe averages"
o_waveSpeed0
=
std
::
min
(
characteristicSpeed0
,
roeSpeed0
);
// E.Drossos - Define arrays for hRoe, sqrt_hRoe, uRoe
o_waveSpeed1
=
std
::
max
(
characteristicSpeed1
,
roeSpeed1
);
T
*
hRoe
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
T
*
sqrt_hRoe
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
T
*
uRoe
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
hRoe
[
0
:
vlength
]
=
(
T
)
.5
*
(
i_hRight
[
0
:
vlength
]
+
i_hLeft
[
0
:
vlength
]);
// 2 FLOPs
sqrt_hRoe
[
0
:
vlength
]
=
std
::
sqrt
(
hRoe
[
0
:
vlength
]);
// 1 FLOP (sqrt)
uRoe
[
0
:
vlength
]
=
i_uLeft
[
0
:
vlength
]
*
sqrt_hLeft
[
0
:
vlength
]
+
i_uRight
[
0
:
vlength
]
*
sqrt_hRight
[
0
:
vlength
];
// 3 FLOPs
uRoe
[
0
:
vlength
]
/=
(
sqrt_hLeft
[
0
:
vlength
]
+
sqrt_hRight
[
0
:
vlength
]);
// 2 FLOPs (1 div)
// compute "Roe speeds" from Roe averages
// define arrays for "Roe speeds"
T
*
roeSpeed0
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
T
*
roeSpeed1
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
roeSpeed0
[
0
:
vlength
]
=
uRoe
[
0
:
vlength
]
-
sqrt_gravity
*
sqrt_hRoe
[
0
:
vlength
];
// 2 FLOPs
roeSpeed1
[
0
:
vlength
]
=
uRoe
[
0
:
vlength
]
+
sqrt_gravity
*
sqrt_hRoe
[
0
:
vlength
];
// 2 FLOPs
// compute Eindfeldt speeds (returned as output parameters)
o_waveSpeed0
[
0
:
vlength
]
=
std
::
min
(
characteristicSpeed0
[
0
:
vlength
],
roeSpeed0
[
0
:
vlength
]);
// 1 FLOP (min)
o_waveSpeed1
[
0
:
vlength
]
=
std
::
max
(
characteristicSpeed1
[
0
:
vlength
],
roeSpeed1
[
0
:
vlength
]);
// 1 FLOP (max)
//==============
//20 FLOPs (incl. 3 sqrt, 1 div, 2 min/max)
}
}
inline
inline
void
fWaveComputeWaveDecomposition
(
void
fWaveComputeWaveDecomposition
(
const
T
i_hLeft
,
const
T
i_hRight
,
const
T
*
i_hLeft
,
const
T
*
i_hRight
,
const
T
i_huLeft
,
const
T
i_huRight
,
const
T
*
i_huLeft
,
const
T
*
i_huRight
,
const
T
i_uLeft
,
const
T
i_uRight
,
const
T
*
i_uLeft
,
const
T
*
i_uRight
,
const
T
i_bLeft
,
const
T
i_bRight
,
const
T
*
i_bLeft
,
const
T
*
i_bRight
,
const
T
i_waveSpeed0
,
const
T
i_waveSpeed1
,
const
T
*
i_waveSpeed0
,
const
T
*
i_waveSpeed1
,
T
&
o_fWave0
,
T
&
o_fWave1
)
const
T
*
o_fWave0
,
T
*
o_fWave1
)
const
{
{
T
lambdaDif
=
i_waveSpeed1
-
i_waveSpeed0
;
//compute the inverse matrix R^{-1}
T
Rinv00
=
0.
,
Rinv01
=
0.
,
Rinv10
=
0.
,
Rinv11
=
0.
;
T
oneDivLambdaDif
=
(
T
)
1.
/
lambdaDif
;
Rinv00
=
oneDivLambdaDif
*
i_waveSpeed1
;
Rinv01
=
-
oneDivLambdaDif
;
Rinv10
=
oneDivLambdaDif
*
-
i_waveSpeed0
;
Rinv11
=
oneDivLambdaDif
;
//right hand side
T
fDif0
=
0.
,
fDif1
=
0.
;
//calculate modified (bathymetry!) flux difference
//calculate modified (bathymetry!) flux difference
// f(Q_i) - f(Q_{i-1})
// f(Q_i) - f(Q_{i-1}) -> serve as right hand sides
fDif0
=
i_huRight
-
i_huLeft
;
fDif1
=
i_huRight
*
i_uRight
+
(
T
)
.5
*
gravity
*
i_hRight
*
i_hRight
//allocate mem for fDif0, fDif1 arrays
-
(
i_huLeft
*
i_uLeft
+
(
T
)
.5
*
gravity
*
i_hLeft
*
i_hLeft
);
T
*
fDif0
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
T
*
fDif1
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
fDif0
[
0
:
vlength
]
=
i_huRight
[
0
:
vlength
]
-
i_huLeft
[
0
:
vlength
];
// 1 FLOP
fDif1
[
0
:
vlength
]
=
i_huRight
[
0
:
vlength
]
*
i_uRight
[
0
:
vlength
]
+
half_gravity
*
i_hRight
[
0
:
vlength
]
*
i_hRight
[
0
:
vlength
]
-
(
i_huLeft
[
0
:
vlength
]
*
i_uLeft
[
0
:
vlength
]
+
half_gravity
*
i_hLeft
[
0
:
vlength
]
*
i_hLeft
[
0
:
vlength
]);
// 9 FLOPs
// \delta x \Psi[2]
// \delta x \Psi[2]
T
psi
=
-
gravity
*
(
T
)
.5
*
(
i_hRight
+
i_hLeft
)
*
(
i_bRight
-
i_bLeft
);
fDif1
[
0
:
vlength
]
+=
half_gravity
*
(
i_hRight
[
0
:
vlength
]
+
i_hLeft
[
0
:
vlength
])
*
(
i_bRight
[
0
:
vlength
]
-
i_bLeft
[
0
:
vlength
]);
// 5 FLOPs
fDif1
-=
psi
;
// solve linear system of equations to obtain f-waves:
//solve linear equations
// ( 1 1 ) ( o_fWave0 ) = ( fDif0 )
o_fWave0
=
Rinv00
*
fDif0
+
Rinv01
*
fDif1
;
// ( i_waveSpeed0 i_waveSpeed1 ) ( o_fWave1 ) ( fDif1 )
o_fWave1
=
Rinv10
*
fDif0
+
Rinv11
*
fDif1
;
// compute the inverse of the wave speed difference:
T
*
inverseSpeedDiff
=
(
T
*
)
malloc
(
vlength
*
sizeof
(
T
));
inverseSpeedDiff
[
0
:
vlength
]
=
(
T
)
1.
/
(
i_waveSpeed1
[
0
:
vlength
]
-
i_waveSpeed0
[
0
:
vlength
]
);
// 2 FLOPs (1 div)
// compute f-waves:
o_fWave0
[
0
:
vlength
]
=
(
i_waveSpeed1
[
0
:
vlength
]
*
fDif0
[
0
:
vlength
]
-
fDif1
[
0
:
vlength
]
)
*
inverseSpeedDiff
[
0
:
vlength
];
// 3 FLOPs
o_fWave1
[
0
:
vlength
]
=
(
-
i_waveSpeed0
[
0
:
vlength
]
*
fDif0
[
0
:
vlength
]
+
fDif1
[
0
:
vlength
]
)
*
inverseSpeedDiff
[
0
:
vlength
];
// 3 FLOPs
//=========
//23 FLOPs in total (incl. 1 div)
}
}
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment