more fine-grained single multiplication functions, reuse of buffer -> faster implementation, configured for performance measurement
This commit is contained in:
parent
ce033a24a3
commit
d981a96048
|
|
@ -114,6 +114,16 @@ class MultiDimVector {
|
||||||
|
|
||||||
It getJumpIterator() const { return it; }
|
It getJumpIterator() const { return it; }
|
||||||
|
|
||||||
|
void resetWithJumpIterator(It const &other_it) {
|
||||||
|
it = other_it;
|
||||||
|
size_t n_1 = it.firstIndexBound();
|
||||||
|
data.resize(n_1);
|
||||||
|
auto sizes = it.numValuesPerFirstIndex();
|
||||||
|
for (size_t i = 0; i < n_1; ++i) {
|
||||||
|
data[i].resize(sizes[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Iterator begin() { return Iterator(*this, it); }
|
Iterator begin() { return Iterator(*this, it); }
|
||||||
|
|
||||||
Iterator end() {
|
Iterator end() {
|
||||||
|
|
@ -165,99 +175,203 @@ double squared_l2_norm(MultiDimVector<It> const &v) {
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename It>
|
||||||
|
void multiply_single_lower_triangular_inplace(boost::numeric::ublas::matrix<double> const &L,
|
||||||
|
MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
|
||||||
|
It it = v.getJumpIterator();
|
||||||
|
It cycled_it = it.cycle();
|
||||||
|
buffer.resetWithJumpIterator(cycled_it);
|
||||||
|
|
||||||
|
std::vector<size_t> indexes(buffer.data.size(), 0);
|
||||||
|
|
||||||
|
it.reset();
|
||||||
|
|
||||||
|
size_t first_v_index = 0;
|
||||||
|
size_t second_v_index = 0;
|
||||||
|
|
||||||
|
double *data_pointer = &v.data[0][0];
|
||||||
|
size_t data_size = v.data[0].size();
|
||||||
|
|
||||||
|
while (it.valid()) {
|
||||||
|
size_t last_dim_count = it.lastDimensionCount();
|
||||||
|
double *offset_data_pointer = data_pointer + second_v_index;
|
||||||
|
for (size_t i = 0; i < last_dim_count; ++i) {
|
||||||
|
double sum = 0.0;
|
||||||
|
for (size_t j = 0; j <= i; ++j) {
|
||||||
|
sum += L(i, j) * (*(offset_data_pointer + j));
|
||||||
|
}
|
||||||
|
buffer.data[i][indexes[i]++] = sum;
|
||||||
|
}
|
||||||
|
second_v_index += last_dim_count;
|
||||||
|
if (second_v_index >= data_size) {
|
||||||
|
second_v_index = 0;
|
||||||
|
first_v_index += 1;
|
||||||
|
data_pointer = &v.data[first_v_index][0];
|
||||||
|
data_size = v.data[first_v_index].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
it.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
v.swap(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename It>
|
||||||
|
void multiply_single_upper_triangular_inplace(boost::numeric::ublas::matrix<double> const &U,
|
||||||
|
MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
|
||||||
|
It it = v.getJumpIterator();
|
||||||
|
It cycled_it = it.cycle();
|
||||||
|
buffer.resetWithJumpIterator(cycled_it);
|
||||||
|
|
||||||
|
std::vector<size_t> indexes(buffer.data.size(), 0);
|
||||||
|
|
||||||
|
it.reset();
|
||||||
|
|
||||||
|
size_t first_v_index = 0;
|
||||||
|
size_t second_v_index = 0;
|
||||||
|
|
||||||
|
double *data_pointer = &v.data[0][0];
|
||||||
|
size_t data_size = v.data[0].size();
|
||||||
|
|
||||||
|
while (it.valid()) {
|
||||||
|
size_t last_dim_count = it.lastDimensionCount();
|
||||||
|
double *offset_data_pointer = data_pointer + second_v_index;
|
||||||
|
for (size_t i = 0; i < last_dim_count; ++i) {
|
||||||
|
double sum = 0.0;
|
||||||
|
for (size_t j = i; j < last_dim_count; ++j) {
|
||||||
|
sum += U(i, j) * (*(offset_data_pointer + j));
|
||||||
|
}
|
||||||
|
buffer.data[i][indexes[i]++] = sum;
|
||||||
|
}
|
||||||
|
second_v_index += last_dim_count;
|
||||||
|
if (second_v_index >= data_size) {
|
||||||
|
second_v_index = 0;
|
||||||
|
first_v_index += 1;
|
||||||
|
data_pointer = &v.data[first_v_index][0];
|
||||||
|
data_size = v.data[first_v_index].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
it.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
v.swap(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename It>
|
||||||
|
void multiply_single_arbitrary_inplace(boost::numeric::ublas::matrix<double> const &M,
|
||||||
|
MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
|
||||||
|
It it = v.getJumpIterator();
|
||||||
|
It cycled_it = it.cycle();
|
||||||
|
buffer.resetWithJumpIterator(cycled_it);
|
||||||
|
|
||||||
|
std::vector<size_t> indexes(buffer.data.size(), 0);
|
||||||
|
|
||||||
|
it.reset();
|
||||||
|
|
||||||
|
size_t first_v_index = 0;
|
||||||
|
size_t second_v_index = 0;
|
||||||
|
|
||||||
|
double *data_pointer = &v.data[0][0];
|
||||||
|
size_t data_size = v.data[0].size();
|
||||||
|
|
||||||
|
while (it.valid()) {
|
||||||
|
size_t last_dim_count = it.lastDimensionCount();
|
||||||
|
double *offset_data_pointer = data_pointer + second_v_index;
|
||||||
|
for (size_t i = 0; i < last_dim_count; ++i) {
|
||||||
|
double sum = 0.0;
|
||||||
|
for (size_t j = 0; j < last_dim_count; ++j) {
|
||||||
|
sum += M(i, j) * (*(offset_data_pointer + j));
|
||||||
|
}
|
||||||
|
buffer.data[i][indexes[i]++] = sum;
|
||||||
|
}
|
||||||
|
second_v_index += last_dim_count;
|
||||||
|
if (second_v_index >= data_size) {
|
||||||
|
second_v_index = 0;
|
||||||
|
first_v_index += 1;
|
||||||
|
data_pointer = &v.data[first_v_index][0];
|
||||||
|
data_size = v.data[first_v_index].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
it.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
v.swap(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename It>
|
||||||
|
void multiply_single_identity_inplace(MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
|
||||||
|
It it = v.getJumpIterator();
|
||||||
|
It cycled_it = it.cycle();
|
||||||
|
buffer.resetWithJumpIterator(cycled_it);
|
||||||
|
|
||||||
|
std::vector<size_t> indexes(buffer.data.size(), 0);
|
||||||
|
|
||||||
|
it.reset();
|
||||||
|
|
||||||
|
size_t first_v_index = 0;
|
||||||
|
size_t second_v_index = 0;
|
||||||
|
|
||||||
|
double *data_pointer = &v.data[0][0];
|
||||||
|
size_t data_size = v.data[0].size();
|
||||||
|
|
||||||
|
while (it.valid()) {
|
||||||
|
size_t last_dim_count = it.lastDimensionCount();
|
||||||
|
double *offset_data_pointer = data_pointer + second_v_index;
|
||||||
|
for (size_t i = 0; i < last_dim_count; ++i) {
|
||||||
|
buffer.data[i][indexes[i]++] = (*(offset_data_pointer + i));
|
||||||
|
}
|
||||||
|
second_v_index += last_dim_count;
|
||||||
|
if (second_v_index >= data_size) {
|
||||||
|
second_v_index = 0;
|
||||||
|
first_v_index += 1;
|
||||||
|
data_pointer = &v.data[first_v_index][0];
|
||||||
|
data_size = v.data[first_v_index].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
it.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
v.swap(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename It>
|
||||||
|
void cycle_vector_inplace(size_t num_times, MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
|
||||||
|
for (size_t i = 0; i < num_times; ++i) {
|
||||||
|
multiply_single_identity_inplace(v, buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename It>
|
||||||
|
void cycle_vector_inplace(size_t num_times, MultiDimVector<It> &v) {
|
||||||
|
MultiDimVector<It> buffer(v.getJumpIterator());
|
||||||
|
for (size_t i = 0; i < num_times; ++i) {
|
||||||
|
multiply_single_identity_inplace(v, buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename It>
|
template <typename It>
|
||||||
void multiply_lower_triangular_inplace(std::vector<boost::numeric::ublas::matrix<double>> L,
|
void multiply_lower_triangular_inplace(std::vector<boost::numeric::ublas::matrix<double>> L,
|
||||||
MultiDimVector<It> &v) {
|
MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
|
||||||
// the multiplication is based on a cyclic permutation of the indices: the last index of v becomes
|
// the multiplication is based on a cyclic permutation of the indices: the last index of v becomes
|
||||||
// the first index of w
|
// the first index of w
|
||||||
size_t d = L.size();
|
size_t d = L.size();
|
||||||
It it = v.getJumpIterator();
|
It it = v.getJumpIterator();
|
||||||
|
|
||||||
for (int k = d - 1; k >= 0; --k) {
|
for (int k = d - 1; k >= 0; --k) {
|
||||||
MultiDimVector<It> w(it);
|
multiply_single_lower_triangular_inplace(L[k], v, buffer);
|
||||||
std::vector<size_t> indexes(w.data.size(), 0);
|
|
||||||
|
|
||||||
auto &Lk = L[k];
|
|
||||||
it.reset();
|
|
||||||
|
|
||||||
size_t first_v_index = 0;
|
|
||||||
size_t second_v_index = 0;
|
|
||||||
|
|
||||||
double *data_pointer = &v.data[0][0];
|
|
||||||
size_t data_size = v.data[0].size();
|
|
||||||
|
|
||||||
while (it.valid()) {
|
|
||||||
size_t last_dim_count = it.lastDimensionCount();
|
|
||||||
double *offset_data_pointer = data_pointer + second_v_index;
|
|
||||||
for (size_t i = 0; i < last_dim_count; ++i) {
|
|
||||||
double sum = 0.0;
|
|
||||||
for (size_t j = 0; j <= i; ++j) {
|
|
||||||
sum += Lk(i, j) * (*(offset_data_pointer + j));
|
|
||||||
}
|
|
||||||
w.data[i][indexes[i]++] = sum;
|
|
||||||
}
|
|
||||||
second_v_index += last_dim_count;
|
|
||||||
if (second_v_index >= data_size) {
|
|
||||||
second_v_index = 0;
|
|
||||||
first_v_index += 1;
|
|
||||||
data_pointer = &v.data[first_v_index][0];
|
|
||||||
data_size = v.data[first_v_index].size();
|
|
||||||
}
|
|
||||||
|
|
||||||
it.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
v.swap(w);
|
|
||||||
|
|
||||||
it = it.cycle();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename It>
|
template <typename It>
|
||||||
void multiply_upper_triangular_inplace(std::vector<boost::numeric::ublas::matrix<double>> U,
|
void multiply_upper_triangular_inplace(std::vector<boost::numeric::ublas::matrix<double>> U,
|
||||||
MultiDimVector<It> &v) {
|
MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
|
||||||
// the multiplication is based on a cyclic permutation of the indices: the last index of v becomes
|
// the multiplication is based on a cyclic permutation of the indices: the last index of v becomes
|
||||||
// the first index of w
|
// the first index of w
|
||||||
size_t d = U.size();
|
size_t d = U.size();
|
||||||
It it = v.getJumpIterator();
|
It it = v.getJumpIterator();
|
||||||
|
|
||||||
for (int k = d - 1; k >= 0; --k) {
|
for (int k = d - 1; k >= 0; --k) {
|
||||||
MultiDimVector<It> w(it);
|
multiply_single_upper_triangular_inplace(U[k], v, buffer);
|
||||||
std::vector<size_t> indexes(w.data.size(), 0);
|
|
||||||
|
|
||||||
auto &Uk = U[k];
|
|
||||||
it.reset();
|
|
||||||
|
|
||||||
size_t first_v_index = 0;
|
|
||||||
size_t second_v_index = 0;
|
|
||||||
|
|
||||||
double *data_pointer = &v.data[0][0];
|
|
||||||
size_t data_size = v.data[0].size();
|
|
||||||
|
|
||||||
while (it.valid()) {
|
|
||||||
size_t last_dim_count = it.lastDimensionCount();
|
|
||||||
double *offset_data_pointer = data_pointer + second_v_index;
|
|
||||||
for (size_t i = 0; i < last_dim_count; ++i) {
|
|
||||||
double sum = 0.0;
|
|
||||||
for (size_t j = i; j < last_dim_count; ++j) {
|
|
||||||
sum += Uk(i, j) * (*(offset_data_pointer + j));
|
|
||||||
}
|
|
||||||
w.data[i][indexes[i]++] = sum;
|
|
||||||
}
|
|
||||||
second_v_index += last_dim_count;
|
|
||||||
if (second_v_index >= data_size) {
|
|
||||||
second_v_index = 0;
|
|
||||||
first_v_index += 1;
|
|
||||||
data_pointer = &v.data[first_v_index][0];
|
|
||||||
data_size = v.data[first_v_index].size();
|
|
||||||
}
|
|
||||||
|
|
||||||
it.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
it = it.cycle();
|
|
||||||
|
|
||||||
v.swap(w);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -273,11 +387,12 @@ class SparseTPOperator {
|
||||||
std::vector<boost::numeric::ublas::matrix<double>> U;
|
std::vector<boost::numeric::ublas::matrix<double>> U;
|
||||||
std::vector<boost::numeric::ublas::matrix<double>> Linv;
|
std::vector<boost::numeric::ublas::matrix<double>> Linv;
|
||||||
std::vector<boost::numeric::ublas::matrix<double>> Uinv;
|
std::vector<boost::numeric::ublas::matrix<double>> Uinv;
|
||||||
|
MultiDimVector<It> buffer;
|
||||||
size_t d;
|
size_t d;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SparseTPOperator(It it, std::vector<boost::numeric::ublas::matrix<double>> matrices)
|
SparseTPOperator(It it, std::vector<boost::numeric::ublas::matrix<double>> matrices)
|
||||||
: it(it), M(matrices), d(matrices.size()){};
|
: it(it), M(matrices), buffer(it), d(matrices.size()){};
|
||||||
|
|
||||||
void prepareCommon() {
|
void prepareCommon() {
|
||||||
if (LU.size() > 0) {
|
if (LU.size() > 0) {
|
||||||
|
|
@ -354,15 +469,15 @@ class SparseTPOperator {
|
||||||
|
|
||||||
MultiDimVector<It> apply(MultiDimVector<It> input) {
|
MultiDimVector<It> apply(MultiDimVector<It> input) {
|
||||||
prepareApply();
|
prepareApply();
|
||||||
multiply_upper_triangular_inplace(U, input);
|
multiply_upper_triangular_inplace(U, input, buffer);
|
||||||
multiply_lower_triangular_inplace(L, input);
|
multiply_lower_triangular_inplace(L, input, buffer);
|
||||||
return input;
|
return input;
|
||||||
}
|
}
|
||||||
|
|
||||||
MultiDimVector<It> solve(MultiDimVector<It> rhs) {
|
MultiDimVector<It> solve(MultiDimVector<It> rhs) {
|
||||||
prepareSolve();
|
prepareSolve();
|
||||||
multiply_lower_triangular_inplace(Linv, rhs);
|
multiply_lower_triangular_inplace(Linv, rhs, buffer);
|
||||||
multiply_upper_triangular_inplace(Uinv, rhs);
|
multiply_upper_triangular_inplace(Uinv, rhs, buffer);
|
||||||
return rhs;
|
return rhs;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -24,8 +24,8 @@ limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void runFunctions() {
|
void runFunctions() {
|
||||||
constexpr size_t d = 30;
|
constexpr size_t d = 8;
|
||||||
size_t bound = 8;
|
size_t bound = 30;
|
||||||
// fsi::TemplateBoundedSumIterator<d> it(bound);
|
// fsi::TemplateBoundedSumIterator<d> it(bound);
|
||||||
fsi::BoundedSumIterator it(d, bound);
|
fsi::BoundedSumIterator it(d, bound);
|
||||||
std::vector<MonomialFunctions> phi(d);
|
std::vector<MonomialFunctions> phi(d);
|
||||||
|
|
@ -53,6 +53,10 @@ void runFunctions() {
|
||||||
std::cout << "Time for apply(): " << timer.elapsed() << " s\n";
|
std::cout << "Time for apply(): " << timer.elapsed() << " s\n";
|
||||||
// std::cout << b << "\n";
|
// std::cout << b << "\n";
|
||||||
|
|
||||||
|
// timer.reset();
|
||||||
|
// fsi::cycle_vector_inplace(d, b);
|
||||||
|
// std::cout << "Time for cycling: " << timer.elapsed() << "s\n";
|
||||||
|
|
||||||
std::cout << "Number of points: " << it.numValues() << "\n";
|
std::cout << "Number of points: " << it.numValues() << "\n";
|
||||||
|
|
||||||
std::cout << "Reconstruction error (L2 norm): " << sqrt(fsi::squared_l2_norm(b - rhs)) << "\n";
|
std::cout << "Reconstruction error (L2 norm): " << sqrt(fsi::squared_l2_norm(b - rhs)) << "\n";
|
||||||
|
|
|
||||||
|
|
@ -36,17 +36,17 @@ void measurePerformance() {
|
||||||
|
|
||||||
std::vector<size_t> dimensions = {2, 4, 8, 16, 32, 64};
|
std::vector<size_t> dimensions = {2, 4, 8, 16, 32, 64};
|
||||||
// size_t maxNumPoints = 20000000;
|
// size_t maxNumPoints = 20000000;
|
||||||
size_t maxNumPoints = 30000000;
|
size_t maxNumPoints = 100000000;
|
||||||
|
|
||||||
std::ostringstream stream;
|
std::ostringstream stream;
|
||||||
|
|
||||||
// minimum quotient of number of points of current configuration to
|
// minimum quotient of number of points of current configuration to
|
||||||
// previous configuration
|
// previous configuration
|
||||||
double minQuotient = 4.0;
|
double minQuotient = 2.0;
|
||||||
|
|
||||||
// maximum runtime in seconds
|
// maximum runtime in seconds
|
||||||
// if exceeded, we do not try higher numbers of points
|
// we try not to exceed this runtime (based on previous predictions)
|
||||||
double maxRuntime = 2.0;
|
double maxRuntime = 10.0;
|
||||||
|
|
||||||
// small value for denominator of a fraction
|
// small value for denominator of a fraction
|
||||||
double epsilon = 0.01;
|
double epsilon = 0.01;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue