more fine-grained single multiplication functions, reuse of buffer -> faster implementation, configured for performance measurement

This commit is contained in:
David Holzmüller 2019-04-13 14:42:55 +02:00
parent ce033a24a3
commit d981a96048
3 changed files with 204 additions and 85 deletions

View File

@ -114,6 +114,16 @@ class MultiDimVector {
It getJumpIterator() const { return it; } It getJumpIterator() const { return it; }
void resetWithJumpIterator(It const &other_it) {
it = other_it;
size_t n_1 = it.firstIndexBound();
data.resize(n_1);
auto sizes = it.numValuesPerFirstIndex();
for (size_t i = 0; i < n_1; ++i) {
data[i].resize(sizes[i]);
}
}
Iterator begin() { return Iterator(*this, it); } Iterator begin() { return Iterator(*this, it); }
Iterator end() { Iterator end() {
@ -166,18 +176,14 @@ double squared_l2_norm(MultiDimVector<It> const &v) {
} }
template <typename It> template <typename It>
void multiply_lower_triangular_inplace(std::vector<boost::numeric::ublas::matrix<double>> L, void multiply_single_lower_triangular_inplace(boost::numeric::ublas::matrix<double> const &L,
MultiDimVector<It> &v) { MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
// the multiplication is based on a cyclic permutation of the indices: the last index of v becomes
// the first index of w
size_t d = L.size();
It it = v.getJumpIterator(); It it = v.getJumpIterator();
It cycled_it = it.cycle();
buffer.resetWithJumpIterator(cycled_it);
for (int k = d - 1; k >= 0; --k) { std::vector<size_t> indexes(buffer.data.size(), 0);
MultiDimVector<It> w(it);
std::vector<size_t> indexes(w.data.size(), 0);
auto &Lk = L[k];
it.reset(); it.reset();
size_t first_v_index = 0; size_t first_v_index = 0;
@ -192,9 +198,9 @@ void multiply_lower_triangular_inplace(std::vector<boost::numeric::ublas::matrix
for (size_t i = 0; i < last_dim_count; ++i) { for (size_t i = 0; i < last_dim_count; ++i) {
double sum = 0.0; double sum = 0.0;
for (size_t j = 0; j <= i; ++j) { for (size_t j = 0; j <= i; ++j) {
sum += Lk(i, j) * (*(offset_data_pointer + j)); sum += L(i, j) * (*(offset_data_pointer + j));
} }
w.data[i][indexes[i]++] = sum; buffer.data[i][indexes[i]++] = sum;
} }
second_v_index += last_dim_count; second_v_index += last_dim_count;
if (second_v_index >= data_size) { if (second_v_index >= data_size) {
@ -207,25 +213,18 @@ void multiply_lower_triangular_inplace(std::vector<boost::numeric::ublas::matrix
it.next(); it.next();
} }
v.swap(w); v.swap(buffer);
it = it.cycle();
}
} }
template <typename It> template <typename It>
void multiply_upper_triangular_inplace(std::vector<boost::numeric::ublas::matrix<double>> U, void multiply_single_upper_triangular_inplace(boost::numeric::ublas::matrix<double> const &U,
MultiDimVector<It> &v) { MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
// the multiplication is based on a cyclic permutation of the indices: the last index of v becomes
// the first index of w
size_t d = U.size();
It it = v.getJumpIterator(); It it = v.getJumpIterator();
It cycled_it = it.cycle();
buffer.resetWithJumpIterator(cycled_it);
for (int k = d - 1; k >= 0; --k) { std::vector<size_t> indexes(buffer.data.size(), 0);
MultiDimVector<It> w(it);
std::vector<size_t> indexes(w.data.size(), 0);
auto &Uk = U[k];
it.reset(); it.reset();
size_t first_v_index = 0; size_t first_v_index = 0;
@ -240,9 +239,9 @@ void multiply_upper_triangular_inplace(std::vector<boost::numeric::ublas::matrix
for (size_t i = 0; i < last_dim_count; ++i) { for (size_t i = 0; i < last_dim_count; ++i) {
double sum = 0.0; double sum = 0.0;
for (size_t j = i; j < last_dim_count; ++j) { for (size_t j = i; j < last_dim_count; ++j) {
sum += Uk(i, j) * (*(offset_data_pointer + j)); sum += U(i, j) * (*(offset_data_pointer + j));
} }
w.data[i][indexes[i]++] = sum; buffer.data[i][indexes[i]++] = sum;
} }
second_v_index += last_dim_count; second_v_index += last_dim_count;
if (second_v_index >= data_size) { if (second_v_index >= data_size) {
@ -255,9 +254,124 @@ void multiply_upper_triangular_inplace(std::vector<boost::numeric::ublas::matrix
it.next(); it.next();
} }
it = it.cycle(); v.swap(buffer);
}
v.swap(w); template <typename It>
void multiply_single_arbitrary_inplace(boost::numeric::ublas::matrix<double> const &M,
MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
It it = v.getJumpIterator();
It cycled_it = it.cycle();
buffer.resetWithJumpIterator(cycled_it);
std::vector<size_t> indexes(buffer.data.size(), 0);
it.reset();
size_t first_v_index = 0;
size_t second_v_index = 0;
double *data_pointer = &v.data[0][0];
size_t data_size = v.data[0].size();
while (it.valid()) {
size_t last_dim_count = it.lastDimensionCount();
double *offset_data_pointer = data_pointer + second_v_index;
for (size_t i = 0; i < last_dim_count; ++i) {
double sum = 0.0;
for (size_t j = 0; j < last_dim_count; ++j) {
sum += M(i, j) * (*(offset_data_pointer + j));
}
buffer.data[i][indexes[i]++] = sum;
}
second_v_index += last_dim_count;
if (second_v_index >= data_size) {
second_v_index = 0;
first_v_index += 1;
data_pointer = &v.data[first_v_index][0];
data_size = v.data[first_v_index].size();
}
it.next();
}
v.swap(buffer);
}
template <typename It>
void multiply_single_identity_inplace(MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
It it = v.getJumpIterator();
It cycled_it = it.cycle();
buffer.resetWithJumpIterator(cycled_it);
std::vector<size_t> indexes(buffer.data.size(), 0);
it.reset();
size_t first_v_index = 0;
size_t second_v_index = 0;
double *data_pointer = &v.data[0][0];
size_t data_size = v.data[0].size();
while (it.valid()) {
size_t last_dim_count = it.lastDimensionCount();
double *offset_data_pointer = data_pointer + second_v_index;
for (size_t i = 0; i < last_dim_count; ++i) {
buffer.data[i][indexes[i]++] = (*(offset_data_pointer + i));
}
second_v_index += last_dim_count;
if (second_v_index >= data_size) {
second_v_index = 0;
first_v_index += 1;
data_pointer = &v.data[first_v_index][0];
data_size = v.data[first_v_index].size();
}
it.next();
}
v.swap(buffer);
}
template <typename It>
void cycle_vector_inplace(size_t num_times, MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
for (size_t i = 0; i < num_times; ++i) {
multiply_single_identity_inplace(v, buffer);
}
}
template <typename It>
void cycle_vector_inplace(size_t num_times, MultiDimVector<It> &v) {
MultiDimVector<It> buffer(v.getJumpIterator());
for (size_t i = 0; i < num_times; ++i) {
multiply_single_identity_inplace(v, buffer);
}
}
template <typename It>
void multiply_lower_triangular_inplace(std::vector<boost::numeric::ublas::matrix<double>> L,
MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
// the multiplication is based on a cyclic permutation of the indices: the last index of v becomes
// the first index of w
size_t d = L.size();
It it = v.getJumpIterator();
for (int k = d - 1; k >= 0; --k) {
multiply_single_lower_triangular_inplace(L[k], v, buffer);
}
}
template <typename It>
void multiply_upper_triangular_inplace(std::vector<boost::numeric::ublas::matrix<double>> U,
MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
// the multiplication is based on a cyclic permutation of the indices: the last index of v becomes
// the first index of w
size_t d = U.size();
It it = v.getJumpIterator();
for (int k = d - 1; k >= 0; --k) {
multiply_single_upper_triangular_inplace(U[k], v, buffer);
} }
} }
@ -273,11 +387,12 @@ class SparseTPOperator {
std::vector<boost::numeric::ublas::matrix<double>> U; std::vector<boost::numeric::ublas::matrix<double>> U;
std::vector<boost::numeric::ublas::matrix<double>> Linv; std::vector<boost::numeric::ublas::matrix<double>> Linv;
std::vector<boost::numeric::ublas::matrix<double>> Uinv; std::vector<boost::numeric::ublas::matrix<double>> Uinv;
MultiDimVector<It> buffer;
size_t d; size_t d;
public: public:
SparseTPOperator(It it, std::vector<boost::numeric::ublas::matrix<double>> matrices) SparseTPOperator(It it, std::vector<boost::numeric::ublas::matrix<double>> matrices)
: it(it), M(matrices), d(matrices.size()){}; : it(it), M(matrices), buffer(it), d(matrices.size()){};
void prepareCommon() { void prepareCommon() {
if (LU.size() > 0) { if (LU.size() > 0) {
@ -354,15 +469,15 @@ class SparseTPOperator {
MultiDimVector<It> apply(MultiDimVector<It> input) { MultiDimVector<It> apply(MultiDimVector<It> input) {
prepareApply(); prepareApply();
multiply_upper_triangular_inplace(U, input); multiply_upper_triangular_inplace(U, input, buffer);
multiply_lower_triangular_inplace(L, input); multiply_lower_triangular_inplace(L, input, buffer);
return input; return input;
} }
MultiDimVector<It> solve(MultiDimVector<It> rhs) { MultiDimVector<It> solve(MultiDimVector<It> rhs) {
prepareSolve(); prepareSolve();
multiply_lower_triangular_inplace(Linv, rhs); multiply_lower_triangular_inplace(Linv, rhs, buffer);
multiply_upper_triangular_inplace(Uinv, rhs); multiply_upper_triangular_inplace(Uinv, rhs, buffer);
return rhs; return rhs;
} }
}; };

View File

@ -24,8 +24,8 @@ limitations under the License.
*/ */
void runFunctions() { void runFunctions() {
constexpr size_t d = 30; constexpr size_t d = 8;
size_t bound = 8; size_t bound = 30;
// fsi::TemplateBoundedSumIterator<d> it(bound); // fsi::TemplateBoundedSumIterator<d> it(bound);
fsi::BoundedSumIterator it(d, bound); fsi::BoundedSumIterator it(d, bound);
std::vector<MonomialFunctions> phi(d); std::vector<MonomialFunctions> phi(d);
@ -53,6 +53,10 @@ void runFunctions() {
std::cout << "Time for apply(): " << timer.elapsed() << " s\n"; std::cout << "Time for apply(): " << timer.elapsed() << " s\n";
// std::cout << b << "\n"; // std::cout << b << "\n";
// timer.reset();
// fsi::cycle_vector_inplace(d, b);
// std::cout << "Time for cycling: " << timer.elapsed() << "s\n";
std::cout << "Number of points: " << it.numValues() << "\n"; std::cout << "Number of points: " << it.numValues() << "\n";
std::cout << "Reconstruction error (L2 norm): " << sqrt(fsi::squared_l2_norm(b - rhs)) << "\n"; std::cout << "Reconstruction error (L2 norm): " << sqrt(fsi::squared_l2_norm(b - rhs)) << "\n";

View File

@ -36,17 +36,17 @@ void measurePerformance() {
std::vector<size_t> dimensions = {2, 4, 8, 16, 32, 64}; std::vector<size_t> dimensions = {2, 4, 8, 16, 32, 64};
// size_t maxNumPoints = 20000000; // size_t maxNumPoints = 20000000;
size_t maxNumPoints = 30000000; size_t maxNumPoints = 100000000;
std::ostringstream stream; std::ostringstream stream;
// minimum quotient of number of points of current configuration to // minimum quotient of number of points of current configuration to
// previous configuration // previous configuration
double minQuotient = 4.0; double minQuotient = 2.0;
// maximum runtime in seconds // maximum runtime in seconds
// if exceeded, we do not try higher numbers of points // we try not to exceed this runtime (based on previous predictions)
double maxRuntime = 2.0; double maxRuntime = 10.0;
// small value for denominator of a fraction // small value for denominator of a fraction
double epsilon = 0.01; double epsilon = 0.01;