more fine-grained single multiplication functions, reuse of buffer -> faster implementation, configured for performance measurement

2019-04-13 14:42:55 +02:00 · 2019-04-13 14:42:55 +02:00 · d981a96048
parent ce033a24a3
commit d981a96048
3 changed files with 204 additions and 85 deletions
--- a/src/Interpolation.hpp
+++ b/src/Interpolation.hpp
@ -114,6 +114,16 @@ class MultiDimVector {

  It getJumpIterator() const { return it; }

+  void resetWithJumpIterator(It const &other_it) {
+    it = other_it;
+    size_t n_1 = it.firstIndexBound();
+    data.resize(n_1);
+    auto sizes = it.numValuesPerFirstIndex();
+    for (size_t i = 0; i < n_1; ++i) {
+      data[i].resize(sizes[i]);
+    }
+  }
+
  Iterator begin() { return Iterator(*this, it); }

  Iterator end() {
@ -165,99 +175,203 @@ double squared_l2_norm(MultiDimVector<It> const &v) {
  return sum;
 }

+template <typename It>
+void multiply_single_lower_triangular_inplace(boost::numeric::ublas::matrix<double> const &L,
+                                              MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
+  It it = v.getJumpIterator();
+  It cycled_it = it.cycle();
+  buffer.resetWithJumpIterator(cycled_it);
+
+  std::vector<size_t> indexes(buffer.data.size(), 0);
+
+  it.reset();
+
+  size_t first_v_index = 0;
+  size_t second_v_index = 0;
+
+  double *data_pointer = &v.data[0][0];
+  size_t data_size = v.data[0].size();
+
+  while (it.valid()) {
+    size_t last_dim_count = it.lastDimensionCount();
+    double *offset_data_pointer = data_pointer + second_v_index;
+    for (size_t i = 0; i < last_dim_count; ++i) {
+      double sum = 0.0;
+      for (size_t j = 0; j <= i; ++j) {
+        sum += L(i, j) * (*(offset_data_pointer + j));
+      }
+      buffer.data[i][indexes[i]++] = sum;
+    }
+    second_v_index += last_dim_count;
+    if (second_v_index >= data_size) {
+      second_v_index = 0;
+      first_v_index += 1;
+      data_pointer = &v.data[first_v_index][0];
+      data_size = v.data[first_v_index].size();
+    }
+
+    it.next();
+  }
+
+  v.swap(buffer);
+}
+
+template <typename It>
+void multiply_single_upper_triangular_inplace(boost::numeric::ublas::matrix<double> const &U,
+                                              MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
+  It it = v.getJumpIterator();
+  It cycled_it = it.cycle();
+  buffer.resetWithJumpIterator(cycled_it);
+
+  std::vector<size_t> indexes(buffer.data.size(), 0);
+
+  it.reset();
+
+  size_t first_v_index = 0;
+  size_t second_v_index = 0;
+
+  double *data_pointer = &v.data[0][0];
+  size_t data_size = v.data[0].size();
+
+  while (it.valid()) {
+    size_t last_dim_count = it.lastDimensionCount();
+    double *offset_data_pointer = data_pointer + second_v_index;
+    for (size_t i = 0; i < last_dim_count; ++i) {
+      double sum = 0.0;
+      for (size_t j = i; j < last_dim_count; ++j) {
+        sum += U(i, j) * (*(offset_data_pointer + j));
+      }
+      buffer.data[i][indexes[i]++] = sum;
+    }
+    second_v_index += last_dim_count;
+    if (second_v_index >= data_size) {
+      second_v_index = 0;
+      first_v_index += 1;
+      data_pointer = &v.data[first_v_index][0];
+      data_size = v.data[first_v_index].size();
+    }
+
+    it.next();
+  }
+
+  v.swap(buffer);
+}
+
+template <typename It>
+void multiply_single_arbitrary_inplace(boost::numeric::ublas::matrix<double> const &M,
+                                       MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
+  It it = v.getJumpIterator();
+  It cycled_it = it.cycle();
+  buffer.resetWithJumpIterator(cycled_it);
+
+  std::vector<size_t> indexes(buffer.data.size(), 0);
+
+  it.reset();
+
+  size_t first_v_index = 0;
+  size_t second_v_index = 0;
+
+  double *data_pointer = &v.data[0][0];
+  size_t data_size = v.data[0].size();
+
+  while (it.valid()) {
+    size_t last_dim_count = it.lastDimensionCount();
+    double *offset_data_pointer = data_pointer + second_v_index;
+    for (size_t i = 0; i < last_dim_count; ++i) {
+      double sum = 0.0;
+      for (size_t j = 0; j < last_dim_count; ++j) {
+        sum += M(i, j) * (*(offset_data_pointer + j));
+      }
+      buffer.data[i][indexes[i]++] = sum;
+    }
+    second_v_index += last_dim_count;
+    if (second_v_index >= data_size) {
+      second_v_index = 0;
+      first_v_index += 1;
+      data_pointer = &v.data[first_v_index][0];
+      data_size = v.data[first_v_index].size();
+    }
+
+    it.next();
+  }
+
+  v.swap(buffer);
+}
+
+template <typename It>
+void multiply_single_identity_inplace(MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
+  It it = v.getJumpIterator();
+  It cycled_it = it.cycle();
+  buffer.resetWithJumpIterator(cycled_it);
+
+  std::vector<size_t> indexes(buffer.data.size(), 0);
+
+  it.reset();
+
+  size_t first_v_index = 0;
+  size_t second_v_index = 0;
+
+  double *data_pointer = &v.data[0][0];
+  size_t data_size = v.data[0].size();
+
+  while (it.valid()) {
+    size_t last_dim_count = it.lastDimensionCount();
+    double *offset_data_pointer = data_pointer + second_v_index;
+    for (size_t i = 0; i < last_dim_count; ++i) {
+      buffer.data[i][indexes[i]++] = (*(offset_data_pointer + i));
+    }
+    second_v_index += last_dim_count;
+    if (second_v_index >= data_size) {
+      second_v_index = 0;
+      first_v_index += 1;
+      data_pointer = &v.data[first_v_index][0];
+      data_size = v.data[first_v_index].size();
+    }
+
+    it.next();
+  }
+
+  v.swap(buffer);
+}
+
+template <typename It>
+void cycle_vector_inplace(size_t num_times, MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
+  for (size_t i = 0; i < num_times; ++i) {
+    multiply_single_identity_inplace(v, buffer);
+  }
+}
+
+template <typename It>
+void cycle_vector_inplace(size_t num_times, MultiDimVector<It> &v) {
+  MultiDimVector<It> buffer(v.getJumpIterator());
+  for (size_t i = 0; i < num_times; ++i) {
+    multiply_single_identity_inplace(v, buffer);
+  }
+}
+
 template <typename It>
 void multiply_lower_triangular_inplace(std::vector<boost::numeric::ublas::matrix<double>> L,
-                                       MultiDimVector<It> &v) {
+                                       MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
  // the multiplication is based on a cyclic permutation of the indices: the last index of v becomes
  // the first index of w
  size_t d = L.size();
  It it = v.getJumpIterator();

  for (int k = d - 1; k >= 0; --k) {
-    MultiDimVector<It> w(it);
-    std::vector<size_t> indexes(w.data.size(), 0);
-
-    auto &Lk = L[k];
-    it.reset();
-
-    size_t first_v_index = 0;
-    size_t second_v_index = 0;
-
-    double *data_pointer = &v.data[0][0];
-    size_t data_size = v.data[0].size();
-
-    while (it.valid()) {
-      size_t last_dim_count = it.lastDimensionCount();
-      double *offset_data_pointer = data_pointer + second_v_index;
-      for (size_t i = 0; i < last_dim_count; ++i) {
-        double sum = 0.0;
-        for (size_t j = 0; j <= i; ++j) {
-          sum += Lk(i, j) * (*(offset_data_pointer + j));
-        }
-        w.data[i][indexes[i]++] = sum;
-      }
-      second_v_index += last_dim_count;
-      if (second_v_index >= data_size) {
-        second_v_index = 0;
-        first_v_index += 1;
-        data_pointer = &v.data[first_v_index][0];
-        data_size = v.data[first_v_index].size();
-      }
-
-      it.next();
-    }
-
-    v.swap(w);
-
-    it = it.cycle();
+    multiply_single_lower_triangular_inplace(L[k], v, buffer);
  }
 }

 template <typename It>
 void multiply_upper_triangular_inplace(std::vector<boost::numeric::ublas::matrix<double>> U,
-                                       MultiDimVector<It> &v) {
+                                       MultiDimVector<It> &v, MultiDimVector<It> &buffer) {
  // the multiplication is based on a cyclic permutation of the indices: the last index of v becomes
  // the first index of w
  size_t d = U.size();
  It it = v.getJumpIterator();

  for (int k = d - 1; k >= 0; --k) {
-    MultiDimVector<It> w(it);
-    std::vector<size_t> indexes(w.data.size(), 0);
-
-    auto &Uk = U[k];
-    it.reset();
-
-    size_t first_v_index = 0;
-    size_t second_v_index = 0;
-
-    double *data_pointer = &v.data[0][0];
-    size_t data_size = v.data[0].size();
-
-    while (it.valid()) {
-      size_t last_dim_count = it.lastDimensionCount();
-      double *offset_data_pointer = data_pointer + second_v_index;
-      for (size_t i = 0; i < last_dim_count; ++i) {
-        double sum = 0.0;
-        for (size_t j = i; j < last_dim_count; ++j) {
-          sum += Uk(i, j) * (*(offset_data_pointer + j));
-        }
-        w.data[i][indexes[i]++] = sum;
-      }
-      second_v_index += last_dim_count;
-      if (second_v_index >= data_size) {
-        second_v_index = 0;
-        first_v_index += 1;
-        data_pointer = &v.data[first_v_index][0];
-        data_size = v.data[first_v_index].size();
-      }
-
-      it.next();
-    }
-
-    it = it.cycle();
-
-    v.swap(w);
+    multiply_single_upper_triangular_inplace(U[k], v, buffer);
  }
 }

@ -273,11 +387,12 @@ class SparseTPOperator {
  std::vector<boost::numeric::ublas::matrix<double>> U;
  std::vector<boost::numeric::ublas::matrix<double>> Linv;
  std::vector<boost::numeric::ublas::matrix<double>> Uinv;
+  MultiDimVector<It> buffer;
  size_t d;

 public:
  SparseTPOperator(It it, std::vector<boost::numeric::ublas::matrix<double>> matrices)
-      : it(it), M(matrices), d(matrices.size()){};
+      : it(it), M(matrices), buffer(it), d(matrices.size()){};

  void prepareCommon() {
    if (LU.size() > 0) {
@ -354,15 +469,15 @@ class SparseTPOperator {

  MultiDimVector<It> apply(MultiDimVector<It> input) {
    prepareApply();
-    multiply_upper_triangular_inplace(U, input);
-    multiply_lower_triangular_inplace(L, input);
+    multiply_upper_triangular_inplace(U, input, buffer);
+    multiply_lower_triangular_inplace(L, input, buffer);
    return input;
  }

  MultiDimVector<It> solve(MultiDimVector<It> rhs) {
    prepareSolve();
-    multiply_lower_triangular_inplace(Linv, rhs);
-    multiply_upper_triangular_inplace(Uinv, rhs);
+    multiply_lower_triangular_inplace(Linv, rhs, buffer);
+    multiply_upper_triangular_inplace(Uinv, rhs, buffer);
    return rhs;
  }
 };
--- a/test/main.cpp
+++ b/test/main.cpp
@ -24,8 +24,8 @@ limitations under the License.
 */

 void runFunctions() {
-  constexpr size_t d = 30;
-  size_t bound = 8;
+  constexpr size_t d = 8;
+  size_t bound = 30;
  // fsi::TemplateBoundedSumIterator<d> it(bound);
  fsi::BoundedSumIterator it(d, bound);
  std::vector<MonomialFunctions> phi(d);
@ -53,6 +53,10 @@ void runFunctions() {
  std::cout << "Time for apply(): " << timer.elapsed() << " s\n";
  // std::cout << b << "\n";

+  // timer.reset();
+  // fsi::cycle_vector_inplace(d, b);
+  // std::cout << "Time for cycling: " << timer.elapsed() << "s\n";
+
  std::cout << "Number of points: " << it.numValues() << "\n";

  std::cout << "Reconstruction error (L2 norm): " << sqrt(fsi::squared_l2_norm(b - rhs)) << "\n";
--- a/test/performance.cpp
+++ b/test/performance.cpp
@ -36,17 +36,17 @@ void measurePerformance() {

  std::vector<size_t> dimensions = {2, 4, 8, 16, 32, 64};
  // size_t maxNumPoints = 20000000;
-  size_t maxNumPoints = 30000000;
+  size_t maxNumPoints = 100000000;

  std::ostringstream stream;

  // minimum quotient of number of points of current configuration to
  // previous configuration
-  double minQuotient = 4.0;
+  double minQuotient = 2.0;

  // maximum runtime in seconds
-  // if exceeded, we do not try higher numbers of points
-  double maxRuntime = 2.0;
+  // we try not to exceed this runtime (based on previous predictions)
+  double maxRuntime = 10.0;

  // small value for denominator of a fraction
  double epsilon = 0.01;