diff --git a/src/Interpolation.hpp b/src/Interpolation.hpp index 6177f7b..36d8efb 100644 --- a/src/Interpolation.hpp +++ b/src/Interpolation.hpp @@ -114,6 +114,16 @@ class MultiDimVector { It getJumpIterator() const { return it; } + void resetWithJumpIterator(It const &other_it) { + it = other_it; + size_t n_1 = it.firstIndexBound(); + data.resize(n_1); + auto sizes = it.numValuesPerFirstIndex(); + for (size_t i = 0; i < n_1; ++i) { + data[i].resize(sizes[i]); + } + } + Iterator begin() { return Iterator(*this, it); } Iterator end() { @@ -165,99 +175,203 @@ double squared_l2_norm(MultiDimVector const &v) { return sum; } +template +void multiply_single_lower_triangular_inplace(boost::numeric::ublas::matrix const &L, + MultiDimVector &v, MultiDimVector &buffer) { + It it = v.getJumpIterator(); + It cycled_it = it.cycle(); + buffer.resetWithJumpIterator(cycled_it); + + std::vector indexes(buffer.data.size(), 0); + + it.reset(); + + size_t first_v_index = 0; + size_t second_v_index = 0; + + double *data_pointer = &v.data[0][0]; + size_t data_size = v.data[0].size(); + + while (it.valid()) { + size_t last_dim_count = it.lastDimensionCount(); + double *offset_data_pointer = data_pointer + second_v_index; + for (size_t i = 0; i < last_dim_count; ++i) { + double sum = 0.0; + for (size_t j = 0; j <= i; ++j) { + sum += L(i, j) * (*(offset_data_pointer + j)); + } + buffer.data[i][indexes[i]++] = sum; + } + second_v_index += last_dim_count; + if (second_v_index >= data_size) { + second_v_index = 0; + first_v_index += 1; + data_pointer = &v.data[first_v_index][0]; + data_size = v.data[first_v_index].size(); + } + + it.next(); + } + + v.swap(buffer); +} + +template +void multiply_single_upper_triangular_inplace(boost::numeric::ublas::matrix const &U, + MultiDimVector &v, MultiDimVector &buffer) { + It it = v.getJumpIterator(); + It cycled_it = it.cycle(); + buffer.resetWithJumpIterator(cycled_it); + + std::vector indexes(buffer.data.size(), 0); + + it.reset(); + + size_t first_v_index = 0; + size_t second_v_index = 0; + + double *data_pointer = &v.data[0][0]; + size_t data_size = v.data[0].size(); + + while (it.valid()) { + size_t last_dim_count = it.lastDimensionCount(); + double *offset_data_pointer = data_pointer + second_v_index; + for (size_t i = 0; i < last_dim_count; ++i) { + double sum = 0.0; + for (size_t j = i; j < last_dim_count; ++j) { + sum += U(i, j) * (*(offset_data_pointer + j)); + } + buffer.data[i][indexes[i]++] = sum; + } + second_v_index += last_dim_count; + if (second_v_index >= data_size) { + second_v_index = 0; + first_v_index += 1; + data_pointer = &v.data[first_v_index][0]; + data_size = v.data[first_v_index].size(); + } + + it.next(); + } + + v.swap(buffer); +} + +template +void multiply_single_arbitrary_inplace(boost::numeric::ublas::matrix const &M, + MultiDimVector &v, MultiDimVector &buffer) { + It it = v.getJumpIterator(); + It cycled_it = it.cycle(); + buffer.resetWithJumpIterator(cycled_it); + + std::vector indexes(buffer.data.size(), 0); + + it.reset(); + + size_t first_v_index = 0; + size_t second_v_index = 0; + + double *data_pointer = &v.data[0][0]; + size_t data_size = v.data[0].size(); + + while (it.valid()) { + size_t last_dim_count = it.lastDimensionCount(); + double *offset_data_pointer = data_pointer + second_v_index; + for (size_t i = 0; i < last_dim_count; ++i) { + double sum = 0.0; + for (size_t j = 0; j < last_dim_count; ++j) { + sum += M(i, j) * (*(offset_data_pointer + j)); + } + buffer.data[i][indexes[i]++] = sum; + } + second_v_index += last_dim_count; + if (second_v_index >= data_size) { + second_v_index = 0; + first_v_index += 1; + data_pointer = &v.data[first_v_index][0]; + data_size = v.data[first_v_index].size(); + } + + it.next(); + } + + v.swap(buffer); +} + +template +void multiply_single_identity_inplace(MultiDimVector &v, MultiDimVector &buffer) { + It it = v.getJumpIterator(); + It cycled_it = it.cycle(); + buffer.resetWithJumpIterator(cycled_it); + + std::vector indexes(buffer.data.size(), 0); + + it.reset(); + + size_t first_v_index = 0; + size_t second_v_index = 0; + + double *data_pointer = &v.data[0][0]; + size_t data_size = v.data[0].size(); + + while (it.valid()) { + size_t last_dim_count = it.lastDimensionCount(); + double *offset_data_pointer = data_pointer + second_v_index; + for (size_t i = 0; i < last_dim_count; ++i) { + buffer.data[i][indexes[i]++] = (*(offset_data_pointer + i)); + } + second_v_index += last_dim_count; + if (second_v_index >= data_size) { + second_v_index = 0; + first_v_index += 1; + data_pointer = &v.data[first_v_index][0]; + data_size = v.data[first_v_index].size(); + } + + it.next(); + } + + v.swap(buffer); +} + +template +void cycle_vector_inplace(size_t num_times, MultiDimVector &v, MultiDimVector &buffer) { + for (size_t i = 0; i < num_times; ++i) { + multiply_single_identity_inplace(v, buffer); + } +} + +template +void cycle_vector_inplace(size_t num_times, MultiDimVector &v) { + MultiDimVector buffer(v.getJumpIterator()); + for (size_t i = 0; i < num_times; ++i) { + multiply_single_identity_inplace(v, buffer); + } +} + template void multiply_lower_triangular_inplace(std::vector> L, - MultiDimVector &v) { + MultiDimVector &v, MultiDimVector &buffer) { // the multiplication is based on a cyclic permutation of the indices: the last index of v becomes // the first index of w size_t d = L.size(); It it = v.getJumpIterator(); for (int k = d - 1; k >= 0; --k) { - MultiDimVector w(it); - std::vector indexes(w.data.size(), 0); - - auto &Lk = L[k]; - it.reset(); - - size_t first_v_index = 0; - size_t second_v_index = 0; - - double *data_pointer = &v.data[0][0]; - size_t data_size = v.data[0].size(); - - while (it.valid()) { - size_t last_dim_count = it.lastDimensionCount(); - double *offset_data_pointer = data_pointer + second_v_index; - for (size_t i = 0; i < last_dim_count; ++i) { - double sum = 0.0; - for (size_t j = 0; j <= i; ++j) { - sum += Lk(i, j) * (*(offset_data_pointer + j)); - } - w.data[i][indexes[i]++] = sum; - } - second_v_index += last_dim_count; - if (second_v_index >= data_size) { - second_v_index = 0; - first_v_index += 1; - data_pointer = &v.data[first_v_index][0]; - data_size = v.data[first_v_index].size(); - } - - it.next(); - } - - v.swap(w); - - it = it.cycle(); + multiply_single_lower_triangular_inplace(L[k], v, buffer); } } template void multiply_upper_triangular_inplace(std::vector> U, - MultiDimVector &v) { + MultiDimVector &v, MultiDimVector &buffer) { // the multiplication is based on a cyclic permutation of the indices: the last index of v becomes // the first index of w size_t d = U.size(); It it = v.getJumpIterator(); for (int k = d - 1; k >= 0; --k) { - MultiDimVector w(it); - std::vector indexes(w.data.size(), 0); - - auto &Uk = U[k]; - it.reset(); - - size_t first_v_index = 0; - size_t second_v_index = 0; - - double *data_pointer = &v.data[0][0]; - size_t data_size = v.data[0].size(); - - while (it.valid()) { - size_t last_dim_count = it.lastDimensionCount(); - double *offset_data_pointer = data_pointer + second_v_index; - for (size_t i = 0; i < last_dim_count; ++i) { - double sum = 0.0; - for (size_t j = i; j < last_dim_count; ++j) { - sum += Uk(i, j) * (*(offset_data_pointer + j)); - } - w.data[i][indexes[i]++] = sum; - } - second_v_index += last_dim_count; - if (second_v_index >= data_size) { - second_v_index = 0; - first_v_index += 1; - data_pointer = &v.data[first_v_index][0]; - data_size = v.data[first_v_index].size(); - } - - it.next(); - } - - it = it.cycle(); - - v.swap(w); + multiply_single_upper_triangular_inplace(U[k], v, buffer); } } @@ -273,11 +387,12 @@ class SparseTPOperator { std::vector> U; std::vector> Linv; std::vector> Uinv; + MultiDimVector buffer; size_t d; public: SparseTPOperator(It it, std::vector> matrices) - : it(it), M(matrices), d(matrices.size()){}; + : it(it), M(matrices), buffer(it), d(matrices.size()){}; void prepareCommon() { if (LU.size() > 0) { @@ -354,15 +469,15 @@ class SparseTPOperator { MultiDimVector apply(MultiDimVector input) { prepareApply(); - multiply_upper_triangular_inplace(U, input); - multiply_lower_triangular_inplace(L, input); + multiply_upper_triangular_inplace(U, input, buffer); + multiply_lower_triangular_inplace(L, input, buffer); return input; } MultiDimVector solve(MultiDimVector rhs) { prepareSolve(); - multiply_lower_triangular_inplace(Linv, rhs); - multiply_upper_triangular_inplace(Uinv, rhs); + multiply_lower_triangular_inplace(Linv, rhs, buffer); + multiply_upper_triangular_inplace(Uinv, rhs, buffer); return rhs; } }; diff --git a/test/main.cpp b/test/main.cpp index 46e8a9d..79ceea1 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -24,8 +24,8 @@ limitations under the License. */ void runFunctions() { - constexpr size_t d = 30; - size_t bound = 8; + constexpr size_t d = 8; + size_t bound = 30; // fsi::TemplateBoundedSumIterator it(bound); fsi::BoundedSumIterator it(d, bound); std::vector phi(d); @@ -53,6 +53,10 @@ void runFunctions() { std::cout << "Time for apply(): " << timer.elapsed() << " s\n"; // std::cout << b << "\n"; + // timer.reset(); + // fsi::cycle_vector_inplace(d, b); + // std::cout << "Time for cycling: " << timer.elapsed() << "s\n"; + std::cout << "Number of points: " << it.numValues() << "\n"; std::cout << "Reconstruction error (L2 norm): " << sqrt(fsi::squared_l2_norm(b - rhs)) << "\n"; diff --git a/test/performance.cpp b/test/performance.cpp index d00b986..c16b193 100644 --- a/test/performance.cpp +++ b/test/performance.cpp @@ -36,17 +36,17 @@ void measurePerformance() { std::vector dimensions = {2, 4, 8, 16, 32, 64}; // size_t maxNumPoints = 20000000; - size_t maxNumPoints = 30000000; + size_t maxNumPoints = 100000000; std::ostringstream stream; // minimum quotient of number of points of current configuration to // previous configuration - double minQuotient = 4.0; + double minQuotient = 2.0; // maximum runtime in seconds - // if exceeded, we do not try higher numbers of points - double maxRuntime = 2.0; + // we try not to exceed this runtime (based on previous predictions) + double maxRuntime = 10.0; // small value for denominator of a fraction double epsilon = 0.01;