diff --git a/CMakeLists.txt b/CMakeLists.txt index b21f246..8e62184 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(fast_sparse_interpolation CXX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -fmessage-length=0 -std=c++11 -D_GNU_SOURCE=1 -D_REENTRANT -Dlinux -D__linux__ -Dx86_64 -D__x86_64__") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g3 -O0") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") #-mtune=native -march=native +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -mtune=native -march=native") #-mtune=native -march=native set(CMAKE_LD_FLAGS "${CMAKE_LD_FLAGS} -L/usr/local/lib") link_directories(/usr/local/lib) diff --git a/src/Interpolation.hpp b/src/Interpolation.hpp index ae9de0c..fc90e74 100644 --- a/src/Interpolation.hpp +++ b/src/Interpolation.hpp @@ -12,6 +12,94 @@ namespace fsi { +template +void iterateRecursive(size_t remaining, std::function callback) { + for (size_t val = 0; val <= remaining; ++val) { + iterateRecursive(remaining - val, callback); + } +} + +template <> +void iterateRecursive<0>(size_t remaining, std::function callback) { + callback(remaining); +} + +template +class TemplateBoundedSumIterator { + size_t bound; + std::vector index_head; // contains all entries of the index except the last one + size_t index_head_sum; + bool is_done; + + public: + TemplateBoundedSumIterator(size_t bound) + : bound(bound), index_head(d - 1, 0), index_head_sum(0), is_done(false){}; + + void iterate(std::function callback, + std::function outerLoopCallback) { + for (size_t val = 0; val <= bound; ++val) { + outerLoopCallback(val); + iterateRecursive(bound - val, callback); + } + } + + /** + * At the current multi-index (i_1, ..., i_{d-1}, 0), return how many multi-indices starting with + * (i_1, ..., i_{d-1}) are contained in the multi-index set, then advance to the next multi-index + * that ends with a zero. + */ + size_t lastDimensionCount() { return bound - index_head_sum + 1; } + + void next() { + if (bound > index_head_sum) { + index_head_sum += 1; + index_head[d - 2] += 1; + } else { + int dim = d - 2; + + for (; dim >= 0 && index_head[dim] == 0; --dim) { + // reduce dimension until entry is nonzero + } + + if (dim > 0) { + index_head_sum -= (index_head[dim] - 1); + index_head[dim] = 0; + index_head[dim - 1] += 1; + } else if (dim == 0) { + index_head[dim] = 0; + index_head_sum = 0; + is_done = true; + } + } + } + + size_t firstIndex() { return index_head[0]; } + + size_t indexAt(size_t dim) { return index_head[dim]; } + + bool done() { return is_done; } + + void reset() { + index_head = std::vector(d - 1, 0); + index_head_sum = 0; + is_done = false; + } + + size_t dim() { return d; } + + std::vector indexBounds() { return std::vector(d, bound + 1); } + + /** + * Returns an iterator where the last index moves to the front. For an index set defined by a sum + * bound, nothing changes. + */ + TemplateBoundedSumIterator cycle() { + TemplateBoundedSumIterator it = *this; + it.reset(); + return it; + } +}; + class BoundedSumIterator { size_t d; size_t bound; @@ -68,6 +156,16 @@ class BoundedSumIterator { size_t dim() { return d; } std::vector indexBounds() { return std::vector(d, bound + 1); } + + /** + * Returns an iterator where the last index moves to the front. For an index set defined by a sum + * bound, nothing changes. + */ + BoundedSumIterator cycle() { + BoundedSumIterator it = *this; + it.reset(); + return it; + } }; class StandardBoundedSumIterator { @@ -166,6 +264,114 @@ inline std::ostream &operator<<(std::ostream &os, const std::vector &v) { return os; } +template +void multiply_lower_triangular_inplace(It const &it, + std::vector> L, + MultiDimVector &v) { + size_t d = L.size(); + auto n = it.indexBounds(); + + for (int k = d - 1; k >= 0; --k) { + MultiDimVector w(n[k]); + for (size_t idx = 0; idx < n[k]; ++idx) { + // TODO: make compatible with other iterators + w.data[idx].reserve(v.data[idx].size()); + } + + auto &Lk = L[k]; + it.reset(); + + size_t first_v_index = 0; + size_t second_v_index = 0; + + double *data_pointer = &v.data[0][0]; + size_t data_size = v.data[0].size(); + + while (not it.done()) { + size_t last_dim_count = it.lastDimensionCount(); + double *offset_data_pointer = data_pointer + second_v_index; + for (size_t i = 0; i < last_dim_count; ++i) { + double sum = 0.0; + for (size_t j = 0; j <= i; ++j) { + sum += Lk(i, j) * (*(offset_data_pointer + j)); + } + w.data[i].push_back(sum); + } + second_v_index += last_dim_count; + if (second_v_index >= data_size) { + second_v_index = 0; + first_v_index += 1; + data_pointer = &v.data[first_v_index][0]; + data_size = v.data[first_v_index].size(); + } + + it.next(); + } + + v.swap(w); + + it = it.cycle(); + + // for (size_t i = 0; i < v.data.size(); ++i) { + // std::cout << v.data[i] << "\n\n"; + // } + } +} + +template +void multiply_upper_triangular_inplace(It const &it, + std::vector> U, + MultiDimVector &v) { + size_t d = U.size(); + auto n = it.indexBounds(); + + for (int k = d - 1; k >= 0; --k) { + MultiDimVector w(n[k]); + for (size_t idx = 0; idx < n[k]; ++idx) { + // TODO: make compatible with other iterators + w.data[idx].reserve(v.data[idx].size()); + } + + auto &Uk = U[k]; + it.reset(); + + size_t first_v_index = 0; + size_t second_v_index = 0; + + double *data_pointer = &v.data[0][0]; + size_t data_size = v.data[0].size(); + + while (not it.done()) { + size_t last_dim_count = it.lastDimensionCount(); + double *offset_data_pointer = data_pointer + second_v_index; + for (size_t i = 0; i < last_dim_count; ++i) { + double sum = 0.0; + for (size_t j = i; j < last_dim_count; ++j) { + sum += Uk(i, j) * (*(offset_data_pointer + j)); + } + w.data[i].push_back(sum); + } + second_v_index += last_dim_count; + if (second_v_index >= data_size) { + second_v_index = 0; + first_v_index += 1; + data_pointer = &v.data[first_v_index][0]; + data_size = v.data[first_v_index].size(); + } + + it.next(); + } + + it = it.cycle(); + + v.swap(w); + + // for (size_t i = 0; i < v.data.size(); ++i) { + // std::cout << v.data[i] << "\n\n"; + // } + } +} + template MultiDimVector interpolate(Func f, It it, Phi phi, X x) { auto n = it.indexBounds(); @@ -241,6 +447,8 @@ MultiDimVector interpolate(Func f, It it, Phi phi, X x) { // std::cout << v.data[i] << "\n\n"; // } + size_t num_sum_op = 0; + std::cout << "First matrix multiplication\n"; // multiply by L^{-1} @@ -249,32 +457,46 @@ MultiDimVector interpolate(Func f, It it, Phi phi, X x) { for (int k = d - 1; k >= 0; --k) { MultiDimVector w(n[k]); + for (size_t idx = 0; idx < n[k]; ++idx) { + // TODO: make compatible with other iterators + w.data[idx].reserve(v.data[idx].size()); + } + auto &Lkinv = Linv[k]; it.reset(); size_t first_v_index = 0; size_t second_v_index = 0; + double *data_pointer = &v.data[0][0]; + size_t data_size = v.data[0].size(); + while (not it.done()) { size_t last_dim_count = it.lastDimensionCount(); + double *offset_data_pointer = data_pointer + second_v_index; for (size_t i = 0; i < last_dim_count; ++i) { double sum = 0.0; for (size_t j = 0; j <= i; ++j) { - // std::cout << Lkinv(i, j) << " * " << v.data[first_v_index][second_v_index + j] << "\n"; - sum += Lkinv(i, j) * v.data[first_v_index][second_v_index + j]; + sum += Lkinv(i, j) * (*(offset_data_pointer + j)); + ++num_sum_op; } w.data[i].push_back(sum); } second_v_index += last_dim_count; - if (second_v_index >= v.data[first_v_index].size()) { + if (second_v_index >= data_size) { second_v_index = 0; first_v_index += 1; + data_pointer = &v.data[first_v_index][0]; + data_size = v.data[first_v_index].size(); } + it.next(); } v.swap(w); + it = it.cycle(); + // for (size_t i = 0; i < v.data.size(); ++i) { // std::cout << v.data[i] << "\n\n"; // } @@ -288,30 +510,44 @@ MultiDimVector interpolate(Func f, It it, Phi phi, X x) { for (int k = d - 1; k >= 0; --k) { MultiDimVector w(n[k]); + for (size_t idx = 0; idx < n[k]; ++idx) { + // TODO: make compatible with other iterators + w.data[idx].reserve(v.data[idx].size()); + } + auto &Ukinv = Uinv[k]; it.reset(); size_t first_v_index = 0; size_t second_v_index = 0; + double *data_pointer = &v.data[0][0]; + size_t data_size = v.data[0].size(); + while (not it.done()) { size_t last_dim_count = it.lastDimensionCount(); + double *offset_data_pointer = data_pointer + second_v_index; for (size_t i = 0; i < last_dim_count; ++i) { double sum = 0.0; for (size_t j = i; j < last_dim_count; ++j) { - sum += Ukinv(i, j) * v.data[first_v_index][second_v_index + j]; + sum += Ukinv(i, j) * (*(offset_data_pointer + j)); + ++num_sum_op; } w.data[i].push_back(sum); } second_v_index += last_dim_count; - if (second_v_index >= v.data[first_v_index].size()) { + if (second_v_index >= data_size) { second_v_index = 0; first_v_index += 1; + data_pointer = &v.data[first_v_index][0]; + data_size = v.data[first_v_index].size(); } it.next(); } + it = it.cycle(); + v.swap(w); // for (size_t i = 0; i < v.data.size(); ++i) { @@ -319,6 +555,184 @@ MultiDimVector interpolate(Func f, It it, Phi phi, X x) { // } } + std::cout << "Total number of summation operations: " << num_sum_op << "\n"; + + return v; +} + +template +MultiDimVector interpolate2(Func f, It it, Phi phi, X x) { + auto n = it.indexBounds(); + size_t d = it.dim(); + + namespace ublas = boost::numeric::ublas; + typedef ublas::matrix Matrix; + + std::vector Linv, Uinv; + + // create matrices and inverted LU decompositions + for (size_t k = 0; k < d; ++k) { + // std::cout << "Matrix creation loop\n"; + Matrix Mk(n[k], n[k]); + for (size_t i = 0; i < n[k]; ++i) { + for (size_t j = 0; j < n[k]; ++j) { + Mk(i, j) = phi[k](j)(x[k](i)); + } + } + + // std::cout << "Matrices:\n"; + + // std::cout << Mk << "\n"; + + ublas::lu_factorize(Mk); + + // std::cout << Mk << "\n"; + + Matrix Lkinv = ublas::identity_matrix(n[k]); + Matrix Ukinv = ublas::identity_matrix(n[k]); + ublas::inplace_solve(Mk, Lkinv, ublas::unit_lower_tag()); + ublas::inplace_solve(Mk, Ukinv, ublas::upper_tag()); + + // std::cout << Lkinv << "\n"; + + // std::cout << Ukinv << "\n"; + + Linv.push_back(Lkinv); + Uinv.push_back(Ukinv); + } + + MultiDimVector v(n[0]); + + std::cout << "Compute function values\n"; + + // compute function values + it.reset(); + std::vector point(d); + while (not it.done()) { + size_t last_dim_count = it.lastDimensionCount(); + for (size_t dim = 0; dim < d - 1; ++dim) { + point[dim] = x[dim](it.indexAt(dim)); + } + + for (size_t last_dim_idx = 0; last_dim_idx < last_dim_count; ++last_dim_idx) { + point[d - 1] = x[d - 1](last_dim_idx); + + double function_value = f(point); + + v.data[it.firstIndex()].push_back(function_value); + } + + it.next(); + } + + size_t number = 0; + for (size_t dim = 0; dim < n[0]; ++dim) { + number += v.data[dim].size(); + } + std::cout << "number of points: " << number << "\n"; + + // for (size_t i = 0; i < v.data.size(); ++i) { + // std::cout << v.data[i] << "\n\n"; + // } + + size_t num_sum_op = 0; + + std::cout << "First matrix multiplication\n"; + + // multiply by L^{-1} + // the multiplication is based on a cyclic permutation of the indices: the last index of v becomes + // the first index of w + + for (int k = d - 1; k >= 0; --k) { + MultiDimVector w(n[k]); + for (size_t idx = 0; idx < n[k]; ++idx) { + // TODO: make compatible with other iterators + w.data[idx].reserve(v.data[idx].size()); + } + + auto &Lkinv = Linv[k]; + it.reset(); + + size_t second_v_index = 0; + + double *data_pointer = &v.data[0][0]; + + it.iterate( + [&](size_t last_dim_count) { + double *offset_data_pointer = data_pointer + second_v_index; + for (size_t i = 0; i < last_dim_count; ++i) { + double sum = 0.0; + for (size_t j = 0; j <= i; ++j) { + sum += Lkinv(i, j) * (*(offset_data_pointer + j)); + ++num_sum_op; + } + w.data[i].push_back(sum); + } + second_v_index += last_dim_count; + }, + [&](size_t first_dim_index) { + second_v_index = 0; + data_pointer = &v.data[first_dim_index][0]; + }); + + v.swap(w); + + it = it.cycle(); + + // for (size_t i = 0; i < v.data.size(); ++i) { + // std::cout << v.data[i] << "\n\n"; + // } + } + + std::cout << "Second matrix multiplication\n"; + + // multiply by U^{-1} + // the multiplication is based on a cyclic permutation of the indices: the last index of v becomes + // the first index of w + + for (int k = d - 1; k >= 0; --k) { + MultiDimVector w(n[k]); + for (size_t idx = 0; idx < n[k]; ++idx) { + // TODO: make compatible with other iterators + w.data[idx].reserve(v.data[idx].size()); + } + + auto &Ukinv = Uinv[k]; + it.reset(); + + size_t second_v_index = 0; + + double *data_pointer = &v.data[0][0]; + + it.iterate( + [&](size_t last_dim_count) { + double *offset_data_pointer = data_pointer + second_v_index; + for (size_t i = 0; i < last_dim_count; ++i) { + double sum = 0.0; + for (size_t j = i; j < last_dim_count; ++j) { + sum += Ukinv(i, j) * (*(offset_data_pointer + j)); + ++num_sum_op; + } + w.data[i].push_back(sum); + } + second_v_index += last_dim_count; + }, + [&](size_t first_dim_index) { + second_v_index = 0; + data_pointer = &v.data[first_dim_index][0]; + }); + + v.swap(w); + + it = it.cycle(); + + // for (size_t i = 0; i < v.data.size(); ++i) { + // std::cout << v.data[i] << "\n\n"; + // } + } + + std::cout << "Total number of summation operations: " << num_sum_op << "\n"; + return v; } diff --git a/test/main.cpp b/test/main.cpp index d6fddca..7e4b67a 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -52,11 +52,26 @@ inline std::ostream &operator<<(std::ostream &os, const std::vector &v) { return os; } +// TODO: refactoring: +/** + * - Interface for MultiDimVector + * - Separate Functions for L- and U-Multiplication, Creation of LU decomposition, computation of + * function values + * - Pass a callback function to iterator instead of calling it.next() - this might improve the + * vector functions (could be made recursive or even loops for fixed dimension implementation) + * - Typed interface? + * - Tests? + * - More point distributions / Basis functions? + * - Forward evaluation? + * - Computation of derivatives? + */ + // using namespace fsi; int main() { - size_t d = 12; - size_t bound = 16; - fsi::BoundedSumIterator it(d, bound); + constexpr size_t d = 30; + size_t bound = 8; + fsi::TemplateBoundedSumIterator it(bound); + // fsi::BoundedSumIterator it(d, bound); std::vector phi(d); std::vector x(d); auto start = std::chrono::high_resolution_clock::now();