diff --git a/CMakeLists.txt b/CMakeLists.txt index acd125e..4bfe877 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,7 @@ if(MPI_FOUND) message("-- MPI include files found in " "${MPI_INCLUDE_PATH}") separate_arguments(MPIEXEC_PREFLAGS) # to support multi flags endif() + # OPENMP find_package(OpenMP) @@ -83,7 +84,10 @@ if(MPI_FOUND) target_include_directories(Htool PRIVATE ${MPI_INCLUDE_PATH} ${MPI4PY_INCLUDE_DIR}) target_compile_definitions(Htool PRIVATE HAVE_MPI) endif() -target_link_libraries(Htool PRIVATE ${MPI_LIBRARIES} ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} ${ARPACK_LIBRARIES} ${OpenMP_CXX_LIBRARIES}) +target_link_libraries(Htool PRIVATE ${MPI_LIBRARIES} ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} ${ARPACK_LIBRARIES}) +if(OpenMP_CXX_FOUND) + target_link_libraries(Htool PRIVATE OpenMP::OpenMP_CXX) +endif() if("${BLA_VENDOR}" STREQUAL "Intel10_32" OR "${BLA_VENDOR}" STREQUAL "Intel10_64lp" @@ -94,7 +98,7 @@ if("${BLA_VENDOR}" STREQUAL "Intel10_32" target_compile_definitions(Htool PRIVATE "-DHPDDM_MKL -DHTOOL_MKL") endif() -target_compile_definitions(Htool PRIVATE "-DHTOOL_WITH_PYTHON_INTERFACE" "-DHTOOL_WITH_HPDDM") +target_compile_definitions(Htool PRIVATE "-DHTOOL_WITH_HPDDM") if(CODE_COVERAGE) if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU") diff --git a/lib/htool b/lib/htool index f0b1541..96800b1 160000 --- a/lib/htool +++ b/lib/htool @@ -1 +1 @@ -Subproject commit f0b154130a0f542b19ad6d3b4e6986539fb702b7 +Subproject commit 96800b14a86092fbd4b8593a5ecdc0f6ffa50ce3 diff --git a/lib/pybind11 b/lib/pybind11 index cc86e8b..678b673 160000 --- a/lib/pybind11 +++ b/lib/pybind11 @@ -1 +1 @@ -Subproject commit cc86e8b2a6c68580f23c433479e2815aefa4d880 +Subproject commit 678b6735e44fe9d7feac7ff31bc9c86d42c04613 diff --git a/src/htool/hmatrix/hmatrix.hpp b/src/htool/hmatrix/hmatrix.hpp index 27d7752..c47d409 100644 --- a/src/htool/hmatrix/hmatrix.hpp +++ b/src/htool/hmatrix/hmatrix.hpp @@ -56,10 +56,10 @@ void declare_HMatrix(py::module &m, const std::string &className) { py_class.def("get_source_cluster", &HMatrix::get_source_cluster, py::return_value_policy::reference_internal); py_class.def("lu_factorization", [](HMatrix &hmatrix) { - htool::lu_factorization(hmatrix); + htool::lu_factorization(exec_compat::par, hmatrix); }); py_class.def("cholesky_factorization", [](HMatrix &hmatrix, char UPLO) { - htool::cholesky_factorization(UPLO, hmatrix); + htool::cholesky_factorization(exec_compat::par, UPLO, hmatrix); }); py_class.def("lu_solve", [](const Class &self, char trans, const py::array_t &input) { std::vector shape; @@ -110,7 +110,7 @@ void declare_HMatrix(py::module &m, const std::string &className) { std::fill_n(result.mutable_data(), self.get_target_cluster().get_size(), CoefficientPrecision(0)); char trans = 'N'; - htool::add_hmatrix_vector_product(trans, CoefficientPrecision(1), self, input.data(), CoefficientPrecision(0), result.mutable_data()); + htool::add_hmatrix_vector_product(exec_compat::par, trans, CoefficientPrecision(1), self, input.data(), CoefficientPrecision(0), result.mutable_data()); return result; }, @@ -131,7 +131,7 @@ void declare_HMatrix(py::module &m, const std::string &className) { htool::MatrixView output_view(input.shape()[0], input.shape()[1], result.mutable_data()); char transa = 'N'; char transb = 'N'; - htool::add_hmatrix_matrix_product(transa, transb, CoefficientPrecision(1), self, input_view, CoefficientPrecision(0), output_view); + htool::add_hmatrix_matrix_product(exec_compat::par, transa, transb, CoefficientPrecision(1), self, input_view, CoefficientPrecision(0), output_view); return result; }, diff --git a/src/htool/hmatrix/hmatrix_tree_builder.hpp b/src/htool/hmatrix/hmatrix_tree_builder.hpp index 217fe28..f31f055 100644 --- a/src/htool/hmatrix/hmatrix_tree_builder.hpp +++ b/src/htool/hmatrix/hmatrix_tree_builder.hpp @@ -33,7 +33,7 @@ void declare_hmatrix_builder(py::module &m, const std::string &className) { // LCOV_EXCL_STOP // Build - py_class.def("build", [](const Class &self, const VirtualGenerator &generator, const Cluster &target_cluster, const Cluster &source_cluster, int target_partition_number, int partition_number_for_symmetry) { return self.build(generator, target_cluster, source_cluster, target_partition_number, partition_number_for_symmetry); }, py::arg("generator"), py::arg("target_cluster"), py::arg("source_cluster"), py::arg("target_partition_number") = -1, py::arg("partition_number_for_symmetry") = -1); + py_class.def("build", [](const Class &self, const VirtualGenerator &generator, const Cluster &target_cluster, const Cluster &source_cluster, int target_partition_number, int partition_number_for_symmetry) { return self.build(exec_compat::par, generator, target_cluster, source_cluster, target_partition_number, partition_number_for_symmetry); }, py::arg("generator"), py::arg("target_cluster"), py::arg("source_cluster"), py::arg("target_partition_number") = -1, py::arg("partition_number_for_symmetry") = -1, py::call_guard()); // Setters py_class.def("set_minimal_source_depth", &Class::set_minimal_source_depth); diff --git a/src/htool/hmatrix/interfaces/virtual_generator.hpp b/src/htool/hmatrix/interfaces/virtual_generator.hpp index 532c160..011606b 100644 --- a/src/htool/hmatrix/interfaces/virtual_generator.hpp +++ b/src/htool/hmatrix/interfaces/virtual_generator.hpp @@ -10,11 +10,11 @@ template class VirtualGeneratorPython : public htool::VirtualGenerator { public: using VirtualGenerator::VirtualGenerator; - VirtualGeneratorPython(const py::array_t &target_permutation, const py::array_t &source_permutation) : VirtualGenerator() {} void copy_submatrix(int M, int N, const int *const rows, const int *const cols, CoefficientPrecision *ptr) const override { if (M * N > 0) { + py::gil_scoped_acquire acquire; py::array_t mat(std::array{M, N}, ptr, py::capsule(ptr)); py::array_t py_rows(std::array{M}, rows, py::capsule(rows)); @@ -47,14 +47,60 @@ class PyVirtualGenerator : public VirtualGeneratorPython { }; template -void declare_virtual_generator(py::module &m, const std::string &className, const std::string &base_class_name) { +class GeneratorFromCapsule : public htool::VirtualGenerator { + private: + using backend_callback_t = void (*)(int, int, const int *, const int *, double *, void *); + + struct Backend { + backend_callback_t callback; + void *ctx; + }; + + backend_callback_t callback = nullptr; + void *ctx = nullptr; + + py::capsule m_capsule; + + public: + using VirtualGenerator::VirtualGenerator; + GeneratorFromCapsule(py::object obj) : VirtualGenerator() { + if (!py::isinstance(obj)) { + throw std::runtime_error("Backend needs to be a Python capsule"); + } + + m_capsule = obj.cast(); + + auto *user_backend = static_cast( + PyCapsule_GetPointer(m_capsule.ptr(), "backend")); + + if (!user_backend || !user_backend->callback) { + throw std::runtime_error("Invalid backend capsule"); + } + + callback = user_backend->callback; + ctx = user_backend->ctx; + } + + void copy_submatrix(int M, int N, const int *const rows, const int *const cols, CoefficientPrecision *ptr) const override { + double *user_ptr = reinterpret_cast(ptr); + if (M * N > 0) { + callback(M, N, rows, cols, user_ptr, ctx); + } + } +}; + +template +void declare_virtual_generator(py::module &m, const std::string &prefix) { using BaseClass = VirtualGenerator; - py::class_(m, base_class_name.c_str()); + py::class_(m, (prefix + "IGenerator").c_str()); using Class = VirtualGeneratorPython; - py::class_> py_class(m, className.c_str()); + py::class_> py_class(m, (prefix + "VirtualGenerator").c_str()); py_class.def(py::init<>()); py_class.def("build_submatrix", &Class::build_submatrix); + + py::class_, BaseClass> py_class_2(m, (prefix + "GeneratorFromCapsule").c_str()); + py_class_2.def(py::init()); } #endif diff --git a/src/htool/hmatrix/interfaces/virtual_low_rank_generator.hpp b/src/htool/hmatrix/interfaces/virtual_low_rank_generator.hpp index d0f1a21..c47ae65 100644 --- a/src/htool/hmatrix/interfaces/virtual_low_rank_generator.hpp +++ b/src/htool/hmatrix/interfaces/virtual_low_rank_generator.hpp @@ -23,11 +23,11 @@ class VirtualLowRankGeneratorPython : public VirtualLowRankGenerator(), m_allow_copy(allow_copy) {} bool copy_low_rank_approximation(int M, int N, const int *const rows, const int *const cols, LowRankMatrix &lrmat) const override { + py::gil_scoped_acquire acquire; auto &U = lrmat.get_U(); auto &V = lrmat.get_V(); py::array_t py_rows(std::array{M}, rows, py::capsule(rows)); py::array_t py_cols(std::array{N}, cols, py::capsule(cols)); - bool success = build_low_rank_approximation(py_rows, py_cols, lrmat.get_epsilon()); if (success) { if (m_allow_copy) { diff --git a/src/htool/main.cpp b/src/htool/main.cpp index 548445d..c628eb3 100644 --- a/src/htool/main.cpp +++ b/src/htool/main.cpp @@ -58,7 +58,7 @@ PYBIND11_MODULE(Htool, m) { declare_cluster_builder(m, "ClusterTreeBuilder"); declare_cluster_utility(m); - declare_virtual_generator(m, "VirtualGenerator", "IGenerator"); + declare_virtual_generator(m, ""); declare_LowRankMatrix(m, "LowRankMatrix"); declare_HMatrix(m, "HMatrix"); declare_custom_VirtualLowRankGenerator(m, "VirtualLowRankGenerator"); @@ -89,7 +89,7 @@ PYBIND11_MODULE(Htool, m) { declare_virtual_partitioning>(m, "Complex"); declare_LowRankMatrix>(m, "ComplexLowRankMatrix"); declare_HMatrix, double>(m, "ComplexHMatrix"); - declare_virtual_generator>(m, "ComplexVirtualGenerator", "IComplexGenerator"); + declare_virtual_generator>(m, "Complex"); declare_custom_VirtualLowRankGenerator>(m, "VirtualComplexLowRankGenerator"); declare_custom_VirtualDenseBlocksGenerator>(m, "ComplexVirtualDenseBlocksGenerator"); declare_hmatrix_builder, double>(m, "ComplexHMatrixTreeBuilder"); diff --git a/src/htool/matplotlib/hmatrix.hpp b/src/htool/matplotlib/hmatrix.hpp index 75a42e9..8d7b773 100644 --- a/src/htool/matplotlib/hmatrix.hpp +++ b/src/htool/matplotlib/hmatrix.hpp @@ -24,31 +24,25 @@ void declare_matplotlib_hmatrix(py::module &m) { }); // Import - py::object plt = py::module::import("matplotlib.pyplot"); - py::object patches = py::module::import("matplotlib.patches"); - py::object colors = py::module::import("matplotlib.colors"); - py::object numpy = py::module::import("numpy"); + py::object plt = py::module::import("matplotlib.pyplot"); + py::object patches = py::module::import("matplotlib.patches"); + py::object collections = py::module::import("matplotlib.collections"); + py::object colors = py::module::import("matplotlib.colors"); + py::object numpy = py::module::import("numpy"); + + // Colormap + py::object cmap = plt.attr("get_cmap")("YlGn"); + py::object new_cmap = colors.attr("LinearSegmentedColormap").attr("from_list")("trunc(YlGn,0.4,1)", cmap(numpy.attr("linspace")(0.4, 1, 100))); + py::object norm = colors.attr("Normalize")("vmin"_a = 0, "vmax"_a = 10); // First Data int nr = hmatrix.get_target_cluster().get_size(); int nc = hmatrix.get_source_cluster().get_size(); - py::array_t matrix({nr, nc}); - py::array_t mask_matrix({nr, nc}); - matrix.attr("fill")(0); - mask_matrix.attr("fill")(false); - - // Figure - // py::tuple sublots_output = plt.attr("subplots")(1, 1); - // py::object fig = sublots_output[0]; - // py::object axes = sublots_output[1]; - // axes.attr() - // Issue: there a shift of one pixel along the y-axis... - // int shift = axes.transData.transform([(0,0), (1,1)]) - // shift = shift[1,1] - shift[0,1] # 1 unit in display coords - int shift = 0; + // Storage for rectangles and colors + py::list rects; + py::list facecolors; - int max_rank = 0; for (int p = 0; p < nb_leaves; p++) { int i_row = buf[5 * p]; int nb_row = buf[5 * p + 1]; @@ -56,36 +50,51 @@ void declare_matplotlib_hmatrix(py::module &m) { int nb_col = buf[5 * p + 3]; int rank = buf[5 * p + 4]; - if (rank > max_rank) { - max_rank = rank; - } - for (int i = 0; i < nb_row; i++) { - for (int j = 0; j < nb_col; j++) { - matrix.mutable_at(i_row + i, i_col + j) = rank; - if (rank == -1) { - mask_matrix.mutable_at(i_row + i, i_col + j) = true; - } - } + // Color selection + py::object facecolor; + if (rank == -1) { + facecolors.append(py::str("red")); // full blocks + } else { + facecolors.append(new_cmap(norm(rank))); } - py::object rect = patches.attr("Rectangle")(py::make_tuple(i_col - 0.5, i_row - 0.5 + shift), nb_col, nb_row, "linewidth"_a = 0.75, "edgecolor"_a = 'k', "facecolor"_a = "none"); - axes.attr("add_patch")(rect); + // Rectangle (no styling here!) + py::object rect = patches.attr("Rectangle")( + py::make_tuple(i_col, i_row), + nb_col, + nb_row); + rects.append(rect); - if (rank >= 0 && nb_col / double(nc) > 0.05 && nb_row / double(nc) > 0.05) { - axes.attr("annotate")(rank, py::make_tuple(i_col + nb_col / 2., i_row + nb_row / 2.), "color"_a = "white", "size"_a = 10, "va"_a = "center", "ha"_a = "center"); + // Optional: annotate only large blocks + if (rank >= 0 && nb_col > nc * 0.05 && nb_row > nr * 0.05) { + axes.attr("text")( + i_col + nb_col / 2.0, + i_row + nb_row / 2.0, + rank, + "color"_a = "white", + "ha"_a = "center", + "va"_a = "center", + "fontsize"_a = 8); } } - // Colormap - py::object cmap = plt.attr("get_cmap")("YlGn"); - py::object new_cmap = colors.attr("LinearSegmentedColormap").attr("from_list")("trunc(YlGn,0.4,1)", cmap(numpy.attr("linspace")(0.4, 1, 100))); + // Create PatchCollection + py::object collection = collections.attr("PatchCollection")( + rects, + "facecolor"_a = facecolors, + "edgecolor"_a = "black", // huge speedup + "linewidth"_a = 0.2); + + axes.attr("add_collection")(collection); - // Plot - py::object masked_matrix = numpy.attr("ma").attr("array")(matrix, "mask"_a = mask_matrix); - new_cmap.attr("set_bad")("color"_a = "red"); + // Axes formatting + axes.attr("set_xlim")(0, nc); + axes.attr("set_ylim")(nr, 0); // invert y-axis (matrix style) + axes.attr("set_aspect")("equal"); - axes.attr("imshow")(masked_matrix, "cmap"_a = new_cmap, "vmin"_a = 0, "vmax"_a = 10); - // plt.attr("draw")(); + // Optional: remove ticks for speed + axes.attr("set_xticks")(py::list()); + axes.attr("set_yticks")(py::list()); }); } diff --git a/tests/test_hmatrix.py b/tests/test_hmatrix.py index 7eed45a..2bd9f6b 100644 --- a/tests/test_hmatrix.py +++ b/tests/test_hmatrix.py @@ -5,23 +5,27 @@ import pytest import Htool -from example.advanced.define_custom_low_rank_generator import CustomSVD +from example.advanced.define_custom_low_rank_generator import ( + ComplexCustomSVD, + CustomSVD, +) from example.create_geometry import create_random_geometries -from example.define_generators import CustomGenerator +from example.define_generators import ComplexCustomGenerator, CustomGenerator @pytest.mark.parametrize( - "loglevel,symmetry", + "loglevel,symmetry,is_complex", [ - (logging.INFO, "N"), - (logging.DEBUG, "N"), - (logging.WARNING, "N"), - (logging.ERROR, "N"), - (logging.CRITICAL, "N"), - (logging.INFO, "S"), + (logging.INFO, "N", False), + (logging.DEBUG, "N", False), + (logging.WARNING, "N", False), + (logging.ERROR, "N", False), + (logging.CRITICAL, "N", False), + (logging.INFO, "S", False), + (logging.INFO, "S", True), ], ) -def test_hmatrix(loglevel, symmetry): +def test_hmatrix(loglevel, symmetry, is_complex): logging.basicConfig(level=loglevel) # Random geometry @@ -52,17 +56,30 @@ def test_hmatrix(loglevel, symmetry): source_cluster = target_cluster # Build generator - if symmetry == "N": - generator = CustomGenerator(target_points, source_points) + if is_complex is False: + if symmetry == "N": + generator = CustomGenerator(target_points, source_points) + else: + generator = CustomGenerator(target_points, target_points) else: - generator = CustomGenerator(target_points, target_points) - + if symmetry == "N": + generator = ComplexCustomGenerator(target_points, source_points) + else: + generator = ComplexCustomGenerator(target_points, target_points) # Custom low rank generator - low_rank_generator = CustomSVD(generator, False) + if is_complex is False: + low_rank_generator = CustomSVD(generator, False) + else: + low_rank_generator = ComplexCustomSVD(generator, False) # Build HMatrix - hmatrix_builder = Htool.HMatrixTreeBuilder(epsilon, eta, "N", "N") + if is_complex is False: + hmatrix_builder = Htool.HMatrixTreeBuilder(epsilon, eta, "N", "N") + else: + hmatrix_builder = Htool.ComplexHMatrixTreeBuilder(epsilon, eta, "N", "N") hmatrix_builder.set_low_rank_generator(low_rank_generator) + if symmetry == "S": + hmatrix_builder.set_block_tree_consistency(True) hmatrix = hmatrix_builder.build(generator, target_cluster, source_cluster) assert hmatrix.shape == (nb_rows, nb_cols) @@ -74,39 +91,46 @@ def test_hmatrix(loglevel, symmetry): dense_in_user_numbering = hmatrix.to_dense_in_user_numbering() # HMatrix vector product + dtype = np.float64 if is_complex is False else np.complex128 np.random.seed(0) - x = np.random.rand(nb_cols) + if is_complex is False: + x = np.random.rand(nb_cols) + else: + x = np.random.rand(nb_cols) + 1j * np.random.rand(nb_cols) y = hmatrix * x y_exact = generator.mat_vec(x) y_dense = dense_in_user_numbering.dot(x) y_copy = copy_hmatrix * x assert np.linalg.norm(y - y_exact) / np.linalg.norm(y_exact) < epsilon assert np.linalg.norm(y - y_dense) / np.linalg.norm(y_dense) < 1e-10 - assert np.linalg.norm(y - y_copy) < 1e-10 + assert np.linalg.norm(y - y_copy) / np.linalg.norm(y) < 1e-10 # HMatrix matrix product np.random.seed(0) - x = np.random.rand(nb_cols, 2) + if is_complex is False: + x = np.random.rand(nb_cols, 2) + else: + x = np.random.rand(nb_cols, 2) + 1j * np.random.rand(nb_cols, 2) y = hmatrix @ x y_exact = generator.mat_mat(x) y_dense = dense_in_user_numbering @ x y_copy = copy_hmatrix @ x assert np.linalg.norm(y - y_exact) / np.linalg.norm(y_exact) < epsilon assert np.linalg.norm(y - y_dense) / np.linalg.norm(y_dense) < 1e-10 - assert np.linalg.norm(y - y_copy) < 1e-10 + assert np.linalg.norm(y - y_copy) / np.linalg.norm(y) < 1e-10 if symmetry != "N": # HLU factorization copy_hmatrix.lu_factorization() # HLU solve vec - x_ref = np.ones(nb_cols) + x_ref = np.ones(nb_cols, dtype=dtype) y = hmatrix * x_ref x = copy_hmatrix.lu_solve("N", y) assert np.linalg.norm(x - x_ref) / np.linalg.norm(x_ref) < epsilon # HLU solve mat - x_ref = np.ones((nb_cols, 2)) + x_ref = np.ones((nb_cols, 2), dtype=dtype) y = hmatrix @ x_ref x = copy_hmatrix.lu_solve("N", y) assert np.linalg.norm(x - x_ref) / np.linalg.norm(x_ref) < epsilon @@ -116,13 +140,13 @@ def test_hmatrix(loglevel, symmetry): copy_hmatrix.cholesky_factorization("L") # Cholesky solve vec - x_ref = np.ones(nb_cols) + x_ref = np.ones(nb_cols, dtype=dtype) y = hmatrix * x_ref x = copy_hmatrix.cholesky_solve("L", y) assert np.linalg.norm(x - x_ref) / np.linalg.norm(x_ref) < epsilon # Cholesky solve mat - x_ref = np.ones((nb_cols, 2)) + x_ref = np.ones((nb_cols, 2), dtype=dtype) y = hmatrix @ x_ref x = copy_hmatrix.cholesky_solve("L", y) assert np.linalg.norm(x - x_ref) / np.linalg.norm(x_ref) < epsilon