diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..9b3a9b9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,35 @@ +# CHANGELOG + +All notable changes to `dae-cpp` project will be documented in this file. + +## [`develop` branch -- 2.3.0] + +### Added + +- Option `linear_system_scaling` (can be `true` or `false`) to rescale linear system matrix for better convergence (`false` by default). +- Linear system matrix (row) scaling. Matrix scaling can improve stability of the linear solver in some cases but comes with a slight performance penalty. +- Linear system matrix prune if scaling is enabled. + +### Fixed + +- The solver cannot reach relative tolerance `rtol` in some cases and stops with the "solution diverged" error (fixed by updating internal tolerances). +- Incompatibility of `autodiff` library with the latest version of Eigen (fixed by altering `autodiff`). +- Typo in `daecpp::solver_command::stop_integration` enum (`stop_intergration` -> `stop_integration`). + +### Changed + +- Eigen to version 5.0.0. +- Renamed `TESTING` macro definition to `DAECPP_TESTING` to avoid potential clash. +- Linear system matrix pattern now analysed only once at the first iteration. +- Pre-allocate vector of `dual` numbers in `JacobianMatrixShape` class to improve performance of the Jacobian computed from the user-defined shape. +- Updated internal tolerances used in the solver for the convergence check against relative tolerance `rtol`. + +### Removed + +- Conversion from Eigen to dae-cpp matrix format and back in automatic Jacobian class (an attempt to speed it up). + +## [2.2.0] + +Current stable version. + +[CHANGELOG](https://dae-cpp.github.io/CHANGELOG.html) diff --git a/CMakeLists.txt b/CMakeLists.txt index b1ecf0d..0d21121 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,18 +7,16 @@ set(CMAKE_CXX_STANDARD_REQUIRED True) ######## Build examples ######## -set(EXAMPLE_LIST "quick_start" "simple_dae" "perovskite_model" "flame_propagation" "jacobian_shape" "jacobian_compare") - include_directories(${PROJECT_SOURCE_DIR}) -foreach(EXAMPLE_NAME ${EXAMPLE_LIST}) - - FILE(GLOB SOURCES ${PROJECT_SOURCE_DIR}/examples/${EXAMPLE_NAME}/*.cpp) - - add_executable(${EXAMPLE_NAME} ${SOURCES}) - - install(TARGETS ${EXAMPLE_NAME} DESTINATION bin) +file(GLOB EXAMPLE_DIRS RELATIVE ${PROJECT_SOURCE_DIR}/examples ${PROJECT_SOURCE_DIR}/examples/*) +foreach(EXAMPLE_NAME ${EXAMPLE_DIRS}) + if(IS_DIRECTORY ${PROJECT_SOURCE_DIR}/examples/${EXAMPLE_NAME}) + file(GLOB SOURCES ${PROJECT_SOURCE_DIR}/examples/${EXAMPLE_NAME}/*.cpp) + add_executable(${EXAMPLE_NAME} ${SOURCES}) + install(TARGETS ${EXAMPLE_NAME} DESTINATION bin) + endif() endforeach() ######## Build tests ######## @@ -28,7 +26,7 @@ set(PROJECT_TEST "dae-cpp-test") include(CTest) enable_testing() -FILE(GLOB SOURCES_TEST ${PROJECT_SOURCE_DIR}/tests/test_*.cpp) +file(GLOB SOURCES_TEST ${PROJECT_SOURCE_DIR}/tests/test_*.cpp) add_executable(${PROJECT_TEST} ${SOURCES_TEST}) @@ -44,7 +42,7 @@ include_directories(${PROJECT_SOURCE_DIR}/googletest/googletest/include) target_link_libraries(${PROJECT_TEST} gtest_main gtest) -target_compile_definitions(${PROJECT_TEST} PRIVATE "TESTING") +target_compile_definitions(${PROJECT_TEST} PRIVATE "DAECPP_TESTING") include(GoogleTest) gtest_discover_tests(${PROJECT_TEST}) diff --git a/Eigen/COPYING.APACHE b/Eigen/COPYING.APACHE deleted file mode 100644 index 61e948d..0000000 --- a/Eigen/COPYING.APACHE +++ /dev/null @@ -1,203 +0,0 @@ -/* - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ \ No newline at end of file diff --git a/Eigen/COPYING.BSD b/Eigen/COPYING.BSD deleted file mode 100644 index 8964ddf..0000000 --- a/Eigen/COPYING.BSD +++ /dev/null @@ -1,26 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ diff --git a/Eigen/COPYING.MINPACK b/Eigen/COPYING.MINPACK deleted file mode 100644 index 132cc3f..0000000 --- a/Eigen/COPYING.MINPACK +++ /dev/null @@ -1,51 +0,0 @@ -Minpack Copyright Notice (1999) University of Chicago. All rights reserved - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the -following conditions are met: - -1. Redistributions of source code must retain the above -copyright notice, this list of conditions and the following -disclaimer. - -2. Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following -disclaimer in the documentation and/or other materials -provided with the distribution. - -3. The end-user documentation included with the -redistribution, if any, must include the following -acknowledgment: - - "This product includes software developed by the - University of Chicago, as Operator of Argonne National - Laboratory. - -Alternately, this acknowledgment may appear in the software -itself, if and wherever such third-party acknowledgments -normally appear. - -4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" -WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE -UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND -THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE -OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY -OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR -USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF -THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) -DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION -UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL -BE CORRECTED. - -5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT -HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF -ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, -INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF -ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF -PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER -SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT -(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, -EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE -POSSIBILITY OF SUCH LOSS OR DAMAGES. diff --git a/Eigen/COPYING.README b/Eigen/COPYING.README deleted file mode 100644 index 11af93c..0000000 --- a/Eigen/COPYING.README +++ /dev/null @@ -1,6 +0,0 @@ -Eigen is primarily MPL2 licensed. See COPYING.MPL2 and these links: - http://www.mozilla.org/MPL/2.0/ - http://www.mozilla.org/MPL/2.0/FAQ.html - -Some files contain third-party code under BSD or other MPL2-compatible licenses, -whence the other COPYING.* files here. \ No newline at end of file diff --git a/Eigen/Core b/Eigen/Core index f9d9974..4ce2ed9 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -11,6 +11,9 @@ #ifndef EIGEN_CORE_MODULE_H #define EIGEN_CORE_MODULE_H +// Eigen version information. +#include "Version" + // first thing Eigen does: stop the compiler from reporting useless warnings. #include "src/Core/util/DisableStupidWarnings.h" @@ -92,11 +95,22 @@ #include #include +#include #include // for std::is_nothrow_move_assignable #include +// for std::this_thread::yield(). +#if !defined(EIGEN_USE_BLAS) && (defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL)) +#include +#endif + +// for std::bit_cast() +#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +#include +#endif + // for outputting debug info #ifdef EIGEN_DEBUG_ASSIGN #include @@ -116,9 +130,8 @@ #undef isfinite #include #include -#include -#include #include +#include #ifndef EIGEN_SYCL_LOCAL_THREAD_DIM0 #define EIGEN_SYCL_LOCAL_THREAD_DIM0 16 #endif @@ -178,6 +191,7 @@ using std::ptrdiff_t; #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" +#include "src/Core/RandomImpl.h" #include "src/Core/GenericPacketMath.h" #include "src/Core/MathFunctionsImpl.h" #include "src/Core/arch/Default/ConjHelper.h" @@ -187,38 +201,52 @@ using std::ptrdiff_t; #include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h" #if defined EIGEN_VECTORIZE_AVX512 +#include "src/Core/arch/SSE/PacketMath.h" +#include "src/Core/arch/SSE/Reductions.h" +#include "src/Core/arch/AVX/PacketMath.h" +#include "src/Core/arch/AVX/Reductions.h" +#include "src/Core/arch/AVX512/PacketMath.h" +#include "src/Core/arch/AVX512/Reductions.h" #if defined EIGEN_VECTORIZE_AVX512FP16 #include "src/Core/arch/AVX512/PacketMathFP16.h" #endif -#include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/TypeCasting.h" -#include "src/Core/arch/SSE/Complex.h" -#include "src/Core/arch/AVX/PacketMath.h" #include "src/Core/arch/AVX/TypeCasting.h" -#include "src/Core/arch/AVX/Complex.h" -#include "src/Core/arch/AVX512/PacketMath.h" #include "src/Core/arch/AVX512/TypeCasting.h" +#if defined EIGEN_VECTORIZE_AVX512FP16 +#include "src/Core/arch/AVX512/TypeCastingFP16.h" +#endif +#include "src/Core/arch/SSE/Complex.h" +#include "src/Core/arch/AVX/Complex.h" #include "src/Core/arch/AVX512/Complex.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX512/MathFunctions.h" +#if defined EIGEN_VECTORIZE_AVX512FP16 +#include "src/Core/arch/AVX512/MathFunctionsFP16.h" +#endif #include "src/Core/arch/AVX512/TrsmKernel.h" #elif defined EIGEN_VECTORIZE_AVX - // Use AVX for floats and doubles, SSE for integers +// Use AVX for floats and doubles, SSE for integers #include "src/Core/arch/SSE/PacketMath.h" +#include "src/Core/arch/SSE/Reductions.h" #include "src/Core/arch/SSE/TypeCasting.h" #include "src/Core/arch/SSE/Complex.h" #include "src/Core/arch/AVX/PacketMath.h" +#include "src/Core/arch/AVX/Reductions.h" #include "src/Core/arch/AVX/TypeCasting.h" #include "src/Core/arch/AVX/Complex.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/AVX/MathFunctions.h" #elif defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" +#include "src/Core/arch/SSE/Reductions.h" #include "src/Core/arch/SSE/TypeCasting.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/Complex.h" -#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) +#endif + +#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) #include "src/Core/arch/AltiVec/PacketMath.h" #include "src/Core/arch/AltiVec/TypeCasting.h" #include "src/Core/arch/AltiVec/MathFunctions.h" @@ -228,6 +256,11 @@ using std::ptrdiff_t; #include "src/Core/arch/NEON/TypeCasting.h" #include "src/Core/arch/NEON/MathFunctions.h" #include "src/Core/arch/NEON/Complex.h" +#elif defined EIGEN_VECTORIZE_LSX +#include "src/Core/arch/LSX/PacketMath.h" +#include "src/Core/arch/LSX/TypeCasting.h" +#include "src/Core/arch/LSX/MathFunctions.h" +#include "src/Core/arch/LSX/Complex.h" #elif defined EIGEN_VECTORIZE_SVE #include "src/Core/arch/SVE/PacketMath.h" #include "src/Core/arch/SVE/TypeCasting.h" @@ -294,11 +327,8 @@ using std::ptrdiff_t; #include "src/Core/Product.h" #include "src/Core/CoreEvaluators.h" #include "src/Core/AssignEvaluator.h" - -#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874 - // at least confirmed with Doxygen 1.5.5 and 1.5.6 +#include "src/Core/RealView.h" #include "src/Core/Assign.h" -#endif #include "src/Core/ArrayBase.h" #include "src/Core/util/BlasUtil.h" @@ -312,12 +342,14 @@ using std::ptrdiff_t; #include "src/Core/PlainObjectBase.h" #include "src/Core/Matrix.h" #include "src/Core/Array.h" +#include "src/Core/Fill.h" #include "src/Core/CwiseTernaryOp.h" #include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseNullaryOp.h" #include "src/Core/CwiseUnaryView.h" #include "src/Core/SelfCwiseBinaryOp.h" +#include "src/Core/InnerProduct.h" #include "src/Core/Dot.h" #include "src/Core/StableNorm.h" #include "src/Core/Stride.h" @@ -335,6 +367,7 @@ using std::ptrdiff_t; #include "src/Core/SkewSymmetricMatrix3.h" #include "src/Core/Redux.h" #include "src/Core/Visitor.h" +#include "src/Core/FindCoeff.h" #include "src/Core/Fuzzy.h" #include "src/Core/Swap.h" #include "src/Core/CommaInitializer.h" @@ -347,6 +380,7 @@ using std::ptrdiff_t; #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" #include "src/Core/products/GeneralBlockPanelKernel.h" +#include "src/Core/DeviceWrapper.h" #ifdef EIGEN_GEMM_THREADPOOL #include "ThreadPool" #endif @@ -372,6 +406,8 @@ using std::ptrdiff_t; #include "src/Core/arch/AltiVec/MatrixProduct.h" #elif defined EIGEN_VECTORIZE_NEON #include "src/Core/arch/NEON/GeneralBlockPanelKernel.h" +#elif defined EIGEN_VECTORIZE_LSX +#include "src/Core/arch/LSX/GeneralBlockPanelKernel.h" #endif #if defined(EIGEN_VECTORIZE_AVX512) diff --git a/Eigen/Geometry b/Eigen/Geometry index 3334874..efe3e1f 100644 --- a/Eigen/Geometry +++ b/Eigen/Geometry @@ -22,13 +22,11 @@ * - fixed-size homogeneous transformations * - translation, scaling, 2D and 3D rotations * - \link Quaternion quaternions \endlink - * - cross products (\ref MatrixBase::cross, \ref MatrixBase::cross3) - * - orthognal vector generation (\ref MatrixBase::unitOrthogonal) - * - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes - * \endlink + * - cross products (\ref MatrixBase::cross(), \ref MatrixBase::cross3()) + * - orthogonal vector generation (MatrixBase::unitOrthogonal) + * - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink * - \link AlignedBox axis aligned bounding boxes \endlink - * - \link umeyama least-square transformation fitting \endlink - * + * - \link umeyama() least-square transformation fitting \endlink * \code * #include * \endcode diff --git a/Eigen/COPYING.MPL2 b/Eigen/LICENSE similarity index 99% rename from Eigen/COPYING.MPL2 rename to Eigen/LICENSE index ee6256c..a612ad9 100644 --- a/Eigen/COPYING.MPL2 +++ b/Eigen/LICENSE @@ -35,7 +35,7 @@ Mozilla Public License Version 2.0 means any form of the work other than Source Code Form. 1.7. "Larger Work" - means a work that combines Covered Software with other material, in + means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" @@ -357,7 +357,7 @@ Exhibit A - Source Code Form License Notice This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this - file, You can obtain one at https://mozilla.org/MPL/2.0/. + file, You can obtain one at http://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE diff --git a/Eigen/OrderingMethods b/Eigen/OrderingMethods index 921b8a0..0167419 100644 --- a/Eigen/OrderingMethods +++ b/Eigen/OrderingMethods @@ -54,7 +54,7 @@ * \note Some of these methods (like AMD or METIS), need the sparsity pattern * of the input matrix to be symmetric. When the matrix is structurally unsymmetric, * Eigen computes internally the pattern of \f$A^T*A\f$ before calling the method. - * If your matrix is already symmetric (at leat in structure), you can avoid that + * If your matrix is already symmetric (at least in structure), you can avoid that * by calling the method with a SelfAdjointView type. * * \code diff --git a/Eigen/ThreadPool b/Eigen/ThreadPool index febb187..39e5d1e 100644 --- a/Eigen/ThreadPool +++ b/Eigen/ThreadPool @@ -71,6 +71,8 @@ #include "src/ThreadPool/ThreadEnvironment.h" #include "src/ThreadPool/Barrier.h" #include "src/ThreadPool/NonBlockingThreadPool.h" +#include "src/ThreadPool/CoreThreadPoolDevice.h" +#include "src/ThreadPool/ForkJoin.h" // IWYU pragma: end_exports #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/Version b/Eigen/Version new file mode 100644 index 0000000..91936c2 --- /dev/null +++ b/Eigen/Version @@ -0,0 +1,14 @@ +#ifndef EIGEN_VERSION_H +#define EIGEN_VERSION_H + +// The "WORLD" version will forever remain "3" for the "Eigen3" library. +#define EIGEN_WORLD_VERSION 3 +// As of Eigen3 5.0.0, we have moved to Semantic Versioning (semver.org). +#define EIGEN_MAJOR_VERSION 5 +#define EIGEN_MINOR_VERSION 0 +#define EIGEN_PATCH_VERSION 0 +#define EIGEN_PRERELEASE_VERSION "" +#define EIGEN_BUILD_VERSION "" +#define EIGEN_VERSION_STRING "5.0.0" + +#endif // EIGEN_VERSION_H diff --git a/Eigen/commit-d26e19714fca2faf544619c3604f88d980e5a207 b/Eigen/commit-549bf8c75b6aae071cde2f28aa48f16ee3ae60b0 similarity index 100% rename from Eigen/commit-d26e19714fca2faf544619c3604f88d980e5a207 rename to Eigen/commit-549bf8c75b6aae071cde2f28aa48f16ee3ae60b0 diff --git a/Eigen/src/AccelerateSupport/AccelerateSupport.h b/Eigen/src/AccelerateSupport/AccelerateSupport.h index 09967ff..13a26df 100644 --- a/Eigen/src/AccelerateSupport/AccelerateSupport.h +++ b/Eigen/src/AccelerateSupport/AccelerateSupport.h @@ -11,7 +11,7 @@ template using AccelerateLLT = AccelerateImpl; /** \ingroup AccelerateSupport_Module - * \class AccelerateLDLT + * \typedef AccelerateLDLT * \brief The default Cholesky (LDLT) factorization and solver based on Accelerate * * \warning Only single and double precision real scalar types are supported by Accelerate @@ -39,7 +39,7 @@ template using AccelerateLDLT = AccelerateImpl; /** \ingroup AccelerateSupport_Module - * \class AccelerateLDLTUnpivoted + * \typedef AccelerateLDLTUnpivoted * \brief A direct Cholesky-like LDL^T factorization and solver based on Accelerate with only 1x1 pivots and no pivoting * * \warning Only single and double precision real scalar types are supported by Accelerate @@ -53,7 +53,7 @@ template using AccelerateLDLTUnpivoted = AccelerateImpl; /** \ingroup AccelerateSupport_Module - * \class AccelerateLDLTSBK + * \typedef AccelerateLDLTSBK * \brief A direct Cholesky (LDLT) factorization and solver based on Accelerate with Supernode Bunch-Kaufman and static * pivoting * @@ -68,7 +68,7 @@ template using AccelerateLDLTSBK = AccelerateImpl; /** \ingroup AccelerateSupport_Module - * \class AccelerateLDLTTPP + * \typedef AccelerateLDLTTPP * \brief A direct Cholesky (LDLT) factorization and solver based on Accelerate with full threshold partial pivoting * * \warning Only single and double precision real scalar types are supported by Accelerate @@ -82,7 +82,7 @@ template using AccelerateLDLTTPP = AccelerateImpl; /** \ingroup AccelerateSupport_Module - * \class AccelerateQR + * \typedef AccelerateQR * \brief A QR factorization and solver based on Accelerate * * \warning Only single and double precision real scalar types are supported by Accelerate @@ -95,7 +95,7 @@ template using AccelerateQR = AccelerateImpl; /** \ingroup AccelerateSupport_Module - * \class AccelerateCholeskyAtA + * \typedef AccelerateCholeskyAtA * \brief A QR factorization and solver based on Accelerate without storing Q (equivalent to A^TA = R^T R) * * \warning Only single and double precision real scalar types are supported by Accelerate diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index 5d52ab2..b1d801d 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -230,8 +230,8 @@ class LDLT : public SolverBase > { */ const LDLT& adjoint() const { return *this; } - EIGEN_DEVICE_FUNC inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); } - EIGEN_DEVICE_FUNC inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_matrix.cols(); } /** \brief Reports whether previous computation was successful. * diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index 01b4476..7fa4fa2 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -182,10 +182,10 @@ class LLT : public SolverBase > { * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as: * \code x = decomposition.adjoint().solve(b) \endcode */ - const LLT& adjoint() const EIGEN_NOEXCEPT { return *this; } + const LLT& adjoint() const noexcept { return *this; } - inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); } - inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); } + constexpr Index rows() const noexcept { return m_matrix.rows(); } + constexpr Index cols() const noexcept { return m_matrix.cols(); } template LLT& rankUpdate(const VectorType& vec, const RealScalar& sigma = 1); diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index e5b46c4..7e3c881 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -425,7 +425,7 @@ class CholmodBase : public SparseSolverBase { RealScalar logDet = 0; Scalar* x = static_cast(m_cholmodFactor->x); if (m_cholmodFactor->is_super) { - // Supernodal factorization stored as a packed list of dense column-major blocs, + // Supernodal factorization stored as a packed list of dense column-major blocks, // as described by the following structure: // super[k] == index of the first column of the j-th super node diff --git a/Eigen/src/Core/ArithmeticSequence.h b/Eigen/src/Core/ArithmeticSequence.h index ae3fac3..ae6373d 100644 --- a/Eigen/src/Core/ArithmeticSequence.h +++ b/Eigen/src/Core/ArithmeticSequence.h @@ -61,26 +61,28 @@ seqN(FirstType first, SizeType size, IncrType incr); template class ArithmeticSequence { public: - ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {} - ArithmeticSequence(FirstType first, SizeType size, IncrType incr) : m_first(first), m_size(size), m_incr(incr) {} + constexpr ArithmeticSequence() = default; + constexpr ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {} + constexpr ArithmeticSequence(FirstType first, SizeType size, IncrType incr) + : m_first(first), m_size(size), m_incr(incr) {} enum { - SizeAtCompileTime = internal::get_fixed_value::value, + // SizeAtCompileTime = internal::get_fixed_value::value, IncrAtCompileTime = internal::get_fixed_value::value }; /** \returns the size, i.e., number of elements, of the sequence */ - Index size() const { return m_size; } + constexpr Index size() const { return m_size; } /** \returns the first element \f$ a_0 \f$ in the sequence */ - Index first() const { return m_first; } + constexpr Index first() const { return m_first; } /** \returns the value \f$ a_i \f$ at index \a i in the sequence. */ - Index operator[](Index i) const { return m_first + i * m_incr; } + constexpr Index operator[](Index i) const { return m_first + i * m_incr; } - const FirstType& firstObject() const { return m_first; } - const SizeType& sizeObject() const { return m_size; } - const IncrType& incrObject() const { return m_incr; } + constexpr const FirstType& firstObject() const { return m_first; } + constexpr const SizeType& sizeObject() const { return m_size; } + constexpr const IncrType& incrObject() const { return m_incr; } protected: FirstType m_first; @@ -88,7 +90,7 @@ class ArithmeticSequence { IncrType m_incr; public: - auto reverse() const -> decltype(Eigen::seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr)) { + constexpr auto reverse() const -> decltype(Eigen::seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr)) { return seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr); } }; @@ -201,38 +203,6 @@ auto lastN(SizeType size) -> decltype(seqN(Eigen::placeholders::last + fix<1>() } // namespace placeholders -namespace internal { - -// Convert a symbolic span into a usable one (i.e., remove last/end "keywords") -template -struct make_size_type { - typedef std::conditional_t::value, Index, T> type; -}; - -template -struct IndexedViewCompatibleType, XprSize> { - typedef ArithmeticSequence::type, IncrType> type; -}; - -template -ArithmeticSequence::type, IncrType> makeIndexedViewCompatible( - const ArithmeticSequence& ids, Index size, SpecializedType) { - return ArithmeticSequence::type, IncrType>( - eval_expr_given_size(ids.firstObject(), size), eval_expr_given_size(ids.sizeObject(), size), ids.incrObject()); -} - -template -struct get_compile_time_incr > { - enum { value = get_fixed_value::value }; -}; - -template -constexpr Index get_runtime_incr(const ArithmeticSequence& x) EIGEN_NOEXCEPT { - return static_cast(x.incrObject()); -} - -} // end namespace internal - /** \namespace Eigen::indexing * \ingroup Core_Module * diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index 29c9682..57f3186 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -102,8 +102,13 @@ class Array : public PlainObjectBase::value) - : Base(std::move(other)) { - } - EIGEN_DEVICE_FUNC Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { + /** \brief Move constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(Array&&) = default; + EIGEN_DEVICE_FUNC Array& operator=(Array&& other) noexcept(std::is_nothrow_move_assignable::value) { Base::operator=(std::move(other)); return *this; } - /** \copydoc PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const - * ArgTypes&... args) + /** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. + * + * \only_for_vectors + * + * This constructor is for 1D array or vectors with more than 4 coefficients. + * + * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this + * constructor must match the the fixed number of rows (resp. columns) of \c *this. + * * * Example: \include Array_variadic_ctor_cxx11.cpp * Output: \verbinclude Array_variadic_ctor_cxx11.out @@ -232,7 +239,7 @@ class Array : public PlainObjectBaseinnerSize(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return 1; } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return this->innerSize(); } #ifdef EIGEN_ARRAY_PLUGIN #include EIGEN_ARRAY_PLUGIN diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 6237df4..8465f54 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -81,9 +81,6 @@ class ArrayBase : public DenseBase { typedef typename Base::CoeffReturnType CoeffReturnType; -#endif // not EIGEN_PARSED_BY_DOXYGEN - -#ifndef EIGEN_PARSED_BY_DOXYGEN typedef typename Base::PlainObject PlainObject; /** \internal Represents a matrix with all coefficients equal to one another*/ @@ -118,19 +115,57 @@ class ArrayBase : public DenseBase { return derived(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const Scalar& scalar); - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const Scalar& scalar); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const Scalar& other) { + internal::call_assignment(this->derived(), PlainObject::Constant(rows(), cols(), other), + internal::add_assign_op()); + return derived(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const Scalar& other) { + internal::call_assignment(this->derived(), PlainObject::Constant(rows(), cols(), other), + internal::sub_assign_op()); + return derived(); + } + /** replaces \c *this by \c *this + \a other. + * + * \returns a reference to \c *this + */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const ArrayBase& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const ArrayBase& other) { + call_assignment(derived(), other.derived(), internal::add_assign_op()); + return derived(); + } + + /** replaces \c *this by \c *this - \a other. + * + * \returns a reference to \c *this + */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const ArrayBase& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const ArrayBase& other) { + call_assignment(derived(), other.derived(), internal::sub_assign_op()); + return derived(); + } + /** replaces \c *this by \c *this * \a other coefficient wise. + * + * \returns a reference to \c *this + */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const ArrayBase& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const ArrayBase& other) { + call_assignment(derived(), other.derived(), internal::mul_assign_op()); + return derived(); + } + /** replaces \c *this by \c *this / \a other coefficient wise. + * + * \returns a reference to \c *this + */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const ArrayBase& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const ArrayBase& other) { + call_assignment(derived(), other.derived(), internal::div_assign_op()); + return derived(); + } public: EIGEN_DEVICE_FUNC ArrayBase& array() { return *this; } @@ -173,50 +208,6 @@ class ArrayBase : public DenseBase { } }; -/** replaces \c *this by \c *this - \a other. - * - * \returns a reference to \c *this - */ -template -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const ArrayBase& other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); - return derived(); -} - -/** replaces \c *this by \c *this + \a other. - * - * \returns a reference to \c *this - */ -template -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const ArrayBase& other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); - return derived(); -} - -/** replaces \c *this by \c *this * \a other coefficient wise. - * - * \returns a reference to \c *this - */ -template -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator*=(const ArrayBase& other) { - call_assignment(derived(), other.derived(), internal::mul_assign_op()); - return derived(); -} - -/** replaces \c *this by \c *this / \a other coefficient wise. - * - * \returns a reference to \c *this - */ -template -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator/=(const ArrayBase& other) { - call_assignment(derived(), other.derived(), internal::div_assign_op()); - return derived(); -} - } // end namespace Eigen #endif // EIGEN_ARRAYBASE_H diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h index b45395d..c9a194e 100644 --- a/Eigen/src/Core/ArrayWrapper.h +++ b/Eigen/src/Core/ArrayWrapper.h @@ -56,17 +56,13 @@ class ArrayWrapper : public ArrayBase > { EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const EIGEN_NOEXCEPT { - return m_expression.outerStride(); - } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const EIGEN_NOEXCEPT { - return m_expression.innerStride(); - } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_expression.innerStride(); } - EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } - EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } + EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_expression.data(); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return m_expression.coeffRef(rowId, colId); @@ -135,17 +131,13 @@ class MatrixWrapper : public MatrixBase > { EIGEN_DEVICE_FUNC explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const EIGEN_NOEXCEPT { - return m_expression.outerStride(); - } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const EIGEN_NOEXCEPT { - return m_expression.innerStride(); - } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_expression.innerStride(); } - EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } - EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } + EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_expression.data(); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return m_expression.derived().coeffRef(rowId, colId); diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index f7f0b23..36f0a9d 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -27,125 +27,116 @@ namespace internal { // copy_using_evaluator_traits is based on assign_traits -template +template struct copy_using_evaluator_traits { - typedef typename DstEvaluator::XprType Dst; - typedef typename Dst::Scalar DstScalar; + using Src = typename SrcEvaluator::XprType; + using Dst = typename DstEvaluator::XprType; + using DstScalar = typename Dst::Scalar; - enum { DstFlags = DstEvaluator::Flags, SrcFlags = SrcEvaluator::Flags }; + static constexpr int DstFlags = DstEvaluator::Flags; + static constexpr int SrcFlags = SrcEvaluator::Flags; public: - enum { - DstAlignment = DstEvaluator::Alignment, - SrcAlignment = SrcEvaluator::Alignment, - DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit, - JointAlignment = plain_enum_min(DstAlignment, SrcAlignment) - }; - - private: - enum { - InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) - : int(DstFlags) & RowMajorBit ? int(Dst::ColsAtCompileTime) - : int(Dst::RowsAtCompileTime), - InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) - : int(DstFlags) & RowMajorBit ? int(Dst::MaxColsAtCompileTime) - : int(Dst::MaxRowsAtCompileTime), - RestrictedInnerSize = min_size_prefer_fixed(InnerSize, MaxPacketSize), - RestrictedLinearSize = min_size_prefer_fixed(Dst::SizeAtCompileTime, MaxPacketSize), - OuterStride = int(outer_stride_at_compile_time::ret), - MaxSizeAtCompileTime = Dst::SizeAtCompileTime - }; + static constexpr int DstAlignment = DstEvaluator::Alignment; + static constexpr int SrcAlignment = SrcEvaluator::Alignment; + static constexpr int JointAlignment = plain_enum_min(DstAlignment, SrcAlignment); + static constexpr bool DstHasDirectAccess = bool(DstFlags & DirectAccessBit); + static constexpr bool SrcIsRowMajor = bool(SrcFlags & RowMajorBit); + static constexpr bool DstIsRowMajor = bool(DstFlags & RowMajorBit); + static constexpr bool IsVectorAtCompileTime = Dst::IsVectorAtCompileTime; + static constexpr int RowsAtCompileTime = size_prefer_fixed(Src::RowsAtCompileTime, Dst::RowsAtCompileTime); + static constexpr int ColsAtCompileTime = size_prefer_fixed(Src::ColsAtCompileTime, Dst::ColsAtCompileTime); + static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime); + static constexpr int MaxRowsAtCompileTime = + min_size_prefer_fixed(Src::MaxRowsAtCompileTime, Dst::MaxRowsAtCompileTime); + static constexpr int MaxColsAtCompileTime = + min_size_prefer_fixed(Src::MaxColsAtCompileTime, Dst::MaxColsAtCompileTime); + static constexpr int MaxSizeAtCompileTime = + min_size_prefer_fixed(Src::MaxSizeAtCompileTime, Dst::MaxSizeAtCompileTime); + static constexpr int InnerSizeAtCompileTime = IsVectorAtCompileTime ? SizeAtCompileTime + : DstIsRowMajor ? ColsAtCompileTime + : RowsAtCompileTime; + static constexpr int MaxInnerSizeAtCompileTime = IsVectorAtCompileTime ? MaxSizeAtCompileTime + : DstIsRowMajor ? MaxColsAtCompileTime + : MaxRowsAtCompileTime; + static constexpr int RestrictedInnerSize = min_size_prefer_fixed(MaxInnerSizeAtCompileTime, MaxPacketSize); + static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize); + static constexpr int OuterStride = outer_stride_at_compile_time::ret; // TODO distinguish between linear traversal and inner-traversals - typedef typename find_best_packet::type LinearPacketType; - typedef typename find_best_packet::type InnerPacketType; + using LinearPacketType = typename find_best_packet::type; + using InnerPacketType = typename find_best_packet::type; - enum { - LinearPacketSize = unpacket_traits::size, - InnerPacketSize = unpacket_traits::size - }; + static constexpr int LinearPacketSize = unpacket_traits::size; + static constexpr int InnerPacketSize = unpacket_traits::size; public: - enum { - LinearRequiredAlignment = unpacket_traits::alignment, - InnerRequiredAlignment = unpacket_traits::alignment - }; + static constexpr int LinearRequiredAlignment = unpacket_traits::alignment; + static constexpr int InnerRequiredAlignment = unpacket_traits::alignment; private: - enum { - DstIsRowMajor = DstFlags & RowMajorBit, - SrcIsRowMajor = SrcFlags & RowMajorBit, - StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), - MightVectorize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && - bool(functor_traits::PacketAccess), - MayInnerVectorize = MightVectorize && int(InnerSize) != Dynamic && int(InnerSize) % int(InnerPacketSize) == 0 && - int(OuterStride) != Dynamic && int(OuterStride) % int(InnerPacketSize) == 0 && - (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment) >= int(InnerRequiredAlignment)), - MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), - MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess) && - (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment) >= int(LinearRequiredAlignment)) || - MaxSizeAtCompileTime == Dynamic), - /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, - so it's only good for large enough sizes. */ - MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) && - (int(InnerMaxSize) == Dynamic || - int(InnerMaxSize) >= (EIGEN_UNALIGNED_VECTORIZE ? InnerPacketSize : (3 * InnerPacketSize))) - /* slice vectorization can be slow, so we only want it if the slices are big, which is - indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block - in a fixed-size matrix - However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ - }; + static constexpr bool StorageOrdersAgree = DstIsRowMajor == SrcIsRowMajor; + static constexpr bool MightVectorize = StorageOrdersAgree && bool(DstFlags & SrcFlags & ActualPacketAccessBit) && + bool(functor_traits::PacketAccess); + static constexpr bool MayInnerVectorize = MightVectorize && (InnerSizeAtCompileTime != Dynamic) && + (InnerSizeAtCompileTime % InnerPacketSize == 0) && + (OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) && + (EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment); + static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit); + static constexpr bool MayLinearVectorize = + MightVectorize && MayLinearize && DstHasDirectAccess && + (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) && + (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize); + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize; + static constexpr bool MaySliceVectorize = + MightVectorize && DstHasDirectAccess && + (MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold); + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix + However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ public: - enum { - Traversal = int(Dst::SizeAtCompileTime) == 0 - ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time. - : (int(MayLinearVectorize) && (LinearPacketSize > InnerPacketSize)) ? int(LinearVectorizedTraversal) - : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) - : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) - : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) - : int(MayLinearize) ? int(LinearTraversal) - : int(DefaultTraversal), - Vectorized = int(Traversal) == InnerVectorizedTraversal || int(Traversal) == LinearVectorizedTraversal || - int(Traversal) == SliceVectorizedTraversal - }; - - typedef std::conditional_t PacketType; + static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal + : (MayLinearVectorize && (LinearPacketSize > InnerPacketSize)) + ? LinearVectorizedTraversal + : MayInnerVectorize ? InnerVectorizedTraversal + : MayLinearVectorize ? LinearVectorizedTraversal + : MaySliceVectorize ? SliceVectorizedTraversal + : MayLinearize ? LinearTraversal + : DefaultTraversal; + static constexpr bool Vectorized = Traversal == InnerVectorizedTraversal || Traversal == LinearVectorizedTraversal || + Traversal == SliceVectorizedTraversal; + + using PacketType = std::conditional_t; private: - enum { - ActualPacketSize = int(Traversal) == LinearVectorizedTraversal ? LinearPacketSize - : Vectorized ? InnerPacketSize - : 1, - UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, - MayUnrollCompletely = - int(Dst::SizeAtCompileTime) != Dynamic && - int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost)) <= - int(UnrollingLimit), - MayUnrollInner = - int(InnerSize) != Dynamic && - int(InnerSize) * (int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) - }; + static constexpr int ActualPacketSize = Vectorized ? unpacket_traits::size : 1; + static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize; + static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost); + static constexpr bool MayUnrollCompletely = + (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit); + static constexpr bool MayUnrollInner = + (InnerSizeAtCompileTime != Dynamic) && (InnerSizeAtCompileTime * CoeffReadCost <= UnrollingLimit); public: - enum { - Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) - ? (int(MayUnrollCompletely) ? int(CompleteUnrolling) - : int(MayUnrollInner) ? int(InnerUnrolling) - : int(NoUnrolling)) - : int(Traversal) == int(LinearVectorizedTraversal) - ? (bool(MayUnrollCompletely) && - (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment) >= int(LinearRequiredAlignment))) - ? int(CompleteUnrolling) - : int(NoUnrolling)) - : int(Traversal) == int(LinearTraversal) - ? (bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling)) + static constexpr int Unrolling = + (Traversal == InnerVectorizedTraversal || Traversal == DefaultTraversal) + ? (MayUnrollCompletely ? CompleteUnrolling + : MayUnrollInner ? InnerUnrolling + : NoUnrolling) + : Traversal == LinearVectorizedTraversal + ? (MayUnrollCompletely && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment)) + ? CompleteUnrolling + : NoUnrolling) + : Traversal == LinearTraversal ? (MayUnrollCompletely ? CompleteUnrolling : NoUnrolling) #if EIGEN_UNALIGNED_VECTORIZE - : int(Traversal) == int(SliceVectorizedTraversal) - ? (bool(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling)) + : Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling) #endif - : int(NoUnrolling) - }; + : NoUnrolling; + static constexpr bool UsePacketSegment = has_packet_segment::value; #ifdef EIGEN_DEBUG_ASSIGN static void debug() { @@ -162,8 +153,8 @@ struct copy_using_evaluator_traits { EIGEN_DEBUG_VAR(LinearRequiredAlignment) EIGEN_DEBUG_VAR(InnerRequiredAlignment) EIGEN_DEBUG_VAR(JointAlignment) - EIGEN_DEBUG_VAR(InnerSize) - EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(InnerSizeAtCompileTime) + EIGEN_DEBUG_VAR(MaxInnerSizeAtCompileTime) EIGEN_DEBUG_VAR(LinearPacketSize) EIGEN_DEBUG_VAR(InnerPacketSize) EIGEN_DEBUG_VAR(ActualPacketSize) @@ -196,28 +187,25 @@ struct copy_using_evaluator_traits { *** Default traversal *** ************************/ -template +template struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { - // FIXME: this is not very clean, perhaps this information should be provided by the kernel? - typedef typename Kernel::DstEvaluatorType DstEvaluatorType; - typedef typename DstEvaluatorType::XprType DstXprType; - - enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime }; + static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - kernel.assignCoeffByOuterInner(outer, inner); - copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + kernel.assignCoeffByOuterInner(Outer, Inner); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); } }; template struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} }; template struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer) { kernel.assignCoeffByOuterInner(outer, Index_); copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); } @@ -225,62 +213,57 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { template struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {} }; /*********************** *** Linear traversal *** ***********************/ -template +template struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - kernel.assignCoeff(Index); - copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + kernel.assignCoeff(Index_); + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); } }; template struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} }; /************************** *** Inner vectorization *** **************************/ -template +template struct copy_using_evaluator_innervec_CompleteUnrolling { - // FIXME: this is not very clean, perhaps this information should be provided by the kernel? - typedef typename Kernel::DstEvaluatorType DstEvaluatorType; - typedef typename DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::PacketType PacketType; - - enum { - outer = Index / DstXprType::InnerSizeAtCompileTime, - inner = Index % DstXprType::InnerSizeAtCompileTime, - SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, - DstAlignment = Kernel::AssignmentTraits::DstAlignment - }; + using PacketType = typename Kernel::PacketType; + static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int NextIndex = Index_ + unpacket_traits::size; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - kernel.template assignPacketByOuterInner(outer, inner); - enum { NextIndex = Index + unpacket_traits::size }; + kernel.template assignPacketByOuterInner(Outer, Inner); copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); } }; template struct copy_using_evaluator_innervec_CompleteUnrolling { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} }; template struct copy_using_evaluator_innervec_InnerUnrolling { - typedef typename Kernel::PacketType PacketType; + using PacketType = typename Kernel::PacketType; + static constexpr int NextIndex = Index_ + unpacket_traits::size; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) { kernel.template assignPacketByOuterInner(outer, Index_); - enum { NextIndex = Index_ + unpacket_traits::size }; copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); } @@ -288,7 +271,34 @@ struct copy_using_evaluator_innervec_InnerUnrolling { template struct copy_using_evaluator_innervec_InnerUnrolling { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {} +}; + +template +struct copy_using_evaluator_innervec_segment { + using PacketType = typename Kernel::PacketType; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) { + kernel.template assignPacketSegmentByOuterInner(outer, Start, 0, + Stop - Start); + } +}; + +template +struct copy_using_evaluator_innervec_segment + : copy_using_evaluator_DefaultTraversal_InnerUnrolling {}; + +template +struct copy_using_evaluator_innervec_segment { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {} +}; + +template +struct copy_using_evaluator_innervec_segment { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {} }; /*************************************************************************** @@ -299,7 +309,21 @@ struct copy_using_evaluator_innervec_InnerUnrolling -struct dense_assignment_loop; +struct dense_assignment_loop_impl; + +template +struct dense_assignment_loop { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { +#ifdef __cpp_lib_is_constant_evaluated + if (internal::is_constant_evaluated()) + dense_assignment_loop_impl::run(kernel); + else +#endif + dense_assignment_loop_impl::run(kernel); + } +}; /************************ ***** Special Cases ***** @@ -307,10 +331,11 @@ struct dense_assignment_loop; // Zero-sized assignment is a no-op. template -struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE EIGEN_CONSTEXPR run(Kernel& /*kernel*/) { - EIGEN_STATIC_ASSERT(int(Kernel::DstEvaluatorType::XprType::SizeAtCompileTime) == 0, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT) +struct dense_assignment_loop_impl { + static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime; + + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& /*kernel*/) { + EIGEN_STATIC_ASSERT(SizeAtCompileTime == 0, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT) } }; @@ -319,8 +344,8 @@ struct dense_assignment_loop { ************************/ template -struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& kernel) { +struct dense_assignment_loop_impl { + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& kernel) { for (Index outer = 0; outer < kernel.outerSize(); ++outer) { for (Index inner = 0; inner < kernel.innerSize(); ++inner) { kernel.assignCoeffByOuterInner(outer, inner); @@ -330,22 +355,22 @@ struct dense_assignment_loop { }; template -struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); +struct dense_assignment_loop_impl { + static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); } }; template -struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; +struct dense_assignment_loop_impl { + static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { const Index outerSize = kernel.outerSize(); for (Index outer = 0; outer < outerSize; ++outer) - copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, - outer); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); } }; @@ -356,92 +381,134 @@ struct dense_assignment_loop { // The goal of unaligned_dense_assignment_loop is simply to factorize the handling // of the non vectorizable beginning and ending parts -template +template struct unaligned_dense_assignment_loop { - // if IsAligned = true, then do nothing + // if Skip == true, then do nothing + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*start*/, Index /*end*/) {} template - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&, Index, Index) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*outer*/, + Index /*innerStart*/, Index /*innerEnd*/) {} }; -template <> -struct unaligned_dense_assignment_loop { - // MSVC must not inline this functions. If it does, it fails to optimize the - // packet access path. - // FIXME check which version exhibits this issue -#if EIGEN_COMP_MSVC +template +struct unaligned_dense_assignment_loop { template - static EIGEN_DONT_INLINE void run(Kernel& kernel, Index start, Index end) -#else + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) { + Index count = end - start; + eigen_assert(count <= unpacket_traits::size); + if (count > 0) kernel.template assignPacketSegment(start, 0, count); + } template - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel, Index start, Index end) -#endif - { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index start, Index end) { + Index count = end - start; + eigen_assert(count <= unpacket_traits::size); + if (count > 0) + kernel.template assignPacketSegmentByOuterInner(outer, start, 0, count); + } +}; + +template +struct unaligned_dense_assignment_loop { + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) { for (Index index = start; index < end; ++index) kernel.assignCoeff(index); } + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index innerStart, + Index innerEnd) { + for (Index inner = innerStart; inner < innerEnd; ++inner) kernel.assignCoeffByOuterInner(outer, inner); + } }; -template +template struct copy_using_evaluator_linearvec_CompleteUnrolling { - // FIXME: this is not very clean, perhaps this information should be provided by the kernel? - typedef typename Kernel::DstEvaluatorType DstEvaluatorType; - typedef typename DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::PacketType PacketType; - - enum { SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, DstAlignment = Kernel::AssignmentTraits::DstAlignment }; + using PacketType = typename Kernel::PacketType; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + static constexpr int NextIndex = Index_ + unpacket_traits::size; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - kernel.template assignPacket(Index); - enum { NextIndex = Index + unpacket_traits::size }; + kernel.template assignPacket(Index_); copy_using_evaluator_linearvec_CompleteUnrolling::run(kernel); } }; template struct copy_using_evaluator_linearvec_CompleteUnrolling { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} +}; + +template +struct copy_using_evaluator_linearvec_segment { + using PacketType = typename Kernel::PacketType; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { + kernel.template assignPacketSegment(Index_, 0, Stop - Index_); + } +}; + +template +struct copy_using_evaluator_linearvec_segment + : copy_using_evaluator_LinearTraversal_CompleteUnrolling {}; + +template +struct copy_using_evaluator_linearvec_segment { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} +}; + +template +struct copy_using_evaluator_linearvec_segment { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} }; template -struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { +struct dense_assignment_loop_impl { + using Scalar = typename Kernel::Scalar; + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment; + static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar)); + static constexpr int RequestedAlignment = unpacket_traits::alignment; + static constexpr bool Alignable = + (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0); + static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment; + static constexpr bool DstIsAligned = DstAlignment >= Alignment; + static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment; + + using head_loop = + unaligned_dense_assignment_loop; + using tail_loop = unaligned_dense_assignment_loop; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { const Index size = kernel.size(); - typedef typename Kernel::Scalar Scalar; - typedef typename Kernel::PacketType PacketType; - enum { - requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment, - packetSize = unpacket_traits::size, - dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment), - dstAlignment = packet_traits::AlignedOnScalar ? int(requestedAlignment) - : int(Kernel::AssignmentTraits::DstAlignment), - srcAlignment = Kernel::AssignmentTraits::JointAlignment - }; - const Index alignedStart = - dstIsAligned ? 0 : internal::first_aligned(kernel.dstDataPtr(), size); - const Index alignedEnd = alignedStart + ((size - alignedStart) / packetSize) * packetSize; + const Index alignedStart = DstIsAligned ? 0 : first_aligned(kernel.dstDataPtr(), size); + const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize); - unaligned_dense_assignment_loop::run(kernel, 0, alignedStart); + head_loop::run(kernel, 0, alignedStart); - for (Index index = alignedStart; index < alignedEnd; index += packetSize) - kernel.template assignPacket(index); + for (Index index = alignedStart; index < alignedEnd; index += PacketSize) + kernel.template assignPacket(index); - unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); + tail_loop::run(kernel, alignedEnd, size); } }; template -struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::PacketType PacketType; - - enum { - size = DstXprType::SizeAtCompileTime, - packetSize = unpacket_traits::size, - alignedSize = (int(size) / packetSize) * packetSize - }; - - copy_using_evaluator_linearvec_CompleteUnrolling::run(kernel); - copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); +struct dense_assignment_loop_impl { + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime; + static constexpr int AlignedSize = numext::round_down(Size, PacketSize); + static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + copy_using_evaluator_linearvec_CompleteUnrolling::run(kernel); + copy_using_evaluator_linearvec_segment::run(kernel); } }; @@ -450,36 +517,41 @@ struct dense_assignment_loop -struct dense_assignment_loop { - typedef typename Kernel::PacketType PacketType; - enum { SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, DstAlignment = Kernel::AssignmentTraits::DstAlignment }; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { +struct dense_assignment_loop_impl { + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { const Index innerSize = kernel.innerSize(); const Index outerSize = kernel.outerSize(); - const Index packetSize = unpacket_traits::size; for (Index outer = 0; outer < outerSize; ++outer) - for (Index inner = 0; inner < innerSize; inner += packetSize) + for (Index inner = 0; inner < innerSize; inner += PacketSize) kernel.template assignPacketByOuterInner(outer, inner); } }; template -struct dense_assignment_loop { +struct dense_assignment_loop_impl { + static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); } }; template -struct dense_assignment_loop { +struct dense_assignment_loop_impl { + static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::AssignmentTraits Traits; const Index outerSize = kernel.outerSize(); for (Index outer = 0; outer < outerSize; ++outer) - copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, + outer); } }; @@ -488,18 +560,18 @@ struct dense_assignment_loop { ***********************/ template -struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { +struct dense_assignment_loop_impl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { const Index size = kernel.size(); for (Index i = 0; i < size; ++i) kernel.assignCoeff(i); } }; template -struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); +struct dense_assignment_loop_impl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run( + kernel); } }; @@ -508,64 +580,62 @@ struct dense_assignment_loop { ***************************/ template -struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::Scalar Scalar; - typedef typename Kernel::PacketType PacketType; - enum { - packetSize = unpacket_traits::size, - requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), - alignable = - packet_traits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment) >= sizeof(Scalar), - dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment), - dstAlignment = alignable ? int(requestedAlignment) : int(Kernel::AssignmentTraits::DstAlignment) - }; +struct dense_assignment_loop_impl { + using Scalar = typename Kernel::Scalar; + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment; + static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar)); + static constexpr int RequestedAlignment = unpacket_traits::alignment; + static constexpr bool Alignable = + (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0); + static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment; + static constexpr bool DstIsAligned = DstAlignment >= Alignment; + static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment; + + using head_loop = unaligned_dense_assignment_loop; + using tail_loop = unaligned_dense_assignment_loop; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { const Scalar* dst_ptr = kernel.dstDataPtr(); - if ((!bool(dstIsAligned)) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) { - // the pointer is not aligned-on scalar, so alignment is not possible - return dense_assignment_loop::run(kernel); - } - const Index packetAlignedMask = packetSize - 1; const Index innerSize = kernel.innerSize(); const Index outerSize = kernel.outerSize(); - const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; - Index alignedStart = - ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); + const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0; + Index alignedStart = ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned(dst_ptr, innerSize); for (Index outer = 0; outer < outerSize; ++outer) { - const Index alignedEnd = alignedStart + ((innerSize - alignedStart) & ~packetAlignedMask); - // do the non-vectorizable part of the assignment - for (Index inner = 0; inner < alignedStart; ++inner) kernel.assignCoeffByOuterInner(outer, inner); + const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize); + + head_loop::run(kernel, outer, 0, alignedStart); // do the vectorizable part of the assignment - for (Index inner = alignedStart; inner < alignedEnd; inner += packetSize) - kernel.template assignPacketByOuterInner(outer, inner); + for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize) + kernel.template assignPacketByOuterInner(outer, inner); - // do the non-vectorizable part of the assignment - for (Index inner = alignedEnd; inner < innerSize; ++inner) kernel.assignCoeffByOuterInner(outer, inner); + tail_loop::run(kernel, outer, alignedEnd, innerSize); - alignedStart = numext::mini((alignedStart + alignedStep) % packetSize, innerSize); + alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize); } } }; #if EIGEN_UNALIGNED_VECTORIZE template -struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) { - typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - typedef typename Kernel::PacketType PacketType; - - enum { - innerSize = DstXprType::InnerSizeAtCompileTime, - packetSize = unpacket_traits::size, - vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize), - size = DstXprType::SizeAtCompileTime - }; - +struct dense_assignment_loop_impl { + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize); + static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment; + + using packet_loop = copy_using_evaluator_innervec_InnerUnrolling; + using packet_segment_loop = copy_using_evaluator_innervec_segment; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { for (Index outer = 0; outer < kernel.outerSize(); ++outer) { - copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); - copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + packet_loop::run(kernel, outer); + packet_segment_loop::run(kernel, outer); } } }; @@ -594,27 +664,28 @@ class generic_dense_assignment_kernel { typedef copy_using_evaluator_traits AssignmentTraits; typedef typename AssignmentTraits::PacketType PacketType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE generic_dense_assignment_kernel(DstEvaluatorType& dst, - const SrcEvaluatorType& src, - const Functor& func, DstXprType& dstExpr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst, + const SrcEvaluatorType& src, + const Functor& func, + DstXprType& dstExpr) : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) { #ifdef EIGEN_DEBUG_ASSIGN AssignmentTraits::debug(); #endif } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); } + EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return m_dstExpr.size(); } + EIGEN_DEVICE_FUNC constexpr Index innerSize() const noexcept { return m_dstExpr.innerSize(); } + EIGEN_DEVICE_FUNC constexpr Index outerSize() const noexcept { return m_dstExpr.outerSize(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_dstExpr.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_dstExpr.cols(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_dstExpr.outerStride(); } - EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; } - EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; } + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() noexcept { return m_dst; } + EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; } /// Assign src(row,col) to dst(row,col) through the assignment functor. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) { m_functor.assignCoeff(m_dst.coeffRef(row, col), m_src.coeff(row, col)); } @@ -624,7 +695,7 @@ class generic_dense_assignment_kernel { } /// \sa assignCoeff(Index,Index) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeffByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); assignCoeff(row, col); @@ -648,7 +719,28 @@ class generic_dense_assignment_kernel { assignPacket(row, col); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) { + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index row, Index col, Index begin, Index count) { + m_functor.template assignPacketSegment( + &m_dst.coeffRef(row, col), m_src.template packetSegment(row, col, begin, count), begin, + count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index index, Index begin, Index count) { + m_functor.template assignPacketSegment( + &m_dst.coeffRef(index), m_src.template packetSegment(index, begin, count), begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegmentByOuterInner(Index outer, Index inner, Index begin, + Index count) { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacketSegment(row, col, begin, count); + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 : int(Traits::ColsAtCompileTime) == 1 ? inner @@ -656,7 +748,7 @@ class generic_dense_assignment_kernel { : inner; } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 : int(Traits::RowsAtCompileTime) == 1 ? inner @@ -700,16 +792,16 @@ class restricted_packet_dense_assignment_kernel ***************************************************************************/ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, const SrcXprType& src, - const Functor& /*func*/) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src, + const Functor& /*func*/) { EIGEN_ONLY_USED_FOR_DEBUG(dst); EIGEN_ONLY_USED_FOR_DEBUG(src); eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, const SrcXprType& src, - const internal::assign_op& /*func*/) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src, + const internal::assign_op& /*func*/) { Index dstRows = src.rows(); Index dstCols = src.cols(); if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols); @@ -717,9 +809,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, co } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_dense_assignment_loop(DstXprType& dst, - const SrcXprType& src, - const Functor& func) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, + const Functor& func) { typedef evaluator DstEvaluatorType; typedef evaluator SrcEvaluatorType; @@ -737,18 +828,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_dense_assignment dense_assignment_loop::run(kernel); } -// Specialization for filling the destination with a constant value. -#ifndef EIGEN_GPU_COMPILE_PHASE -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop( - DstXprType& dst, - const Eigen::CwiseNullaryOp, DstXprType>& src, - const internal::assign_op& func) { - resize_if_allowed(dst, src, func); - std::fill_n(dst.data(), dst.size(), src.functor()()); -} -#endif - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) { call_dense_assignment_loop(dst, src, internal::assign_op()); @@ -790,7 +869,7 @@ struct Assignment; // not has to bother about these annoying details. template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst& dst, const Src& src) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(Dst& dst, const Src& src) { call_assignment(dst, src, internal::assign_op()); } template @@ -800,14 +879,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const // Deal with "assume-aliasing" template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment( +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment( Dst& dst, const Src& src, const Func& func, std::enable_if_t::value, void*> = 0) { typename plain_matrix_type::type tmp(src); call_assignment_no_alias(dst, tmp, func); } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment( +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment( Dst& dst, const Src& src, const Func& func, std::enable_if_t::value, void*> = 0) { call_assignment_no_alias(dst, src, func); } @@ -815,14 +894,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment( // by-pass "assume-aliasing" // When there is no aliasing, we require that 'dst' has been properly resized template class StorageBase, typename Src, typename Func> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment(NoAlias& dst, - const Src& src, const Func& func) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(NoAlias& dst, const Src& src, + const Func& func) { call_assignment_no_alias(dst.expression(), src, func); } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_alias(Dst& dst, const Src& src, - const Func& func) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src, + const Func& func) { enum { NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) && @@ -861,14 +940,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_ } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_alias(Dst& dst, const Src& src) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src) { call_assignment_no_alias(dst, src, internal::assign_op()); } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_alias_no_transpose(Dst& dst, - const Src& src, - const Func& func) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, + const Func& func) { // TODO check whether this is the right place to perform these checks: EIGEN_STATIC_ASSERT_LVALUE(Dst) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst, Src) @@ -877,8 +955,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_al Assignment::run(dst, src, func); } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_alias_no_transpose(Dst& dst, - const Src& src) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) { call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); } @@ -891,15 +968,44 @@ EIGEN_DEVICE_FUNC void check_for_aliasing(const Dst& dst, const Src& src); // both partial specialization+SFINAE without ambiguous specialization template struct Assignment { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func) { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(DstXprType& dst, const SrcXprType& src, + const Functor& func) { #ifndef EIGEN_NO_DEBUG - internal::check_for_aliasing(dst, src); + if (!internal::is_constant_evaluated()) { + internal::check_for_aliasing(dst, src); + } #endif call_dense_assignment_loop(dst, src, func); } }; +template +struct Assignment, SrcPlainObject>, + assign_op, Dense2Dense, Weak> { + using Scalar = typename DstXprType::Scalar; + using NullaryOp = scalar_constant_op; + using SrcXprType = CwiseNullaryOp; + using Functor = assign_op; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const Functor& /*func*/) { + eigen_fill_impl::run(dst, src); + } +}; + +template +struct Assignment, SrcPlainObject>, + assign_op, Dense2Dense, Weak> { + using Scalar = typename DstXprType::Scalar; + using NullaryOp = scalar_zero_op; + using SrcXprType = CwiseNullaryOp; + using Functor = assign_op; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const Functor& /*func*/) { + eigen_zero_impl::run(dst, src); + } +}; + // Generic assignment through evalTo. // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. // Note that the last template argument "Weak" is needed to make it possible to perform diff --git a/Eigen/src/Core/Assign_MKL.h b/Eigen/src/Core/Assign_MKL.h index 5b566cd..ad11220 100644 --- a/Eigen/src/Core/Assign_MKL.h +++ b/Eigen/src/Core/Assign_MKL.h @@ -89,7 +89,7 @@ class vml_assign_traits { static void run(DstXprType &dst, const SrcXprType &src, const assign_op &func) { \ resize_if_allowed(dst, src, func); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ - if (vml_assign_traits::Traversal == LinearTraversal) { \ + if (vml_assign_traits::Traversal == (int)LinearTraversal) { \ VMLOP(dst.size(), (const VMLTYPE *)src.nestedExpression().data(), \ (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \ } else { \ diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h index ca991ca..57b0322 100644 --- a/Eigen/src/Core/BandMatrix.h +++ b/Eigen/src/Core/BandMatrix.h @@ -200,16 +200,16 @@ class BandMatrix : public BandMatrixBase * Adding an offset to nullptr is undefined behavior, so we must avoid it. */ template - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE static Scalar* add_to_nullable_pointer(Scalar* base, - Index offset) { + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE static Scalar* add_to_nullable_pointer(Scalar* base, Index offset) { return base != nullptr ? base + offset : nullptr; } @@ -378,30 +373,25 @@ class BlockImpl_dense init(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const internal::remove_all_t& nestedExpression() const - EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const internal::remove_all_t& nestedExpression() const noexcept { return m_xpr; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE XprType& nestedExpression() { return m_xpr; } /** \sa MapBase::innerStride() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index innerStride() const EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index innerStride() const noexcept { return internal::traits::HasSameStorageOrderAsXprType ? m_xpr.innerStride() : m_xpr.outerStride(); } /** \sa MapBase::outerStride() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const noexcept { return internal::traits::HasSameStorageOrderAsXprType ? m_xpr.outerStride() : m_xpr.innerStride(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR StorageIndex startRow() const EIGEN_NOEXCEPT { - return m_startRow.value(); - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startRow() const noexcept { return m_startRow.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR StorageIndex startCol() const EIGEN_NOEXCEPT { - return m_startCol.value(); - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startCol() const noexcept { return m_startCol.value(); } #ifndef __SUNPRO_CC // FIXME sunstudio is not friendly with the above friend... diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index c629123..c414117 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -92,7 +92,7 @@ struct CommaInitializer { EIGEN_DEVICE_FUNC inline ~CommaInitializer() #if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS - EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) + noexcept(false) // Eigen::eigen_assert_exception #endif { finished(); diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index c620600..60857e2 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -124,8 +124,7 @@ struct evaluator_base { // noncopyable: // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization) // and make complex evaluator much larger than then should do. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator_base() = default; private: EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&); @@ -143,23 +142,23 @@ struct evaluator_base { template class plainobjectbase_evaluator_data { public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr) { #ifndef EIGEN_INTERNAL_DEBUGGING EIGEN_UNUSED_VARIABLE(outerStride); #endif eigen_internal_assert(outerStride == OuterStride); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return OuterStride; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const noexcept { return OuterStride; } const Scalar* data; }; template class plainobjectbase_evaluator_data { public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outerStride() const { return m_outerStride; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const { return m_outerStride; } const Scalar* data; protected: @@ -167,7 +166,7 @@ class plainobjectbase_evaluator_data { }; template -struct evaluator > : evaluator_base { +struct evaluator> : evaluator_base { typedef PlainObjectBase PlainObjectType; typedef typename PlainObjectType::Scalar Scalar; typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; @@ -189,89 +188,106 @@ struct evaluator > : evaluator_base { : RowsAtCompileTime }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() : m_d(0, OuterStrideAtCompileTime) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() : m_d(0, OuterStrideAtCompileTime) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const PlainObjectType& m) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const PlainObjectType& m) : m_d(m.data(), IsVectorAtCompileTime ? 0 : m.outerStride()) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - if (IsRowMajor) - return m_d.data[row * m_d.outerStride() + col]; - else - return m_d.data[row + col * m_d.outerStride()]; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index row, Index col) const { + return coeff(getIndex(row, col)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_d.data[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index index) const { return m_d.data[index]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { - if (IsRowMajor) - return const_cast(m_d.data)[row * m_d.outerStride() + col]; - else - return const_cast(m_d.data)[row + col * m_d.outerStride()]; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index row, Index col) { + return coeffRef(getIndex(row, col)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return const_cast(m_d.data)[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) { + return const_cast(m_d.data)[index]; + } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - if (IsRowMajor) - return ploadt(m_d.data + row * m_d.outerStride() + col); - else - return ploadt(m_d.data + row + col * m_d.outerStride()); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + return packet(getIndex(row, col)); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return ploadt(m_d.data + index); } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { - if (IsRowMajor) - return pstoret(const_cast(m_d.data) + row * m_d.outerStride() + col, x); - else - return pstoret(const_cast(m_d.data) + row + col * m_d.outerStride(), x); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + writePacket(getIndex(row, col), x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + pstoret(const_cast(m_d.data) + index, x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return packetSegment(getIndex(row, col), begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return ploadtSegment(m_d.data + index, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + writePacketSegment(getIndex(row, col), x, begin, count); } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return pstoret(const_cast(m_d.data) + index, x); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + pstoretSegment(const_cast(m_d.data) + index, x, begin, count); } protected: plainobjectbase_evaluator_data m_d; + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index constexpr getIndex(Index row, Index col) const { + return IsRowMajor ? row * m_d.outerStride() + col : row + col * m_d.outerStride(); + } }; template -struct evaluator > - : evaluator > > { +struct evaluator> + : evaluator>> { typedef Matrix XprType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m) - : evaluator >(m) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const XprType& m) + : evaluator>(m) {} }; template -struct evaluator > - : evaluator > > { +struct evaluator> + : evaluator>> { typedef Array XprType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m) - : evaluator >(m) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const XprType& m) + : evaluator>(m) {} }; // -------------------- Transpose -------------------- template -struct unary_evaluator, IndexBased> : evaluator_base > { +struct unary_evaluator, IndexBased> : evaluator_base> { typedef Transpose XprType; enum { @@ -298,25 +314,47 @@ struct unary_evaluator, IndexBased> : evaluator_base - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet(col, row); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_argImpl.template packet(index); } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { m_argImpl.template writePacket(col, row, x); } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { m_argImpl.template writePacket(index, x); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_argImpl.template packetSegment(col, row, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return m_argImpl.template packetSegment(index, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + m_argImpl.template writePacketSegment(col, row, x, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + m_argImpl.template writePacketSegment(index, x, begin, count); + } + protected: evaluator m_argImpl; }; @@ -404,7 +442,7 @@ struct nullary_wrapper {}; #if 0 && EIGEN_COMP_MSVC > 0 // Disable this ugly workaround. This is now handled in traits::match, // but this piece of code might still become handly if some other weird compilation -// erros pop up again. +// errors pop up again. // MSVC exhibits a weird compilation error when // compiling: @@ -460,13 +498,13 @@ struct nullary_wrapper #endif // MSVC workaround template -struct evaluator > - : evaluator_base > { +struct evaluator> + : evaluator_base> { typedef CwiseNullaryOp XprType; - typedef internal::remove_all_t PlainObjectTypeCleaned; + typedef remove_all_t PlainObjectTypeCleaned; enum { - CoeffReadCost = internal::functor_traits::Cost, + CoeffReadCost = functor_traits::Cost, Flags = (evaluator::Flags & (HereditaryBits | (functor_has_linear_access::ret ? LinearAccessBit : 0) | @@ -492,24 +530,36 @@ struct evaluator > } template - EIGEN_STRONG_INLINE PacketType packet(IndexType row, IndexType col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(IndexType row, IndexType col) const { return m_wrapper.template packetOp(m_functor, row, col); } template - EIGEN_STRONG_INLINE PacketType packet(IndexType index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(IndexType index) const { return m_wrapper.template packetOp(m_functor, index); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(IndexType row, IndexType col, Index /*begin*/, + Index /*count*/) const { + return packet(row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(IndexType index, Index /*begin*/, + Index /*count*/) const { + return packet(index); + } + protected: const NullaryOp m_functor; - const internal::nullary_wrapper m_wrapper; + const nullary_wrapper m_wrapper; }; // -------------------- CwiseUnaryOp -------------------- template -struct unary_evaluator, IndexBased> : evaluator_base > { +struct unary_evaluator, IndexBased> : evaluator_base> { typedef CwiseUnaryOp XprType; enum { @@ -536,15 +586,25 @@ struct unary_evaluator, IndexBased> : evaluator_b } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_d.func().packetOp(m_d.argImpl.template packet(row, col)); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_d.func().packetOp(m_d.argImpl.template packet(index)); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_d.func().packetOp(m_d.argImpl.template packetSegment(row, col, begin, count)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return m_d.func().packetOp(m_d.argImpl.template packetSegment(index, begin, count)); + } + protected: // this helper permits to completely eliminate the functor if it is empty struct Data { @@ -599,16 +659,11 @@ struct unary_evaluator, ArgType>, In template using SrcPacketArgs8 = std::enable_if_t<(unpacket_traits::size) == (8 * SrcPacketSize), bool>; - template = true> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index, Index col, Index packetSize) const { - return col + packetSize <= cols(); - } - template = true> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index row, Index, Index packetSize) const { - return row + packetSize <= rows(); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index row, Index col, Index begin, Index count) const { + return IsRowMajor ? (col + count + begin <= cols()) : (row + count + begin <= rows()); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index index, Index packetSize) const { - return index + packetSize <= size(); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index index, Index begin, Index count) const { + return index + count + begin <= size(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SrcType srcCoeff(Index row, Index col, Index offset) const { @@ -629,49 +684,94 @@ struct unary_evaluator, ArgType>, In } template - EIGEN_STRONG_INLINE PacketType srcPacket(Index row, Index col, Index offset) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index row, Index col, Index offset) const { constexpr int PacketSize = unpacket_traits::size; - Index actualRow = IsRowMajor ? row : row + (offset * PacketSize); - Index actualCol = IsRowMajor ? col + (offset * PacketSize) : col; - eigen_assert(check_array_bounds(actualRow, actualCol, PacketSize) && "Array index out of bounds"); + Index packetOffset = offset * PacketSize; + Index actualRow = IsRowMajor ? row : row + packetOffset; + Index actualCol = IsRowMajor ? col + packetOffset : col; + eigen_assert(check_array_bounds(actualRow, actualCol, 0, PacketSize) && "Array index out of bounds"); return m_argImpl.template packet(actualRow, actualCol); } template - EIGEN_STRONG_INLINE PacketType srcPacket(Index index, Index offset) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index index, Index offset) const { constexpr int PacketSize = unpacket_traits::size; - Index actualIndex = index + (offset * PacketSize); - eigen_assert(check_array_bounds(actualIndex, PacketSize) && "Array index out of bounds"); + Index packetOffset = offset * PacketSize; + Index actualIndex = index + packetOffset; + eigen_assert(check_array_bounds(actualIndex, 0, PacketSize) && "Array index out of bounds"); return m_argImpl.template packet(actualIndex); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacketSegment(Index row, Index col, Index begin, Index count, + Index offset) const { + constexpr int PacketSize = unpacket_traits::size; + Index packetOffset = offset * PacketSize; + Index actualRow = IsRowMajor ? row : row + packetOffset; + Index actualCol = IsRowMajor ? col + packetOffset : col; + eigen_assert(check_array_bounds(actualRow, actualCol, begin, count) && "Array index out of bounds"); + return m_argImpl.template packetSegment(actualRow, actualCol, begin, count); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacketSegment(Index index, Index begin, Index count, + Index offset) const { + constexpr int PacketSize = unpacket_traits::size; + Index packetOffset = offset * PacketSize; + Index actualIndex = index + packetOffset; + eigen_assert(check_array_bounds(actualIndex, begin, count) && "Array index out of bounds"); + return m_argImpl.template packetSegment(actualIndex, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketBlock srcPacketSegmentHelper(Index row, Index col, + Index begin, + Index count) const { + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets; + for (Index i = 0; i < NumPackets; i++) packets.packet[i] = pzero(PacketType()); + Index offset = begin / SrcPacketSize; + Index actualBegin = begin % SrcPacketSize; + for (; offset < NumPackets; offset++) { + Index actualCount = numext::mini(SrcPacketSize - actualBegin, count); + packets.packet[offset] = srcPacketSegment(row, col, actualBegin, actualCount, offset); + if (count == actualCount) break; + actualBegin = 0; + count -= actualCount; + } + return packets; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketBlock srcPacketSegmentHelper(Index index, + Index begin, + Index count) const { + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets; + for (Index i = 0; i < NumPackets; i++) packets.packet[i] = pzero(PacketType()); + Index offset = begin / SrcPacketSize; + Index actualBegin = begin % SrcPacketSize; + for (; offset < NumPackets; offset++) { + Index actualCount = numext::mini(SrcPacketSize - actualBegin, count); + packets.packet[offset] = srcPacketSegment(index, actualBegin, actualCount, offset); + if (count == actualCount) break; + actualBegin = 0; + count -= actualCount; + } + return packets; + } // There is no source packet type with equal or fewer elements than DstPacketType. // This is problematic as the evaluation loop may attempt to access data outside the bounds of the array. // For example, consider the cast utilizing pcast with an array of size 4: {0.0f,1.0f,2.0f,3.0f}. - // The first iteration of the evaulation loop will load 16 bytes: {0.0f,1.0f,2.0f,3.0f} and cast to {0.0,1.0}, which + // The first iteration of the evaluation loop will load 16 bytes: {0.0f,1.0f,2.0f,3.0f} and cast to {0.0,1.0}, which // is acceptable. The second iteration will load 16 bytes: {2.0f,3.0f,?,?}, which is outside the bounds of the array. - - // Instead, perform runtime check to determine if the load would access data outside the bounds of the array. - // If not, perform full load. Otherwise, revert to a scalar loop to perform a partial load. - // In either case, perform a vectorized cast of the source packet. template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { constexpr int DstPacketSize = unpacket_traits::size; constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); - SrcPacketType src; - if (EIGEN_PREDICT_TRUE(check_array_bounds(row, col, SrcPacketSize))) { - src = srcPacket(row, col, 0); - } else { - Array srcArray; - for (size_t k = 0; k < DstPacketSize; k++) srcArray[k] = srcCoeff(row, col, k); - for (size_t k = DstPacketSize; k < SrcPacketSize; k++) srcArray[k] = SrcType(0); - src = pload(srcArray.data()); - } - return pcast(src); + return pcast(srcPacketSegment(row, col, 0, DstPacketSize, 0)); } // Use the source packet type with the same size as DstPacketType, if it exists template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { constexpr int DstPacketSize = unpacket_traits::size; using SizedSrcPacketType = typename find_packet_by_size::type; constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); @@ -680,14 +780,14 @@ struct unary_evaluator, ArgType>, In } // unpacket_traits::size == 2 * SrcPacketSize template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast(srcPacket(row, col, 0), srcPacket(row, col, 1)); } // unpacket_traits::size == 4 * SrcPacketSize template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast(srcPacket(row, col, 0), srcPacket(row, col, 1), srcPacket(row, col, 2), @@ -695,7 +795,7 @@ struct unary_evaluator, ArgType>, In } // unpacket_traits::size == 8 * SrcPacketSize template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast( srcPacket(row, col, 0), srcPacket(row, col, 1), srcPacket(row, col, 2), @@ -703,25 +803,70 @@ struct unary_evaluator, ArgType>, In srcPacket(row, col, 6), srcPacket(row, col, 7)); } - // Analagous routines for linear access. + // packetSegment variants template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin, + Index count) const { constexpr int DstPacketSize = unpacket_traits::size; constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); - SrcPacketType src; - if (EIGEN_PREDICT_TRUE(check_array_bounds(index, SrcPacketSize))) { - src = srcPacket(index, 0); - } else { - Array srcArray; - for (size_t k = 0; k < DstPacketSize; k++) srcArray[k] = srcCoeff(index, k); - for (size_t k = DstPacketSize; k < SrcPacketSize; k++) srcArray[k] = SrcType(0); - src = pload(srcArray.data()); - } - return pcast(src); + return pcast(srcPacketSegment(row, col, begin, count, 0)); + } + // Use the source packet type with the same size as DstPacketType, if it exists + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + constexpr int DstPacketSize = unpacket_traits::size; + using SizedSrcPacketType = typename find_packet_by_size::type; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast( + srcPacketSegment(row, col, begin, count, 0)); + } + // unpacket_traits::size == 2 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + constexpr int NumPackets = 2; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(row, col, begin, count); + return pcast(packets.packet[0], packets.packet[1]); + } + // unpacket_traits::size == 4 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + constexpr int NumPackets = 4; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(row, col, begin, count); + return pcast(packets.packet[0], packets.packet[1], packets.packet[2], + packets.packet[3]); + } + // unpacket_traits::size == 8 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + constexpr int NumPackets = 8; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(row, col, begin, count); + return pcast(packets.packet[0], packets.packet[1], packets.packet[2], + packets.packet[3], packets.packet[4], packets.packet[5], + packets.packet[6], packets.packet[7]); + } + + // Analogous routines for linear access. + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + constexpr int DstPacketSize = unpacket_traits::size; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast(srcPacketSegment(index, 0, DstPacketSize, 0)); } template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { constexpr int DstPacketSize = unpacket_traits::size; using SizedSrcPacketType = typename find_packet_by_size::type; constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); @@ -729,18 +874,18 @@ struct unary_evaluator, ArgType>, In return pcast(srcPacket(index, 0)); } template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast(srcPacket(index, 0), srcPacket(index, 1)); } template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast(srcPacket(index, 0), srcPacket(index, 1), srcPacket(index, 2), srcPacket(index, 3)); } template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast(srcPacket(index, 0), srcPacket(index, 1), srcPacket(index, 2), srcPacket(index, 3), @@ -748,6 +893,55 @@ struct unary_evaluator, ArgType>, In srcPacket(index, 6), srcPacket(index, 7)); } + // packetSegment variants + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int DstPacketSize = unpacket_traits::size; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast(srcPacketSegment(index, begin, count, 0)); + } + // Use the source packet type with the same size as DstPacketType, if it exists + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int DstPacketSize = unpacket_traits::size; + using SizedSrcPacketType = typename find_packet_by_size::type; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast( + srcPacketSegment(index, begin, count, 0)); + } + // unpacket_traits::size == 2 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int NumPackets = 2; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(index, begin, count); + return pcast(packets.packet[0], packets.packet[1]); + } + // unpacket_traits::size == 4 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int NumPackets = 4; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(index, begin, count); + return pcast(packets.packet[0], packets.packet[1], packets.packet[2], + packets.packet[3]); + } + // unpacket_traits::size == 8 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int NumPackets = 8; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(index, begin, count); + return pcast(packets.packet[0], packets.packet[1], packets.packet[2], + packets.packet[3], packets.packet[4], packets.packet[5], + packets.packet[6], packets.packet[7]); + } + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_rows; } constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_cols; } constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_rows * m_cols; } @@ -762,17 +956,17 @@ struct unary_evaluator, ArgType>, In // this is a ternary expression template -struct evaluator > - : public ternary_evaluator > { +struct evaluator> + : public ternary_evaluator> { typedef CwiseTernaryOp XprType; - typedef ternary_evaluator > Base; + typedef ternary_evaluator> Base; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} }; template struct ternary_evaluator, IndexBased, IndexBased> - : evaluator_base > { + : evaluator_base> { typedef CwiseTernaryOp XprType; enum { @@ -812,19 +1006,33 @@ struct ternary_evaluator, IndexBased } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_d.func().packetOp(m_d.arg1Impl.template packet(row, col), m_d.arg2Impl.template packet(row, col), m_d.arg3Impl.template packet(row, col)); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_d.func().packetOp(m_d.arg1Impl.template packet(index), m_d.arg2Impl.template packet(index), m_d.arg3Impl.template packet(index)); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_d.func().packetOp(m_d.arg1Impl.template packetSegment(row, col, begin, count), + m_d.arg2Impl.template packetSegment(row, col, begin, count), + m_d.arg3Impl.template packetSegment(row, col, begin, count)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return m_d.func().packetOp(m_d.arg1Impl.template packetSegment(index, begin, count), + m_d.arg2Impl.template packetSegment(index, begin, count), + m_d.arg3Impl.template packetSegment(index, begin, count)); + } + protected: // this helper permits to completely eliminate the functor if it is empty struct Data { @@ -840,20 +1048,52 @@ struct ternary_evaluator, IndexBased Data m_d; }; +template +struct scalar_boolean_select_spec { + using DummyTernaryOp = scalar_boolean_select_op; + using DummyArg3 = CwiseBinaryOp, CmpLhsType, CmpRhsType>; + using DummyXprType = CwiseTernaryOp; + + // only use the typed comparison if it is vectorized + static constexpr bool UseTyped = functor_traits>::PacketAccess; + using CondScalar = std::conditional_t; + + using TernaryOp = scalar_boolean_select_op; + using Arg3 = CwiseBinaryOp, CmpLhsType, CmpRhsType>; + using XprType = CwiseTernaryOp; + + using Base = ternary_evaluator; +}; + +// specialization for expressions like (a < b).select(c, d) to enable full vectorization +template +struct evaluator, Arg1, Arg2, + CwiseBinaryOp, CmpLhsType, CmpRhsType>>> + : public scalar_boolean_select_spec::Base { + using Helper = scalar_boolean_select_spec; + using Base = typename Helper::Base; + using DummyXprType = typename Helper::DummyXprType; + using Arg3 = typename Helper::Arg3; + using XprType = typename Helper::XprType; + + EIGEN_DEVICE_FUNC explicit evaluator(const DummyXprType& xpr) + : Base(XprType(xpr.arg1(), xpr.arg2(), Arg3(xpr.arg3().lhs(), xpr.arg3().rhs()))) {} +}; + // -------------------- CwiseBinaryOp -------------------- // this is a binary expression template -struct evaluator > : public binary_evaluator > { +struct evaluator> : public binary_evaluator> { typedef CwiseBinaryOp XprType; - typedef binary_evaluator > Base; + typedef binary_evaluator> Base; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} }; template struct binary_evaluator, IndexBased, IndexBased> - : evaluator_base > { + : evaluator_base> { typedef CwiseBinaryOp XprType; enum { @@ -889,17 +1129,29 @@ struct binary_evaluator, IndexBased, IndexBase } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_d.func().packetOp(m_d.lhsImpl.template packet(row, col), m_d.rhsImpl.template packet(row, col)); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_d.func().packetOp(m_d.lhsImpl.template packet(index), m_d.rhsImpl.template packet(index)); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_d.func().packetOp(m_d.lhsImpl.template packetSegment(row, col, begin, count), + m_d.rhsImpl.template packetSegment(row, col, begin, count)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return m_d.func().packetOp(m_d.lhsImpl.template packetSegment(index, begin, count), + m_d.rhsImpl.template packetSegment(index, begin, count)); + } + protected: // this helper permits to completely eliminate the functor if it is empty struct Data { @@ -918,7 +1170,7 @@ struct binary_evaluator, IndexBased, IndexBase template struct unary_evaluator, IndexBased> - : evaluator_base > { + : evaluator_base> { typedef CwiseUnaryView XprType; enum { @@ -991,7 +1243,7 @@ struct mapbase_evaluator : evaluator_base { m_innerStride(map.innerStride()), m_outerStride(map.outerStride()) { EIGEN_STATIC_ASSERT(check_implication((evaluator::Flags & PacketAccessBit) != 0, - internal::inner_stride_at_compile_time::ret == 1), + inner_stride_at_compile_time::ret == 1), PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -1011,42 +1263,66 @@ struct mapbase_evaluator : evaluator_base { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_data[index * m_innerStride.value()]; } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { PointerType ptr = m_data + row * rowStride() + col * colStride(); - return internal::ploadt(ptr); + return ploadt(ptr); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return internal::ploadt(m_data + index * m_innerStride.value()); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return ploadt(m_data + index * m_innerStride.value()); } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { PointerType ptr = m_data + row * rowStride() + col * colStride(); - return internal::pstoret(ptr, x); + pstoret(ptr, x); } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - internal::pstoret(m_data + index * m_innerStride.value(), x); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + pstoret(m_data + index * m_innerStride.value(), x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + return ploadtSegment(ptr, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return ploadtSegment(m_data + index * m_innerStride.value(), begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + pstoretSegment(ptr, x, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + pstoretSegment(m_data + index * m_innerStride.value(), x, begin, count); } protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rowStride() const EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rowStride() const noexcept { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index colStride() const EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index colStride() const noexcept { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); } PointerType m_data; - const internal::variable_if_dynamic m_innerStride; - const internal::variable_if_dynamic m_outerStride; + const variable_if_dynamic m_innerStride; + const variable_if_dynamic m_outerStride; }; template -struct evaluator > +struct evaluator> : public mapbase_evaluator, PlainObjectType> { typedef Map XprType; typedef typename XprType::Scalar Scalar; @@ -1079,13 +1355,13 @@ struct evaluator > // -------------------- Ref -------------------- template -struct evaluator > +struct evaluator> : public mapbase_evaluator, PlainObjectType> { typedef Ref XprType; enum { - Flags = evaluator >::Flags, - Alignment = evaluator >::Alignment + Flags = evaluator>::Flags, + Alignment = evaluator>::Alignment }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& ref) @@ -1095,11 +1371,11 @@ struct evaluator > // -------------------- Block -------------------- template ::ret> + bool HasDirectAccess = has_direct_access::ret> struct block_evaluator; template -struct evaluator > +struct evaluator> : block_evaluator { typedef Block XprType; typedef typename XprType::Scalar Scalar; @@ -1150,7 +1426,7 @@ struct evaluator > // no direct-access => dispatch to a unary evaluator template struct block_evaluator - : unary_evaluator > { + : unary_evaluator> { typedef Block XprType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit block_evaluator(const XprType& block) @@ -1159,7 +1435,7 @@ struct block_evaluator struct unary_evaluator, IndexBased> - : evaluator_base > { + : evaluator_base> { typedef Block XprType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& block) @@ -1198,12 +1474,12 @@ struct unary_evaluator, IndexBa } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet(m_startRow.value() + row, m_startCol.value() + col); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { if (ForwardLinearAccess) return m_argImpl.template packet(m_linear_offset.value() + index); else @@ -1211,12 +1487,12 @@ struct unary_evaluator, IndexBa } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { if (ForwardLinearAccess) return m_argImpl.template writePacket(m_linear_offset.value() + index, x); else @@ -1224,6 +1500,39 @@ struct unary_evaluator, IndexBa x); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_argImpl.template packetSegment(m_startRow.value() + row, m_startCol.value() + col, + begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + if (ForwardLinearAccess) + return m_argImpl.template packetSegment(m_linear_offset.value() + index, begin, count); + else + return packetSegment(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0, + begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + return m_argImpl.template writePacketSegment(m_startRow.value() + row, + m_startCol.value() + col, x, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + if (ForwardLinearAccess) + return m_argImpl.template writePacketSegment(m_linear_offset.value() + index, x, begin, + count); + else + return writePacketSegment(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, x, begin, count); + } + protected: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType linear_coeff_impl(Index index, internal::true_type /* ForwardLinearAccess */) const { @@ -1267,60 +1576,16 @@ struct block_evaluator -struct evaluator > - : evaluator_base > { - typedef Select XprType; - enum { - CoeffReadCost = evaluator::CoeffReadCost + - plain_enum_max(evaluator::CoeffReadCost, evaluator::CoeffReadCost), - - Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, - - Alignment = plain_enum_min(evaluator::Alignment, evaluator::Alignment) - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& select) - : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) { - EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - if (m_conditionImpl.coeff(row, col)) - return m_thenImpl.coeff(row, col); - else - return m_elseImpl.coeff(row, col); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - if (m_conditionImpl.coeff(index)) - return m_thenImpl.coeff(index); - else - return m_elseImpl.coeff(index); - } - - protected: - evaluator m_conditionImpl; - evaluator m_thenImpl; - evaluator m_elseImpl; -}; - // -------------------- Replicate -------------------- template -struct unary_evaluator > - : evaluator_base > { +struct unary_evaluator> + : evaluator_base> { typedef Replicate XprType; typedef typename XprType::CoeffReturnType CoeffReturnType; enum { Factor = (RowFactor == Dynamic || ColFactor == Dynamic) ? Dynamic : RowFactor * ColFactor }; - typedef typename internal::nested_eval::type ArgTypeNested; - typedef internal::remove_all_t ArgTypeNestedCleaned; + typedef typename nested_eval::type ArgTypeNested; + typedef remove_all_t ArgTypeNestedCleaned; enum { CoeffReadCost = evaluator::CoeffReadCost, @@ -1339,19 +1604,15 @@ struct unary_evaluator > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { // try to avoid using modulo; this is a pure optimization strategy - const Index actual_row = internal::traits::RowsAtCompileTime == 1 ? 0 - : RowFactor == 1 ? row - : row % m_rows.value(); - const Index actual_col = internal::traits::ColsAtCompileTime == 1 ? 0 - : ColFactor == 1 ? col - : col % m_cols.value(); + const Index actual_row = traits::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value(); + const Index actual_col = traits::ColsAtCompileTime == 1 ? 0 : ColFactor == 1 ? col : col % m_cols.value(); return m_argImpl.coeff(actual_row, actual_col); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { // try to avoid using modulo; this is a pure optimization strategy - const Index actual_index = internal::traits::RowsAtCompileTime == 1 + const Index actual_index = traits::RowsAtCompileTime == 1 ? (ColFactor == 1 ? index : index % m_cols.value()) : (RowFactor == 1 ? index : index % m_rows.value()); @@ -1359,26 +1620,39 @@ struct unary_evaluator > } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - const Index actual_row = internal::traits::RowsAtCompileTime == 1 ? 0 - : RowFactor == 1 ? row - : row % m_rows.value(); - const Index actual_col = internal::traits::ColsAtCompileTime == 1 ? 0 - : ColFactor == 1 ? col - : col % m_cols.value(); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + const Index actual_row = traits::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value(); + const Index actual_col = traits::ColsAtCompileTime == 1 ? 0 : ColFactor == 1 ? col : col % m_cols.value(); return m_argImpl.template packet(actual_row, actual_col); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { - const Index actual_index = internal::traits::RowsAtCompileTime == 1 + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + const Index actual_index = traits::RowsAtCompileTime == 1 ? (ColFactor == 1 ? index : index % m_cols.value()) : (RowFactor == 1 ? index : index % m_rows.value()); return m_argImpl.template packet(actual_index); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + const Index actual_row = traits::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value(); + const Index actual_col = traits::ColsAtCompileTime == 1 ? 0 : ColFactor == 1 ? col : col % m_cols.value(); + + return m_argImpl.template packetSegment(actual_row, actual_col, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + const Index actual_index = traits::RowsAtCompileTime == 1 + ? (ColFactor == 1 ? index : index % m_cols.value()) + : (RowFactor == 1 ? index : index % m_rows.value()); + + return m_argImpl.template packetSegment(actual_index, begin, count); + } + protected: const ArgTypeNested m_arg; evaluator m_argImpl; @@ -1416,43 +1690,65 @@ struct evaluator_wrapper_base : evaluator_base { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet(row, col); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_argImpl.template packet(index); } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { m_argImpl.template writePacket(row, col, x); } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { m_argImpl.template writePacket(index, x); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_argImpl.template packetSegment(row, col, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return m_argImpl.template packetSegment(index, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + m_argImpl.template writePacketSegment(row, col, x, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + m_argImpl.template writePacketSegment(index, x, begin, count); + } + protected: evaluator m_argImpl; }; template -struct unary_evaluator > : evaluator_wrapper_base > { +struct unary_evaluator> : evaluator_wrapper_base> { typedef MatrixWrapper XprType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper) - : evaluator_wrapper_base >(wrapper.nestedExpression()) {} + : evaluator_wrapper_base>(wrapper.nestedExpression()) {} }; template -struct unary_evaluator > : evaluator_wrapper_base > { +struct unary_evaluator> : evaluator_wrapper_base> { typedef ArrayWrapper XprType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper) - : evaluator_wrapper_base >(wrapper.nestedExpression()) {} + : evaluator_wrapper_base>(wrapper.nestedExpression()) {} }; // -------------------- Reverse -------------------- @@ -1462,7 +1758,7 @@ template struct reverse_packet_cond; template -struct unary_evaluator > : evaluator_base > { +struct unary_evaluator> : evaluator_base> { typedef Reverse XprType; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -1513,42 +1809,98 @@ struct unary_evaluator > : evaluator_base - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - enum { - PacketSize = unpacket_traits::size, - OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, - OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 - }; - typedef internal::reverse_packet_cond reverse_packet; - return reverse_packet::run(m_argImpl.template packet( - ReverseRow ? m_rows.value() - row - OffsetRow : row, ReverseCol ? m_cols.value() - col - OffsetCol : col)); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1; + static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1; + using reverse_packet = reverse_packet_cond; + + Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row; + Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col; + + return reverse_packet::run(m_argImpl.template packet(actualRow, actualCol)); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { - enum { PacketSize = unpacket_traits::size }; - return preverse( - m_argImpl.template packet(m_rows.value() * m_cols.value() - index - PacketSize)); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + static constexpr int PacketSize = unpacket_traits::size; + + Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize; + + return preverse(m_argImpl.template packet(actualIndex)); } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { - // FIXME we could factorize some code with packet(i,j) - enum { - PacketSize = unpacket_traits::size, - OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, - OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 - }; - typedef internal::reverse_packet_cond reverse_packet; - m_argImpl.template writePacket(ReverseRow ? m_rows.value() - row - OffsetRow : row, - ReverseCol ? m_cols.value() - col - OffsetCol : col, - reverse_packet::run(x)); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1; + static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1; + using reverse_packet = reverse_packet_cond; + + Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row; + Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col; + + m_argImpl.template writePacket(actualRow, actualCol, reverse_packet::run(x)); } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - enum { PacketSize = unpacket_traits::size }; - m_argImpl.template writePacket(m_rows.value() * m_cols.value() - index - PacketSize, preverse(x)); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + static constexpr int PacketSize = unpacket_traits::size; + + Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize; + + m_argImpl.template writePacket(actualIndex, preverse(x)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1; + static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1; + using reverse_packet = reverse_packet_cond; + + Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row; + Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col; + Index actualBegin = ReversePacket ? (PacketSize - count - begin) : begin; + + return reverse_packet::run( + m_argImpl.template packetSegment(actualRow, actualCol, actualBegin, count)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + static constexpr int PacketSize = unpacket_traits::size; + + Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize; + Index actualBegin = PacketSize - count - begin; + + return preverse(m_argImpl.template packetSegment(actualIndex, actualBegin, count)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1; + static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1; + using reverse_packet = reverse_packet_cond; + + Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row; + Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col; + Index actualBegin = ReversePacket ? (PacketSize - count - begin) : begin; + + m_argImpl.template writePacketSegment(actualRow, actualCol, reverse_packet::run(x), actualBegin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + static constexpr int PacketSize = unpacket_traits::size; + + Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize; + Index actualBegin = PacketSize - count - begin; + + m_argImpl.template writePacketSegment(actualIndex, preverse(x), actualBegin, count); } protected: @@ -1563,7 +1915,7 @@ struct unary_evaluator > : evaluator_base -struct evaluator > : evaluator_base > { +struct evaluator> : evaluator_base> { typedef Diagonal XprType; enum { @@ -1599,13 +1951,13 @@ struct evaluator > : evaluator_base m_argImpl; - const internal::variable_if_dynamicindex m_index; + const variable_if_dynamicindex m_index; private: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rowOffset() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index colOffset() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } }; @@ -1622,10 +1974,10 @@ template class EvalToTemp; template -struct traits > : public traits {}; +struct traits> : public traits {}; template -class EvalToTemp : public dense_xpr_base >::type { +class EvalToTemp : public dense_xpr_base>::type { public: typedef typename dense_xpr_base::type Base; EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) @@ -1634,16 +1986,16 @@ class EvalToTemp : public dense_xpr_base >::type { const ArgType& arg() const { return m_arg; } - EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_arg.rows(); } + constexpr Index rows() const noexcept { return m_arg.rows(); } - EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_arg.cols(); } + constexpr Index cols() const noexcept { return m_arg.cols(); } private: const ArgType& m_arg; }; template -struct evaluator > : public evaluator { +struct evaluator> : public evaluator { typedef EvalToTemp XprType; typedef typename ArgType::PlainObject PlainObject; typedef evaluator Base; diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index aa79b60..e2b2da5 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -108,12 +108,12 @@ class CwiseBinaryOp : public CwiseBinaryOpImpl>::RowsAtCompileTime == Dynamic ? m_rhs.rows() : m_lhs.rows(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { // return the fixed size type if available to enable compile time optimizations return internal::traits>::ColsAtCompileTime == Dynamic ? m_rhs.cols() : m_lhs.cols(); diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 39c33cf..e4c5fed 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -71,9 +71,13 @@ class CwiseNullaryOp : public internal::dense_xpr_base= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); } + EIGEN_DEVICE_FUNC CwiseNullaryOp(Index size, const NullaryOp& func = NullaryOp()) + : CwiseNullaryOp(RowsAtCompileTime == 1 ? 1 : size, RowsAtCompileTime == 1 ? size : 1, func) { + EIGEN_STATIC_ASSERT(CwiseNullaryOp::IsVectorAtCompileTime, YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX); + } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rows() const { return m_rows.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index cols() const { return m_cols.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols.value(); } /** \returns the functor representing the nullary operation */ EIGEN_DEVICE_FUNC const NullaryOp& functor() const { return m_functor; } @@ -231,8 +235,7 @@ DenseBase::Constant(const Scalar& value) { * \sa LinSpaced(Index,const Scalar&, const Scalar&), setLinSpaced(Index,const Scalar&,const Scalar&) */ template -EIGEN_DEPRECATED EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase< - Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return DenseBase::NullaryExpr(size, internal::linspaced_op(low, high, size)); @@ -243,8 +246,7 @@ DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const * \sa LinSpaced(const Scalar&, const Scalar&) */ template -EIGEN_DEPRECATED EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase< - Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) @@ -283,7 +285,7 @@ DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) } /** - * \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&) + * \copydoc DenseBase::LinSpaced(Index, const DenseBase::Scalar&, const DenseBase::Scalar&) * Special version for fixed size types which does not require the size parameter. */ template @@ -343,7 +345,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar */ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) { - return derived() = Constant(rows(), cols(), val); + internal::eigen_fill_impl::run(derived(), val); + return derived(); } /** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val. @@ -479,9 +482,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setEqualSpace * \sa Zero(), Zero(Index) */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero( +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ZeroReturnType DenseBase::Zero( Index rows, Index cols) { - return Constant(rows, cols, Scalar(0)); + return ZeroReturnType(rows, cols); } /** \returns an expression of a zero vector. @@ -501,9 +504,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::Constan * \sa Zero(), Zero(Index,Index) */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero( +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ZeroReturnType DenseBase::Zero( Index size) { - return Constant(size, Scalar(0)); + return ZeroReturnType(size); } /** \returns an expression of a fixed-size zero matrix or vector. @@ -517,8 +520,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::Constan * \sa Zero(Index), Zero(Index,Index) */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero() { - return Constant(Scalar(0)); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ZeroReturnType DenseBase::Zero() { + return ZeroReturnType(RowsAtCompileTime, ColsAtCompileTime); } /** \returns true if *this is approximately equal to the zero matrix, @@ -547,7 +550,8 @@ EIGEN_DEVICE_FUNC bool DenseBase::isZero(const RealScalar& prec) const */ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setZero() { - return setConstant(Scalar(0)); + internal::eigen_zero_impl::run(derived()); + return derived(); } /** Resizes to the given \a size, and sets all coefficients in this expression to zero. @@ -562,7 +566,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setZero() { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(Index newSize) { resize(newSize); - return setConstant(Scalar(0)); + return setZero(); } /** Resizes to the given size, and sets all coefficients in this expression to zero. @@ -578,7 +582,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(Index rows, Index cols) { resize(rows, cols); - return setConstant(Scalar(0)); + return setZero(); } /** Resizes to the given size, changing only the number of columns, and sets all diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index 42ed459..94ec1a0 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -60,8 +60,8 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeffRef(0)); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const { + EIGEN_DEVICE_FUNC constexpr Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? int(StrideType::InnerStrideAtCompileTime) : derived().nestedExpression().innerStride() * sizeof(typename traits::Scalar) / sizeof(Scalar); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const { + EIGEN_DEVICE_FUNC constexpr Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? int(StrideType::OuterStrideAtCompileTime) : derived().nestedExpression().outerStride() * sizeof(typename traits::Scalar) / sizeof(Scalar); @@ -145,8 +145,8 @@ class CwiseUnaryView : public internal::CwiseUnaryViewImplrows() : this->cols(); } @@ -217,7 +217,7 @@ class DenseBase * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a * column-major matrix, and the number of columns for a row-major matrix. */ - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const { + EIGEN_DEVICE_FUNC constexpr Index innerSize() const { return IsVectorAtCompileTime ? this->size() : int(IsRowMajor) ? this->cols() : this->rows(); } @@ -243,6 +243,8 @@ class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal Represents a matrix with all coefficients equal to one another*/ typedef CwiseNullaryOp, PlainObject> ConstantReturnType; + /** \internal Represents a matrix with all coefficients equal to zero*/ + typedef CwiseNullaryOp, PlainObject> ZeroReturnType; /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */ EIGEN_DEPRECATED typedef CwiseNullaryOp, PlainObject> SequentialLinSpacedReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ @@ -304,12 +306,12 @@ class DenseBase EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index size, const Scalar& value); EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(const Scalar& value); - EIGEN_DEPRECATED EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Sequential_t, Index size, - const Scalar& low, - const Scalar& high); - EIGEN_DEPRECATED EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Sequential_t, - const Scalar& low, - const Scalar& high); + EIGEN_DEPRECATED_WITH_REASON("The method may result in accuracy loss. Use .EqualSpaced() instead.") + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Sequential_t, Index size, const Scalar& low, + const Scalar& high); + EIGEN_DEPRECATED_WITH_REASON("The method may result in accuracy loss. Use .EqualSpaced() instead.") + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Sequential_t, const Scalar& low, + const Scalar& high); EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Index size, const Scalar& low, const Scalar& high); @@ -328,9 +330,9 @@ class DenseBase template EIGEN_DEVICE_FUNC static const CwiseNullaryOp NullaryExpr(const CustomNullaryOp& func); - EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols); - EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size); - EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(); + EIGEN_DEVICE_FUNC static const ZeroReturnType Zero(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ZeroReturnType Zero(Index size); + EIGEN_DEVICE_FUNC static const ZeroReturnType Zero(); EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols); EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size); EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(); @@ -365,7 +367,12 @@ class DenseBase EIGEN_DEVICE_FUNC inline bool allFinite() const; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const Scalar& other); + template ::value, typename = std::enable_if_t> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const RealScalar& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const Scalar& other); + template ::value, typename = std::enable_if_t> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const RealScalar& other); typedef internal::add_const_on_value_type_t::type> EvalReturnType; /** \returns the matrix or vector obtained by evaluating this expression. @@ -595,6 +602,13 @@ class DenseBase inline const_iterator end() const; inline const_iterator cend() const; + using RealViewReturnType = std::conditional_t::IsComplex, RealView, Derived&>; + using ConstRealViewReturnType = + std::conditional_t::IsComplex, RealView, const Derived&>; + + EIGEN_DEVICE_FUNC RealViewReturnType realView(); + EIGEN_DEVICE_FUNC ConstRealViewReturnType realView() const; + #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase #define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL #define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) @@ -621,17 +635,19 @@ class DenseBase protected: EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase) /** Default constructor. Do nothing. */ +#ifdef EIGEN_INTERNAL_DEBUGGING EIGEN_DEVICE_FUNC constexpr DenseBase() { /* Just checks for self-consistency of the flags. * Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down */ -#ifdef EIGEN_INTERNAL_DEBUGGING EIGEN_STATIC_ASSERT( (internal::check_implication(MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1, int(IsRowMajor)) && internal::check_implication(MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1, int(!IsRowMajor))), INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION) -#endif } +#else + EIGEN_DEVICE_FUNC constexpr DenseBase() = default; +#endif private: EIGEN_DEVICE_FUNC explicit DenseBase(int); @@ -640,6 +656,18 @@ class DenseBase EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase&); }; +/** Free-function swap. + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + // Use forwarding references to capture all combinations of cv-qualified l+r-value cases. + std::enable_if_t>, std::decay_t>::value && + std::is_base_of>, std::decay_t>::value, + void> + swap(DerivedA&& a, DerivedB&& b) { + a.swap(b); +} + } // end namespace Eigen #endif // EIGEN_DENSEBASE_H diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index 48c6d73..377df57 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -45,10 +45,16 @@ class DenseCoeffsBase : public EigenBase { // - This is the return type of the coeff() method. // - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references // to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value). + // - The DirectAccessBit means exactly that the underlying data of coefficients can be directly accessed as a plain + // strided array, which means exactly that the underlying data of coefficients does exist in memory, which means + // exactly that the coefficients is const-referencable, which means exactly that we can have coeff() return a const + // reference. For example, Map have DirectAccessBit but not LvalueBit, so that Map.coeff() + // does points to a const Scalar& which exists in memory, while does not allow coeffRef() as it would not provide a + // lvalue. Notice that DirectAccessBit and LvalueBit are mutually orthogonal. // - The is_arithmetic check is required since "const int", "const double", etc. will cause warnings on some systems // while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is // not possible, since the underlying expressions might not offer a valid address the reference could be referring to. - typedef std::conditional_t::Flags& LvalueBit), const Scalar&, + typedef std::conditional_t::Flags&(LvalueBit | DirectAccessBit)), const Scalar&, std::conditional_t::value, Scalar, const Scalar>> CoeffReturnType; @@ -89,12 +95,12 @@ class DenseCoeffsBase : public EigenBase { * * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index row, Index col) const { eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return internal::evaluator(derived()).coeff(row, col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { return coeff(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner)); } @@ -102,7 +108,7 @@ class DenseCoeffsBase : public EigenBase { * * \sa operator()(Index,Index), operator[](Index) */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator()(Index row, Index col) const { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return coeff(row, col); } @@ -122,7 +128,7 @@ class DenseCoeffsBase : public EigenBase { * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index index) const { EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); @@ -137,7 +143,7 @@ class DenseCoeffsBase : public EigenBase { * z() const, w() const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator[](Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator[](Index index) const { EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) eigen_assert(index >= 0 && index < size()); @@ -154,32 +160,32 @@ class DenseCoeffsBase : public EigenBase { * z() const, w() const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator()(Index index) const { eigen_assert(index >= 0 && index < size()); return coeff(index); } /** equivalent to operator[](0). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType x() const { return (*this)[0]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType x() const { return (*this)[0]; } /** equivalent to operator[](1). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType y() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType y() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS); return (*this)[1]; } /** equivalent to operator[](2). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType z() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType z() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS); return (*this)[2]; } /** equivalent to operator[](3). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType w() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType w() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS); return (*this)[3]; } @@ -297,7 +303,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() && col >= 0 && col < cols()); return internal::evaluator(derived()).coeffRef(row, col); } @@ -311,7 +317,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() && col >= 0 && col < cols()); return coeffRef(row, col); } @@ -331,7 +337,7 @@ class DenseCoeffsBase : public DenseCoeffsBase::Flags & LinearAccessBit, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); @@ -345,7 +351,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); @@ -361,32 +367,32 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); return coeffRef(index); } /** equivalent to operator[](0). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& x() { return (*this)[0]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& x() { return (*this)[0]; } /** equivalent to operator[](1). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& y() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& y() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS); return (*this)[1]; } /** equivalent to operator[](2). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& z() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& z() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS); return (*this)[2]; } /** equivalent to operator[](3). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& w() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& w() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS); return (*this)[3]; } @@ -420,33 +426,29 @@ class DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase struct first_aligned_impl { - static EIGEN_CONSTEXPR inline Index run(const Derived&) EIGEN_NOEXCEPT { return 0; } + static constexpr Index run(const Derived&) noexcept { return 0; } }; template diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index f616939..d62586c 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -27,622 +27,550 @@ namespace Eigen { namespace internal { -struct constructor_without_unaligned_array_assert {}; +#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) +#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(Alignment) +#else +#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(Alignment) \ + eigen_assert((is_constant_evaluated() || (std::uintptr_t(array) % Alignment == 0)) && \ + "this assertion is explained here: " \ + "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ + " **** READ THIS WEB PAGE !!! ****"); +#endif -template -EIGEN_DEVICE_FUNC constexpr void check_static_allocation_size() { -// if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit #if EIGEN_STACK_ALLOCATION_LIMIT - EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); +#define EIGEN_MAKE_STACK_ALLOCATION_ASSERT(X) \ + EIGEN_STATIC_ASSERT(X <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG) +#else +#define EIGEN_MAKE_STACK_ALLOCATION_ASSERT(X) #endif -} /** \internal * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: * to 16 bytes boundary if the total size is a multiple of 16 bytes. */ + template ::value> struct plain_array { - T array[Size]; - - EIGEN_DEVICE_FUNC constexpr plain_array() { check_static_allocation_size(); } - - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) { - check_static_allocation_size(); - } -}; - -#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) -#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) + EIGEN_ALIGN_TO_BOUNDARY(Alignment) T array[Size]; +#if defined(EIGEN_NO_DEBUG) || defined(EIGEN_TESTING_PLAINOBJECT_CTOR) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default; #else -#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ - eigen_assert((internal::is_constant_evaluated() || (std::uintptr_t(array) & (sizemask)) == 0) && \ - "this assertion is explained here: " \ - "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ - " **** READ THIS WEB PAGE !!! ****"); -#endif - -template -struct plain_array { - EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; - - EIGEN_DEVICE_FUNC constexpr plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); - check_static_allocation_size(); - } - - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) { - check_static_allocation_size(); - } -}; - -template -struct plain_array { - EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; - - EIGEN_DEVICE_FUNC constexpr plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); - check_static_allocation_size(); - } - - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) { - check_static_allocation_size(); - } -}; - -template -struct plain_array { - EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; - - EIGEN_DEVICE_FUNC constexpr plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); - check_static_allocation_size(); - } - - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) { - check_static_allocation_size(); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(Alignment) + EIGEN_MAKE_STACK_ALLOCATION_ASSERT(Size * sizeof(T)) } +#endif }; template -struct plain_array { - EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; - - EIGEN_DEVICE_FUNC constexpr plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); - check_static_allocation_size(); - } - - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) { - check_static_allocation_size(); - } +struct plain_array { + T array[Size]; +#if defined(EIGEN_NO_DEBUG) || defined(EIGEN_TESTING_PLAINOBJECT_CTOR) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default; +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() { EIGEN_MAKE_STACK_ALLOCATION_ASSERT(Size * sizeof(T)) } +#endif }; template struct plain_array { T array[1]; - EIGEN_DEVICE_FUNC constexpr plain_array() {} - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default; }; -struct plain_array_helper { - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void copy( - const plain_array& src, const Eigen::Index size, - plain_array& dst) { - smart_copy(src.array, src.array + size, dst.array); - } - - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void swap(plain_array& a, - const Eigen::Index a_size, - plain_array& b, - const Eigen::Index b_size) { - if (a_size < b_size) { - std::swap_ranges(b.array, b.array + a_size, a.array); - smart_move(b.array + a_size, b.array + b_size, a.array + a_size); - } else if (a_size > b_size) { - std::swap_ranges(a.array, a.array + b_size, b.array); - smart_move(a.array + b_size, a.array + a_size, b.array + b_size); - } else { - std::swap_ranges(a.array, a.array + a_size, b.array); - } - } -}; - -} // end namespace internal - -/** \internal - * - * \class DenseStorage - * \ingroup Core_Module - * - * \brief Stores the data of a matrix - * - * This class stores the data of fixed-size, dynamic-size or mixed matrices - * in a way as compact as possible. - * - * \sa Matrix - */ -template -class DenseStorage; +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap_plain_array(plain_array& a, + plain_array& b, + Index a_size, Index b_size) { + Index common_size = numext::mini(a_size, b_size); + std::swap_ranges(a.array, a.array + common_size, b.array); + if (a_size > b_size) + smart_copy(a.array + common_size, a.array + a_size, b.array + common_size); + else if (b_size > a_size) + smart_copy(b.array + common_size, b.array + b_size, a.array + common_size); +} -// purely fixed-size matrix -template -class DenseStorage { - internal::plain_array m_data; +template +class DenseStorage_impl { + plain_array m_data; public: - constexpr EIGEN_DEVICE_FUNC DenseStorage(){EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN( - Index size = - Size)} EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) - : m_data(internal::constructor_without_unaligned_array_assert()) {} -#if defined(EIGEN_DENSE_STORAGE_CTOR_PLUGIN) - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other) - : m_data(other.m_data){EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)} +#ifndef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; #else - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + smart_copy(other.m_data.array, other.m_data.array + Size, m_data.array); + } #endif - EIGEN_DEVICE_FUNC constexpr DenseStorage - & - operator=(const DenseStorage&) = default; - EIGEN_DEVICE_FUNC constexpr DenseStorage(DenseStorage&&) = default; - EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(DenseStorage&&) = default; - EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - eigen_internal_assert(size == rows * cols && rows == Rows_ && cols == Cols_); - EIGEN_UNUSED_VARIABLE(size); - EIGEN_UNUSED_VARIABLE(rows); - EIGEN_UNUSED_VARIABLE(cols); - } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { numext::swap(m_data, other.m_data); } - EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; } - EIGEN_DEVICE_FUNC static constexpr Index cols(void) EIGEN_NOEXCEPT { return Cols_; } - EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index) {} - EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index) {} - EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } - EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } -}; - -// null matrix -template -class DenseStorage { - public: - static_assert(Rows_ * Cols_ == 0, "The fixed number of rows times columns must equal the storage size."); - EIGEN_DEVICE_FUNC constexpr DenseStorage() {} - EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) { return *this; } - EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index, Index) {} - EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage&) {} - EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; } - EIGEN_DEVICE_FUNC static constexpr Index cols(void) EIGEN_NOEXCEPT { return Cols_; } - EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index) {} - EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index) {} - EIGEN_DEVICE_FUNC constexpr const T* data() const { return 0; } - EIGEN_DEVICE_FUNC constexpr T* data() { return 0; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + numext::swap(m_data, other.m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, + Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } }; - -// more specializations for null matrices; these are necessary to resolve ambiguities -template -class DenseStorage { - Index m_rows; - Index m_cols; +template +class DenseStorage_impl { + plain_array m_data; + Index m_rows = 0; public: - EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {} - EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {} - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_rows(other.m_rows), m_cols(other.m_cols) {} - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_rows(other.m_rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index /*cols*/) + : m_rows(rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + EIGEN_UNUSED_VARIABLE(size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); m_rows = other.m_rows; - m_cols = other.m_cols; return *this; } - EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) { - eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size."); - } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + swap_plain_array(m_data, other.m_data, size(), other.size()); numext::swap(m_rows, other.m_rows); - numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_rows; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_cols; } - EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; - m_cols = cols; - eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size."); } - EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; - m_cols = cols; - eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size."); } - EIGEN_DEVICE_FUNC const T* data() const { return nullptr; } - EIGEN_DEVICE_FUNC T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } }; - -template -class DenseStorage { - Index m_cols; +template +class DenseStorage_impl { + plain_array m_data; + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {} - EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {} - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_cols(other.m_cols) {} - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index /*rows*/, Index cols) + : m_cols(cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + EIGEN_UNUSED_VARIABLE(size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); m_cols = other.m_cols; return *this; } - EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) { - eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size."); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + swap_plain_array(m_data, other.m_data, size(), other.size()); + numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT { return Rows_; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT { return m_cols; } - EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; - eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size."); } - EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; - eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size."); } - EIGEN_DEVICE_FUNC const T* data() const { return nullptr; } - EIGEN_DEVICE_FUNC T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } }; - -template -class DenseStorage { - Index m_rows; +template +class DenseStorage_impl { + plain_array m_data; + Index m_rows = 0; + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {} - EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {} - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_rows(other.m_rows) {} - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_rows(other.m_rows), m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index cols) + : m_rows(rows), m_cols(cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + EIGEN_UNUSED_VARIABLE(size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); m_rows = other.m_rows; + m_cols = other.m_cols; return *this; } - EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) { - eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size."); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + swap_plain_array(m_data, other.m_data, size(), other.size()); + numext::swap(m_rows, other.m_rows); + numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { numext::swap(m_rows, other.m_rows); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT { return m_rows; } - EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT { return Cols_; } - EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) { m_rows = rows; - eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size."); + m_cols = cols; } - EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index cols) { m_rows = rows; - eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size."); + m_cols = cols; } - EIGEN_DEVICE_FUNC const T* data() const { return nullptr; } - EIGEN_DEVICE_FUNC T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } }; - -// dynamic-size matrix with fixed-size storage -template -class DenseStorage { - internal::plain_array m_data; - Index m_rows; - Index m_cols; +// null matrix variants +template +class DenseStorage_impl { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl&) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, + Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } +}; +template +class DenseStorage_impl { + Index m_rows = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(), m_rows(0), m_cols(0) {} - EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) - : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other) - : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols) { - internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data); - } - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - m_rows = other.m_rows; - m_cols = other.m_cols; - internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data); - } - return *this; - } - EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {} - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { - internal::plain_array_helper::swap(m_data, m_rows * m_cols, other.m_data, other.m_rows * other.m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index rows, Index /*cols*/) + : m_rows(rows) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_rows, other.m_rows); - numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; } - EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; } - EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index rows, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; - m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr void resize(Index, Index rows, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; - m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } - EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } }; - -// dynamic-size matrix with fixed-size storage and fixed width -template -class DenseStorage { - internal::plain_array m_data; - Index m_rows; +template +class DenseStorage_impl { + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_rows(0) {} - EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) - : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other) - : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows) { - internal::plain_array_helper::copy(other.m_data, m_rows * Cols_, m_data); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index cols) + : m_cols(cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + numext::swap(m_cols, other.m_cols); } - - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - m_rows = other.m_rows; - internal::plain_array_helper::copy(other.m_data, m_rows * Cols_, m_data); - } - return *this; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { + m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index rows, Index) : m_rows(rows) {} - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { - internal::plain_array_helper::swap(m_data, m_rows * Cols_, other.m_data, other.m_rows * Cols_); - numext::swap(m_rows, other.m_rows); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { + m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr Index rows(void) const EIGEN_NOEXCEPT { return m_rows; } - EIGEN_DEVICE_FUNC constexpr Index cols(void) const EIGEN_NOEXCEPT { return Cols_; } - EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index rows, Index) { m_rows = rows; } - EIGEN_DEVICE_FUNC constexpr void resize(Index, Index rows, Index) { m_rows = rows; } - EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } - EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } }; - -// dynamic-size matrix with fixed-size storage and fixed height -template -class DenseStorage { - internal::plain_array m_data; - Index m_cols; +template +class DenseStorage_impl { + Index m_rows = 0; + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_cols(0) {} - EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) - : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other) - : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols) { - internal::plain_array_helper::copy(other.m_data, Rows_ * m_cols, m_data); - } - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - m_cols = other.m_cols; - internal::plain_array_helper::copy(other.m_data, Rows_ * m_cols, m_data); - } - return *this; - } - EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {} - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { - internal::plain_array_helper::swap(m_data, Rows_ * m_cols, other.m_data, Rows_ * other.m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index rows, Index cols) + : m_rows(rows), m_cols(cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + numext::swap(m_rows, other.m_rows); numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC constexpr Index rows(void) const EIGEN_NOEXCEPT { return Rows_; } - EIGEN_DEVICE_FUNC constexpr Index cols(void) const EIGEN_NOEXCEPT { return m_cols; } - EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index cols) { m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index cols) { m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } - EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) { + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index cols) { + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } }; - -// purely dynamic matrix. -template -class DenseStorage { - T* m_data; - Index m_rows; - Index m_cols; +// fixed-size matrix with dynamic memory allocation not currently supported +template +class DenseStorage_impl {}; +// dynamic-sized variants +template +class DenseStorage_impl { + static constexpr bool Align = (Options & DontAlign) == 0; + T* m_data = nullptr; + Index m_rows = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} - EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) - : m_data(0), m_rows(0), m_cols(0) {} - EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) - : m_data(internal::conditional_aligned_new_auto(size)), - m_rows(rows), - m_cols(cols) { + static constexpr int Size = Dynamic; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_data(conditional_aligned_new_auto(other.size())), m_rows(other.m_rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data, other.m_data + other.size(), m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index /*cols*/) + : m_data(conditional_aligned_new_auto(size)), m_rows(rows) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - eigen_internal_assert(size == rows * cols && rows >= 0 && cols >= 0); - } - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) - : m_data(internal::conditional_aligned_new_auto(other.m_rows * other.m_cols)), - m_rows(other.m_rows), - m_cols(other.m_cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows * m_cols) - internal::smart_copy(other.m_data, other.m_data + other.m_rows * other.m_cols, m_data); - } - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - DenseStorage tmp(other); - this->swap(tmp); - } - return *this; } - EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT : m_data(std::move(other.m_data)), - m_rows(std::move(other.m_rows)), - m_cols(std::move(other.m_cols)) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + : m_data(other.m_data), m_rows(other.m_rows) { other.m_data = nullptr; other.m_rows = 0; - other.m_cols = 0; } - EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT { - numext::swap(m_data, other.m_data); - numext::swap(m_rows, other.m_rows); - numext::swap(m_cols, other.m_cols); + EIGEN_DEVICE_FUNC ~DenseStorage_impl() { conditional_aligned_delete_auto(m_data, size()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + resize(other.size(), other.rows(), other.cols()); + smart_copy(other.m_data, other.m_data + other.size(), m_data); return *this; } - EIGEN_DEVICE_FUNC ~DenseStorage() { - internal::conditional_aligned_delete_auto(m_data, m_rows * m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + this->swap(other); + return *this; } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_data, other.m_data); numext::swap(m_rows, other.m_rows); - numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT { return m_rows; } - EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT { return m_cols; } - void conservativeResize(Index size, Index rows, Index cols) { - m_data = - internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows * m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index size, Index rows, Index /*cols*/) { + m_data = conditional_aligned_realloc_new_auto(m_data, size, this->size()); m_rows = rows; - m_cols = cols; } - EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols) { - if (size != m_rows * m_cols) { - internal::conditional_aligned_delete_auto(m_data, m_rows * m_cols); - if (size > 0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative - m_data = internal::conditional_aligned_new_auto(size); - else - m_data = 0; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index size, Index rows, Index /*cols*/) { + Index oldSize = this->size(); + if (oldSize != size) { + conditional_aligned_delete_auto(m_data, oldSize); + m_data = conditional_aligned_new_auto(size); EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_rows = rows; - m_cols = cols; } - EIGEN_DEVICE_FUNC const T* data() const { return m_data; } - EIGEN_DEVICE_FUNC T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } }; - -// matrix with dynamic width and fixed height (so that matrix has dynamic size). -template -class DenseStorage { - T* m_data; - Index m_cols; +template +class DenseStorage_impl { + static constexpr bool Align = (Options & DontAlign) == 0; + T* m_data = nullptr; + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_cols(0) {} - explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} - EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) - : m_data(internal::conditional_aligned_new_auto(size)), m_cols(cols) { + static constexpr int Size = Dynamic; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_data(conditional_aligned_new_auto(other.size())), m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data, other.m_data + other.size(), m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index /*rows*/, Index cols) + : m_data(conditional_aligned_new_auto(size)), m_cols(cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - eigen_internal_assert(size == rows * cols && rows == Rows_ && cols >= 0); - EIGEN_UNUSED_VARIABLE(rows); - } - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) - : m_data(internal::conditional_aligned_new_auto(Rows_ * other.m_cols)), - m_cols(other.m_cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols * Rows_) - internal::smart_copy(other.m_data, other.m_data + Rows_ * m_cols, m_data); - } - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - DenseStorage tmp(other); - this->swap(tmp); - } - return *this; } - EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT : m_data(std::move(other.m_data)), - m_cols(std::move(other.m_cols)) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + : m_data(other.m_data), m_cols(other.m_cols) { other.m_data = nullptr; other.m_cols = 0; } - EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT { - numext::swap(m_data, other.m_data); - numext::swap(m_cols, other.m_cols); + EIGEN_DEVICE_FUNC ~DenseStorage_impl() { conditional_aligned_delete_auto(m_data, size()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + resize(other.size(), other.rows(), other.cols()); + smart_copy(other.m_data, other.m_data + other.size(), m_data); return *this; } - EIGEN_DEVICE_FUNC ~DenseStorage() { - internal::conditional_aligned_delete_auto(m_data, Rows_ * m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + this->swap(other); + return *this; } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_data, other.m_data); numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; } - EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT { return m_cols; } - EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols) { - m_data = - internal::conditional_aligned_realloc_new_auto(m_data, size, Rows_ * m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index size, Index /*rows*/, Index cols) { + m_data = conditional_aligned_realloc_new_auto(m_data, size, this->size()); m_cols = cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols) { - if (size != Rows_ * m_cols) { - internal::conditional_aligned_delete_auto(m_data, Rows_ * m_cols); - if (size > 0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative - m_data = internal::conditional_aligned_new_auto(size); - else - m_data = 0; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index size, Index /*rows*/, Index cols) { + Index oldSize = this->size(); + if (oldSize != size) { + conditional_aligned_delete_auto(m_data, oldSize); + m_data = conditional_aligned_new_auto(size); EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_cols = cols; } - EIGEN_DEVICE_FUNC const T* data() const { return m_data; } - EIGEN_DEVICE_FUNC T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } }; - -// matrix with dynamic height and fixed width (so that matrix has dynamic size). -template -class DenseStorage { - T* m_data; - Index m_rows; +template +class DenseStorage_impl { + static constexpr bool Align = (Options & DontAlign) == 0; + T* m_data = nullptr; + Index m_rows = 0; + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_rows(0) {} - explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) - : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows) { + static constexpr int Size = Dynamic; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_data(conditional_aligned_new_auto(other.size())), m_rows(other.m_rows), m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data, other.m_data + other.size(), m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index cols) + : m_data(conditional_aligned_new_auto(size)), m_rows(rows), m_cols(cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - eigen_internal_assert(size == rows * cols && rows >= 0 && cols == Cols_); - EIGEN_UNUSED_VARIABLE(cols); - } - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) - : m_data(internal::conditional_aligned_new_auto(other.m_rows * Cols_)), - m_rows(other.m_rows) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows * Cols_) - internal::smart_copy(other.m_data, other.m_data + other.m_rows * Cols_, m_data); - } - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - DenseStorage tmp(other); - this->swap(tmp); - } - return *this; } - EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT : m_data(std::move(other.m_data)), - m_rows(std::move(other.m_rows)) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) { other.m_data = nullptr; other.m_rows = 0; + other.m_cols = 0; } - EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT { - numext::swap(m_data, other.m_data); - numext::swap(m_rows, other.m_rows); + EIGEN_DEVICE_FUNC ~DenseStorage_impl() { conditional_aligned_delete_auto(m_data, size()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + resize(other.size(), other.rows(), other.cols()); + smart_copy(other.m_data, other.m_data + other.size(), m_data); return *this; } - EIGEN_DEVICE_FUNC ~DenseStorage() { - internal::conditional_aligned_delete_auto(m_data, Cols_ * m_rows); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + this->swap(other); + return *this; } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_data, other.m_data); numext::swap(m_rows, other.m_rows); + numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT { return m_rows; } - EIGEN_DEVICE_FUNC static constexpr Index cols(void) { return Cols_; } - void conservativeResize(Index size, Index rows, Index) { - m_data = - internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows * Cols_); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index size, Index rows, Index cols) { + m_data = conditional_aligned_realloc_new_auto(m_data, size, this->size()); m_rows = rows; + m_cols = cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index) { - if (size != m_rows * Cols_) { - internal::conditional_aligned_delete_auto(m_data, Cols_ * m_rows); - if (size > 0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative - m_data = internal::conditional_aligned_new_auto(size); - else - m_data = 0; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index size, Index rows, Index cols) { + Index oldSize = this->size(); + if (oldSize != size) { + conditional_aligned_delete_auto(m_data, oldSize); + m_data = conditional_aligned_new_auto(size); EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } +}; +template +struct use_default_move { + static constexpr bool DynamicObject = Size == Dynamic; + static constexpr bool TrivialObject = + (!NumTraits::RequireInitialization) && (Rows >= 0) && (Cols >= 0) && (Size == Rows * Cols); + static constexpr bool value = DynamicObject || TrivialObject; +}; +} // end namespace internal + +/** \internal + * + * \class DenseStorage_impl + * \ingroup Core_Module + * + * \brief Stores the data of a matrix + * + * This class stores the data of fixed-size, dynamic-size or mixed matrices + * in a way as compact as possible. + * + * \sa Matrix + */ +template ::value> +class DenseStorage : public internal::DenseStorage_impl { + using Base = internal::DenseStorage_impl; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(Index size, Index rows, Index cols) + : Base(size, rows, cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(const DenseStorage&) = default; + // if DenseStorage meets the requirements of use_default_move, then use the move construction and move assignment + // operation defined in DenseStorage_impl, or the compiler-generated version if none is defined + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(DenseStorage&&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(DenseStorage&&) = default; +}; +template +class DenseStorage + : public internal::DenseStorage_impl { + using Base = internal::DenseStorage_impl; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(Index size, Index rows, Index cols) + : Base(size, rows, cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(const DenseStorage&) = default; + // if DenseStorage does not meet the requirements of use_default_move, then defer to the copy construction and copy + // assignment behavior + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(DenseStorage&& other) + : DenseStorage(static_cast(other)) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(DenseStorage&& other) { + *this = other; + return *this; } - EIGEN_DEVICE_FUNC const T* data() const { return m_data; } - EIGEN_DEVICE_FUNC T* data() { return m_data; } }; } // end namespace Eigen diff --git a/Eigen/src/Core/DeviceWrapper.h b/Eigen/src/Core/DeviceWrapper.h new file mode 100644 index 0000000..012dce1 --- /dev/null +++ b/Eigen/src/Core/DeviceWrapper.h @@ -0,0 +1,153 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2023 Charlie Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DEVICEWRAPPER_H +#define EIGEN_DEVICEWRAPPER_H + +namespace Eigen { +template +struct DeviceWrapper { + using Base = EigenBase>; + using Scalar = typename Derived::Scalar; + + EIGEN_DEVICE_FUNC DeviceWrapper(Base& xpr, Device& device) : m_xpr(xpr.derived()), m_device(device) {} + EIGEN_DEVICE_FUNC DeviceWrapper(const Base& xpr, Device& device) : m_xpr(xpr.derived()), m_device(device) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const EigenBase& other) { + using AssignOp = internal::assign_op; + internal::call_assignment(*this, other.derived(), AssignOp()); + return m_xpr; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const EigenBase& other) { + using AddAssignOp = internal::add_assign_op; + internal::call_assignment(*this, other.derived(), AddAssignOp()); + return m_xpr; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const EigenBase& other) { + using SubAssignOp = internal::sub_assign_op; + internal::call_assignment(*this, other.derived(), SubAssignOp()); + return m_xpr; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& derived() { return m_xpr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Device& device() { return m_device; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NoAlias noalias() { + return NoAlias(*this); + } + + Derived& m_xpr; + Device& m_device; +}; + +namespace internal { + +// this is where we differentiate between lazy assignment and specialized kernels (e.g. matrix products) +template ::Shape, + typename evaluator_traits::Shape>::Kind, + typename EnableIf = void> +struct AssignmentWithDevice; + +// unless otherwise specified, use the default product implementation +template +struct AssignmentWithDevice, Functor, Device, Dense2Dense, Weak> { + using SrcXprType = Product; + using Base = Assignment; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func, + Device&) { + Base::run(dst, src, func); + } +}; + +// specialization for coeffcient-wise assignment +template +struct AssignmentWithDevice { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func, + Device& device) { +#ifndef EIGEN_NO_DEBUG + internal::check_for_aliasing(dst, src); +#endif + + call_dense_assignment_loop(dst, src, func, device); + } +}; + +// this allows us to use the default evaluation scheme if it is not specialized for the device +template +struct dense_assignment_loop_with_device { + using Base = dense_assignment_loop; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Device&) { Base::run(kernel); } +}; + +// entry point for a generic expression with device +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(DeviceWrapper dst, + const Src& src, const Func& func) { + enum { + NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) || + (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) && + int(Dst::SizeAtCompileTime) != 1 + }; + + using ActualDstTypeCleaned = std::conditional_t, Dst>; + using ActualDstType = std::conditional_t, Dst&>; + ActualDstType actualDst(dst.derived()); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar); + + // this provides a mechanism for specializing simple assignments, matrix products, etc + AssignmentWithDevice::run(actualDst, src, func, dst.device()); +} + +// copy and pasted from AssignEvaluator except forward device to kernel +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, + const Functor& func, Device& device) { + using DstEvaluatorType = evaluator; + using SrcEvaluatorType = evaluator; + + SrcEvaluatorType srcEvaluator(src); + + // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, + // we need to resize the destination after the source evaluator has been created. + resize_if_allowed(dst, src, func); + + DstEvaluatorType dstEvaluator(dst); + + using Kernel = generic_dense_assignment_kernel; + + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + dense_assignment_loop_with_device::run(kernel, device); +} + +} // namespace internal + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper EigenBase::device(Device& device) { + return DeviceWrapper(derived(), device); +} + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper EigenBase::device( + Device& device) const { + return DeviceWrapper(derived(), device); +} +} // namespace Eigen +#endif diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h index 8d27857..ff8611c 100644 --- a/Eigen/src/Core/Diagonal.h +++ b/Eigen/src/Core/Diagonal.h @@ -83,13 +83,11 @@ class Diagonal : public internal::dense_xpr_base(m_matrix.rows(), m_matrix.cols() - m_index.value()); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return 1; } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return 1; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const EIGEN_NOEXCEPT { - return m_matrix.outerStride() + 1; - } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_matrix.outerStride() + 1; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const EIGEN_NOEXCEPT { return 0; } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return 0; } typedef std::conditional_t::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue; @@ -134,13 +132,13 @@ class Diagonal : public internal::dense_xpr_base 0 ? m_index.value() : -m_index.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rowOffset() const EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rowOffset() const noexcept { return m_index.value() > 0 ? 0 : -m_index.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index colOffset() const EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index colOffset() const noexcept { return m_index.value() > 0 ? m_index.value() : 0; } // trigger a compile-time error if someone try to call packet diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index fd61bb7..52630d9 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -76,9 +76,9 @@ class DiagonalBase : public EigenBase { } /** \returns the number of rows. */ - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const { return diagonal().size(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return diagonal().size(); } /** \returns the number of columns. */ - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const { return diagonal().size(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return diagonal().size(); } /** \returns the diagonal matrix product of \c *this by the dense matrix, \a matrix */ template @@ -256,10 +256,13 @@ class DiagonalMatrix : public DiagonalBase, DiagonalVectorType>> InitializeReturnType; + typedef DiagonalWrapper, DiagonalVectorType>> + ZeroInitializeReturnType; + /** Initializes a diagonal matrix of size SizeAtCompileTime with coefficients set to zero */ - EIGEN_DEVICE_FUNC static const InitializeReturnType Zero() { return DiagonalVectorType::Zero().asDiagonal(); } + EIGEN_DEVICE_FUNC static const ZeroInitializeReturnType Zero() { return DiagonalVectorType::Zero().asDiagonal(); } /** Initializes a diagonal matrix of size dim with coefficients set to zero */ - EIGEN_DEVICE_FUNC static const InitializeReturnType Zero(Index size) { + EIGEN_DEVICE_FUNC static const ZeroInitializeReturnType Zero(Index size) { return DiagonalVectorType::Zero(size).asDiagonal(); } /** Initializes a identity matrix of size SizeAtCompileTime */ @@ -386,8 +389,9 @@ struct AssignmentKind { // Diagonal matrix to Dense assignment template struct Assignment { - static void run(DstXprType& dst, const SrcXprType& src, - const internal::assign_op& /*func*/) { + static EIGEN_DEVICE_FUNC void run( + DstXprType& dst, const SrcXprType& src, + const internal::assign_op& /*func*/) { Index dstRows = src.rows(); Index dstCols = src.cols(); if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); @@ -396,13 +400,15 @@ struct Assignment { dst.diagonal() = src.diagonal(); } - static void run(DstXprType& dst, const SrcXprType& src, - const internal::add_assign_op& /*func*/) { + static EIGEN_DEVICE_FUNC void run( + DstXprType& dst, const SrcXprType& src, + const internal::add_assign_op& /*func*/) { dst.diagonal() += src.diagonal(); } - static void run(DstXprType& dst, const SrcXprType& src, - const internal::sub_assign_op& /*func*/) { + static EIGEN_DEVICE_FUNC void run( + DstXprType& dst, const SrcXprType& src, + const internal::sub_assign_op& /*func*/) { dst.diagonal() -= src.diagonal(); } }; diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 82eb9c7..059527c 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -17,28 +17,18 @@ namespace Eigen { namespace internal { -// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot -// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE -// looking at the static assertions. Thus this is a trick to get better compile errors. -template -struct dot_nocheck { - typedef scalar_conj_product_op::Scalar, typename traits::Scalar> conj_prod; - typedef typename conj_prod::result_type ResScalar; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.template binaryExpr(b).sum(); +template ::Scalar> +struct squared_norm_impl { + using Real = typename NumTraits::Real; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Real run(const Derived& a) { + Scalar result = a.unaryExpr(squared_norm_functor()).sum(); + return numext::real(result) + numext::imag(result); } }; -template -struct dot_nocheck { - typedef scalar_conj_product_op::Scalar, typename traits::Scalar> conj_prod; - typedef typename conj_prod::result_type ResScalar; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.transpose().template binaryExpr(b).sum(); - } +template +struct squared_norm_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(const Derived& a) { return a.any(); } }; } // end namespace internal @@ -60,18 +50,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ScalarBinaryOpTraits::Scalar, typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived) -#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG)) - EIGEN_CHECK_BINARY_COMPATIBILIY( - Eigen::internal::scalar_conj_product_op, Scalar, - typename OtherDerived::Scalar); -#endif - - eigen_assert(size() == other.size()); - - return internal::dot_nocheck::run(*this, other); + return internal::dot_impl::run(derived(), other.derived()); } //---------- implementation of L2 norm and related functions ---------- @@ -85,7 +64,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const { - return numext::real((*this).cwiseAbs2().sum()); + return internal::squared_norm_impl::run(derived()); } /** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm. diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index f485016..c9a6e88 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -46,22 +46,22 @@ struct EigenBase { typedef typename internal::traits::StorageKind StorageKind; /** \returns a reference to the derived object */ - EIGEN_DEVICE_FUNC Derived& derived() { return *static_cast(this); } + EIGEN_DEVICE_FUNC constexpr Derived& derived() { return *static_cast(this); } /** \returns a const reference to the derived object */ - EIGEN_DEVICE_FUNC const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC constexpr const Derived& derived() const { return *static_cast(this); } - EIGEN_DEVICE_FUNC inline Derived& const_cast_derived() const { + EIGEN_DEVICE_FUNC inline constexpr Derived& const_cast_derived() const { return *static_cast(const_cast(this)); } EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast(this); } /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return derived().rows(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return derived().rows(); } /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/ - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return derived().cols(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return derived().cols(); } /** \returns the number of coefficients, which is rows()*cols(). * \sa rows(), cols(), SizeAtCompileTime. */ - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index size() const EIGEN_NOEXCEPT { return rows() * cols(); } + EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return rows() * cols(); } /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */ template @@ -104,6 +104,11 @@ struct EigenBase { // derived class can reimplement it in a more optimized way. dst = this->derived() * dst; } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper device(Device& device); + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper device(Device& device) const; }; /*************************************************************************** diff --git a/Eigen/src/Core/Fill.h b/Eigen/src/Core/Fill.h new file mode 100644 index 0000000..f40d56d --- /dev/null +++ b/Eigen/src/Core/Fill.h @@ -0,0 +1,140 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2024 Charles Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FILL_H +#define EIGEN_FILL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct eigen_fill_helper : std::false_type {}; + +template +struct eigen_fill_helper> : std::true_type {}; + +template +struct eigen_fill_helper> : std::true_type {}; + +template +struct eigen_fill_helper> : eigen_fill_helper {}; + +template +struct eigen_fill_helper> + : std::integral_constant::value && + (Xpr::IsRowMajor ? (BlockRows == 1) : (BlockCols == 1))> {}; + +template +struct eigen_fill_helper>> : eigen_fill_helper {}; + +template +struct eigen_fill_helper>> + : std::integral_constant::value && + enum_eq_not_dynamic(OuterStride_, Xpr::InnerSizeAtCompileTime)> {}; + +template +struct eigen_fill_helper>> + : eigen_fill_helper>> {}; + +template +struct eigen_fill_helper>> + : eigen_fill_helper>> {}; + +template +struct eigen_fill_helper>> + : eigen_fill_helper>> {}; + +template +struct eigen_fill_impl { + using Scalar = typename Xpr::Scalar; + using Func = scalar_constant_op; + using PlainObject = typename Xpr::PlainObject; + using Constant = typename PlainObject::ConstantReturnType; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const Scalar& val) { + const Constant src(dst.rows(), dst.cols(), val); + run(dst, src); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) { + call_dense_assignment_loop(dst, src, assign_op()); + } +}; + +#if EIGEN_COMP_MSVC || defined(EIGEN_GPU_COMPILE_PHASE) +template +struct eigen_fill_impl : eigen_fill_impl {}; +#else +template +struct eigen_fill_impl { + using Scalar = typename Xpr::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const Scalar& val) { + const Scalar val_copy = val; + using std::fill_n; + fill_n(dst.data(), dst.size(), val_copy); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) { + resize_if_allowed(dst, src, assign_op()); + const Scalar& val = src.functor()(); + run(dst, val); + } +}; +#endif + +template +struct eigen_memset_helper { + static constexpr bool value = + std::is_trivially_copyable::value && eigen_fill_helper::value; +}; + +template +struct eigen_zero_impl { + using Scalar = typename Xpr::Scalar; + using PlainObject = typename Xpr::PlainObject; + using Zero = typename PlainObject::ZeroReturnType; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst) { + const Zero src(dst.rows(), dst.cols()); + run(dst, src); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) { + call_dense_assignment_loop(dst, src, assign_op()); + } +}; + +template +struct eigen_zero_impl { + using Scalar = typename Xpr::Scalar; + static constexpr size_t max_bytes = (std::numeric_limits::max)(); + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) { + const size_t num_bytes = dst.size() * sizeof(Scalar); + if (num_bytes == 0) return; + void* dst_ptr = static_cast(dst.data()); +#ifndef EIGEN_NO_DEBUG + if (num_bytes > max_bytes) throw_std_bad_alloc(); + eigen_assert((dst_ptr != nullptr) && "null pointer dereference error!"); +#endif + EIGEN_USING_STD(memset); + memset(dst_ptr, 0, num_bytes); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) { + resize_if_allowed(dst, src, assign_op()); + run(dst); + } +}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_FILL_H diff --git a/Eigen/src/Core/FindCoeff.h b/Eigen/src/Core/FindCoeff.h new file mode 100644 index 0000000..0102e8a --- /dev/null +++ b/Eigen/src/Core/FindCoeff.h @@ -0,0 +1,464 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2025 Charlie Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FIND_COEFF_H +#define EIGEN_FIND_COEFF_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template ::IsInteger> +struct max_coeff_functor { + EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const { + return candidate > incumbent; + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const { + return pcmp_lt(incumbent, candidate); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_max(a); + } +}; + +template +struct max_coeff_functor { + EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) { + return (candidate > incumbent) || ((candidate != candidate) && (incumbent == incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) { + return pandnot(pcmp_lt_or_nan(incumbent, candidate), pisnan(incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_max(a); + } +}; + +template +struct max_coeff_functor { + EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const { + return (candidate > incumbent) || ((candidate == candidate) && (incumbent != incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const { + return pandnot(pcmp_lt_or_nan(incumbent, candidate), pisnan(candidate)); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_max(a); + } +}; + +template ::IsInteger> +struct min_coeff_functor { + EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const { + return candidate < incumbent; + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const { + return pcmp_lt(candidate, incumbent); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_min(a); + } +}; + +template +struct min_coeff_functor { + EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) { + return (candidate < incumbent) || ((candidate != candidate) && (incumbent == incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) { + return pandnot(pcmp_lt_or_nan(candidate, incumbent), pisnan(incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_min(a); + } +}; + +template +struct min_coeff_functor { + EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const { + return (candidate < incumbent) || ((candidate == candidate) && (incumbent != incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const { + return pandnot(pcmp_lt_or_nan(candidate, incumbent), pisnan(candidate)); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_min(a); + } +}; + +template +struct min_max_traits { + static constexpr bool PacketAccess = packet_traits::Vectorizable; +}; +template +struct functor_traits> : min_max_traits {}; +template +struct functor_traits> : min_max_traits {}; + +template +struct find_coeff_loop; +template +struct find_coeff_loop { + using Scalar = typename Evaluator::Scalar; + static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& res, Index& outer, Index& inner) { + Index outerSize = eval.outerSize(); + Index innerSize = eval.innerSize(); + + /* initialization performed in calling function */ + /* result = eval.coeff(0, 0); */ + /* outer = 0; */ + /* inner = 0; */ + + for (Index j = 0; j < outerSize; j++) { + for (Index i = 0; i < innerSize; i++) { + Scalar xprCoeff = eval.coeffByOuterInner(j, i); + bool newRes = func.compareCoeff(res, xprCoeff); + if (newRes) { + outer = j; + inner = i; + res = xprCoeff; + } + } + } + } +}; +template +struct find_coeff_loop { + using Scalar = typename Evaluator::Scalar; + static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& res, Index& index) { + Index size = eval.size(); + + /* initialization performed in calling function */ + /* result = eval.coeff(0); */ + /* index = 0; */ + + for (Index k = 0; k < size; k++) { + Scalar xprCoeff = eval.coeff(k); + bool newRes = func.compareCoeff(res, xprCoeff); + if (newRes) { + index = k; + res = xprCoeff; + } + } + } +}; +template +struct find_coeff_loop { + using ScalarImpl = find_coeff_loop; + using Scalar = typename Evaluator::Scalar; + using Packet = typename Evaluator::Packet; + static constexpr int PacketSize = unpacket_traits::size; + static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& result, Index& outer, + Index& inner) { + Index outerSize = eval.outerSize(); + Index innerSize = eval.innerSize(); + Index packetEnd = numext::round_down(innerSize, PacketSize); + + /* initialization performed in calling function */ + /* result = eval.coeff(0, 0); */ + /* outer = 0; */ + /* inner = 0; */ + + bool checkPacket = false; + + for (Index j = 0; j < outerSize; j++) { + Packet resultPacket = pset1(result); + for (Index i = 0; i < packetEnd; i += PacketSize) { + Packet xprPacket = eval.template packetByOuterInner(j, i); + if (predux_any(func.comparePacket(resultPacket, xprPacket))) { + outer = j; + inner = i; + result = func.predux(xprPacket); + resultPacket = pset1(result); + checkPacket = true; + } + } + + for (Index i = packetEnd; i < innerSize; i++) { + Scalar xprCoeff = eval.coeffByOuterInner(j, i); + if (func.compareCoeff(result, xprCoeff)) { + outer = j; + inner = i; + result = xprCoeff; + checkPacket = false; + } + } + } + + if (checkPacket) { + result = eval.coeffByOuterInner(outer, inner); + Index i_end = inner + PacketSize; + for (Index i = inner; i < i_end; i++) { + Scalar xprCoeff = eval.coeffByOuterInner(outer, i); + if (func.compareCoeff(result, xprCoeff)) { + inner = i; + result = xprCoeff; + } + } + } + } +}; +template +struct find_coeff_loop { + using ScalarImpl = find_coeff_loop; + using Scalar = typename Evaluator::Scalar; + using Packet = typename Evaluator::Packet; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int Alignment = Evaluator::Alignment; + + static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& result, Index& index) { + Index size = eval.size(); + Index packetEnd = numext::round_down(size, PacketSize); + + /* initialization performed in calling function */ + /* result = eval.coeff(0); */ + /* index = 0; */ + + Packet resultPacket = pset1(result); + bool checkPacket = false; + + for (Index k = 0; k < packetEnd; k += PacketSize) { + Packet xprPacket = eval.template packet(k); + if (predux_any(func.comparePacket(resultPacket, xprPacket))) { + index = k; + result = func.predux(xprPacket); + resultPacket = pset1(result); + checkPacket = true; + } + } + + for (Index k = packetEnd; k < size; k++) { + Scalar xprCoeff = eval.coeff(k); + if (func.compareCoeff(result, xprCoeff)) { + index = k; + result = xprCoeff; + checkPacket = false; + } + } + + if (checkPacket) { + result = eval.coeff(index); + Index k_end = index + PacketSize; + for (Index k = index; k < k_end; k++) { + Scalar xprCoeff = eval.coeff(k); + if (func.compareCoeff(result, xprCoeff)) { + index = k; + result = xprCoeff; + } + } + } + } +}; + +template +struct find_coeff_evaluator : public evaluator { + using Base = evaluator; + using Scalar = typename Derived::Scalar; + using Packet = typename packet_traits::type; + static constexpr int Flags = Base::Flags; + static constexpr bool IsRowMajor = bool(Flags & RowMajorBit); + EIGEN_DEVICE_FUNC inline find_coeff_evaluator(const Derived& xpr) : Base(xpr), m_xpr(xpr) {} + + EIGEN_DEVICE_FUNC inline Scalar coeffByOuterInner(Index outer, Index inner) const { + Index row = IsRowMajor ? outer : inner; + Index col = IsRowMajor ? inner : outer; + return Base::coeff(row, col); + } + template + EIGEN_DEVICE_FUNC inline PacketType packetByOuterInner(Index outer, Index inner) const { + Index row = IsRowMajor ? outer : inner; + Index col = IsRowMajor ? inner : outer; + return Base::template packet(row, col); + } + + EIGEN_DEVICE_FUNC inline Index innerSize() const { return m_xpr.innerSize(); } + EIGEN_DEVICE_FUNC inline Index outerSize() const { return m_xpr.outerSize(); } + EIGEN_DEVICE_FUNC inline Index size() const { return m_xpr.size(); } + + const Derived& m_xpr; +}; + +template +struct find_coeff_impl { + using Evaluator = find_coeff_evaluator; + static constexpr int Flags = Evaluator::Flags; + static constexpr int Alignment = Evaluator::Alignment; + static constexpr bool IsRowMajor = Derived::IsRowMajor; + static constexpr int MaxInnerSizeAtCompileTime = + IsRowMajor ? Derived::MaxColsAtCompileTime : Derived::MaxRowsAtCompileTime; + static constexpr int MaxSizeAtCompileTime = Derived::MaxSizeAtCompileTime; + + using Scalar = typename Derived::Scalar; + using Packet = typename Evaluator::Packet; + + static constexpr int PacketSize = unpacket_traits::size; + static constexpr bool Linearize = bool(Flags & LinearAccessBit); + static constexpr bool DontVectorize = + enum_lt_not_dynamic(Linearize ? MaxSizeAtCompileTime : MaxInnerSizeAtCompileTime, PacketSize); + static constexpr bool Vectorize = + !DontVectorize && bool(Flags & PacketAccessBit) && functor_traits::PacketAccess; + + using Loop = find_coeff_loop; + + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& xpr, Func& func, Scalar& res, Index& outer, + Index& inner) { + Evaluator eval(xpr); + Loop::run(eval, func, res, outer, inner); + } + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& xpr, Func& func, Scalar& res, Index& outer, + Index& inner) { + // where possible, use the linear loop and back-calculate the outer and inner indices + Index index = 0; + run(xpr, func, res, index); + outer = index / xpr.innerSize(); + inner = index % xpr.innerSize(); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& xpr, Func& func, Scalar& res, Index& index) { + Evaluator eval(xpr); + Loop::run(eval, func, res, index); + } +}; + +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar findCoeff(const DenseBase& mat, Func& func, + IndexType* rowPtr, IndexType* colPtr) { + eigen_assert(mat.rows() > 0 && mat.cols() > 0 && "you are using an empty matrix"); + using Scalar = typename DenseBase::Scalar; + using FindCoeffImpl = internal::find_coeff_impl; + Index outer = 0; + Index inner = 0; + Scalar res = mat.coeff(0, 0); + FindCoeffImpl::run(mat.derived(), func, res, outer, inner); + *rowPtr = internal::convert_index(Derived::IsRowMajor ? outer : inner); + if (colPtr) *colPtr = internal::convert_index(Derived::IsRowMajor ? inner : outer); + return res; +} + +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar findCoeff(const DenseBase& mat, Func& func, + IndexType* indexPtr) { + eigen_assert(mat.size() > 0 && "you are using an empty matrix"); + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + using Scalar = typename DenseBase::Scalar; + using FindCoeffImpl = internal::find_coeff_impl; + Index index = 0; + Scalar res = mat.coeff(0); + FindCoeffImpl::run(mat.derived(), func, res, index); + *indexPtr = internal::convert_index(index); + return res; +} + +} // namespace internal + +/** \fn DenseBase::minCoeff(IndexType* rowId, IndexType* colId) const + * \returns the minimum of all coefficients of *this and puts in *row and *col its location. + * + * If there are multiple coefficients with the same extreme value, the location of the first instance is returned. + * + * In case \c *this contains NaN, NaNPropagation determines the behavior: + * NaNPropagation == PropagateFast : undefined + * NaNPropagation == PropagateNaN : result is NaN + * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN + * \warning the matrix must be not empty, otherwise an assertion is triggered. + * + * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff() + */ +template +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar DenseBase::minCoeff(IndexType* rowPtr, + IndexType* colPtr) const { + using Func = internal::min_coeff_functor; + Func func; + return internal::findCoeff(derived(), func, rowPtr, colPtr); +} + +/** \returns the minimum of all coefficients of *this and puts in *index its location. + * + * If there are multiple coefficients with the same extreme value, the location of the first instance is returned. + * + * In case \c *this contains NaN, NaNPropagation determines the behavior: + * NaNPropagation == PropagateFast : undefined + * NaNPropagation == PropagateNaN : result is NaN + * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN + * \warning the matrix must be not empty, otherwise an assertion is triggered. + * + * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), + * DenseBase::minCoeff() + */ +template +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar DenseBase::minCoeff(IndexType* indexPtr) const { + using Func = internal::min_coeff_functor; + Func func; + return internal::findCoeff(derived(), func, indexPtr); +} + +/** \fn DenseBase::maxCoeff(IndexType* rowId, IndexType* colId) const + * \returns the maximum of all coefficients of *this and puts in *row and *col its location. + * + * If there are multiple coefficients with the same extreme value, the location of the first instance is returned. + * + * In case \c *this contains NaN, NaNPropagation determines the behavior: + * NaNPropagation == PropagateFast : undefined + * NaNPropagation == PropagateNaN : result is NaN + * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN + * \warning the matrix must be not empty, otherwise an assertion is triggered. + * + * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff() + */ +template +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar DenseBase::maxCoeff(IndexType* rowPtr, + IndexType* colPtr) const { + using Func = internal::max_coeff_functor; + Func func; + return internal::findCoeff(derived(), func, rowPtr, colPtr); +} + +/** \returns the maximum of all coefficients of *this and puts in *index its location. + * + * If there are multiple coefficients with the same extreme value, the location of the first instance is returned. + * + * In case \c *this contains NaN, NaNPropagation determines the behavior: + * NaNPropagation == PropagateFast : undefined + * NaNPropagation == PropagateNaN : result is NaN + * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN + * \warning the matrix must be not empty, otherwise an assertion is triggered. + * + * \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), + * DenseBase::maxCoeff() + */ +template +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar DenseBase::maxCoeff(IndexType* indexPtr) const { + using Func = internal::max_coeff_functor; + Func func; + return internal::findCoeff(derived(), func, indexPtr); +} + +} // namespace Eigen + +#endif // EIGEN_FIND_COEFF_H diff --git a/Eigen/src/Core/ForceAlignedAccess.h b/Eigen/src/Core/ForceAlignedAccess.h index a91b0da..55beab3 100644 --- a/Eigen/src/Core/ForceAlignedAccess.h +++ b/Eigen/src/Core/ForceAlignedAccess.h @@ -41,14 +41,10 @@ class ForceAlignedAccess : public internal::dense_xpr_base struct gemv_static_vector_if { - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } @@ -237,23 +237,21 @@ struct gemv_static_vector_if { template struct gemv_static_vector_if { - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { return 0; } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { return 0; } }; template struct gemv_static_vector_if { #if EIGEN_MAX_STATIC_ALIGN_BYTES != 0 - internal::plain_array - m_data; - EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } + internal::plain_array m_data; + EIGEN_STRONG_INLINE constexpr Scalar* data() { return m_data.array; } #else // Some architectures cannot align on the stack, // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. - internal::plain_array< - Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0> - m_data; - EIGEN_STRONG_INLINE Scalar* data() { - return reinterpret_cast((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + EIGEN_MAX_ALIGN_BYTES); + internal::plain_array m_data; + EIGEN_STRONG_INLINE constexpr Scalar* data() { + return reinterpret_cast((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + + EIGEN_MAX_ALIGN_BYTES); } #endif }; @@ -329,6 +327,7 @@ struct gemv_dense_selector { if (!evalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + constexpr int Size = Dest::SizeAtCompileTime; Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif @@ -393,6 +392,7 @@ struct gemv_dense_selector { if (!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime; Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 58a197f..64e1123 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -57,22 +57,22 @@ struct default_packet_traits { HasConj = 1, HasSetLinear = 1, HasSign = 1, + // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet + // types + HasRound = 1, HasArg = 0, HasAbsDiff = 0, HasBlend = 0, // This flag is used to indicate whether packet comparison is supported. - // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true. + // pcmp_eq and pcmp_lt should be defined for it to be true. HasCmp = 0, - HasRound = 0, - HasRint = 0, - HasFloor = 0, - HasCeil = 0, HasDiv = 0, HasReciprocal = 0, HasSqrt = 0, HasRsqrt = 0, + HasCbrt = 0, HasExp = 0, HasExpm1 = 0, HasLog = 0, @@ -135,7 +135,14 @@ template struct unpacket_traits { typedef T type; typedef T half; - enum { size = 1, alignment = 1, vectorizable = false, masked_load_available = false, masked_store_available = false }; + typedef typename numext::get_integer_by_size::signed_type integer_packet; + enum { + size = 1, + alignment = alignof(T), + vectorizable = false, + masked_load_available = false, + masked_store_available = false + }; }; template @@ -188,7 +195,7 @@ struct is_half { static constexpr int Size = unpacket_traits::size; using DefaultPacket = typename packet_traits::type; static constexpr int DefaultSize = unpacket_traits::size; - static constexpr bool value = Size < DefaultSize; + static constexpr bool value = Size != 1 && Size < DefaultSize; }; template @@ -201,7 +208,7 @@ struct type_casting_traits { }; }; -// provides a succint template to define vectorized casting traits with respect to the largest accessible packet types +// provides a succinct template to define vectorized casting traits with respect to the largest accessible packet types template struct vectorized_type_casting_traits { enum : int { @@ -246,6 +253,12 @@ struct preinterpret_generic { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; } }; +template +struct preinterpret_generic::as_real, ComplexPacket, false> { + using RealPacket = typename unpacket_traits::as_real; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealPacket run(const ComplexPacket& a) { return a.v; } +}; + /** \internal \returns reinterpret_cast(a) */ template EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) { @@ -335,12 +348,9 @@ EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) { /** \internal \returns -a (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) { - return -a; -} - -template <> -EIGEN_DEVICE_FUNC inline bool pnegate(const bool& a) { - return !a; + EIGEN_STATIC_ASSERT((!is_same::type, bool>::value), + NEGATE IS NOT DEFINED FOR BOOLEAN TYPES) + return numext::negate(a); } /** \internal \returns conj(a) (coeff-wise) */ @@ -365,8 +375,13 @@ template EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) { return a / b; } +// Avoid compiler warning for boolean algebra. +template <> +EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) { + return a && b; +} -// In the generic case, memset to all one bits. +// In the generic packet case, memset to all one bits. template struct ptrue_impl { static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) { @@ -376,19 +391,16 @@ struct ptrue_impl { } }; +// Use a value of one for scalars. +template +struct ptrue_impl::value>> { + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&) { return Scalar(1); } +}; + // For booleans, we can only directly set a valid `bool` value to avoid UB. template <> struct ptrue_impl { - static EIGEN_DEVICE_FUNC inline bool run(const bool& /*a*/) { return true; } -}; - -// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value). -// Although this is technically not a valid bitmask, the scalar path for pselect -// uses a comparison to zero, so this should still work in most cases. We don't -// have another option, since the scalar type requires initialization. -template -struct ptrue_impl::value && NumTraits::RequireInitialization>> { - static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(1); } + static EIGEN_DEVICE_FUNC inline bool run(const bool&) { return true; } }; /** \internal \returns one bits. */ @@ -397,7 +409,7 @@ EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) { return ptrue_impl::run(a); } -// In the general case, memset to zero. +// In the general packet case, memset to zero. template struct pzero_impl { static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) { @@ -420,74 +432,44 @@ EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) { return pzero_impl::run(a); } -/** \internal \returns a <= b as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) { - return a <= b ? ptrue(a) : pzero(a); -} - -/** \internal \returns a < b as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) { - return a < b ? ptrue(a) : pzero(a); -} - -/** \internal \returns a == b as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) { - return a == b ? ptrue(a) : pzero(a); -} - -/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) { - return a >= b ? pzero(a) : ptrue(a); -} - template struct bit_and { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; } + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; } }; template struct bit_or { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; } + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; } }; template struct bit_xor { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; } + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; } }; template struct bit_not { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; } + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; } }; template <> struct bit_and { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { - return a && b; - } + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a && b; } }; template <> struct bit_or { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { - return a || b; - } + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a || b; } }; template <> struct bit_xor { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { - return a != b; - } + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a != b; } }; template <> struct bit_not { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; } + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; } }; // Use operators &, |, ^, ~. @@ -576,14 +558,32 @@ EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); } -/** \internal \returns isnan(a) */ +/** \internal \returns a < b as a bit mask */ template -EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) { - return pandnot(ptrue(a), pcmp_eq(a, a)); +EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) { + return a < b ? ptrue(a) : pzero(a); +} + +/** \internal \returns a == b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) { + return a == b ? ptrue(a) : pzero(a); +} + +/** \internal \returns a <= b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) { + return por(pcmp_eq(a, b), pcmp_lt(a, b)); +} + +/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) { + return a >= b ? pzero(a) : ptrue(a); } // In the general case, use bitwise select. -template +template ::value> struct pselect_impl { static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) { return por(pand(a, mask), pandnot(b, mask)); @@ -592,9 +592,9 @@ struct pselect_impl { // For scalars, use ternary select. template -struct pselect_impl::value>> { +struct pselect_impl { static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) { - return numext::equal_strict(mask, Packet(0)) ? b : a; + return numext::select(mask, a, b); } }; @@ -611,7 +611,7 @@ EIGEN_DEVICE_FUNC inline bool pselect(const bool& cond, const bool& a, con /** \internal \returns the min or of \a a and \a b (coeff-wise) If either \a a or \a b are NaN, the result is implementation defined. */ -template +template struct pminmax_impl { template static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) { @@ -622,7 +622,7 @@ struct pminmax_impl { /** \internal \returns the min or max of \a a and \a b (coeff-wise) If either \a a or \a b are NaN, NaN is returned. */ template <> -struct pminmax_impl { +struct pminmax_impl { template static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) { Packet not_nan_mask_a = pcmp_eq(a, a); @@ -635,7 +635,7 @@ struct pminmax_impl { If both \a a and \a b are NaN, NaN is returned. Equivalent to std::fmin(a, b). */ template <> -struct pminmax_impl { +struct pminmax_impl { template static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) { Packet not_nan_mask_a = pcmp_eq(a, a); @@ -644,7 +644,7 @@ struct pminmax_impl { } }; -#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& a, const Type& b) { return Func(a, b); } +#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); } /** \internal \returns the min of \a a and \a b (coeff-wise). If \a a or \b b is NaN, the return value is implementation defined. */ @@ -657,7 +657,8 @@ EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) { NaNPropagation determines the NaN propagation semantics. */ template EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) { - return pminmax_impl::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin))); + constexpr bool IsInteger = NumTraits::type>::IsInteger; + return pminmax_impl::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin))); } /** \internal \returns the max of \a a and \a b (coeff-wise) @@ -671,7 +672,8 @@ EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) { NaNPropagation determines the NaN propagation semantics. */ template EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) { - return pminmax_impl::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax))); + constexpr bool IsInteger = NumTraits::type>::IsInteger; + return pminmax_impl::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax))); } /** \internal \returns the absolute value of \a a */ @@ -706,33 +708,21 @@ EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) { } /** \internal \returns \a a arithmetically shifted by N bits to the right */ -template -EIGEN_DEVICE_FUNC inline int parithmetic_shift_right(const int& a) { - return a >> N; -} -template -EIGEN_DEVICE_FUNC inline long int parithmetic_shift_right(const long int& a) { - return a >> N; +template +EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) { + return numext::arithmetic_shift_right(a, N); } /** \internal \returns \a a logically shifted by N bits to the right */ -template -EIGEN_DEVICE_FUNC inline int plogical_shift_right(const int& a) { - return static_cast(static_cast(a) >> N); -} -template -EIGEN_DEVICE_FUNC inline long int plogical_shift_right(const long int& a) { - return static_cast(static_cast(a) >> N); +template +EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) { + return numext::logical_shift_right(a, N); } /** \internal \returns \a a shifted by N bits to the left */ -template -EIGEN_DEVICE_FUNC inline int plogical_shift_left(const int& a) { - return a << N; -} -template -EIGEN_DEVICE_FUNC inline long int plogical_shift_left(const long int& a) { - return a << N; +template +EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) { + return numext::logical_shift_left(a, N); } /** \internal \returns the significant and exponent of the underlying floating point numbers @@ -888,17 +878,29 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_trait return a; } +template +struct peven_mask_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet&) { + typedef typename unpacket_traits::type Scalar; + const size_t n = unpacket_traits::size; + EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n]; + for (size_t i = 0; i < n; ++i) { + memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar)); + } + return ploadu(elements); + } +}; + +template +struct peven_mask_impl::value>> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar&) { return Scalar(1); } +}; + /** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0), where x is the value of all 1-bits. */ template -EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& /*a*/) { - typedef typename unpacket_traits::type Scalar; - const size_t n = unpacket_traits::size; - EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n]; - for (size_t i = 0; i < n; ++i) { - memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar)); - } - return ploadu(elements); +EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& a) { + return peven_mask_impl::run(a); } /** \internal copy the packet \a from to \a *to, \a to must be properly aligned */ @@ -1011,6 +1013,20 @@ EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) { * Special math functions ***************************/ +/** \internal \returns isnan(a) */ +template +EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) { + return pandnot(ptrue(a), pcmp_eq(a, a)); +} + +/** \internal \returns isinf(a) */ +template +EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) { + using Scalar = typename unpacket_traits::type; + constexpr Scalar inf = NumTraits::infinity(); + return pcmp_eq(pabs(a), pset1(inf)); +} + /** \internal \returns the sine of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) { @@ -1084,8 +1100,13 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& /** \internal \returns the exp of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { - EIGEN_USING_STD(exp); - return exp(a); + return numext::exp(a); +} + +/** \internal \returns the exp2 of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) { + return numext::exp2(a); } /** \internal \returns the expm1 of \a a (coeff-wise) */ @@ -1114,11 +1135,12 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& return log10(a); } -/** \internal \returns the log10 of \a a (coeff-wise) */ +/** \internal \returns the log2 of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) { - typedef typename internal::unpacket_traits::type Scalar; - return pmul(pset1(Scalar(EIGEN_LOG2E)), plog(a)); + using Scalar = typename internal::unpacket_traits::type; + using RealScalar = typename NumTraits::Real; + return pmul(pset1(Scalar(RealScalar(EIGEN_LOG2E))), plog(a)); } /** \internal \returns the square-root of \a a (coeff-wise) */ @@ -1133,33 +1155,45 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& return numext::cbrt(a); } +template ::value, + bool IsInteger = NumTraits::type>::IsInteger> +struct nearest_integer_packetop_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); } +}; + /** \internal \returns the rounded value of \a a (coeff-wise) */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pround(const Packet& a) { - using numext::round; - return round(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) { + return nearest_integer_packetop_impl::run_round(a); } /** \internal \returns the floor of \a a (coeff-wise) */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pfloor(const Packet& a) { - using numext::floor; - return floor(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) { + return nearest_integer_packetop_impl::run_floor(a); } /** \internal \returns the rounded value of \a a (coeff-wise) with current * rounding mode */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet print(const Packet& a) { - using numext::rint; - return rint(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) { + return nearest_integer_packetop_impl::run_rint(a); } /** \internal \returns the ceil of \a a (coeff-wise) */ template -EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet& a) { - using numext::ceil; - return ceil(a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) { + return nearest_integer_packetop_impl::run_ceil(a); +} + +/** \internal \returns the truncation of \a a (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) { + return nearest_integer_packetop_impl::run_trunc(a); } template @@ -1227,26 +1261,46 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_mul(const template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) { typedef typename unpacket_traits::type Scalar; - return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin))); -} - -template -EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) { - typedef typename unpacket_traits::type Scalar; - return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin))); + return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin))); } -/** \internal \returns the min of the elements of \a a */ +/** \internal \returns the max of the elements of \a a */ template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const Packet& a) { typedef typename unpacket_traits::type Scalar; - return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax))); + return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax))); +} + +template +struct predux_min_max_helper_impl { + using Scalar = typename unpacket_traits::type; + static constexpr bool UsePredux_ = NaNPropagation == PropagateFast || NumTraits::IsInteger; + template = true> + static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) { + return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin))); + } + template = true> + static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) { + return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax))); + } + template = true> + static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) { + return predux_min(a); + } + template = true> + static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) { + return predux_max(a); + } +}; + +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) { + return predux_min_max_helper_impl::run_min(a); } template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const Packet& a) { - typedef typename unpacket_traits::type Scalar; - return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax))); + return predux_min_max_helper_impl::run_max(a); } #undef EIGEN_BINARY_OP_NAN_PROPAGATION @@ -1277,29 +1331,61 @@ EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) { * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ -// FMA instructions. +template +struct pmadd_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) { + return padd(pmul(a, b), c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) { + return psub(pmul(a, b), c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) { + return psub(c, pmul(a, b)); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) { + return pnegate(pmadd(a, b, c)); + } +}; + +template +struct pmadd_impl::value && NumTraits::IsSigned>> { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::madd(a, b, c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::madd(a, b, Scalar(-c)); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::madd(Scalar(-a), b, c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) { + return -Scalar(numext::madd(a, b, c)); + } +}; + +// Multiply-add instructions. /** \internal \returns a * b + c (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) { - return padd(pmul(a, b), c); + return pmadd_impl::pmadd(a, b, c); } /** \internal \returns a * b - c (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) { - return psub(pmul(a, b), c); + return pmadd_impl::pmsub(a, b, c); } /** \internal \returns -(a * b) + c (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) { - return padd(pnegate(pmul(a, b)), c); + return pmadd_impl::pnmadd(a, b, c); } -/** \internal \returns -(a * b) - c (coeff-wise) */ +/** \internal \returns -((a * b + c) (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) { - return psub(pnegate(pmul(a, b)), c); + return pmadd_impl::pnmsub(a, b, c); } /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned @@ -1508,6 +1594,107 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) { return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ... } +/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements + * outside this range are not defined. \a *from does not need to be aligned, and can be null if \a count is zero.*/ +template +EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits::type* from, Index begin, + Index count) { + using Scalar = typename unpacket_traits::type; + constexpr Index PacketSize = unpacket_traits::size; + eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range"); + Scalar aux[PacketSize] = {}; + for (Index k = begin; k < begin + count; k++) { + aux[k] = from[k]; + } + return ploadu(aux); +} + +/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements + * outside this range are not defined. \a *from must be aligned, and cannot be null.*/ +template +EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits::type* from, Index begin, + Index count) { + return ploaduSegment(from, begin, count); +} + +/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to. +Elements outside of the range [begin, begin + count) are not defined. \a *to does not need to be aligned, and can be +null if \a count is zero.*/ +template +EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) { + constexpr Index PacketSize = unpacket_traits::size; + eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range"); + Scalar aux[PacketSize]; + pstoreu(aux, from); + for (Index k = begin; k < begin + count; k++) { + to[k] = aux[k]; + } +} + +/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to. +Elements outside of the range [begin, begin + count) are not defined. \a *to must be aligned, and cannot be +null.*/ +template +EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) { + return pstoreuSegment(to, from, begin, count); +} + +/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements + * outside this range are not defined.*/ +template +EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits::type* from, Index begin, + Index count) { + constexpr int RequiredAlignment = unpacket_traits::alignment; + if (Alignment >= RequiredAlignment) { + return ploadSegment(from, begin, count); + } else { + return ploaduSegment(from, begin, count); + } +} + +/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to. +Elements outside of the range [begin, begin + count) are not defined.*/ +template +EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) { + constexpr int RequiredAlignment = unpacket_traits::alignment; + if (Alignment >= RequiredAlignment) { + pstoreSegment(to, from, begin, count); + } else { + pstoreuSegment(to, from, begin, count); + } +} + +#ifndef EIGEN_NO_IO + +template +class StreamablePacket { + public: + using Scalar = typename unpacket_traits::type; + StreamablePacket(const Packet& packet) { pstoreu(v_, packet); } + + friend std::ostream& operator<<(std::ostream& os, const StreamablePacket& packet) { + os << "{" << packet.v_[0]; + for (int i = 1; i < unpacket_traits::size; ++i) { + os << "," << packet.v_[i]; + } + os << "}"; + return os; + } + + private: + Scalar v_[unpacket_traits::size]; +}; + +/** + * \internal \returns an intermediary that can be used to ostream packets, e.g. for debugging. + */ +template +StreamablePacket postream(const Packet& packet) { + return StreamablePacket(packet); +} + +#endif // EIGEN_NO_IO + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index f0ae5a8..df1098e 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -76,6 +76,7 @@ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf, scalar_erf_op, error function,\sa ArrayBas EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc, scalar_erfc_op, complement error function,\sa ArrayBase::erfc) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri, scalar_ndtri_op, inverse normal distribution function,\sa ArrayBase::ndtri) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp, scalar_exp_op, exponential,\sa ArrayBase::exp) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp2, scalar_exp2_op, exponential,\sa ArrayBase::exp2) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1, scalar_expm1_op, exponential of a value minus 1,\sa ArrayBase::expm1) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log, scalar_log_op, natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p, scalar_log1p_op, natural logarithm of 1 plus the value,\sa ArrayBase::log1p) @@ -98,9 +99,12 @@ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rint, scalar_rint_op, EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round, scalar_round_op, nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( - floor, scalar_floor_op, nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) + floor, scalar_floor_op, nearest integer not greater than the given value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( - ceil, scalar_ceil_op, nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) + ceil, scalar_ceil_op, nearest integer not less than the given value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(trunc, scalar_trunc_op, + nearest integer not greater in magnitude than the given value,\sa Eigen::trunc DOXCOMMA + ArrayBase::trunc) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( isnan, scalar_isnan_op, not -a - number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h index ca5f247..0a1b583 100644 --- a/Eigen/src/Core/IO.h +++ b/Eigen/src/Core/IO.h @@ -68,8 +68,8 @@ struct IOFormat { // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline // don't add rowSpacer if columns are not to be aligned if ((flags & DontAlignCols)) return; - int i = int(matSuffix.length()) - 1; - while (i >= 0 && matSuffix[i] != '\n') { + int i = int(matPrefix.length()) - 1; + while (i >= 0 && matPrefix[i] != '\n') { rowSpacer += ' '; i--; } diff --git a/Eigen/src/Core/IndexedView.h b/Eigen/src/Core/IndexedView.h index b90ecb1..10562c1 100644 --- a/Eigen/src/Core/IndexedView.h +++ b/Eigen/src/Core/IndexedView.h @@ -20,8 +20,8 @@ namespace internal { template struct traits> : traits { enum { - RowsAtCompileTime = int(array_size::value), - ColsAtCompileTime = int(array_size::value), + RowsAtCompileTime = int(IndexedViewHelper::SizeAtCompileTime), + ColsAtCompileTime = int(IndexedViewHelper::SizeAtCompileTime), MaxRowsAtCompileTime = RowsAtCompileTime, MaxColsAtCompileTime = ColsAtCompileTime, @@ -30,8 +30,8 @@ struct traits> : traits { : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0 : XprTypeIsRowMajor, - RowIncr = int(get_compile_time_incr::value), - ColIncr = int(get_compile_time_incr::value), + RowIncr = int(IndexedViewHelper::IncrAtCompileTime), + ColIncr = int(IndexedViewHelper::IncrAtCompileTime), InnerIncr = IsRowMajor ? ColIncr : RowIncr, OuterIncr = IsRowMajor ? RowIncr : ColIncr, @@ -47,24 +47,23 @@ struct traits> : traits { is_same, std::conditional_t>::value, InnerStrideAtCompileTime = - InnerIncr < 0 || InnerIncr == DynamicIndex || XprInnerStride == Dynamic || InnerIncr == UndefinedIncr + InnerIncr < 0 || InnerIncr == DynamicIndex || XprInnerStride == Dynamic || InnerIncr == Undefined ? Dynamic : XprInnerStride * InnerIncr, OuterStrideAtCompileTime = - OuterIncr < 0 || OuterIncr == DynamicIndex || XprOuterstride == Dynamic || OuterIncr == UndefinedIncr + OuterIncr < 0 || OuterIncr == DynamicIndex || XprOuterstride == Dynamic || OuterIncr == Undefined ? Dynamic : XprOuterstride * OuterIncr, - ReturnAsScalar = is_same::value && is_same::value, + ReturnAsScalar = is_single_range::value && is_single_range::value, ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike, ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock), // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag, // but this is too strict regarding negative strides... - DirectAccessMask = - (int(InnerIncr) != UndefinedIncr && int(OuterIncr) != UndefinedIncr && InnerIncr >= 0 && OuterIncr >= 0) - ? DirectAccessBit - : 0, + DirectAccessMask = (int(InnerIncr) != Undefined && int(OuterIncr) != Undefined && InnerIncr >= 0 && OuterIncr >= 0) + ? DirectAccessBit + : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, @@ -153,10 +152,10 @@ class IndexedViewImpl : public internal::generic_xpr_base::size(m_rowIndices); } /** \returns number of columns */ - Index cols() const { return internal::index_list_size(m_colIndices); } + Index cols() const { return IndexedViewHelper::size(m_colIndices); } /** \returns the nested expression */ const internal::remove_all_t& nestedExpression() const { return m_xpr; } @@ -198,16 +197,16 @@ class IndexedViewImpl IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {} Index rowIncrement() const { - if (traits::RowIncr != DynamicIndex && traits::RowIncr != UndefinedIncr) { + if (traits::RowIncr != DynamicIndex && traits::RowIncr != Undefined) { return traits::RowIncr; } - return get_runtime_incr(this->rowIndices()); + return IndexedViewHelper::incr(this->rowIndices()); } Index colIncrement() const { - if (traits::ColIncr != DynamicIndex && traits::ColIncr != UndefinedIncr) { + if (traits::ColIncr != DynamicIndex && traits::ColIncr != Undefined) { return traits::ColIncr; } - return get_runtime_incr(this->colIndices()); + return IndexedViewHelper::incr(this->colIndices()); } Index innerIncrement() const { return traits::IsRowMajor ? colIncrement() : rowIncrement(); } @@ -226,14 +225,14 @@ class IndexedViewImpl return this->nestedExpression().data() + row_offset + col_offset; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { if (traits::InnerStrideAtCompileTime != Dynamic) { return traits::InnerStrideAtCompileTime; } return innerIncrement() * this->nestedExpression().innerStride(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { if (traits::OuterStrideAtCompileTime != Dynamic) { return traits::OuterStrideAtCompileTime; } @@ -309,6 +308,12 @@ struct unary_evaluator, IndexBased> const XprType& m_xpr; }; +// Catch assignments to an IndexedView. +template +struct evaluator_assume_aliasing> { + static const bool value = true; +}; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/InnerProduct.h b/Eigen/src/Core/InnerProduct.h new file mode 100644 index 0000000..9849d9b --- /dev/null +++ b/Eigen/src/Core/InnerProduct.h @@ -0,0 +1,254 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2024 Charlie Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INNER_PRODUCT_EVAL_H +#define EIGEN_INNER_PRODUCT_EVAL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// recursively searches for the largest simd type that does not exceed Size, or the smallest if no such type exists +template ::type, + bool Stop = + (unpacket_traits::size <= Size) || is_same::half>::value> +struct find_inner_product_packet_helper; + +template +struct find_inner_product_packet_helper { + using type = typename find_inner_product_packet_helper::half>::type; +}; + +template +struct find_inner_product_packet_helper { + using type = Packet; +}; + +template +struct find_inner_product_packet : find_inner_product_packet_helper {}; + +template +struct find_inner_product_packet { + using type = typename packet_traits::type; +}; + +template +struct inner_product_assert { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Lhs) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Rhs) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Lhs, Rhs) +#ifndef EIGEN_NO_DEBUG + static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, const Rhs& rhs) { + eigen_assert((lhs.size() == rhs.size()) && "Inner product: lhs and rhs vectors must have same size"); + } +#else + static EIGEN_DEVICE_FUNC void run(const Lhs&, const Rhs&) {} +#endif +}; + +template +struct inner_product_evaluator { + static constexpr int LhsFlags = evaluator::Flags; + static constexpr int RhsFlags = evaluator::Flags; + static constexpr int SizeAtCompileTime = size_prefer_fixed(Lhs::SizeAtCompileTime, Rhs::SizeAtCompileTime); + static constexpr int MaxSizeAtCompileTime = + min_size_prefer_fixed(Lhs::MaxSizeAtCompileTime, Rhs::MaxSizeAtCompileTime); + static constexpr int LhsAlignment = evaluator::Alignment; + static constexpr int RhsAlignment = evaluator::Alignment; + + using Scalar = typename Func::result_type; + using Packet = typename find_inner_product_packet::type; + + static constexpr bool Vectorize = + bool(LhsFlags & RhsFlags & PacketAccessBit) && Func::PacketAccess && + ((MaxSizeAtCompileTime == Dynamic) || (unpacket_traits::size <= MaxSizeAtCompileTime)); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit inner_product_evaluator(const Lhs& lhs, const Rhs& rhs, + Func func = Func()) + : m_func(func), m_lhs(lhs), m_rhs(rhs), m_size(lhs.size()) { + inner_product_assert::run(lhs, rhs); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_size.value(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const { + return m_func.coeff(m_lhs.coeff(index), m_rhs.coeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& value, Index index) const { + return m_func.coeff(value, m_lhs.coeff(index), m_rhs.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return m_func.packet(m_lhs.template packet(index), + m_rhs.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(const PacketType& value, Index index) const { + return m_func.packet(value, m_lhs.template packet(index), + m_rhs.template packet(index)); + } + + const Func m_func; + const evaluator m_lhs; + const evaluator m_rhs; + const variable_if_dynamic m_size; +}; + +template +struct inner_product_impl; + +// scalar loop +template +struct inner_product_impl { + using Scalar = typename Evaluator::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) { + const Index size = eval.size(); + if (size == 0) return Scalar(0); + + Scalar result = eval.coeff(0); + for (Index k = 1; k < size; k++) { + result = eval.coeff(result, k); + } + + return result; + } +}; + +// vector loop +template +struct inner_product_impl { + using UnsignedIndex = std::make_unsigned_t; + using Scalar = typename Evaluator::Scalar; + using Packet = typename Evaluator::Packet; + static constexpr int PacketSize = unpacket_traits::size; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) { + const UnsignedIndex size = static_cast(eval.size()); + if (size < PacketSize) return inner_product_impl::run(eval); + + const UnsignedIndex packetEnd = numext::round_down(size, PacketSize); + const UnsignedIndex quadEnd = numext::round_down(size, 4 * PacketSize); + const UnsignedIndex numPackets = size / PacketSize; + const UnsignedIndex numRemPackets = (packetEnd - quadEnd) / PacketSize; + + Packet presult0, presult1, presult2, presult3; + + presult0 = eval.template packet(0 * PacketSize); + if (numPackets >= 2) presult1 = eval.template packet(1 * PacketSize); + if (numPackets >= 3) presult2 = eval.template packet(2 * PacketSize); + if (numPackets >= 4) { + presult3 = eval.template packet(3 * PacketSize); + + for (UnsignedIndex k = 4 * PacketSize; k < quadEnd; k += 4 * PacketSize) { + presult0 = eval.packet(presult0, k + 0 * PacketSize); + presult1 = eval.packet(presult1, k + 1 * PacketSize); + presult2 = eval.packet(presult2, k + 2 * PacketSize); + presult3 = eval.packet(presult3, k + 3 * PacketSize); + } + + if (numRemPackets >= 1) presult0 = eval.packet(presult0, quadEnd + 0 * PacketSize); + if (numRemPackets >= 2) presult1 = eval.packet(presult1, quadEnd + 1 * PacketSize); + if (numRemPackets == 3) presult2 = eval.packet(presult2, quadEnd + 2 * PacketSize); + + presult2 = padd(presult2, presult3); + } + + if (numPackets >= 3) presult1 = padd(presult1, presult2); + if (numPackets >= 2) presult0 = padd(presult0, presult1); + + Scalar result = predux(presult0); + for (UnsignedIndex k = packetEnd; k < size; k++) { + result = eval.coeff(result, k); + } + + return result; + } +}; + +template +struct conditional_conj; + +template +struct conditional_conj { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return numext::conj(a); } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) { + return pconj(a); + } +}; + +template +struct conditional_conj { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return a; } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) { + return a; + } +}; + +template +struct scalar_inner_product_op { + using result_type = typename ScalarBinaryOpTraits::ReturnType; + using conj_helper = conditional_conj; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const LhsScalar& a, const RhsScalar& b) const { + return (conj_helper::coeff(a) * b); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const result_type& accum, const LhsScalar& a, + const RhsScalar& b) const { + return (conj_helper::coeff(a) * b) + accum; + } + static constexpr bool PacketAccess = false; +}; + +template +struct scalar_inner_product_op { + using result_type = Scalar; + using conj_helper = conditional_conj; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a, const Scalar& b) const { + return pmul(conj_helper::coeff(a), b); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& accum, const Scalar& a, const Scalar& b) const { + return pmadd(conj_helper::coeff(a), b, accum); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a, const Packet& b) const { + return pmul(conj_helper::packet(a), b); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& accum, const Packet& a, const Packet& b) const { + return pmadd(conj_helper::packet(a), b, accum); + } + static constexpr bool PacketAccess = packet_traits::HasMul && packet_traits::HasAdd; +}; + +template +struct default_inner_product_impl { + using LhsScalar = typename traits::Scalar; + using RhsScalar = typename traits::Scalar; + using Op = scalar_inner_product_op; + using Evaluator = inner_product_evaluator; + using result_type = typename Evaluator::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type run(const MatrixBase& a, const MatrixBase& b) { + Evaluator eval(a.derived(), b.derived(), Op()); + return inner_product_impl::run(eval); + } +}; + +template +struct dot_impl : default_inner_product_impl {}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_INNER_PRODUCT_EVAL_H diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h index cfb3b20..79fc3ab 100644 --- a/Eigen/src/Core/Inverse.h +++ b/Eigen/src/Core/Inverse.h @@ -51,8 +51,8 @@ class Inverse : public InverseImpl:: explicit EIGEN_DEVICE_FUNC Inverse(const XprType& xpr) : m_xpr(xpr) {} - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_xpr.cols(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_xpr.rows(); } EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } @@ -92,7 +92,7 @@ struct unary_evaluator > : public evaluator(this, m_result); internal::call_assignment_no_alias(m_result, inv_xpr); } diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index df7b7ca..c740da7 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -102,11 +102,11 @@ class Map : public MapBase > { typedef PointerType PointerArgType; EIGEN_DEVICE_FUNC inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const { + EIGEN_DEVICE_FUNC constexpr Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const { + EIGEN_DEVICE_FUNC constexpr Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() : internal::traits::OuterStrideAtCompileTime != Dynamic ? Index(internal::traits::OuterStrideAtCompileTime) diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index da95b5c..5e3d746 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -84,9 +84,9 @@ class MapBase : public internal::dense_xpr_base : public internal::dense_xpr_base : public MapBase::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue; - EIGEN_DEVICE_FUNC inline const Scalar* data() const { return this->m_data; } - EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return this->m_data; } + EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 3f28068..155fdad 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -170,8 +170,8 @@ struct imag_ref_default_impl { template struct imag_ref_default_impl { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline Scalar run(Scalar&) { return Scalar(0); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline const Scalar run(const Scalar&) { return Scalar(0); } + EIGEN_DEVICE_FUNC constexpr static Scalar run(Scalar&) { return Scalar(0); } + EIGEN_DEVICE_FUNC constexpr static const Scalar run(const Scalar&) { return Scalar(0); } }; template @@ -182,6 +182,40 @@ struct imag_ref_retval { typedef typename NumTraits::Real& type; }; +} // namespace internal + +namespace numext { + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t real_ref( + const Scalar& x) { + return internal::real_ref_impl::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) { + return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar select(const Scalar& mask, const Scalar& a, const Scalar& b) { + return numext::is_exactly_zero(mask) ? b : a; +} + +} // namespace numext + +namespace internal { + /**************************************************************************** * Implementation of conj * ****************************************************************************/ @@ -221,7 +255,9 @@ template struct abs2_impl_default // IsComplex { typedef typename NumTraits::Real RealScalar; - EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x.real() * x.real() + x.imag() * x.imag(); } + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + return numext::real(x) * numext::real(x) + numext::imag(x) * numext::imag(x); + } }; template @@ -250,16 +286,14 @@ struct sqrt_impl { }; // Complex sqrt defined in MathFunctionsImpl.h. -template -EIGEN_DEVICE_FUNC std::complex complex_sqrt(const std::complex& a_x); +template +EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& a_x); // Custom implementation is faster than `std::sqrt`, works on // GPU, and correctly handles special cases (unlike MSVC). template struct sqrt_impl> { - EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex run(const std::complex& x) { - return complex_sqrt(x); - } + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex run(const std::complex& x) { return complex_sqrt(x); } }; template @@ -272,13 +306,13 @@ template struct rsqrt_impl; // Complex rsqrt defined in MathFunctionsImpl.h. -template -EIGEN_DEVICE_FUNC std::complex complex_rsqrt(const std::complex& a_x); +template +EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& a_x); template struct rsqrt_impl> { EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex run(const std::complex& x) { - return complex_rsqrt(x); + return complex_rsqrt(x); } }; @@ -299,7 +333,7 @@ struct norm1_default_impl { typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { EIGEN_USING_STD(abs); - return abs(x.real()) + abs(x.imag()); + return abs(numext::real(x)) + abs(numext::imag(x)); } }; @@ -469,8 +503,8 @@ struct expm1_retval { ****************************************************************************/ // Complex log defined in MathFunctionsImpl.h. -template -EIGEN_DEVICE_FUNC std::complex complex_log(const std::complex& z); +template +EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z); template struct log_impl { @@ -604,7 +638,6 @@ template struct count_bits_impl { static_assert(std::is_integral::value && std::is_unsigned::value, "BitsType must be an unsigned integer"); - static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { int n = CHAR_BIT * sizeof(BitsType); int shift = n / 2; @@ -655,9 +688,9 @@ EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { #if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG template -struct count_bits_impl> { +struct count_bits_impl< + BitsType, std::enable_if_t::value && sizeof(BitsType) <= sizeof(unsigned int)>> { static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); - static_assert(std::is_integral::value, "BitsType must be a built-in integer"); static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT; return bits == 0 ? kNumBits : __builtin_clz(static_cast(bits)) - kLeadingBitsOffset; @@ -669,10 +702,10 @@ struct count_bits_impl -struct count_bits_impl< - BitsType, std::enable_if_t> { +struct count_bits_impl::value && sizeof(unsigned int) < sizeof(BitsType) && + sizeof(BitsType) <= sizeof(unsigned long)>> { static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); - static_assert(std::is_integral::value, "BitsType must be a built-in integer"); static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT; return bits == 0 ? kNumBits : __builtin_clzl(static_cast(bits)) - kLeadingBitsOffset; @@ -684,10 +717,10 @@ struct count_bits_impl< }; template -struct count_bits_impl> { +struct count_bits_impl::value && sizeof(unsigned long) < sizeof(BitsType) && + sizeof(BitsType) <= sizeof(unsigned long long)>> { static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); - static_assert(std::is_integral::value, "BitsType must be a built-in integer"); static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT; return bits == 0 ? kNumBits : __builtin_clzll(static_cast(bits)) - kLeadingBitsOffset; @@ -701,9 +734,9 @@ struct count_bits_impl -struct count_bits_impl> { +struct count_bits_impl< + BitsType, std::enable_if_t::value && sizeof(BitsType) <= sizeof(unsigned long)>> { static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); - static_assert(std::is_integral::value, "BitsType must be a built-in integer"); static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { unsigned long out; _BitScanReverse(&out, static_cast(bits)); @@ -720,10 +753,10 @@ struct count_bits_impl -struct count_bits_impl< - BitsType, std::enable_if_t> { +struct count_bits_impl::value && sizeof(unsigned long) < sizeof(BitsType) && + sizeof(BitsType) <= sizeof(__int64)>> { static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); - static_assert(std::is_integral::value, "BitsType must be a built-in integer"); static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { unsigned long out; _BitScanReverse64(&out, static_cast(bits)); @@ -742,192 +775,27 @@ struct count_bits_impl< #endif // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG template -int log2_ceil(BitsType x) { - int n = CHAR_BIT * sizeof(BitsType) - clz(x); - bool powerOfTwo = (x & (x - 1)) == 0; - return x == 0 ? 0 : powerOfTwo ? n - 1 : n; -} - -template -int log2_floor(BitsType x) { - int n = CHAR_BIT * sizeof(BitsType) - clz(x); - return x == 0 ? 0 : n - 1; -} - -/**************************************************************************** - * Implementation of random * - ****************************************************************************/ - -// return a Scalar filled with numRandomBits beginning from the least significant bit -template -Scalar getRandomBits(int numRandomBits) { - using BitsType = typename numext::get_integer_by_size::unsigned_type; - enum : int { - StdRandBits = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value, - ScalarBits = sizeof(Scalar) * CHAR_BIT - }; - eigen_assert((numRandomBits >= 0) && (numRandomBits <= ScalarBits)); - const BitsType mask = BitsType(-1) >> ((ScalarBits - numRandomBits) & (ScalarBits - 1)); - BitsType randomBits = BitsType(0); - for (int shift = 0; shift < numRandomBits; shift += StdRandBits) { - int r = std::rand(); - randomBits |= static_cast(r) << shift; +struct log_2_impl { + static constexpr int kTotalBits = sizeof(BitsType) * CHAR_BIT; + static EIGEN_DEVICE_FUNC inline int run_ceil(const BitsType& x) { + const int n = kTotalBits - clz(x); + bool power_of_two = (x & (x - 1)) == 0; + return x == 0 ? 0 : power_of_two ? (n - 1) : n; } - // clear the excess bits - randomBits &= mask; - return numext::bit_cast(randomBits); -} - -template -struct random_default_impl {}; - -template -struct random_impl : random_default_impl::IsComplex, NumTraits::IsInteger> {}; - -template -struct random_retval { - typedef Scalar type; -}; - -template -inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y); -template -inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(); - -template -struct random_default_impl { - using BitsType = typename numext::get_integer_by_size::unsigned_type; - static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) { - Scalar half_x = Scalar(0.5) * x; - Scalar half_y = Scalar(0.5) * y; - Scalar result = (half_x + half_y) + (half_y - half_x) * run(numRandomBits); - // result is in the half-open interval [x, y) -- provided that x < y - return result; - } - static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { - const int mantissa_bits = NumTraits::digits() - 1; - return run(x, y, mantissa_bits); - } - static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { - const int mantissa_bits = NumTraits::digits() - 1; - eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissa_bits); - BitsType randomBits = getRandomBits(numRandomBits); - // if fewer than MantissaBits is requested, shift them to the left - randomBits <<= (mantissa_bits - numRandomBits); - // randomBits is in the half-open interval [2,4) - randomBits |= numext::bit_cast(Scalar(2)); - // result is in the half-open interval [-1,1) - Scalar result = numext::bit_cast(randomBits) - Scalar(3); - return result; - } - static EIGEN_DEVICE_FUNC inline Scalar run() { - const int mantissa_bits = NumTraits::digits() - 1; - return run(mantissa_bits); - } -}; - -// TODO: fix this for PPC -template -struct random_longdouble_impl { - enum : int { - Size = sizeof(long double), - MantissaBits = NumTraits::digits() - 1, - LowBits = MantissaBits > 64 ? 64 : MantissaBits, - HighBits = MantissaBits > 64 ? MantissaBits - 64 : 0 - }; - static EIGEN_DEVICE_FUNC inline long double run() { - EIGEN_USING_STD(memcpy) - uint64_t randomBits[2]; - long double result = 2.0L; - memcpy(&randomBits, &result, Size); - randomBits[0] |= getRandomBits(LowBits); - randomBits[1] |= getRandomBits(HighBits); - memcpy(&result, &randomBits, Size); - result -= 3.0L; - return result; - } -}; - -// GPUs treat long double as double. -#ifndef EIGEN_GPU_COMPILE_PHASE -template <> -struct random_longdouble_impl { - using Impl = random_impl; - static EIGEN_DEVICE_FUNC inline long double run() { return static_cast(Impl::run()); } -}; - -template <> -struct random_impl { - static EIGEN_DEVICE_FUNC inline long double run(const long double& x, const long double& y) { - long double half_x = 0.5L * x; - long double half_y = 0.5L * y; - long double result = (half_x + half_y) + (half_y - half_x) * run(); - return result; - } - static EIGEN_DEVICE_FUNC inline long double run() { return random_longdouble_impl<>::run(); } -}; -#endif - -template -struct random_default_impl { - using BitsType = typename numext::get_integer_by_size::unsigned_type; - enum : int { ScalarBits = sizeof(Scalar) * CHAR_BIT }; - static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { - if (y <= x) return x; - const BitsType range = static_cast(y) - static_cast(x) + 1; - // handle edge case where [x,y] spans the entire range of Scalar - if (range == 0) return getRandomBits(ScalarBits); - // calculate the number of random bits needed to fill range - const int numRandomBits = log2_ceil(range); - BitsType randomBits; - do { - randomBits = getRandomBits(numRandomBits); - // if the random draw is outside [0, range), try again (rejection sampling) - // in the worst-case scenario, the probability of rejection is: 1/2 - 1/2^numRandomBits < 50% - } while (randomBits >= range); - // Avoid overflow in the case where `x` is negative and there is a large range so - // `randomBits` would also be negative if cast to `Scalar` first. - Scalar result = static_cast(static_cast(x) + randomBits); - return result; - } - - static EIGEN_DEVICE_FUNC inline Scalar run() { -#ifdef EIGEN_MAKING_DOCS - return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); -#else - return getRandomBits(ScalarBits); -#endif + static EIGEN_DEVICE_FUNC inline int run_floor(const BitsType& x) { + const int n = kTotalBits - clz(x); + return x == 0 ? 0 : n - 1; } }; -template <> -struct random_impl { - static EIGEN_DEVICE_FUNC inline bool run(const bool& x, const bool& y) { - if (y <= x) return x; - return run(); - } - static EIGEN_DEVICE_FUNC inline bool run() { return getRandomBits(1) ? true : false; } -}; - -template -struct random_default_impl { - static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { - return Scalar(random(x.real(), y.real()), random(x.imag(), y.imag())); - } - static EIGEN_DEVICE_FUNC inline Scalar run() { - typedef typename NumTraits::Real RealScalar; - return Scalar(random(), random()); - } -}; - -template -inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) { - return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y); +template +int log2_ceil(const BitsType& x) { + return log_2_impl::run_ceil(x); } -template -inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() { - return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); +template +int log2_floor(const BitsType& x) { + return log_2_impl::run_floor(x); } // Implementation of is* functions @@ -964,8 +832,8 @@ EIGEN_DEVICE_FUNC std::enable_if_t<(std::numeric_limits::has_infinity && !Num template EIGEN_DEVICE_FUNC - std::enable_if_t::has_quiet_NaN || std::numeric_limits::has_signaling_NaN), bool> - isnan_impl(const T&) { +std::enable_if_t::has_quiet_NaN || std::numeric_limits::has_signaling_NaN), bool> +isnan_impl(const T&) { return false; } @@ -1012,7 +880,7 @@ struct sign_impl { real_type aa = abs(a); if (aa == real_type(0)) return Scalar(0); aa = real_type(1) / aa; - return Scalar(a.real() * aa, a.imag() * aa); + return Scalar(numext::real(a) * aa, numext::imag(a) * aa); } }; @@ -1027,6 +895,25 @@ struct sign_retval { typedef Scalar type; }; +// suppress "unary minus operator applied to unsigned type, result still unsigned" warnings on MSVC +// note: `0 - a` is distinct from `-a` when Scalar is a floating point type and `a` is zero + +template ::IsInteger> +struct negate_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return -a; } +}; + +template +struct negate_impl { + EIGEN_STATIC_ASSERT((!is_same::value), NEGATE IS NOT DEFINED FOR BOOLEAN TYPES) + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return Scalar(0) - a; } +}; + +template +struct negate_retval { + typedef Scalar type; +}; + template ::type>::IsInteger> struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_floor(const Scalar& x) { @@ -1041,6 +928,9 @@ struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_round(const Scalar& x) { EIGEN_USING_STD(round) return round(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { + EIGEN_USING_STD(trunc) return trunc(x); + } }; template struct nearest_integer_impl { @@ -1048,8 +938,82 @@ struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_ceil(const Scalar& x) { return x; } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_rint(const Scalar& x) { return x; } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_round(const Scalar& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { return x; } }; +// Extra namespace to prevent leaking std::fma into Eigen::internal. +namespace has_fma_detail { + +template +struct has_fma_impl : public std::false_type {}; + +using std::fma; + +template +struct has_fma_impl< + T, std::enable_if_t(), std::declval(), std::declval()))>::value>> + : public std::true_type {}; + +} // namespace has_fma_detail + +template +struct has_fma : public has_fma_detail::has_fma_impl {}; + +// Default implementation. +template +struct fma_impl { + static_assert(has_fma::value, "No function fma(...) for type. Please provide an implementation."); +}; + +// STD or ADL version if it exists. +template +struct fma_impl::value>> { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T run(const T& a, const T& b, const T& c) { + using std::fma; + return fma(a, b, c); + } +}; + +#if defined(EIGEN_GPUCC) +template <> +struct has_fma : public true_type {}; + +template <> +struct fma_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float run(const float& a, const float& b, const float& c) { + return ::fmaf(a, b, c); + } +}; + +template <> +struct has_fma : public true_type {}; + +template <> +struct fma_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double run(const double& a, const double& b, const double& c) { + return ::fma(a, b, c); + } +}; +#endif + +// Basic multiply-add. +template +struct madd_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& x, const Scalar& y, const Scalar& z) { + return x * y + z; + } +}; + +// Use FMA if there is a single CPU instruction. +#ifdef EIGEN_VECTORIZE_FMA +template +struct madd_impl::value>> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& x, const Scalar& y, const Scalar& z) { + return fma_impl::run(x, y, z); + } +}; +#endif + } // end namespace internal /**************************************************************************** @@ -1185,27 +1149,6 @@ SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(maxi, fmax) #endif -template -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); -} - -template -EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t real_ref( - const Scalar& x) { - return internal::real_ref_impl::run(x); -} - -template -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) { - return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); -} - -template -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); -} - template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x); @@ -1232,6 +1175,11 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(sign, Scalar) sign(const Scalar& return EIGEN_MATHFUNC_IMPL(sign, Scalar)::run(x); } +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(negate, Scalar) negate(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(negate, Scalar)::run(x); +} + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); @@ -1334,17 +1282,26 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar round(const Scalar& x) { return internal::nearest_integer_impl::run_round(x); } -#if defined(SYCL_DEVICE_ONLY) -SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round) -#endif - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(floor)(const Scalar& x) { return internal::nearest_integer_impl::run_floor(x); } +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(ceil)(const Scalar& x) { + return internal::nearest_integer_impl::run_ceil(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(trunc)(const Scalar& x) { + return internal::nearest_integer_impl::run_trunc(x); +} + #if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(trunc, trunc) #endif #if defined(EIGEN_GPUCC) @@ -1352,52 +1309,61 @@ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float& x) { return ::floorf(x); } - template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double floor(const double& x) { return ::floor(x); } -#endif - -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(ceil)(const Scalar& x) { - return internal::nearest_integer_impl::run_ceil(x); -} - -#if defined(SYCL_DEVICE_ONLY) -SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil) -#endif - -#if defined(EIGEN_GPUCC) template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float& x) { return ::ceilf(x); } - template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double ceil(const double& x) { return ::ceil(x); } +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float trunc(const float& x) { + return ::truncf(x); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double trunc(const double& x) { + return ::trunc(x); +} #endif // Integer division with rounding up. // T is assumed to be an integer type with a>=0, and b>0 template -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR T div_ceil(T a, T b) { +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T div_ceil(T a, T b) { + using UnsignedT = typename internal::make_unsigned::type; EIGEN_STATIC_ASSERT((NumTraits::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES) - eigen_assert(a >= 0); - eigen_assert(b > 0); + // Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations + const UnsignedT ua = UnsignedT(a); + const UnsignedT ub = UnsignedT(b); // Note: This form is used because it cannot overflow. - return a == 0 ? 0 : (a - 1) / b + 1; + return ua == 0 ? 0 : (ua - 1) / ub + 1; +} + +// Integer round down to nearest power of b +// T is assumed to be an integer type with a>=0, and b>0 +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T round_down(T a, U b) { + using UnsignedT = typename internal::make_unsigned::type; + using UnsignedU = typename internal::make_unsigned::type; + EIGEN_STATIC_ASSERT((NumTraits::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES) + EIGEN_STATIC_ASSERT((NumTraits::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES) + // Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations + const UnsignedT ua = UnsignedT(a); + const UnsignedU ub = UnsignedU(b); + return ub * (ua / ub); } /** Log base 2 for 32 bits positive integers. * Conveniently returns 0 for x==0. */ -inline int log2(int x) { - eigen_assert(x >= 0); +constexpr int log2(int x) { unsigned int v(x); - static const int table[32] = {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; + constexpr int table[32] = {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; v |= v >> 1; v |= v >> 2; v |= v >> 4; @@ -1432,11 +1398,17 @@ SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt) /** \returns the cube root of \a x. **/ template -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cbrt(const T& x) { +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t::IsComplex, T> cbrt(const T& x) { EIGEN_USING_STD(cbrt); return static_cast(cbrt(x)); } +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t::IsComplex, T> cbrt(const T& x) { + EIGEN_USING_STD(pow); + return pow(x, typename NumTraits::Real(1.0 / 3.0)); +} + /** \returns the reciprocal square root of \a x. **/ template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T rsqrt(const T& x) { @@ -1465,17 +1437,17 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double log(const double& x) { #endif template -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - std::enable_if_t::IsSigned || NumTraits::IsComplex, typename NumTraits::Real> - abs(const T& x) { +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE std::enable_if_t::IsSigned || NumTraits::IsComplex, typename NumTraits::Real> +abs(const T& x) { EIGEN_USING_STD(abs); return abs(x); } template -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - std::enable_if_t::IsSigned || NumTraits::IsComplex), typename NumTraits::Real> - abs(const T& x) { +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE std::enable_if_t::IsSigned || NumTraits::IsComplex), typename NumTraits::Real> +abs(const T& x) { return x; } @@ -1592,6 +1564,63 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp(const std::comple } #endif +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T exp2(const T& x) { + EIGEN_USING_STD(exp2); + return exp2(x); +} + +// MSVC screws up some edge-cases for std::exp2(complex). +#ifdef EIGEN_COMP_MSVC +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp2(const std::complex& x) { + EIGEN_USING_STD(exp); + // If z is (x,±∞) (for any finite x), the result is (NaN,NaN) and FE_INVALID is raised. + // If z is (x,NaN) (for any finite x), the result is (NaN,NaN) and FE_INVALID may be raised. + if ((isfinite)(real_ref(x)) && !(isfinite)(imag_ref(x))) { + return std::complex(NumTraits::quiet_NaN(), NumTraits::quiet_NaN()); + } + // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified) + // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified) + if ((real_ref(x) == NumTraits::infinity() && !(isfinite)(imag_ref(x)))) { + return std::complex(NumTraits::infinity(), NumTraits::quiet_NaN()); + } + return exp2(x); +} +#endif + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp2, exp2) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp2(const float& x) { + return ::exp2f(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp2(const double& x) { + return ::exp2(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp2(const std::complex& x) { + float com = ::exp2f(x.real()); + float res_real = com * ::cosf(static_cast(EIGEN_LN2) * x.imag()); + float res_imag = com * ::sinf(static_cast(EIGEN_LN2) * x.imag()); + return std::complex(res_real, res_imag); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp2(const std::complex& x) { + double com = ::exp2(x.real()); + double res_real = com * ::cos(static_cast(EIGEN_LN2) * x.imag()); + double res_imag = com * ::sin(static_cast(EIGEN_LN2) * x.imag()); + return std::complex(res_real, res_imag); +} +#endif + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x); @@ -1881,6 +1910,35 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double fmod(const double& a, const double& #undef SYCL_SPECIALIZE_BINARY_FUNC #endif +template ::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_left(const Scalar& a, int n) { + return a << n; +} + +template ::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_right(const Scalar& a, int n) { + using UnsignedScalar = typename numext::get_integer_by_size::unsigned_type; + return bit_cast(bit_cast(a) >> n); +} + +template ::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar arithmetic_shift_right(const Scalar& a, int n) { + using SignedScalar = typename numext::get_integer_by_size::signed_type; + return bit_cast(bit_cast(a) >> n); +} + +// Otherwise, rely on template implementation. +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar fma(const Scalar& x, const Scalar& y, const Scalar& z) { + return internal::fma_impl::run(x, y, z); +} + +// Multiply-add. +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar madd(const Scalar& x, const Scalar& y, const Scalar& z) { + return internal::madd_impl::run(x, y, z); +} + } // end namespace numext namespace internal { diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index 689c6d8..c4b5da3 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -23,12 +23,12 @@ namespace internal { Preconditions: 1. The starting guess provided in approx_a_recip must have at least half the leading mantissa bits in the correct result, such that a single - Newton-Raphson step is sufficient to get within 1-2 ulps of the currect + Newton-Raphson step is sufficient to get within 1-2 ulps of the correct result. 2. If a is zero, approx_a_recip must be infinite with the same sign as a. 3. If a is infinite, approx_a_recip must be zero with the same sign as a. - If the preconditions are satisfied, which they are for for the _*_rcp_ps + If the preconditions are satisfied, which they are for the _*_rcp_ps instructions on x86, the result has a maximum relative error of 2 ulps, and correctly handles reciprocals of zero, infinity, and NaN. */ @@ -61,12 +61,12 @@ struct generic_reciprocal_newton_step { Preconditions: 1. The starting guess provided in approx_a_recip must have at least half the leading mantissa bits in the correct result, such that a single - Newton-Raphson step is sufficient to get within 1-2 ulps of the currect + Newton-Raphson step is sufficient to get within 1-2 ulps of the correct result. 2. If a is zero, approx_a_recip must be infinite with the same sign as a. 3. If a is infinite, approx_a_recip must be zero with the same sign as a. - If the preconditions are satisfied, which they are for for the _*_rcp_ps + If the preconditions are satisfied, which they are for the _*_rcp_ps instructions on x86, the result has a maximum relative error of 2 ulps, and correctly handles zero, infinity, and NaN. Positive denormals are treated as zero. @@ -76,7 +76,7 @@ struct generic_rsqrt_newton_step { static_assert(Steps > 0, "Steps must be at least 1."); using Scalar = typename unpacket_traits::type; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_rsqrt) { - constexpr Scalar kMinusHalf = Scalar(-1) / Scalar(2); + const Scalar kMinusHalf = Scalar(-1) / Scalar(2); const Packet cst_minus_half = pset1(kMinusHalf); const Packet cst_minus_one = pset1(Scalar(-1)); @@ -112,11 +112,11 @@ struct generic_rsqrt_newton_step { 1. The starting guess for the reciprocal sqrt provided in approx_rsqrt must have at least half the leading mantissa bits in the correct result, such that a single Newton-Raphson step is sufficient to get within 1-2 ulps of - the currect result. + the correct result. 2. If a is zero, approx_rsqrt must be infinite. 3. If a is infinite, approx_rsqrt must be zero. - If the preconditions are satisfied, which they are for for the _*_rsqrt_ps + If the preconditions are satisfied, which they are for the _*_rsqrt_ps instructions on x86, the result has a maximum relative error of 2 ulps, and correctly handles zero and infinity, and NaN. Positive denormal inputs are treated as zero. @@ -171,8 +171,8 @@ struct hypot_impl { // Generic complex sqrt implementation that correctly handles corner cases // according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt -template -EIGEN_DEVICE_FUNC std::complex complex_sqrt(const std::complex& z) { +template +EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& z) { // Computes the principal sqrt of the input. // // For a complex square root of the number x + i*y. We want to find real @@ -194,21 +194,21 @@ EIGEN_DEVICE_FUNC std::complex complex_sqrt(const std::complex& z) { // if x == 0: u = w, v = sign(y) * w // if x > 0: u = w, v = y / (2 * w) // if x < 0: u = |y| / (2 * w), v = sign(y) * w - + using T = typename NumTraits::Real; const T x = numext::real(z); const T y = numext::imag(z); const T zero = T(0); const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y))); - return (numext::isinf)(y) ? std::complex(NumTraits::infinity(), y) - : numext::is_exactly_zero(x) ? std::complex(w, y < zero ? -w : w) - : x > zero ? std::complex(w, y / (2 * w)) - : std::complex(numext::abs(y) / (2 * w), y < zero ? -w : w); + return (numext::isinf)(y) ? ComplexT(NumTraits::infinity(), y) + : numext::is_exactly_zero(x) ? ComplexT(w, y < zero ? -w : w) + : x > zero ? ComplexT(w, y / (2 * w)) + : ComplexT(numext::abs(y) / (2 * w), y < zero ? -w : w); } // Generic complex rsqrt implementation. -template -EIGEN_DEVICE_FUNC std::complex complex_rsqrt(const std::complex& z) { +template +EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& z) { // Computes the principal reciprocal sqrt of the input. // // For a complex reciprocal square root of the number z = x + i*y. We want to @@ -230,7 +230,7 @@ EIGEN_DEVICE_FUNC std::complex complex_rsqrt(const std::complex& z) { // if x == 0: u = w / |z|, v = -sign(y) * w / |z| // if x > 0: u = w / |z|, v = -y / (2 * w * |z|) // if x < 0: u = |y| / (2 * w * |z|), v = -sign(y) * w / |z| - + using T = typename NumTraits::Real; const T x = numext::real(z); const T y = numext::imag(z); const T zero = T(0); @@ -239,20 +239,21 @@ EIGEN_DEVICE_FUNC std::complex complex_rsqrt(const std::complex& z) { const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z)); const T woz = w / abs_z; // Corner cases consistent with 1/sqrt(z) on gcc/clang. - return numext::is_exactly_zero(abs_z) ? std::complex(NumTraits::infinity(), NumTraits::quiet_NaN()) - : ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex(zero, zero) - : numext::is_exactly_zero(x) ? std::complex(woz, y < zero ? woz : -woz) - : x > zero ? std::complex(woz, -y / (2 * w * abs_z)) - : std::complex(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz); + return numext::is_exactly_zero(abs_z) ? ComplexT(NumTraits::infinity(), NumTraits::quiet_NaN()) + : ((numext::isinf)(x) || (numext::isinf)(y)) ? ComplexT(zero, zero) + : numext::is_exactly_zero(x) ? ComplexT(woz, y < zero ? woz : -woz) + : x > zero ? ComplexT(woz, -y / (2 * w * abs_z)) + : ComplexT(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz); } -template -EIGEN_DEVICE_FUNC std::complex complex_log(const std::complex& z) { +template +EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z) { // Computes complex log. + using T = typename NumTraits::Real; T a = numext::abs(z); EIGEN_USING_STD(atan2); T b = atan2(z.imag(), z.real()); - return std::complex(numext::log(a), b); + return ComplexT(numext::log(a), b); } } // end namespace internal diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index af6afaf..a2c8eba 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -125,7 +125,7 @@ struct traits> { * coefficients. * *
\anchor fixedsize Fixed-size versus dynamic-size:
- *
Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates + *
Fixed-size means that the numbers of rows and columns are known at compile-time. In this case, Eigen allocates * the array of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices, * typically up to 4x4, sometimes up to 16x16. Larger matrices should be declared as dynamic-size even if one happens to * know their size at compile-time. @@ -139,7 +139,7 @@ struct traits> { *
\anchor maxrows MaxRows_ and MaxCols_:
*
In most cases, one just leaves these parameters to the default values. * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases - * when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they + * when the exact numbers of rows and columns are not known at compile-time, but it is known at compile-time that they * cannot exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case * MaxRows_ and MaxCols_ are the dimensions of the original matrix, while Rows_ and Cols_ are Dynamic.
* @@ -224,8 +224,6 @@ class Matrix : public PlainObjectBase &other) @@ -250,24 +248,31 @@ class Matrix : public PlainObjectBase::value) - : Base(std::move(other)) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other) - EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { +#if defined(EIGEN_INITIALIZE_COEFFS) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() = default; +#endif + /** \brief Move constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix&&) = default; + /** \brief Moves the matrix into the other one. + * + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other) noexcept( + std::is_nothrow_move_assignable::value) { Base::operator=(std::move(other)); return *this; } - /** \copydoc PlainObjectBase(const Scalar&, const Scalar&, const Scalar&, const Scalar&, const ArgTypes&... args) + /** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. + * + * \only_for_vectors + * + * This constructor is for 1D array or vectors with more than 4 coefficients. + * + * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this + * constructor must match the the fixed number of rows (resp. columns) of \c *this. + * * * Example: \include Matrix_variadic_ctor_cxx11.cpp * Output: \verbinclude Matrix_variadic_ctor_cxx11.out @@ -281,6 +286,7 @@ class Matrix : public PlainObjectBase&) @@ -387,8 +393,8 @@ class Matrix : public PlainObjectBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const EigenBase& other) : Base(other.derived()) {} - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const EIGEN_NOEXCEPT { return 1; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const EIGEN_NOEXCEPT { return this->innerSize(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return 1; } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return this->innerSize(); } /////////// Geometry module /////////// diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 81d5a97..045993d 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -112,7 +112,7 @@ class MatrixBase : public DenseBase { ConstTransposeReturnType> AdjointReturnType; /** \internal Return type of eigenvalues() */ - typedef Matrix, internal::traits::ColsAtCompileTime, 1, ColMajor> + typedef Matrix, internal::traits::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType; /** \internal the return type of identity */ typedef CwiseNullaryOp, PlainObject> IdentityReturnType; @@ -280,7 +280,7 @@ class MatrixBase : public DenseBase { * \sa isApprox(), operator!= */ template EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase& other) const { - return cwiseEqual(other).all(); + return (this->rows() == other.rows()) && (this->cols() == other.cols()) && cwiseEqual(other).all(); } /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. @@ -289,7 +289,7 @@ class MatrixBase : public DenseBase { * \sa isApprox(), operator== */ template EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase& other) const { - return cwiseNotEqual(other).any(); + return !(*this == other); } NoAlias EIGEN_DEVICE_FUNC noalias(); @@ -373,12 +373,14 @@ class MatrixBase : public DenseBase { template inline JacobiSVD jacobiSvd() const; template - EIGEN_DEPRECATED inline JacobiSVD jacobiSvd(unsigned int computationOptions) const; + EIGEN_DEPRECATED_WITH_REASON("Options should be specified using method's template parameter.") + inline JacobiSVD jacobiSvd(unsigned int computationOptions) const; template inline BDCSVD bdcSvd() const; template - EIGEN_DEPRECATED inline BDCSVD bdcSvd(unsigned int computationOptions) const; + EIGEN_DEPRECATED_WITH_REASON("Options should be specified using method's template parameter.") + inline BDCSVD bdcSvd(unsigned int computationOptions) const; /////////// Geometry module /////////// @@ -391,7 +393,8 @@ class MatrixBase : public DenseBase { EIGEN_DEVICE_FUNC inline PlainObject unitOrthogonal(void) const; - EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; + EIGEN_DEPRECATED_WITH_REASON("Use .canonicalEulerAngles() instead.") + EIGEN_DEVICE_FUNC inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; EIGEN_DEVICE_FUNC inline Matrix canonicalEulerAngles(Index a0, Index a1, Index a2) const; @@ -468,7 +471,7 @@ class MatrixBase : public DenseBase { EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root) EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm) EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue, pow, power to \c p, const RealScalar& p) - EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex& p) + EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const internal::make_complex_t& p) protected: EIGEN_DEFAULT_COPY_CONSTRUCTOR(MatrixBase) diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h index ec360eb..2ce83a8 100644 --- a/Eigen/src/Core/NestByValue.h +++ b/Eigen/src/Core/NestByValue.h @@ -45,8 +45,8 @@ class NestByValue : public internal::dense_xpr_base EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_expression.rows(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_expression.cols(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 2848b78..bf41c3b 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -22,13 +22,13 @@ namespace internal { template ::is_specialized, bool is_integer = NumTraits::IsInteger> struct default_digits_impl { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int run() { return std::numeric_limits::digits; } + EIGEN_DEVICE_FUNC constexpr static int run() { return std::numeric_limits::digits; } }; template struct default_digits_impl // Floating point { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int run() { + EIGEN_DEVICE_FUNC constexpr static int run() { using std::ceil; using std::log2; typedef typename NumTraits::Real Real; @@ -39,7 +39,7 @@ struct default_digits_impl // Floating point template struct default_digits_impl // Integer { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int run() { return 0; } + EIGEN_DEVICE_FUNC constexpr static int run() { return 0; } }; // default implementation of digits10(), based on numeric_limits if specialized, @@ -47,13 +47,13 @@ struct default_digits_impl // Integer template ::is_specialized, bool is_integer = NumTraits::IsInteger> struct default_digits10_impl { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int run() { return std::numeric_limits::digits10; } + EIGEN_DEVICE_FUNC constexpr static int run() { return std::numeric_limits::digits10; } }; template struct default_digits10_impl // Floating point { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int run() { + EIGEN_DEVICE_FUNC constexpr static int run() { using std::floor; using std::log10; typedef typename NumTraits::Real Real; @@ -64,7 +64,7 @@ struct default_digits10_impl // Floating point template struct default_digits10_impl // Integer { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int run() { return 0; } + EIGEN_DEVICE_FUNC constexpr static int run() { return 0; } }; // default implementation of max_digits10(), based on numeric_limits if specialized, @@ -72,13 +72,13 @@ struct default_digits10_impl // Integer template ::is_specialized, bool is_integer = NumTraits::IsInteger> struct default_max_digits10_impl { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int run() { return std::numeric_limits::max_digits10; } + EIGEN_DEVICE_FUNC constexpr static int run() { return std::numeric_limits::max_digits10; } }; template struct default_max_digits10_impl // Floating point { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int run() { + EIGEN_DEVICE_FUNC constexpr static int run() { using std::ceil; using std::log10; typedef typename NumTraits::Real Real; @@ -89,22 +89,35 @@ struct default_max_digits10_impl // Floating point template struct default_max_digits10_impl // Integer { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int run() { return 0; } + EIGEN_DEVICE_FUNC constexpr static int run() { return 0; } }; } // end namespace internal namespace numext { -/** \internal bit-wise cast without changing the underlying bit representation. */ -// TODO: Replace by std::bit_cast (available in C++20) +/** \internal bit-wise cast without changing the underlying bit representation. */ +#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +template +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Tgt bit_cast(const Src& src) { + return std::bit_cast(src); +} +#elif EIGEN_HAS_BUILTIN(__builtin_bit_cast) +template +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Tgt bit_cast(const Src& src) { + EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED) + return __builtin_bit_cast(Tgt, src); +} +#else template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) { // The behaviour of memcpy is not specified for non-trivially copyable types - EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED); + EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED) EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value && std::is_default_constructible::value, - THIS_TYPE_IS_NOT_SUPPORTED); - EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED); + THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED) Tgt tgt; // Load src into registers first. This allows the memcpy to be elided by CUDA. @@ -113,8 +126,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) { memcpy(static_cast(&tgt), static_cast(&staged), sizeof(Tgt)); return tgt; } +#endif } // namespace numext +// clang-format off /** \class NumTraits * \ingroup Core_Module * @@ -126,48 +141,50 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) { * * The provided data consists of: * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real, - * then \c Real is just a typedef to \a T. If \a T is \c std::complex then \c Real + * then \c Real is just a typedef to \a T. If \a T is `std::complex` then \c Real * is a typedef to \a U. * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values, * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is * only intended as a helper for code that needs to explicitly promote types. - * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c - * std::complex, Literal is defined as \c U. Of course, this type must be fully compatible with \a T. In doubt, just - * use \a T here. \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you - * don't know what this means, just use \a T here. \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c - * std::complex type, and to 0 otherwise. \li An enum value \a IsInteger. It is equal to \c 1 if \a T is an integer type - * such as \c int, and to \c 0 otherwise. \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of - * the number of CPU cycles needed to by move / add / mul instructions respectively, assuming the data is already stored - * in CPU registers. Stay vague here. No need to do architecture-specific stuff. If you don't know what this means, just - * use \c Eigen::HugeCost. \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T - * is unsigned. \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type - * \a T must be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 - * otherwise. \li An epsilon() function which, unlike std::numeric_limits::epsilon(), it returns a - * \a Real instead of a \a T. \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a - * default value by the fuzzy comparison operators. \li highest() and lowest() functions returning the highest and - * lowest possible values respectively. \li digits() function returning the number of radix digits (non-sign digits for - * integers, mantissa for floating-point). This is the analogue of std::numeric_limits::digits which is used - * as the default implementation if specialized. \li digits10() function returning the number of decimal digits that can - * be represented without change. This is the analogue of std::numeric_limits::digits10 which is - * used as the default implementation if specialized. \li max_digits10() function returning the number of decimal digits - * required to uniquely represent all distinct values of the type. This is the analogue of std::numeric_limits::max_digits10 + * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for + * `std::complex`, Literal is defined as \a U. Of course, this type must be fully compatible with \a T. In doubt, + * just use \a T here. + * \li A typedef \c Nested giving the type to use to nest a value inside of the expression tree. If you don't know what + * this means, just use \a T here. + * \li An enum value \c IsComplex. It is equal to 1 if \a T is a \c std::complex type, and to 0 otherwise. + * \li An enum value \c IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int, and to \c 0 otherwise. + * \li Enum values \c ReadCost, \c AddCost and \c MulCost representing a rough estimate of the number of CPU cycles needed to by + * move / add / mul instructions respectively, assuming the data is already stored in CPU registers. Stay vague here. + * No need to do architecture-specific stuff. If you don't know what this means, just use \c Eigen::HugeCost. + * \li An enum value \c IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. + * \li An enum value \c RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must be + * called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. + * \li An epsilon() function which, unlike + * `std::numeric_limits::epsilon()`, it returns a \c Real instead of a \a T. + * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default value by the fuzzy + * comparison operators. + * \li highest() and lowest() functions returning the highest and lowest possible values respectively. + * \li digits() function returning the number of radix digits (non-sign digits for integers, mantissa for floating-point). + * This is the analogue of + * `std::numeric_limits::digits` which is used as the default implementation if specialized. + * \li digits10() function returning the number of decimal digits that can be represented without change. This is the + * analogue of + * `std::numeric_limits::digits10` which is used as the default implementation if specialized. + * \li max_digits10() function returning the number of decimal digits required to uniquely represent all distinct values + * of the type. This is the analogue of `std::numeric_limits::max_digits10` * which is used as the default implementation if specialized. * \li min_exponent() and max_exponent() functions returning the highest and lowest possible values, respectively, * such that the radix raised to the power exponent-1 is a normalized floating-point number. These are equivalent - * to std::numeric_limits::min_exponent/ - * std::numeric_limits::max_exponent. + * to + * `std::numeric_limits::min_exponent`/`std::numeric_limits::max_exponent`. * \li infinity() function returning a representation of positive infinity, if available. - * \li quiet_NaN function returning a non-signaling "not-a-number", if available. + * \li quiet_NaN() function returning a non-signaling "not-a-number", if available. */ - +// clang-format on template struct GenericNumTraits { enum { @@ -185,32 +202,30 @@ struct GenericNumTraits { typedef T Nested; typedef T Literal; - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline Real epsilon() { return numext::numeric_limits::epsilon(); } + EIGEN_DEVICE_FUNC constexpr static Real epsilon() { return numext::numeric_limits::epsilon(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline int digits10() { return internal::default_digits10_impl::run(); } + EIGEN_DEVICE_FUNC constexpr static int digits10() { return internal::default_digits10_impl::run(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline int max_digits10() { - return internal::default_max_digits10_impl::run(); - } + EIGEN_DEVICE_FUNC constexpr static int max_digits10() { return internal::default_max_digits10_impl::run(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline int digits() { return internal::default_digits_impl::run(); } + EIGEN_DEVICE_FUNC constexpr static int digits() { return internal::default_digits_impl::run(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline int min_exponent() { return numext::numeric_limits::min_exponent; } + EIGEN_DEVICE_FUNC constexpr static int min_exponent() { return numext::numeric_limits::min_exponent; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline int max_exponent() { return numext::numeric_limits::max_exponent; } + EIGEN_DEVICE_FUNC constexpr static int max_exponent() { return numext::numeric_limits::max_exponent; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline Real dummy_precision() { + EIGEN_DEVICE_FUNC constexpr static Real dummy_precision() { // make sure to override this for floating-point types return Real(0); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T highest() { return (numext::numeric_limits::max)(); } + EIGEN_DEVICE_FUNC constexpr static T highest() { return (numext::numeric_limits::max)(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T lowest() { return (numext::numeric_limits::lowest)(); } + EIGEN_DEVICE_FUNC constexpr static T lowest() { return (numext::numeric_limits::lowest)(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T infinity() { return numext::numeric_limits::infinity(); } + EIGEN_DEVICE_FUNC constexpr static T infinity() { return numext::numeric_limits::infinity(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline T quiet_NaN() { return numext::numeric_limits::quiet_NaN(); } + EIGEN_DEVICE_FUNC constexpr static T quiet_NaN() { return numext::numeric_limits::quiet_NaN(); } }; template @@ -218,25 +233,23 @@ struct NumTraits : GenericNumTraits {}; template <> struct NumTraits : GenericNumTraits { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline float dummy_precision() { return 1e-5f; } + EIGEN_DEVICE_FUNC constexpr static float dummy_precision() { return 1e-5f; } }; template <> struct NumTraits : GenericNumTraits { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline double dummy_precision() { return 1e-12; } + EIGEN_DEVICE_FUNC constexpr static double dummy_precision() { return 1e-12; } }; // GPU devices treat `long double` as `double`. #ifndef EIGEN_GPU_COMPILE_PHASE template <> struct NumTraits : GenericNumTraits { - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline long double dummy_precision() { - return static_cast(1e-15l); - } + EIGEN_DEVICE_FUNC constexpr static long double dummy_precision() { return static_cast(1e-15l); } #if defined(EIGEN_ARCH_PPC) && (__LDBL_MANT_DIG__ == 106) // PowerPC double double causes issues with some values - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline long double epsilon() { + EIGEN_DEVICE_FUNC constexpr static long double epsilon() { // 2^(-(__LDBL_MANT_DIG__)+1) return static_cast(2.4651903288156618919116517665087e-32l); } @@ -250,16 +263,17 @@ struct NumTraits > : GenericNumTraits > typedef typename NumTraits::Literal Literal; enum { IsComplex = 1, + IsSigned = NumTraits::IsSigned, RequireInitialization = NumTraits::RequireInitialization, ReadCost = 2 * NumTraits::ReadCost, AddCost = 2 * NumTraits::AddCost, MulCost = 4 * NumTraits::MulCost + 2 * NumTraits::AddCost }; - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline Real epsilon() { return NumTraits::epsilon(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline Real dummy_precision() { return NumTraits::dummy_precision(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline int digits10() { return NumTraits::digits10(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline int max_digits10() { return NumTraits::max_digits10(); } + EIGEN_DEVICE_FUNC constexpr static Real epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC constexpr static Real dummy_precision() { return NumTraits::dummy_precision(); } + EIGEN_DEVICE_FUNC constexpr static int digits10() { return NumTraits::digits10(); } + EIGEN_DEVICE_FUNC constexpr static int max_digits10() { return NumTraits::max_digits10(); } }; template @@ -286,25 +300,19 @@ struct NumTraits > { : ArrayType::SizeAtCompileTime * int(NumTraits::MulCost) }; - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline RealScalar epsilon() { return NumTraits::epsilon(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline RealScalar dummy_precision() { - return NumTraits::dummy_precision(); - } + EIGEN_DEVICE_FUNC constexpr static RealScalar epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC constexpr static RealScalar dummy_precision() { return NumTraits::dummy_precision(); } - EIGEN_CONSTEXPR - static inline int digits10() { return NumTraits::digits10(); } - EIGEN_CONSTEXPR - static inline int max_digits10() { return NumTraits::max_digits10(); } + constexpr static int digits10() { return NumTraits::digits10(); } + constexpr static int max_digits10() { return NumTraits::max_digits10(); } }; template <> struct NumTraits : GenericNumTraits { enum { RequireInitialization = 1, ReadCost = HugeCost, AddCost = HugeCost, MulCost = HugeCost }; - EIGEN_CONSTEXPR - static inline int digits10() { return 0; } - EIGEN_CONSTEXPR - static inline int max_digits10() { return 0; } + constexpr static int digits10() { return 0; } + constexpr static int max_digits10() { return 0; } private: static inline std::string epsilon(); diff --git a/Eigen/src/Core/PartialReduxEvaluator.h b/Eigen/src/Core/PartialReduxEvaluator.h index 7b2c8dc..1f638f9 100644 --- a/Eigen/src/Core/PartialReduxEvaluator.h +++ b/Eigen/src/Core/PartialReduxEvaluator.h @@ -103,19 +103,36 @@ struct packetwise_redux_impl { EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size) { if (size == 0) return packetwise_redux_empty_value(func); - const Index size4 = (size - 1) & (~3); + const Index size4 = 1 + numext::round_down(size - 1, 4); PacketType p = eval.template packetByOuterInner(0, 0); - Index i = 1; // This loop is optimized for instruction pipelining: // - each iteration generates two independent instructions // - thanks to branch prediction and out-of-order execution we have independent instructions across loops - for (; i < size4; i += 4) + for (Index i = 1; i < size4; i += 4) p = func.packetOp( p, func.packetOp(func.packetOp(eval.template packetByOuterInner(i + 0, 0), eval.template packetByOuterInner(i + 1, 0)), func.packetOp(eval.template packetByOuterInner(i + 2, 0), eval.template packetByOuterInner(i + 3, 0)))); - for (; i < size; ++i) p = func.packetOp(p, eval.template packetByOuterInner(i, 0)); + for (Index i = size4; i < size; ++i) + p = func.packetOp(p, eval.template packetByOuterInner(i, 0)); + return p; + } +}; + +template +struct packetwise_segment_redux_impl { + typedef typename Evaluator::Scalar Scalar; + typedef typename redux_traits::PacketType PacketScalar; + + template + EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size, Index begin, + Index count) { + if (size == 0) return packetwise_redux_empty_value(func); + + PacketType p = eval.template packetSegmentByOuterInner(0, 0, begin, count); + for (Index i = 1; i < size; ++i) + p = func.packetOp(p, eval.template packetSegmentByOuterInner(i, 0, begin, count)); return p; } }; @@ -174,14 +191,13 @@ struct evaluator > template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC PacketType packet(Index idx) const { - enum { PacketSize = internal::unpacket_traits::size }; - typedef Block - PanelType; - - PanelType panel(m_arg, Direction == Vertical ? 0 : idx, Direction == Vertical ? idx : 0, - Direction == Vertical ? m_arg.rows() : Index(PacketSize), - Direction == Vertical ? Index(PacketSize) : m_arg.cols()); + static constexpr int PacketSize = internal::unpacket_traits::size; + static constexpr int PanelRows = Direction == Vertical ? ArgType::RowsAtCompileTime : PacketSize; + static constexpr int PanelCols = Direction == Vertical ? PacketSize : ArgType::ColsAtCompileTime; + using PanelType = Block; + using PanelEvaluator = typename internal::redux_evaluator; + using BinaryOp = typename MemberOp::BinaryOp; + using Impl = internal::packetwise_redux_impl; // FIXME // See bug 1612, currently if PacketSize==1 (i.e. complex with 128bits registers) then the storage-order of @@ -189,11 +205,39 @@ struct evaluator > // by pass "vectorization" in this case: if (PacketSize == 1) return internal::pset1(coeff(idx)); - typedef typename internal::redux_evaluator PanelEvaluator; + Index startRow = Direction == Vertical ? 0 : idx; + Index startCol = Direction == Vertical ? idx : 0; + Index numRows = Direction == Vertical ? m_arg.rows() : PacketSize; + Index numCols = Direction == Vertical ? PacketSize : m_arg.cols(); + + PanelType panel(m_arg, startRow, startCol, numRows, numCols); + PanelEvaluator panel_eval(panel); + PacketType p = Impl::template run(panel_eval, m_functor.binaryFunc(), m_arg.outerSize()); + return p; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index i, Index j, Index begin, Index count) const { + return packetSegment(Direction == Vertical ? j : i, begin, count); + } + + template + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC PacketType packetSegment(Index idx, Index begin, Index count) const { + static constexpr int PanelRows = Direction == Vertical ? ArgType::RowsAtCompileTime : Dynamic; + static constexpr int PanelCols = Direction == Vertical ? Dynamic : ArgType::ColsAtCompileTime; + using PanelType = Block; + using PanelEvaluator = typename internal::redux_evaluator; + using BinaryOp = typename MemberOp::BinaryOp; + using Impl = internal::packetwise_segment_redux_impl; + + Index startRow = Direction == Vertical ? 0 : idx; + Index startCol = Direction == Vertical ? idx : 0; + Index numRows = Direction == Vertical ? m_arg.rows() : begin + count; + Index numCols = Direction == Vertical ? begin + count : m_arg.cols(); + + PanelType panel(m_arg, startRow, startCol, numRows, numCols); PanelEvaluator panel_eval(panel); - typedef typename MemberOp::BinaryOp BinaryOp; - PacketType p = internal::packetwise_redux_impl::template run( - panel_eval, m_functor.binaryFunc(), m_arg.outerSize()); + PacketType p = Impl::template run(panel_eval, m_functor.binaryFunc(), m_arg.outerSize(), begin, count); return p; } diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index 6945964..eb8e797 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -170,7 +170,7 @@ class PermutationBase : public EigenBase { * \note \blank \note_try_to_help_rvo */ inline InverseReturnType inverse() const { return InverseReturnType(derived()); } - /** \returns the tranpose permutation matrix. + /** \returns the transpose permutation matrix. * * \note \blank \note_try_to_help_rvo */ @@ -468,17 +468,17 @@ class PermutationWrapper : public PermutationBase -EIGEN_DEVICE_FUNC const Product operator*( +EIGEN_DEVICE_FUNC const Product operator*( const MatrixBase& matrix, const PermutationBase& permutation) { - return Product(matrix.derived(), permutation.derived()); + return Product(matrix.derived(), permutation.derived()); } /** \returns the matrix with the permutation applied to the rows. */ template -EIGEN_DEVICE_FUNC const Product operator*( +EIGEN_DEVICE_FUNC const Product operator*( const PermutationBase& permutation, const MatrixBase& matrix) { - return Product(permutation.derived(), matrix.derived()); + return Product(permutation.derived(), matrix.derived()); } template @@ -520,16 +520,16 @@ class InverseImpl : public EigenBase - friend const Product operator*(const MatrixBase& matrix, - const InverseType& trPerm) { - return Product(matrix.derived(), trPerm.derived()); + friend const Product operator*(const MatrixBase& matrix, + const InverseType& trPerm) { + return Product(matrix.derived(), trPerm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ template - const Product operator*(const MatrixBase& matrix) const { - return Product(derived(), matrix.derived()); + const Product operator*(const MatrixBase& matrix) const { + return Product(derived(), matrix.derived()); } }; diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 5f846a0..a78305e 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -80,27 +80,6 @@ struct matrix_swap_impl; } // end namespace internal -#ifdef EIGEN_PARSED_BY_DOXYGEN -namespace doxygen { - -// This is a workaround to doxygen not being able to understand the inheritance logic -// when it is hidden by the dense_xpr_base helper struct. -// Moreover, doxygen fails to include members that are not documented in the declaration body of -// MatrixBase if we inherits MatrixBase >, -// this is why we simply inherits MatrixBase, though this does not make sense. - -/** This class is just a workaround for Doxygen and it does not not actually exist. */ -template -struct dense_xpr_base_dispatcher; -/** This class is just a workaround for Doxygen and it does not not actually exist. */ -template -struct dense_xpr_base_dispatcher> : public MatrixBase {}; -/** This class is just a workaround for Doxygen and it does not not actually exist. */ -template -struct dense_xpr_base_dispatcher> : public ArrayBase {}; - -} // namespace doxygen - /** \class PlainObjectBase * \ingroup Core_Module * \brief %Dense storage base class for matrices and arrays. @@ -113,12 +92,7 @@ struct dense_xpr_base_dispatcher -class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher -#else -template -class PlainObjectBase : public internal::dense_xpr_base::type -#endif -{ +class PlainObjectBase : public internal::dense_xpr_base::type { public: enum { Options = internal::traits::Options }; typedef typename internal::dense_xpr_base::type Base; @@ -188,8 +162,8 @@ class PlainObjectBase : public internal::dense_xpr_base::type EIGEN_DEVICE_FUNC Base& base() { return *static_cast(this); } EIGEN_DEVICE_FUNC const Base& base() const { return *static_cast(this); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_storage.rows(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_storage.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_storage.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_storage.cols(); } /** This is an overloaded version of DenseCoeffsBase::coeff(Index,Index) const * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. @@ -270,10 +244,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type } /** \returns a const pointer to the data array of this matrix */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar* data() const { return m_storage.data(); } + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_storage.data(); } /** \returns a pointer to the data array of this matrix */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() { return m_storage.data(); } + EIGEN_DEVICE_FUNC constexpr Scalar* data() { return m_storage.data(); } /** Resizes \c *this to a \a rows x \a cols matrix. * @@ -324,7 +298,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t) */ - EIGEN_DEVICE_FUNC inline constexpr void resize(Index size) { + EIGEN_DEVICE_FUNC constexpr void resize(Index size) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase) eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime == Dynamic || size <= MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && @@ -349,7 +323,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index) */ - EIGEN_DEVICE_FUNC inline constexpr void resize(NoChange_t, Index cols) { resize(rows(), cols); } + EIGEN_DEVICE_FUNC constexpr void resize(NoChange_t, Index cols) { resize(rows(), cols); } /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special * value \c NoChange as in the example below. @@ -359,7 +333,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index) */ - EIGEN_DEVICE_FUNC inline constexpr void resize(Index rows, NoChange_t) { resize(rows, cols()); } + EIGEN_DEVICE_FUNC constexpr void resize(Index rows, NoChange_t) { resize(rows, cols()); } /** Resizes \c *this to have the same dimensions as \a other. * Takes care of doing all the checking that's needed. @@ -472,34 +446,19 @@ class PlainObjectBase : public internal::dense_xpr_base::type // Prevent user from trying to instantiate PlainObjectBase objects // by making all its constructor protected. See bug 1074. protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() : m_storage() { - // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - } - -#ifndef EIGEN_PARSED_BY_DOXYGEN - // FIXME is it still needed ? - /** \internal */ - EIGEN_DEVICE_FUNC constexpr explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) - : m_storage(internal::constructor_without_unaligned_array_assert()) { - // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - } -#endif - - EIGEN_DEVICE_FUNC constexpr PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT - : m_storage(std::move(other.m_storage)) {} - - EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() = default; + /** \brief Move constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(PlainObjectBase&&) = default; + /** \brief Move assignment operator */ + EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) noexcept { m_storage = std::move(other.m_storage); return *this; } /** Copy constructor */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase& other) - : Base(), m_storage(other.m_storage) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase&) = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols) - : m_storage(size, rows, cols) { - // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - } + : m_storage(size, rows, cols) {} /** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. * @@ -540,7 +499,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type eigen_assert(list_size == static_cast(RowsAtCompileTime) || RowsAtCompileTime == Dynamic); resize(list_size, ColsAtCompileTime); if (list.begin()->begin() != nullptr) { - std::copy(list.begin()->begin(), list.begin()->end(), m_storage.data()); + Index index = 0; + for (const Scalar& e : *list.begin()) { + coeffRef(index++) = e; + } } } else { eigen_assert(list.size() == static_cast(RowsAtCompileTime) || RowsAtCompileTime == Dynamic); diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 6bad832..e16c7cc 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -21,7 +21,7 @@ class ProductImpl; namespace internal { template -struct traits > { +struct traits> { typedef remove_all_t LhsCleaned; typedef remove_all_t RhsCleaned; typedef traits LhsTraits; @@ -55,6 +55,129 @@ struct traits > { }; }; +struct TransposeProductEnum { + // convenience enumerations to specialize transposed products + enum : int { + Default = 0x00, + Matrix = 0x01, + Permutation = 0x02, + MatrixMatrix = (Matrix << 8) | Matrix, + MatrixPermutation = (Matrix << 8) | Permutation, + PermutationMatrix = (Permutation << 8) | Matrix + }; +}; +template +struct TransposeKind { + static constexpr int Kind = is_matrix_base_xpr::value ? TransposeProductEnum::Matrix + : is_permutation_base_xpr::value ? TransposeProductEnum::Permutation + : TransposeProductEnum::Default; +}; + +template +struct TransposeProductKind { + static constexpr int Kind = (TransposeKind::Kind << 8) | TransposeKind::Kind; +}; + +template ::Kind> +struct product_transpose_helper { + // by default, don't optimize the transposed product + using Derived = Product; + using Scalar = typename Derived::Scalar; + using TransposeType = Transpose; + using ConjugateTransposeType = CwiseUnaryOp, TransposeType>; + using AdjointType = std::conditional_t::IsComplex, ConjugateTransposeType, TransposeType>; + + // return (lhs * rhs)^T + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(derived); + } + // return (lhs * rhs)^H + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(TransposeType(derived)); + } +}; + +template +struct product_transpose_helper { + // expand the transposed matrix-matrix product + using Derived = Product; + + using LhsScalar = typename traits::Scalar; + using LhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using LhsConjugateTransposeType = CwiseUnaryOp, LhsTransposeType>; + using LhsAdjointType = + std::conditional_t::IsComplex, LhsConjugateTransposeType, LhsTransposeType>; + + using RhsScalar = typename traits::Scalar; + using RhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using RhsConjugateTransposeType = CwiseUnaryOp, RhsTransposeType>; + using RhsAdjointType = + std::conditional_t::IsComplex, RhsConjugateTransposeType, RhsTransposeType>; + + using TransposeType = Product; + using AdjointType = Product; + + // return rhs^T * lhs^T + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(RhsTransposeType(derived.rhs()), LhsTransposeType(derived.lhs())); + } + // return rhs^H * lhs^H + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(RhsAdjointType(RhsTransposeType(derived.rhs())), + LhsAdjointType(LhsTransposeType(derived.lhs()))); + } +}; +template +struct product_transpose_helper { + // expand the transposed permutation-matrix product + using Derived = Product; + + using LhsInverseType = typename PermutationBase::InverseReturnType; + + using RhsScalar = typename traits::Scalar; + using RhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using RhsConjugateTransposeType = CwiseUnaryOp, RhsTransposeType>; + using RhsAdjointType = + std::conditional_t::IsComplex, RhsConjugateTransposeType, RhsTransposeType>; + + using TransposeType = Product; + using AdjointType = Product; + + // return rhs^T * lhs^-1 + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(RhsTransposeType(derived.rhs()), LhsInverseType(derived.lhs())); + } + // return rhs^H * lhs^-1 + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(RhsAdjointType(RhsTransposeType(derived.rhs())), LhsInverseType(derived.lhs())); + } +}; +template +struct product_transpose_helper { + // expand the transposed matrix-permutation product + using Derived = Product; + + using LhsScalar = typename traits::Scalar; + using LhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using LhsConjugateTransposeType = CwiseUnaryOp, LhsTransposeType>; + using LhsAdjointType = + std::conditional_t::IsComplex, LhsConjugateTransposeType, LhsTransposeType>; + + using RhsInverseType = typename PermutationBase::InverseReturnType; + + using TransposeType = Product; + using AdjointType = Product; + + // return rhs^-1 * lhs^T + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(RhsInverseType(derived.rhs()), LhsTransposeType(derived.lhs())); + } + // return rhs^-1 * lhs^H + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(RhsInverseType(derived.rhs()), LhsAdjointType(LhsTransposeType(derived.lhs()))); + } +}; + } // end namespace internal /** \class Product @@ -93,17 +216,27 @@ class Product typedef internal::remove_all_t LhsNestedCleaned; typedef internal::remove_all_t RhsNestedCleaned; + using TransposeReturnType = typename internal::product_transpose_helper::TransposeType; + using AdjointReturnType = typename internal::product_transpose_helper::AdjointType; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { eigen_assert(lhs.cols() == rhs.rows() && "invalid matrix product" && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_lhs.rows(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_rhs.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_rhs.cols(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const LhsNestedCleaned& lhs() const { return m_lhs; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const RhsNestedCleaned& rhs() const { return m_rhs; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeReturnType transpose() const { + return internal::product_transpose_helper::run_transpose(*this); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointReturnType adjoint() const { + return internal::product_transpose_helper::run_adjoint(*this); + } + protected: LhsNested m_lhs; RhsNested m_rhs; @@ -112,12 +245,12 @@ class Product namespace internal { template ::ret> -class dense_product_base : public internal::dense_xpr_base >::type {}; +class dense_product_base : public internal::dense_xpr_base>::type {}; /** Conversion to scalar for inner-products */ template class dense_product_base - : public internal::dense_xpr_base >::type { + : public internal::dense_xpr_base>::type { typedef Product ProductXpr; typedef typename internal::dense_xpr_base::type Base; diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 19c2560..be55be5 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -235,19 +235,20 @@ EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_difference_op, add_assig template struct generic_product_impl { + using impl = default_inner_product_impl; template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - dst.coeffRef(0, 0) = (lhs.transpose().cwiseProduct(rhs)).sum(); + dst.coeffRef(0, 0) = impl::run(lhs, rhs); } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - dst.coeffRef(0, 0) += (lhs.transpose().cwiseProduct(rhs)).sum(); + dst.coeffRef(0, 0) += impl::run(lhs, rhs); } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - dst.coeffRef(0, 0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); + dst.coeffRef(0, 0) -= impl::run(lhs, rhs); } }; @@ -282,7 +283,7 @@ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, cons template struct generic_product_impl { template - struct is_row_major : std::conditional_t<(int(T::Flags) & RowMajorBit), internal::true_type, internal::false_type> {}; + struct is_row_major : bool_constant<(int(T::Flags) & RowMajorBit)> {}; typedef typename Product::Scalar Scalar; // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose @@ -293,6 +294,7 @@ struct generic_product_impl { } }; struct add { + /** Add to dst. */ template EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; @@ -304,9 +306,12 @@ struct generic_product_impl { dst.const_cast_derived() -= src; } }; + /** Scaled add. */ struct adds { Scalar m_scale; + /** Constructor */ explicit adds(const Scalar& s) : m_scale(s) {} + /** Scaled add to dst. */ template void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += m_scale * src; @@ -440,7 +445,7 @@ struct generic_product_impl::extract(lhs).template conjugateIf(), blas_traits::extract(rhs).template conjugateIf(), func, actualAlpha, - std::conditional_t()); + bool_constant()); } protected: @@ -630,6 +635,24 @@ struct product_evaluator, ProductTag, DenseShape, return packet(row, col); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + PacketType res; + typedef etor_product_packet_impl + PacketImpl; + PacketImpl::run_segment(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res, begin, count); + return res; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index index, Index begin, Index count) const { + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0; + return packetSegment(row, col, begin, count); + } + protected: add_const_on_value_type_t m_lhs; add_const_on_value_type_t m_rhs; @@ -665,6 +688,13 @@ struct etor_product_packet_impl(lhs.coeff(row, Index(UnrollingIndex - 1))), rhs.template packet(Index(UnrollingIndex - 1), col), res); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res, Index begin, Index count) { + etor_product_packet_impl::run_segment( + row, col, lhs, rhs, innerDim, res, begin, count); + res = pmadd(pset1(lhs.coeff(row, Index(UnrollingIndex - 1))), + rhs.template packetSegment(Index(UnrollingIndex - 1), col, begin, count), res); + } }; template @@ -676,6 +706,13 @@ struct etor_product_packet_impl(row, Index(UnrollingIndex - 1)), pset1(rhs.coeff(Index(UnrollingIndex - 1), col)), res); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res, Index begin, Index count) { + etor_product_packet_impl::run_segment( + row, col, lhs, rhs, innerDim, res, begin, count); + res = pmadd(lhs.template packetSegment(row, Index(UnrollingIndex - 1), begin, count), + pset1(rhs.coeff(Index(UnrollingIndex - 1), col)), res); + } }; template @@ -684,6 +721,12 @@ struct etor_product_packet_impl { Index /*innerDim*/, Packet& res) { res = pmul(pset1(lhs.coeff(row, Index(0))), rhs.template packet(Index(0), col)); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index /*innerDim*/, Packet& res, Index begin, + Index count) { + res = pmul(pset1(lhs.coeff(row, Index(0))), + rhs.template packetSegment(Index(0), col, begin, count)); + } }; template @@ -692,6 +735,12 @@ struct etor_product_packet_impl { Index /*innerDim*/, Packet& res) { res = pmul(lhs.template packet(row, Index(0)), pset1(rhs.coeff(Index(0), col))); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index /*innerDim*/, Packet& res, Index begin, + Index count) { + res = pmul(lhs.template packetSegment(row, Index(0), begin, count), + pset1(rhs.coeff(Index(0), col))); + } }; template @@ -700,6 +749,11 @@ struct etor_product_packet_impl { const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) { res = pset1(typename unpacket_traits::type(0)); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, + const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res, + Index /*begin*/, Index /*count*/) { + res = pset1(typename unpacket_traits::type(0)); + } }; template @@ -708,6 +762,11 @@ struct etor_product_packet_impl { const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) { res = pset1(typename unpacket_traits::type(0)); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, + const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res, + Index /*begin*/, Index /*count*/) { + res = pset1(typename unpacket_traits::type(0)); + } }; template @@ -718,6 +777,13 @@ struct etor_product_packet_impl { for (Index i = 0; i < innerDim; ++i) res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res, Index begin, Index count) { + res = pset1(typename unpacket_traits::type(0)); + for (Index i = 0; i < innerDim; ++i) + res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packetSegment(i, col, begin, count), + res); + } }; template @@ -728,6 +794,13 @@ struct etor_product_packet_impl { for (Index i = 0; i < innerDim; ++i) res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res, Index begin, Index count) { + res = pset1(typename unpacket_traits::type(0)); + for (Index i = 0; i < innerDim; ++i) + res = pmadd(lhs.template packetSegment(row, i, begin, count), pset1(rhs.coeff(i, col)), + res); + } }; /*************************************************************************** @@ -773,7 +846,7 @@ struct generic_product_impl template static EIGEN_DEVICE_FUNC void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - selfadjoint_product_impl::run( + selfadjoint_product_impl::run( dst, lhs.nestedExpression(), rhs, alpha); } }; @@ -785,7 +858,7 @@ struct generic_product_impl template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - selfadjoint_product_impl::run( + selfadjoint_product_impl::run( dst, lhs, rhs.nestedExpression(), alpha); } }; @@ -810,7 +883,7 @@ struct diagonal_product_evaluator_base : evaluator_base { : (Derived::MaxColsAtCompileTime == 1 && Derived::MaxRowsAtCompileTime != 1) ? ColMajor : MatrixFlags & RowMajorBit ? RowMajor : ColMajor, - SameStorageOrder_ = StorageOrder_ == (MatrixFlags & RowMajorBit ? RowMajor : ColMajor), + SameStorageOrder_ = int(StorageOrder_) == ((MatrixFlags & RowMajorBit) ? RowMajor : ColMajor), ScalarAccessOnDiag_ = !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft) || (int(StorageOrder_) == RowMajor && int(ProductOrder) == OnTheRight)), @@ -866,6 +939,26 @@ struct diagonal_product_evaluator_base : evaluator_base { m_diagImpl.template packet(id)); } + template + EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count, + internal::true_type) const { + return internal::pmul(m_matImpl.template packetSegment(row, col, begin, count), + internal::pset1(m_diagImpl.coeff(id))); + } + + template + EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count, + internal::false_type) const { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalPacketLoadMode = plain_enum_min( + LoadMode, + ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! + }; + return internal::pmul(m_matImpl.template packetSegment(row, col, begin, count), + m_diagImpl.template packetSegment(id, begin, count)); + } + evaluator m_diagImpl; evaluator m_matImpl; }; @@ -887,7 +980,8 @@ struct product_evaluator, ProductTag, DiagonalSha typedef typename XprType::PlainObject PlainObject; typedef typename Lhs::DiagonalVectorType DiagonalType; - enum { StorageOrder = Base::StorageOrder_ }; + static constexpr int StorageOrder = Base::StorageOrder_; + using IsRowMajor_t = bool_constant; EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {} @@ -900,8 +994,7 @@ struct product_evaluator, ProductTag, DiagonalSha EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. // See also similar calls below. - return this->template packet_impl( - row, col, row, std::conditional_t()); + return this->template packet_impl(row, col, row, IsRowMajor_t()); } template @@ -909,6 +1002,19 @@ struct product_evaluator, ProductTag, DiagonalSha return packet(int(StorageOrder) == ColMajor ? idx : 0, int(StorageOrder) == ColMajor ? 0 : idx); } + + template + EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. + // See also similar calls below. + return this->template packet_segment_impl(row, col, row, begin, count, IsRowMajor_t()); + } + + template + EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const { + return packetSegment(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx, + begin, count); + } #endif }; @@ -928,7 +1034,8 @@ struct product_evaluator, ProductTag, DenseShape, typedef Product XprType; typedef typename XprType::PlainObject PlainObject; - enum { StorageOrder = Base::StorageOrder_ }; + static constexpr int StorageOrder = Base::StorageOrder_; + using IsColMajor_t = bool_constant; EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {} @@ -939,14 +1046,23 @@ struct product_evaluator, ProductTag, DenseShape, #ifndef EIGEN_GPUCC template EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return this->template packet_impl( - row, col, col, std::conditional_t()); + return this->template packet_impl(row, col, col, IsColMajor_t()); } template EIGEN_STRONG_INLINE PacketType packet(Index idx) const { - return packet(int(StorageOrder) == ColMajor ? idx : 0, - int(StorageOrder) == ColMajor ? 0 : idx); + return packet(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx); + } + + template + EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return this->template packet_segment_impl(row, col, col, begin, count, IsColMajor_t()); + } + + template + EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const { + return packetSegment(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx, + begin, count); } #endif }; @@ -1148,6 +1264,22 @@ struct generic_product_impl +struct generic_product_impl + : generic_product_impl {}; + +template +struct generic_product_impl + : generic_product_impl {}; + +template +struct generic_product_impl + : generic_product_impl {}; + +template +struct generic_product_impl + : generic_product_impl {}; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/RandomImpl.h b/Eigen/src/Core/RandomImpl.h new file mode 100644 index 0000000..1a82e62 --- /dev/null +++ b/Eigen/src/Core/RandomImpl.h @@ -0,0 +1,262 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2024 Charles Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RANDOM_IMPL_H +#define EIGEN_RANDOM_IMPL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +/**************************************************************************** + * Implementation of random * + ****************************************************************************/ + +template +struct random_default_impl {}; + +template +struct random_impl : random_default_impl::IsComplex, NumTraits::IsInteger> {}; + +template +struct random_retval { + typedef Scalar type; +}; + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) { + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y); +} + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() { + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); +} + +// TODO: replace or provide alternatives to this, e.g. std::random_device +struct eigen_random_device { + using ReturnType = int; + static constexpr int Entropy = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value; + static constexpr ReturnType Highest = RAND_MAX; + static EIGEN_DEVICE_FUNC inline ReturnType run() { return std::rand(); } +}; + +// Fill a built-in unsigned integer with numRandomBits beginning with the least significant bit +template +struct random_bits_impl { + EIGEN_STATIC_ASSERT(std::is_unsigned::value, SCALAR MUST BE A BUILT - IN UNSIGNED INTEGER) + using RandomDevice = eigen_random_device; + using RandomReturnType = typename RandomDevice::ReturnType; + static constexpr int kEntropy = RandomDevice::Entropy; + static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; + // return a Scalar filled with numRandomBits beginning from the least significant bit + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + eigen_assert((numRandomBits >= 0) && (numRandomBits <= kTotalBits)); + const Scalar mask = Scalar(-1) >> ((kTotalBits - numRandomBits) & (kTotalBits - 1)); + Scalar randomBits = 0; + for (int shift = 0; shift < numRandomBits; shift += kEntropy) { + RandomReturnType r = RandomDevice::run(); + randomBits |= static_cast(r) << shift; + } + // clear the excess bits + randomBits &= mask; + return randomBits; + } +}; + +template +EIGEN_DEVICE_FUNC inline BitsType getRandomBits(int numRandomBits) { + return random_bits_impl::run(numRandomBits); +} + +// random implementation for a built-in floating point type +template ::value> +struct random_float_impl { + using BitsType = typename numext::get_integer_by_size::unsigned_type; + static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() { + const int digits = NumTraits::digits(); + return digits - 1; + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits()); + BitsType randomBits = getRandomBits(numRandomBits); + // if fewer than MantissaBits is requested, shift them to the left + randomBits <<= (mantissaBits() - numRandomBits); + // randomBits is in the half-open interval [2,4) + randomBits |= numext::bit_cast(Scalar(2)); + // result is in the half-open interval [-1,1) + Scalar result = numext::bit_cast(randomBits) - Scalar(3); + return result; + } +}; +// random implementation for a custom floating point type +// uses double as the implementation with a mantissa with a size equal to either the target scalar's mantissa or that of +// double, whichever is smaller +template +struct random_float_impl { + static EIGEN_DEVICE_FUNC inline int mantissaBits() { + const int digits = NumTraits::digits(); + constexpr int kDoubleDigits = NumTraits::digits(); + return numext::mini(digits, kDoubleDigits) - 1; + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits()); + Scalar result = static_cast(random_float_impl::run(numRandomBits)); + return result; + } +}; + +#if !EIGEN_COMP_NVCC +// random implementation for long double +// this specialization is not compatible with double-double scalars +template ::digits != (2 * std::numeric_limits::digits)))> +struct random_longdouble_impl { + static constexpr int Size = sizeof(long double); + static constexpr EIGEN_DEVICE_FUNC int mantissaBits() { return NumTraits::digits() - 1; } + static EIGEN_DEVICE_FUNC inline long double run(int numRandomBits) { + eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits()); + EIGEN_USING_STD(memcpy); + int numLowBits = numext::mini(numRandomBits, 64); + int numHighBits = numext::maxi(numRandomBits - 64, 0); + uint64_t randomBits[2]; + long double result = 2.0L; + memcpy(&randomBits, &result, Size); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + randomBits[0] |= getRandomBits(numLowBits); + randomBits[1] |= getRandomBits(numHighBits); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + randomBits[0] |= getRandomBits(numHighBits); + randomBits[1] |= getRandomBits(numLowBits); +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif + memcpy(&result, &randomBits, Size); + result -= 3.0L; + return result; + } +}; +template <> +struct random_longdouble_impl { + static constexpr EIGEN_DEVICE_FUNC int mantissaBits() { return NumTraits::digits() - 1; } + static EIGEN_DEVICE_FUNC inline long double run(int numRandomBits) { + return static_cast(random_float_impl::run(numRandomBits)); + } +}; +template <> +struct random_float_impl : random_longdouble_impl<> {}; +#endif + +template +struct random_default_impl { + using Impl = random_float_impl; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) { + Scalar half_x = Scalar(0.5) * x; + Scalar half_y = Scalar(0.5) * y; + Scalar result = (half_x + half_y) + (half_y - half_x) * run(numRandomBits); + // result is in the half-open interval [x, y) -- provided that x < y + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + return run(x, y, Impl::mantissaBits()); + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { return Impl::run(numRandomBits); } + static EIGEN_DEVICE_FUNC inline Scalar run() { return run(Impl::mantissaBits()); } +}; + +template ::IsSigned, bool BuiltIn = std::is_integral::value> +struct random_int_impl; + +// random implementation for a built-in unsigned integer type +template +struct random_int_impl { + static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + if (y <= x) return x; + Scalar range = y - x; + // handle edge case where [x,y] spans the entire range of Scalar + if (range == NumTraits::highest()) return run(); + Scalar count = range + 1; + // calculate the number of random bits needed to fill range + int numRandomBits = log2_ceil(count); + Scalar randomBits; + do { + randomBits = getRandomBits(numRandomBits); + // if the random draw is outside [0, range), try again (rejection sampling) + // in the worst-case scenario, the probability of rejection is: 1/2 - 1/2^numRandomBits < 50% + } while (randomBits >= count); + Scalar result = x + randomBits; + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run() { return getRandomBits(kTotalBits); } +}; + +// random implementation for a built-in signed integer type +template +struct random_int_impl { + static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; + using BitsType = typename make_unsigned::type; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + if (y <= x) return x; + // Avoid overflow by representing `range` as an unsigned type + BitsType range = static_cast(y) - static_cast(x); + BitsType randomBits = random_int_impl::run(0, range); + // Avoid overflow in the case where `x` is negative and there is a large range so + // `randomBits` would also be negative if cast to `Scalar` first. + Scalar result = static_cast(static_cast(x) + randomBits); + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run() { return static_cast(getRandomBits(kTotalBits)); } +}; + +// todo: custom integers +template +struct random_int_impl { + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&, const Scalar&) { return run(); } + static EIGEN_DEVICE_FUNC inline Scalar run() { + eigen_assert(std::false_type::value && "RANDOM FOR CUSTOM INTEGERS NOT YET SUPPORTED"); + return Scalar(0); + } +}; + +template +struct random_default_impl : random_int_impl {}; + +template <> +struct random_impl { + static EIGEN_DEVICE_FUNC inline bool run(const bool& x, const bool& y) { + if (y <= x) return x; + return run(); + } + static EIGEN_DEVICE_FUNC inline bool run() { return getRandomBits(1) ? true : false; } +}; + +template +struct random_default_impl { + typedef typename NumTraits::Real RealScalar; + using Impl = random_impl; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) { + return Scalar(Impl::run(x.real(), y.real(), numRandomBits), Impl::run(x.imag(), y.imag(), numRandomBits)); + } + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + return Scalar(Impl::run(x.real(), y.real()), Impl::run(x.imag(), y.imag())); + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + return Scalar(Impl::run(numRandomBits), Impl::run(numRandomBits)); + } + static EIGEN_DEVICE_FUNC inline Scalar run() { return Scalar(Impl::run(), Impl::run()); } +}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_RANDOM_IMPL_H diff --git a/Eigen/src/Core/RealView.h b/Eigen/src/Core/RealView.h new file mode 100644 index 0000000..7ba42f9 --- /dev/null +++ b/Eigen/src/Core/RealView.h @@ -0,0 +1,250 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2025 Charlie Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REALVIEW_H +#define EIGEN_REALVIEW_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// Vectorized assignment to RealView requires array-oriented access to the real and imaginary components. +// From https://en.cppreference.com/w/cpp/numeric/complex.html: +// For any pointer to an element of an array of std::complex named p and any valid array index i, +// reinterpret_cast(p)[2 * i] is the real part of the complex number p[i], and +// reinterpret_cast(p)[2 * i + 1] is the imaginary part of the complex number p[i]. + +template +struct complex_array_access : std::false_type {}; +template <> +struct complex_array_access> : std::true_type {}; +template <> +struct complex_array_access> : std::true_type {}; +template <> +struct complex_array_access> : std::true_type {}; + +template +struct traits> : public traits { + template + static constexpr int double_size(T size, bool times_two) { + int size_as_int = int(size); + if (size_as_int == Dynamic) return Dynamic; + return times_two ? (2 * size_as_int) : size_as_int; + } + using Base = traits; + using ComplexScalar = typename Base::Scalar; + using Scalar = typename NumTraits::Real; + static constexpr int ActualDirectAccessBit = complex_array_access::value ? DirectAccessBit : 0; + static constexpr int ActualPacketAccessBit = packet_traits::Vectorizable ? PacketAccessBit : 0; + static constexpr int FlagMask = + ActualDirectAccessBit | ActualPacketAccessBit | HereditaryBits | LinearAccessBit | LvalueBit; + static constexpr int BaseFlags = int(evaluator::Flags) | int(Base::Flags); + static constexpr int Flags = BaseFlags & FlagMask; + static constexpr bool IsRowMajor = Flags & RowMajorBit; + static constexpr int RowsAtCompileTime = double_size(Base::RowsAtCompileTime, !IsRowMajor); + static constexpr int ColsAtCompileTime = double_size(Base::ColsAtCompileTime, IsRowMajor); + static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime); + static constexpr int MaxRowsAtCompileTime = double_size(Base::MaxRowsAtCompileTime, !IsRowMajor); + static constexpr int MaxColsAtCompileTime = double_size(Base::MaxColsAtCompileTime, IsRowMajor); + static constexpr int MaxSizeAtCompileTime = size_at_compile_time(MaxRowsAtCompileTime, MaxColsAtCompileTime); + static constexpr int OuterStrideAtCompileTime = double_size(outer_stride_at_compile_time::ret, true); + static constexpr int InnerStrideAtCompileTime = inner_stride_at_compile_time::ret; +}; + +template +struct evaluator> : private evaluator { + using BaseEvaluator = evaluator; + using XprType = RealView; + using ExpressionTraits = traits; + using ComplexScalar = typename ExpressionTraits::ComplexScalar; + using ComplexCoeffReturnType = typename BaseEvaluator::CoeffReturnType; + using Scalar = typename ExpressionTraits::Scalar; + + static constexpr bool IsRowMajor = ExpressionTraits::IsRowMajor; + static constexpr int Flags = ExpressionTraits::Flags; + static constexpr int CoeffReadCost = BaseEvaluator::CoeffReadCost; + static constexpr int Alignment = BaseEvaluator::Alignment; + + EIGEN_DEVICE_FUNC explicit evaluator(XprType realView) : BaseEvaluator(realView.m_xpr) {} + + template ::value, typename = std::enable_if_t> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const { + ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(IsRowMajor ? row : row / 2, IsRowMajor ? col / 2 : col); + Index p = (IsRowMajor ? col : row) & 1; + return p ? numext::real(cscalar) : numext::imag(cscalar); + } + + template ::value, typename = std::enable_if_t> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index row, Index col) const { + ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(IsRowMajor ? row : row / 2, IsRowMajor ? col / 2 : col); + Index p = (IsRowMajor ? col : row) & 1; + return reinterpret_cast(cscalar)[p]; + } + + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + ComplexScalar& cscalar = BaseEvaluator::coeffRef(IsRowMajor ? row : row / 2, IsRowMajor ? col / 2 : col); + Index p = (IsRowMajor ? col : row) & 1; + return reinterpret_cast(cscalar)[p]; + } + + template ::value, typename = std::enable_if_t> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const { + ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(index / 2); + Index p = index & 1; + return p ? numext::real(cscalar) : numext::imag(cscalar); + } + + template ::value, typename = std::enable_if_t> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const { + ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(index / 2); + Index p = index & 1; + return reinterpret_cast(cscalar)[p]; + } + + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + ComplexScalar& cscalar = BaseEvaluator::coeffRef(index / 2); + Index p = index & 1; + return reinterpret_cast(cscalar)[p]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + eigen_assert(((IsRowMajor ? col : row) % 2 == 0) && "the inner index must be even"); + + Index crow = IsRowMajor ? row : row / 2; + Index ccol = IsRowMajor ? col / 2 : col; + ComplexPacket cpacket = BaseEvaluator::template packet(crow, ccol); + return preinterpret(cpacket); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + eigen_assert((index % 2 == 0) && "the index must be even"); + + Index cindex = index / 2; + ComplexPacket cpacket = BaseEvaluator::template packet(cindex); + return preinterpret(cpacket); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + eigen_assert(((IsRowMajor ? col : row) % 2 == 0) && "the inner index must be even"); + eigen_assert((begin % 2 == 0) && (count % 2 == 0) && "begin and count must be even"); + + Index crow = IsRowMajor ? row : row / 2; + Index ccol = IsRowMajor ? col / 2 : col; + Index cbegin = begin / 2; + Index ccount = count / 2; + ComplexPacket cpacket = BaseEvaluator::template packetSegment(crow, ccol, cbegin, ccount); + return preinterpret(cpacket); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + eigen_assert((index % 2 == 0) && "the index must be even"); + eigen_assert((begin % 2 == 0) && (count % 2 == 0) && "begin and count must be even"); + + Index cindex = index / 2; + Index cbegin = begin / 2; + Index ccount = count / 2; + ComplexPacket cpacket = BaseEvaluator::template packetSegment(cindex, cbegin, ccount); + return preinterpret(cpacket); + } +}; + +} // namespace internal + +template +class RealView : public internal::dense_xpr_base>::type { + using ExpressionTraits = internal::traits; + EIGEN_STATIC_ASSERT(NumTraits::IsComplex, SCALAR MUST BE COMPLEX) + public: + using Scalar = typename ExpressionTraits::Scalar; + using Nested = RealView; + + EIGEN_DEVICE_FUNC explicit RealView(Xpr& xpr) : m_xpr(xpr) {} + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return Xpr::IsRowMajor ? m_xpr.rows() : 2 * m_xpr.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return Xpr::IsRowMajor ? 2 * m_xpr.cols() : m_xpr.cols(); } + EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return 2 * m_xpr.size(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_xpr.innerStride(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return 2 * m_xpr.outerStride(); } + EIGEN_DEVICE_FUNC void resize(Index rows, Index cols) { + m_xpr.resize(Xpr::IsRowMajor ? rows : rows / 2, Xpr::IsRowMajor ? cols / 2 : cols); + } + EIGEN_DEVICE_FUNC void resize(Index size) { m_xpr.resize(size / 2); } + EIGEN_DEVICE_FUNC Scalar* data() { return reinterpret_cast(m_xpr.data()); } + EIGEN_DEVICE_FUNC const Scalar* data() const { return reinterpret_cast(m_xpr.data()); } + + EIGEN_DEVICE_FUNC RealView(const RealView&) = default; + + EIGEN_DEVICE_FUNC RealView& operator=(const RealView& other); + + template + EIGEN_DEVICE_FUNC RealView& operator=(const RealView& other); + + template + EIGEN_DEVICE_FUNC RealView& operator=(const DenseBase& other); + + protected: + friend struct internal::evaluator>; + Xpr& m_xpr; +}; + +template +EIGEN_DEVICE_FUNC RealView& RealView::operator=(const RealView& other) { + internal::call_assignment(*this, other); + return *this; +} + +template +template +EIGEN_DEVICE_FUNC RealView& RealView::operator=(const RealView& other) { + internal::call_assignment(*this, other); + return *this; +} + +template +template +EIGEN_DEVICE_FUNC RealView& RealView::operator=(const DenseBase& other) { + internal::call_assignment(*this, other.derived()); + return *this; +} + +template +EIGEN_DEVICE_FUNC typename DenseBase::RealViewReturnType DenseBase::realView() { + return RealViewReturnType(derived()); +} + +template +EIGEN_DEVICE_FUNC typename DenseBase::ConstRealViewReturnType DenseBase::realView() const { + return ConstRealViewReturnType(derived()); +} + +} // namespace Eigen + +#endif // EIGEN_REALVIEW_H diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 0c5f2d9..4e9ab0e 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -414,6 +414,13 @@ class redux_evaluator : public internal::evaluator { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetByOuterInner(Index outer, Index inner) const { return Base::template packet(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegmentByOuterInner(Index outer, Index inner, Index begin, + Index count) const { + return Base::template packetSegment(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer, + begin, count); + } }; } // end namespace internal diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 129bc85..30ec277 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -73,11 +73,11 @@ class RefBase : public MapBase { typedef MapBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index innerStride() const { + EIGEN_DEVICE_FUNC constexpr Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outerStride() const { + EIGEN_DEVICE_FUNC constexpr Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() : IsVectorAtCompileTime ? this->size() : int(Flags) & RowMajorBit ? this->cols() @@ -97,11 +97,11 @@ class RefBase : public MapBase { typedef Stride StrideBase; // Resolves inner stride if default 0. - static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index resolveInnerStride(Index inner) { return inner == 0 ? 1 : inner; } + static EIGEN_DEVICE_FUNC constexpr Index resolveInnerStride(Index inner) { return inner == 0 ? 1 : inner; } // Resolves outer stride if default 0. - static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index resolveOuterStride(Index inner, Index outer, Index rows, Index cols, - bool isVectorAtCompileTime, bool isRowMajor) { + static EIGEN_DEVICE_FUNC constexpr Index resolveOuterStride(Index inner, Index outer, Index rows, Index cols, + bool isVectorAtCompileTime, bool isRowMajor) { return outer == 0 ? isVectorAtCompileTime ? inner * rows * cols : isRowMajor ? inner * cols : inner * rows : outer; } diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h index c01c627..3415045 100644 --- a/Eigen/src/Core/Replicate.h +++ b/Eigen/src/Core/Replicate.h @@ -80,16 +80,13 @@ class Replicate : public internal::dense_xpr_base EIGEN_DEVICE_FUNC inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor) - : m_matrix(matrix), - m_rowFactor(rowFactor), - m_colFactor(colFactor){ - EIGEN_STATIC_ASSERT((internal::is_same, OriginalMatrixType>::value), - THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)} - - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const { - return m_matrix.rows() * m_rowFactor.value(); + : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) { + EIGEN_STATIC_ASSERT((internal::is_same, OriginalMatrixType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); } + + EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_matrix.rows() * m_rowFactor.value(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_matrix.cols() * m_colFactor.value(); } EIGEN_DEVICE_FUNC const MatrixTypeNested_& nestedExpression() const { return m_matrix; } diff --git a/Eigen/src/Core/Reshaped.h b/Eigen/src/Core/Reshaped.h index b881dd6..22acdc0 100644 --- a/Eigen/src/Core/Reshaped.h +++ b/Eigen/src/Core/Reshaped.h @@ -173,7 +173,7 @@ class ReshapedImpl_dense #ifdef EIGEN_PARSED_BY_DOXYGEN /** \sa MapBase::data() */ - EIGEN_DEVICE_FUNC inline const Scalar* data() const; + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const; EIGEN_DEVICE_FUNC inline Index innerStride() const; EIGEN_DEVICE_FUNC inline Index outerStride() const; #endif @@ -215,10 +215,10 @@ class ReshapedImpl_dense : public MapBasecols() : this->rows()) * m_xpr.innerStride(); } diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h index 3b5e470..892c193 100644 --- a/Eigen/src/Core/ReturnByValue.h +++ b/Eigen/src/Core/ReturnByValue.h @@ -58,12 +58,8 @@ class ReturnByValue : public internal::dense_xpr_base >:: EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { static_cast(this)->evalTo(dst); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { - return static_cast(this)->rows(); - } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { - return static_cast(this)->cols(); - } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return static_cast(this)->rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return static_cast(this)->cols(); } #ifndef EIGEN_PARSED_BY_DOXYGEN #define Unusable \ diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index 66116aa..d11ba16 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -87,8 +87,8 @@ class Reverse : public internal::dense_xpr_base > EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_matrix.rows(); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_matrix.cols(); } EIGEN_DEVICE_FUNC inline Index innerStride() const { return -m_matrix.innerStride(); } @@ -127,19 +127,25 @@ EIGEN_DEVICE_FUNC inline typename DenseBase::ReverseReturnType DenseBas * \sa VectorwiseOp::reverseInPlace(), reverse() */ template EIGEN_DEVICE_FUNC inline void DenseBase::reverseInPlace() { + constexpr int HalfRowsAtCompileTime = RowsAtCompileTime == Dynamic ? Dynamic : RowsAtCompileTime / 2; + constexpr int HalfColsAtCompileTime = ColsAtCompileTime == Dynamic ? Dynamic : ColsAtCompileTime / 2; if (cols() > rows()) { Index half = cols() / 2; - leftCols(half).swap(rightCols(half).reverse()); + this->template leftCols(half).swap( + this->template rightCols(half).reverse()); if ((cols() % 2) == 1) { Index half2 = rows() / 2; - col(half).head(half2).swap(col(half).tail(half2).reverse()); + col(half).template head(half2).swap( + col(half).template tail(half2).reverse()); } } else { Index half = rows() / 2; - topRows(half).swap(bottomRows(half).reverse()); + this->template topRows(half).swap( + this->template bottomRows(half).reverse()); if ((rows() % 2) == 1) { Index half2 = cols() / 2; - row(half).head(half2).swap(row(half).tail(half2).reverse()); + row(half).template head(half2).swap( + row(half).template tail(half2).reverse()); } } } diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h index 9f46120..61a67c2 100644 --- a/Eigen/src/Core/Select.h +++ b/Eigen/src/Core/Select.h @@ -15,7 +15,7 @@ namespace Eigen { -/** \class Select +/** \typedef Select * \ingroup Core_Module * * \brief Expression of a coefficient wise version of the C++ ternary operator ?: @@ -24,73 +24,16 @@ namespace Eigen { * \tparam ThenMatrixType the type of the \em then expression * \tparam ElseMatrixType the type of the \em else expression * - * This class represents an expression of a coefficient wise version of the C++ ternary operator ?:. + * This type represents an expression of a coefficient wise version of the C++ ternary operator ?:. * It is the return type of DenseBase::select() and most of the time this is the only way it is used. * * \sa DenseBase::select(const DenseBase&, const DenseBase&) const */ - -namespace internal { -template -struct traits > : traits { - typedef typename traits::Scalar Scalar; - typedef Dense StorageKind; - typedef typename traits::XprKind XprKind; - typedef typename ConditionMatrixType::Nested ConditionMatrixNested; - typedef typename ThenMatrixType::Nested ThenMatrixNested; - typedef typename ElseMatrixType::Nested ElseMatrixNested; - enum { - RowsAtCompileTime = ConditionMatrixType::RowsAtCompileTime, - ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, - MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, - Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit - }; -}; -} // namespace internal - template -class Select : public internal::dense_xpr_base >::type, - internal::no_assignment_operator { - public: - typedef typename internal::dense_xpr_base