fast_math.hpp 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16. // Copyright (C) 2015, Itseez Inc., all rights reserved.
  17. // Third party copyrights are property of their respective owners.
  18. //
  19. // Redistribution and use in source and binary forms, with or without modification,
  20. // are permitted provided that the following conditions are met:
  21. //
  22. // * Redistribution's of source code must retain the above copyright notice,
  23. // this list of conditions and the following disclaimer.
  24. //
  25. // * Redistribution's in binary form must reproduce the above copyright notice,
  26. // this list of conditions and the following disclaimer in the documentation
  27. // and/or other materials provided with the distribution.
  28. //
  29. // * The name of the copyright holders may not be used to endorse or promote products
  30. // derived from this software without specific prior written permission.
  31. //
  32. // This software is provided by the copyright holders and contributors "as is" and
  33. // any express or implied warranties, including, but not limited to, the implied
  34. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  35. // In no event shall the Intel Corporation or contributors be liable for any direct,
  36. // indirect, incidental, special, exemplary, or consequential damages
  37. // (including, but not limited to, procurement of substitute goods or services;
  38. // loss of use, data, or profits; or business interruption) however caused
  39. // and on any theory of liability, whether in contract, strict liability,
  40. // or tort (including negligence or otherwise) arising in any way out of
  41. // the use of this software, even if advised of the possibility of such damage.
  42. //
  43. //M*/
  44. #ifndef OPENCV_CORE_FAST_MATH_HPP
  45. #define OPENCV_CORE_FAST_MATH_HPP
  46. #include "opencv2/core/cvdef.h"
  47. #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  48. && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
  49. #include <emmintrin.h>
  50. #endif
  51. //! @addtogroup core_utils
  52. //! @{
  53. /****************************************************************************************\
  54. * fast math *
  55. \****************************************************************************************/
  56. #ifdef __cplusplus
  57. # include <cmath>
  58. #else
  59. # ifdef __BORLANDC__
  60. # include <fastmath.h>
  61. # else
  62. # include <math.h>
  63. # endif
  64. #endif
  65. #ifdef HAVE_TEGRA_OPTIMIZATION
  66. # include "tegra_round.hpp"
  67. #endif
  68. #if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
  69. // 1. general scheme
  70. #define ARM_ROUND(_value, _asm_string) \
  71. int res; \
  72. float temp; \
  73. (void)temp; \
  74. __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
  75. return res
  76. // 2. version for double
  77. #ifdef __clang__
  78. #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
  79. #else
  80. #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
  81. #endif
  82. // 3. version for float
  83. #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
  84. #endif
  85. /** @brief Rounds floating-point number to the nearest integer
  86. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  87. result is not defined.
  88. */
  89. CV_INLINE int
  90. cvRound( double value )
  91. {
  92. #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  93. && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
  94. __m128d t = _mm_set_sd( value );
  95. return _mm_cvtsd_si32(t);
  96. #elif defined _MSC_VER && defined _M_IX86
  97. int t;
  98. __asm
  99. {
  100. fld value;
  101. fistp t;
  102. }
  103. return t;
  104. #elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
  105. defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
  106. TEGRA_ROUND_DBL(value);
  107. #elif defined CV_ICC || defined __GNUC__
  108. # if defined ARM_ROUND_DBL
  109. ARM_ROUND_DBL(value);
  110. # else
  111. return (int)lrint(value);
  112. # endif
  113. #else
  114. /* it's ok if round does not comply with IEEE754 standard;
  115. the tests should allow +/-1 difference when the tested functions use round */
  116. return (int)(value + (value >= 0 ? 0.5 : -0.5));
  117. #endif
  118. }
  119. /** @brief Rounds floating-point number to the nearest integer not larger than the original.
  120. The function computes an integer i such that:
  121. \f[i \le \texttt{value} < i+1\f]
  122. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  123. result is not defined.
  124. */
  125. CV_INLINE int cvFloor( double value )
  126. {
  127. int i = (int)value;
  128. return i - (i > value);
  129. }
  130. /** @brief Rounds floating-point number to the nearest integer not smaller than the original.
  131. The function computes an integer i such that:
  132. \f[i \le \texttt{value} < i+1\f]
  133. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  134. result is not defined.
  135. */
  136. CV_INLINE int cvCeil( double value )
  137. {
  138. int i = (int)value;
  139. return i + (i < value);
  140. }
  141. /** @brief Determines if the argument is Not A Number.
  142. @param value The input floating-point value
  143. The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
  144. otherwise. */
  145. CV_INLINE int cvIsNaN( double value )
  146. {
  147. Cv64suf ieee754;
  148. ieee754.f = value;
  149. return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
  150. ((unsigned)ieee754.u != 0) > 0x7ff00000;
  151. }
  152. /** @brief Determines if the argument is Infinity.
  153. @param value The input floating-point value
  154. The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
  155. and 0 otherwise. */
  156. CV_INLINE int cvIsInf( double value )
  157. {
  158. Cv64suf ieee754;
  159. ieee754.f = value;
  160. return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
  161. (unsigned)ieee754.u == 0;
  162. }
  163. #ifdef __cplusplus
  164. /** @overload */
  165. CV_INLINE int cvRound(float value)
  166. {
  167. #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  168. && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
  169. __m128 t = _mm_set_ss( value );
  170. return _mm_cvtss_si32(t);
  171. #elif defined _MSC_VER && defined _M_IX86
  172. int t;
  173. __asm
  174. {
  175. fld value;
  176. fistp t;
  177. }
  178. return t;
  179. #elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
  180. defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
  181. TEGRA_ROUND_FLT(value);
  182. #elif defined CV_ICC || defined __GNUC__
  183. # if defined ARM_ROUND_FLT
  184. ARM_ROUND_FLT(value);
  185. # else
  186. return (int)lrintf(value);
  187. # endif
  188. #else
  189. /* it's ok if round does not comply with IEEE754 standard;
  190. the tests should allow +/-1 difference when the tested functions use round */
  191. return (int)(value + (value >= 0 ? 0.5f : -0.5f));
  192. #endif
  193. }
  194. /** @overload */
  195. CV_INLINE int cvRound( int value )
  196. {
  197. return value;
  198. }
  199. /** @overload */
  200. CV_INLINE int cvFloor( float value )
  201. {
  202. int i = (int)value;
  203. return i - (i > value);
  204. }
  205. /** @overload */
  206. CV_INLINE int cvFloor( int value )
  207. {
  208. return value;
  209. }
  210. /** @overload */
  211. CV_INLINE int cvCeil( float value )
  212. {
  213. int i = (int)value;
  214. return i + (i < value);
  215. }
  216. /** @overload */
  217. CV_INLINE int cvCeil( int value )
  218. {
  219. return value;
  220. }
  221. /** @overload */
  222. CV_INLINE int cvIsNaN( float value )
  223. {
  224. Cv32suf ieee754;
  225. ieee754.f = value;
  226. return (ieee754.u & 0x7fffffff) > 0x7f800000;
  227. }
  228. /** @overload */
  229. CV_INLINE int cvIsInf( float value )
  230. {
  231. Cv32suf ieee754;
  232. ieee754.f = value;
  233. return (ieee754.u & 0x7fffffff) == 0x7f800000;
  234. }
  235. #endif // __cplusplus
  236. //! @} core_utils
  237. #endif