intrin.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16. // Copyright (C) 2015, Itseez Inc., all rights reserved.
  17. // Third party copyrights are property of their respective owners.
  18. //
  19. // Redistribution and use in source and binary forms, with or without modification,
  20. // are permitted provided that the following conditions are met:
  21. //
  22. // * Redistribution's of source code must retain the above copyright notice,
  23. // this list of conditions and the following disclaimer.
  24. //
  25. // * Redistribution's in binary form must reproduce the above copyright notice,
  26. // this list of conditions and the following disclaimer in the documentation
  27. // and/or other materials provided with the distribution.
  28. //
  29. // * The name of the copyright holders may not be used to endorse or promote products
  30. // derived from this software without specific prior written permission.
  31. //
  32. // This software is provided by the copyright holders and contributors "as is" and
  33. // any express or implied warranties, including, but not limited to, the implied
  34. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  35. // In no event shall the Intel Corporation or contributors be liable for any direct,
  36. // indirect, incidental, special, exemplary, or consequential damages
  37. // (including, but not limited to, procurement of substitute goods or services;
  38. // loss of use, data, or profits; or business interruption) however caused
  39. // and on any theory of liability, whether in contract, strict liability,
  40. // or tort (including negligence or otherwise) arising in any way out of
  41. // the use of this software, even if advised of the possibility of such damage.
  42. //
  43. //M*/
  44. #ifndef OPENCV_HAL_INTRIN_HPP
  45. #define OPENCV_HAL_INTRIN_HPP
  46. #include <cmath>
  47. #include <float.h>
  48. #include <stdlib.h>
  49. #include "opencv2/core/cvdef.h"
  50. #define OPENCV_HAL_ADD(a, b) ((a) + (b))
  51. #define OPENCV_HAL_AND(a, b) ((a) & (b))
  52. #define OPENCV_HAL_NOP(a) (a)
  53. #define OPENCV_HAL_1ST(a, b) (a)
  54. // unlike HAL API, which is in cv::hal,
  55. // we put intrinsics into cv namespace to make its
  56. // access from within opencv code more accessible
  57. namespace cv {
  58. #ifndef CV_DOXYGEN
  59. #ifdef CV_CPU_DISPATCH_MODE
  60. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
  61. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
  62. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  63. #else
  64. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
  65. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
  66. #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
  67. #endif
  68. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  69. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
  70. using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
  71. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  72. #endif
  73. //! @addtogroup core_hal_intrin
  74. //! @{
  75. //! @cond IGNORED
  76. template<typename _Tp> struct V_TypeTraits
  77. {
  78. typedef _Tp int_type;
  79. typedef _Tp uint_type;
  80. typedef _Tp abs_type;
  81. typedef _Tp sum_type;
  82. enum { delta = 0, shift = 0 };
  83. static int_type reinterpret_int(_Tp x) { return x; }
  84. static uint_type reinterpet_uint(_Tp x) { return x; }
  85. static _Tp reinterpret_from_int(int_type x) { return (_Tp)x; }
  86. };
  87. template<> struct V_TypeTraits<uchar>
  88. {
  89. typedef uchar value_type;
  90. typedef schar int_type;
  91. typedef uchar uint_type;
  92. typedef uchar abs_type;
  93. typedef int sum_type;
  94. typedef ushort w_type;
  95. typedef unsigned q_type;
  96. enum { delta = 128, shift = 8 };
  97. static int_type reinterpret_int(value_type x) { return (int_type)x; }
  98. static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
  99. static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
  100. };
  101. template<> struct V_TypeTraits<schar>
  102. {
  103. typedef schar value_type;
  104. typedef schar int_type;
  105. typedef uchar uint_type;
  106. typedef uchar abs_type;
  107. typedef int sum_type;
  108. typedef short w_type;
  109. typedef int q_type;
  110. enum { delta = 128, shift = 8 };
  111. static int_type reinterpret_int(value_type x) { return (int_type)x; }
  112. static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
  113. static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
  114. };
  115. template<> struct V_TypeTraits<ushort>
  116. {
  117. typedef ushort value_type;
  118. typedef short int_type;
  119. typedef ushort uint_type;
  120. typedef ushort abs_type;
  121. typedef int sum_type;
  122. typedef unsigned w_type;
  123. typedef uchar nu_type;
  124. enum { delta = 32768, shift = 16 };
  125. static int_type reinterpret_int(value_type x) { return (int_type)x; }
  126. static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
  127. static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
  128. };
  129. template<> struct V_TypeTraits<short>
  130. {
  131. typedef short value_type;
  132. typedef short int_type;
  133. typedef ushort uint_type;
  134. typedef ushort abs_type;
  135. typedef int sum_type;
  136. typedef int w_type;
  137. typedef uchar nu_type;
  138. typedef schar n_type;
  139. enum { delta = 128, shift = 8 };
  140. static int_type reinterpret_int(value_type x) { return (int_type)x; }
  141. static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
  142. static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
  143. };
  144. template<> struct V_TypeTraits<unsigned>
  145. {
  146. typedef unsigned value_type;
  147. typedef int int_type;
  148. typedef unsigned uint_type;
  149. typedef unsigned abs_type;
  150. typedef unsigned sum_type;
  151. typedef uint64 w_type;
  152. typedef ushort nu_type;
  153. static int_type reinterpret_int(value_type x) { return (int_type)x; }
  154. static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
  155. static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
  156. };
  157. template<> struct V_TypeTraits<int>
  158. {
  159. typedef int value_type;
  160. typedef int int_type;
  161. typedef unsigned uint_type;
  162. typedef unsigned abs_type;
  163. typedef int sum_type;
  164. typedef int64 w_type;
  165. typedef short n_type;
  166. typedef ushort nu_type;
  167. static int_type reinterpret_int(value_type x) { return (int_type)x; }
  168. static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
  169. static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
  170. };
  171. template<> struct V_TypeTraits<uint64>
  172. {
  173. typedef uint64 value_type;
  174. typedef int64 int_type;
  175. typedef uint64 uint_type;
  176. typedef uint64 abs_type;
  177. typedef uint64 sum_type;
  178. typedef unsigned nu_type;
  179. static int_type reinterpret_int(value_type x) { return (int_type)x; }
  180. static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
  181. static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
  182. };
  183. template<> struct V_TypeTraits<int64>
  184. {
  185. typedef int64 value_type;
  186. typedef int64 int_type;
  187. typedef uint64 uint_type;
  188. typedef uint64 abs_type;
  189. typedef int64 sum_type;
  190. typedef int nu_type;
  191. static int_type reinterpret_int(value_type x) { return (int_type)x; }
  192. static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
  193. static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
  194. };
  195. template<> struct V_TypeTraits<float>
  196. {
  197. typedef float value_type;
  198. typedef int int_type;
  199. typedef unsigned uint_type;
  200. typedef float abs_type;
  201. typedef float sum_type;
  202. typedef double w_type;
  203. static int_type reinterpret_int(value_type x)
  204. {
  205. Cv32suf u;
  206. u.f = x;
  207. return u.i;
  208. }
  209. static uint_type reinterpet_uint(value_type x)
  210. {
  211. Cv32suf u;
  212. u.f = x;
  213. return u.u;
  214. }
  215. static value_type reinterpret_from_int(int_type x)
  216. {
  217. Cv32suf u;
  218. u.i = x;
  219. return u.f;
  220. }
  221. };
  222. template<> struct V_TypeTraits<double>
  223. {
  224. typedef double value_type;
  225. typedef int64 int_type;
  226. typedef uint64 uint_type;
  227. typedef double abs_type;
  228. typedef double sum_type;
  229. static int_type reinterpret_int(value_type x)
  230. {
  231. Cv64suf u;
  232. u.f = x;
  233. return u.i;
  234. }
  235. static uint_type reinterpet_uint(value_type x)
  236. {
  237. Cv64suf u;
  238. u.f = x;
  239. return u.u;
  240. }
  241. static value_type reinterpret_from_int(int_type x)
  242. {
  243. Cv64suf u;
  244. u.i = x;
  245. return u.f;
  246. }
  247. };
  248. template <typename T> struct V_SIMD128Traits
  249. {
  250. enum { nlanes = 16 / sizeof(T) };
  251. };
  252. //! @endcond
  253. //! @}
  254. #ifndef CV_DOXYGEN
  255. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
  256. #endif
  257. }
  258. #ifdef CV_DOXYGEN
  259. # undef CV_SSE2
  260. # undef CV_NEON
  261. # undef CV_VSX
  262. #endif
  263. #if CV_SSE2
  264. #include "opencv2/core/hal/intrin_sse.hpp"
  265. #elif CV_NEON
  266. #include "opencv2/core/hal/intrin_neon.hpp"
  267. #elif CV_VSX
  268. #include "opencv2/core/hal/intrin_vsx.hpp"
  269. #else
  270. #include "opencv2/core/hal/intrin_cpp.hpp"
  271. #endif
  272. //! @addtogroup core_hal_intrin
  273. //! @{
  274. #ifndef CV_SIMD128
  275. //! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
  276. #define CV_SIMD128 0
  277. #endif
  278. #ifndef CV_SIMD128_64F
  279. //! Set to 1 if current intrinsics implementation supports 64-bit float vectors
  280. #define CV_SIMD128_64F 0
  281. #endif
  282. //! @}
  283. //==================================================================================================
  284. //! @cond IGNORED
  285. namespace cv {
  286. #ifndef CV_DOXYGEN
  287. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
  288. #endif
  289. template <typename R> struct V_RegTrait128;
  290. template <> struct V_RegTrait128<uchar> {
  291. typedef v_uint8x16 reg;
  292. typedef v_uint16x8 w_reg;
  293. typedef v_uint32x4 q_reg;
  294. typedef v_uint8x16 u_reg;
  295. static v_uint8x16 zero() { return v_setzero_u8(); }
  296. static v_uint8x16 all(uchar val) { return v_setall_u8(val); }
  297. };
  298. template <> struct V_RegTrait128<schar> {
  299. typedef v_int8x16 reg;
  300. typedef v_int16x8 w_reg;
  301. typedef v_int32x4 q_reg;
  302. typedef v_uint8x16 u_reg;
  303. static v_int8x16 zero() { return v_setzero_s8(); }
  304. static v_int8x16 all(schar val) { return v_setall_s8(val); }
  305. };
  306. template <> struct V_RegTrait128<ushort> {
  307. typedef v_uint16x8 reg;
  308. typedef v_uint32x4 w_reg;
  309. typedef v_int16x8 int_reg;
  310. typedef v_uint16x8 u_reg;
  311. static v_uint16x8 zero() { return v_setzero_u16(); }
  312. static v_uint16x8 all(ushort val) { return v_setall_u16(val); }
  313. };
  314. template <> struct V_RegTrait128<short> {
  315. typedef v_int16x8 reg;
  316. typedef v_int32x4 w_reg;
  317. typedef v_uint16x8 u_reg;
  318. static v_int16x8 zero() { return v_setzero_s16(); }
  319. static v_int16x8 all(short val) { return v_setall_s16(val); }
  320. };
  321. template <> struct V_RegTrait128<unsigned> {
  322. typedef v_uint32x4 reg;
  323. typedef v_uint64x2 w_reg;
  324. typedef v_int32x4 int_reg;
  325. typedef v_uint32x4 u_reg;
  326. static v_uint32x4 zero() { return v_setzero_u32(); }
  327. static v_uint32x4 all(unsigned val) { return v_setall_u32(val); }
  328. };
  329. template <> struct V_RegTrait128<int> {
  330. typedef v_int32x4 reg;
  331. typedef v_int64x2 w_reg;
  332. typedef v_uint32x4 u_reg;
  333. static v_int32x4 zero() { return v_setzero_s32(); }
  334. static v_int32x4 all(int val) { return v_setall_s32(val); }
  335. };
  336. template <> struct V_RegTrait128<uint64> {
  337. typedef v_uint64x2 reg;
  338. static v_uint64x2 zero() { return v_setzero_u64(); }
  339. static v_uint64x2 all(uint64 val) { return v_setall_u64(val); }
  340. };
  341. template <> struct V_RegTrait128<int64> {
  342. typedef v_int64x2 reg;
  343. static v_int64x2 zero() { return v_setzero_s64(); }
  344. static v_int64x2 all(int64 val) { return v_setall_s64(val); }
  345. };
  346. template <> struct V_RegTrait128<float> {
  347. typedef v_float32x4 reg;
  348. typedef v_int32x4 int_reg;
  349. typedef v_float32x4 u_reg;
  350. static v_float32x4 zero() { return v_setzero_f32(); }
  351. static v_float32x4 all(float val) { return v_setall_f32(val); }
  352. };
  353. #if CV_SIMD128_64F
  354. template <> struct V_RegTrait128<double> {
  355. typedef v_float64x2 reg;
  356. typedef v_int32x4 int_reg;
  357. typedef v_float64x2 u_reg;
  358. static v_float64x2 zero() { return v_setzero_f64(); }
  359. static v_float64x2 all(double val) { return v_setall_f64(val); }
  360. };
  361. #endif
  362. inline unsigned int trailingZeros32(unsigned int value) {
  363. #if defined(_MSC_VER)
  364. #if (_MSC_VER < 1700) || defined(_M_ARM)
  365. unsigned long index = 0;
  366. _BitScanForward(&index, value);
  367. return (unsigned int)index;
  368. #else
  369. return _tzcnt_u32(value);
  370. #endif
  371. #elif defined(__GNUC__) || defined(__GNUG__)
  372. return __builtin_ctz(value);
  373. #elif defined(__ICC) || defined(__INTEL_COMPILER)
  374. return _bit_scan_forward(value);
  375. #elif defined(__clang__)
  376. return llvm.cttz.i32(value, true);
  377. #else
  378. static const int MultiplyDeBruijnBitPosition[32] = {
  379. 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
  380. 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
  381. return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
  382. #endif
  383. }
  384. #ifndef CV_DOXYGEN
  385. CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
  386. #endif
  387. } // cv::
  388. //! @endcond
  389. #endif