Jeff 6 жил өмнө
parent
commit
514aca9c87
100 өөрчлөгдсөн 52893 нэмэгдсэн , 0 устгасан
  1. 31 0
      ThresholdTools/ThresholdTools.sln
  2. 36 0
      ThresholdTools/ThresholdTools/CImgApply.cpp
  3. 47 0
      ThresholdTools/ThresholdTools/CImgApply.h
  4. BIN
      ThresholdTools/ThresholdTools/ThresholdTools.cpp
  5. BIN
      ThresholdTools/ThresholdTools/ThresholdTools.h
  6. BIN
      ThresholdTools/ThresholdTools/ThresholdTools.rc
  7. 234 0
      ThresholdTools/ThresholdTools/ThresholdTools.vcxproj
  8. 78 0
      ThresholdTools/ThresholdTools/ThresholdTools.vcxproj.filters
  9. BIN
      ThresholdTools/ThresholdTools/ThresholdToolsDlg.cpp
  10. BIN
      ThresholdTools/ThresholdTools/ThresholdToolsDlg.h
  11. BIN
      ThresholdTools/ThresholdTools/res/ThresholdTools.ico
  12. BIN
      ThresholdTools/ThresholdTools/res/ThresholdTools.rc2
  13. BIN
      ThresholdTools/ThresholdTools/resource.h
  14. BIN
      ThresholdTools/ThresholdTools/stdafx.cpp
  15. BIN
      ThresholdTools/ThresholdTools/stdafx.h
  16. BIN
      ThresholdTools/ThresholdTools/targetver.h
  17. 73 0
      ThresholdTools/include/opencv/cv.h
  18. 60 0
      ThresholdTools/include/opencv/cv.hpp
  19. 57 0
      ThresholdTools/include/opencv/cvaux.h
  20. 52 0
      ThresholdTools/include/opencv/cvaux.hpp
  21. 46 0
      ThresholdTools/include/opencv/cvwimage.h
  22. 52 0
      ThresholdTools/include/opencv/cxcore.h
  23. 53 0
      ThresholdTools/include/opencv/cxcore.hpp
  24. 48 0
      ThresholdTools/include/opencv/cxeigen.hpp
  25. 8 0
      ThresholdTools/include/opencv/cxmisc.h
  26. 48 0
      ThresholdTools/include/opencv/highgui.h
  27. 47 0
      ThresholdTools/include/opencv/ml.h
  28. 2458 0
      ThresholdTools/include/opencv2/calib3d.hpp
  29. 48 0
      ThresholdTools/include/opencv2/calib3d/calib3d.hpp
  30. 427 0
      ThresholdTools/include/opencv2/calib3d/calib3d_c.h
  31. 3273 0
      ThresholdTools/include/opencv2/core.hpp
  32. 678 0
      ThresholdTools/include/opencv2/core/affine.hpp
  33. 772 0
      ThresholdTools/include/opencv2/core/base.hpp
  34. 40 0
      ThresholdTools/include/opencv2/core/bufferpool.hpp
  35. 135 0
      ThresholdTools/include/opencv2/core/check.hpp
  36. 48 0
      ThresholdTools/include/opencv2/core/core.hpp
  37. 3175 0
      ThresholdTools/include/opencv2/core/core_c.h
  38. 1049 0
      ThresholdTools/include/opencv2/core/cuda.hpp
  39. 631 0
      ThresholdTools/include/opencv2/core/cuda.inl.hpp
  40. 86 0
      ThresholdTools/include/opencv2/core/cuda_stream_accessor.hpp
  41. 142 0
      ThresholdTools/include/opencv2/core/cuda_types.hpp
  42. 239 0
      ThresholdTools/include/opencv2/core/cv_cpu_dispatch.h
  43. 319 0
      ThresholdTools/include/opencv2/core/cv_cpu_helper.h
  44. 531 0
      ThresholdTools/include/opencv2/core/cvdef.h
  45. 1040 0
      ThresholdTools/include/opencv2/core/cvstd.hpp
  46. 280 0
      ThresholdTools/include/opencv2/core/cvstd.inl.hpp
  47. 184 0
      ThresholdTools/include/opencv2/core/directx.hpp
  48. 280 0
      ThresholdTools/include/opencv2/core/eigen.hpp
  49. 271 0
      ThresholdTools/include/opencv2/core/fast_math.hpp
  50. 250 0
      ThresholdTools/include/opencv2/core/hal/hal.hpp
  51. 182 0
      ThresholdTools/include/opencv2/core/hal/interface.h
  52. 472 0
      ThresholdTools/include/opencv2/core/hal/intrin.hpp
  53. 1988 0
      ThresholdTools/include/opencv2/core/hal/intrin_cpp.hpp
  54. 1345 0
      ThresholdTools/include/opencv2/core/hal/intrin_neon.hpp
  55. 2234 0
      ThresholdTools/include/opencv2/core/hal/intrin_sse.hpp
  56. 917 0
      ThresholdTools/include/opencv2/core/hal/intrin_vsx.hpp
  57. 195 0
      ThresholdTools/include/opencv2/core/ippasync.hpp
  58. 3669 0
      ThresholdTools/include/opencv2/core/mat.hpp
  59. 3945 0
      ThresholdTools/include/opencv2/core/mat.inl.hpp
  60. 1476 0
      ThresholdTools/include/opencv2/core/matx.hpp
  61. 128 0
      ThresholdTools/include/opencv2/core/neon_utils.hpp
  62. 842 0
      ThresholdTools/include/opencv2/core/ocl.hpp
  63. 69 0
      ThresholdTools/include/opencv2/core/ocl_genbase.hpp
  64. 729 0
      ThresholdTools/include/opencv2/core/opengl.hpp
  65. 538 0
      ThresholdTools/include/opencv2/core/operations.hpp
  66. 302 0
      ThresholdTools/include/opencv2/core/optim.hpp
  67. 28 0
      ThresholdTools/include/opencv2/core/ovx.hpp
  68. 1361 0
      ThresholdTools/include/opencv2/core/persistence.hpp
  69. 379 0
      ThresholdTools/include/opencv2/core/ptr.inl.hpp
  70. 165 0
      ThresholdTools/include/opencv2/core/saturate.hpp
  71. 514 0
      ThresholdTools/include/opencv2/core/softfloat.hpp
  72. 652 0
      ThresholdTools/include/opencv2/core/sse_utils.hpp
  73. 397 0
      ThresholdTools/include/opencv2/core/traits.hpp
  74. 2372 0
      ThresholdTools/include/opencv2/core/types.hpp
  75. 1837 0
      ThresholdTools/include/opencv2/core/types_c.h
  76. 1258 0
      ThresholdTools/include/opencv2/core/utility.hpp
  77. 71 0
      ThresholdTools/include/opencv2/core/utils/filesystem.hpp
  78. 22 0
      ThresholdTools/include/opencv2/core/utils/logger.defines.hpp
  79. 87 0
      ThresholdTools/include/opencv2/core/utils/logger.hpp
  80. 250 0
      ThresholdTools/include/opencv2/core/utils/trace.hpp
  81. 77 0
      ThresholdTools/include/opencv2/core/va_intel.hpp
  82. 26 0
      ThresholdTools/include/opencv2/core/version.hpp
  83. 1009 0
      ThresholdTools/include/opencv2/core/vsx_utils.hpp
  84. 603 0
      ThresholdTools/include/opencv2/core/wimage.hpp
  85. 248 0
      ThresholdTools/include/opencv2/cvconfig.h
  86. 64 0
      ThresholdTools/include/opencv2/dnn.hpp
  87. 623 0
      ThresholdTools/include/opencv2/dnn/all_layers.hpp
  88. 156 0
      ThresholdTools/include/opencv2/dnn/dict.hpp
  89. 860 0
      ThresholdTools/include/opencv2/dnn/dnn.hpp
  90. 390 0
      ThresholdTools/include/opencv2/dnn/dnn.inl.hpp
  91. 78 0
      ThresholdTools/include/opencv2/dnn/layer.details.hpp
  92. 85 0
      ThresholdTools/include/opencv2/dnn/layer.hpp
  93. 211 0
      ThresholdTools/include/opencv2/dnn/shape_utils.hpp
  94. 1424 0
      ThresholdTools/include/opencv2/features2d.hpp
  95. 48 0
      ThresholdTools/include/opencv2/features2d/features2d.hpp
  96. 33 0
      ThresholdTools/include/opencv2/features2d/hal/interface.h
  97. 531 0
      ThresholdTools/include/opencv2/flann.hpp
  98. 155 0
      ThresholdTools/include/opencv2/flann/all_indices.h
  99. 192 0
      ThresholdTools/include/opencv2/flann/allocator.h
  100. 330 0
      ThresholdTools/include/opencv2/flann/any.h

+ 31 - 0
ThresholdTools/ThresholdTools.sln

@@ -0,0 +1,31 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.28010.2026
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ThresholdTools", "ThresholdTools\ThresholdTools.vcxproj", "{12274645-7453-491B-8F8F-898729F14928}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{12274645-7453-491B-8F8F-898729F14928}.Debug|x64.ActiveCfg = Debug|x64
+		{12274645-7453-491B-8F8F-898729F14928}.Debug|x64.Build.0 = Debug|x64
+		{12274645-7453-491B-8F8F-898729F14928}.Debug|x86.ActiveCfg = Debug|Win32
+		{12274645-7453-491B-8F8F-898729F14928}.Debug|x86.Build.0 = Debug|Win32
+		{12274645-7453-491B-8F8F-898729F14928}.Release|x64.ActiveCfg = Release|x64
+		{12274645-7453-491B-8F8F-898729F14928}.Release|x64.Build.0 = Release|x64
+		{12274645-7453-491B-8F8F-898729F14928}.Release|x86.ActiveCfg = Release|Win32
+		{12274645-7453-491B-8F8F-898729F14928}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {F09512A4-37BF-4241-A4CC-96806525E5DC}
+	EndGlobalSection
+EndGlobal

+ 36 - 0
ThresholdTools/ThresholdTools/CImgApply.cpp

@@ -0,0 +1,36 @@
+#include "stdafx.h"
+#include "CImgApply.h"
+
+
+CImgApply::CImgApply():m_pLastEditImg(NULL)
+	,m_pCurrentImg(NULL)
+	,m_pOriginalImg(NULL)
+{
+}
+
+
+CImgApply::~CImgApply()
+{
+}
+
+void CImgApply::LoadImage(std::string strImagePath)
+{
+	if ( PathFileExists(strImagePath.c_str()) )
+	{
+		if ( m_pCurrentImg->data != NULL)
+		{
+		}
+	}
+	else
+	{
+		OutputDebugString(_T("Îļþ²»´æÔÚ\n"));
+	}
+}
+
+void CImgApply::EditImage(Mat * pSrcImg)
+{
+}
+
+void CImgApply::SaveImage(std::string strSavePath, Mat * pSaveImg)
+{
+}

+ 47 - 0
ThresholdTools/ThresholdTools/CImgApply.h

@@ -0,0 +1,47 @@
+#ifndef __IMG_APPLY__
+#define __IMG_APPLY__
+
+#pragma once
+class CImgApply
+{
+public:
+	CImgApply();
+	~CImgApply();
+
+public:
+	// 当前显示的图片;
+	Mat *m_pCurrentImg;
+	// 上一次编辑的图片;
+	Mat *m_pLastEditImg;
+	// 原始图片;
+	Mat *m_pOriginalImg;
+	// 历史编辑记录;
+	vector<Mat> m_historyEditImg;
+
+
+public:
+	// 加载图片;
+	void LoadImage(std::string strImagePath);
+	// 编辑图片;
+	void EditImage(Mat *pSrcImg);
+	// 保存图片;
+	void SaveImage(std::string strSavePath, Mat *pSaveImg);
+
+public:
+	// 将图片转成灰阶图;
+	BOOL EditImg2Gray(Mat *pSrcImg);
+	// 全局二值化;
+	BOOL EditImg2GlobalThreshold(Mat *pSrcImg);
+	// 自适应二值化;
+	BOOL EditImg2AdaptiveThreshold(Mat *pSrcImg);
+	// OTSU二值化;
+	BOOL EditImg2OTSUThreshold(Mat *pSrcImg);
+	// 对图片降噪;
+	BOOL EditImgDenoise(Mat *pSrcImg);
+	// 对图片膨胀;
+	BOOL EditImgDilate(Mat *pSrcImg);
+	// 对图片腐蚀;
+	BOOL EditImgErode(Mat *pSrcImg);
+};
+
+#endif

BIN
ThresholdTools/ThresholdTools/ThresholdTools.cpp


BIN
ThresholdTools/ThresholdTools/ThresholdTools.h


BIN
ThresholdTools/ThresholdTools/ThresholdTools.rc


+ 234 - 0
ThresholdTools/ThresholdTools/ThresholdTools.vcxproj

@@ -0,0 +1,234 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{12274645-7453-491B-8F8F-898729F14928}</ProjectGuid>
+    <Keyword>MFCProj</Keyword>
+    <RootNamespace>ThresholdTools</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+    <UseOfMfc>Dynamic</UseOfMfc>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+    <UseOfMfc>Static</UseOfMfc>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseOfMfc>Dynamic</UseOfMfc>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseOfMfc>Dynamic</UseOfMfc>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>..\..\..\bin\$(SolutionName)\</OutDir>
+    <IntDir>$(OutDir)$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>..\..\..\bin\$(SolutionName)\</OutDir>
+    <IntDir>$(OutDir)$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>..\..\bin\$(SolutionName)</OutDir>
+    <IntDir>$(OutDir)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\include;..\..\..\repos_main\common\STL-findfile</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <AdditionalLibraryDirectories>..\lib\Debug\</AdditionalLibraryDirectories>
+      <AdditionalDependencies>opencv_core342d.lib;opencv_imgproc342d.lib;opencv_highgui342d.lib;opencv_ml342d.lib;opencv_video342d.lib;opencv_features2d342d.lib;opencv_calib3d342d.lib;opencv_objdetect342d.lib;opencv_flann342d.lib;opencv_dnn342d.lib;opencv_imgcodecs342d.lib;opencv_photo342d.lib;opencv_shape342d.lib;opencv_stitching342d.lib;opencv_superres342d.lib;opencv_videoio342d.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <Midl>
+      <MkTypLibCompatible>false</MkTypLibCompatible>
+      <ValidateAllParameters>true</ValidateAllParameters>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </Midl>
+    <ResourceCompile>
+      <Culture>0x0804</Culture>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ResourceCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\repos_main\common\STL-findfile</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+    <Midl>
+      <MkTypLibCompatible>false</MkTypLibCompatible>
+      <ValidateAllParameters>true</ValidateAllParameters>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </Midl>
+    <ResourceCompile>
+      <Culture>0x0804</Culture>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ResourceCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\include;..\..\..\repos_main\common\STL-findfile;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>..\lib\Release\</AdditionalLibraryDirectories>
+      <AdditionalDependencies>opencv_core342.lib;opencv_imgproc342.lib;opencv_highgui342.lib;opencv_ml342.lib;opencv_video342.lib;opencv_features2d342.lib;opencv_calib3d342.lib;opencv_objdetect342.lib;opencv_flann342.lib;opencv_dnn342.lib;opencv_imgcodecs342.lib;opencv_photo342.lib;opencv_shape342.lib;opencv_stitching342.lib;opencv_superres342.lib;opencv_videoio342.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <Midl>
+      <MkTypLibCompatible>false</MkTypLibCompatible>
+      <ValidateAllParameters>true</ValidateAllParameters>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </Midl>
+    <ResourceCompile>
+      <Culture>0x0804</Culture>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ResourceCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\repos_main\common\STL-findfile</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Midl>
+      <MkTypLibCompatible>false</MkTypLibCompatible>
+      <ValidateAllParameters>true</ValidateAllParameters>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </Midl>
+    <ResourceCompile>
+      <Culture>0x0804</Culture>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ResourceCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\repos_main\common\STL-findfile\filehelp.h" />
+    <ClInclude Include="..\..\..\repos_main\common\STL-findfile\findfile.h" />
+    <ClInclude Include="CImgApply.h" />
+    <ClInclude Include="Resource.h" />
+    <ClInclude Include="stdafx.h" />
+    <ClInclude Include="targetver.h" />
+    <ClInclude Include="ThresholdTools.h" />
+    <ClInclude Include="ThresholdToolsDlg.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\repos_main\common\STL-findfile\filehelp.cpp" />
+    <ClCompile Include="..\..\..\repos_main\common\STL-findfile\findfile.cpp" />
+    <ClCompile Include="CImgApply.cpp" />
+    <ClCompile Include="stdafx.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="ThresholdTools.cpp" />
+    <ClCompile Include="ThresholdToolsDlg.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="ThresholdTools.rc" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="res\ThresholdTools.rc2" />
+  </ItemGroup>
+  <ItemGroup>
+    <Image Include="res\ThresholdTools.ico" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>

+ 78 - 0
ThresholdTools/ThresholdTools/ThresholdTools.vcxproj.filters

@@ -0,0 +1,78 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="源文件">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="头文件">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
+    </Filter>
+    <Filter Include="资源文件">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="ThresholdTools.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="ThresholdToolsDlg.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="stdafx.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="targetver.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="Resource.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="CImgApply.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\repos_main\common\STL-findfile\filehelp.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\repos_main\common\STL-findfile\findfile.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="ThresholdTools.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="ThresholdToolsDlg.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="stdafx.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="CImgApply.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\repos_main\common\STL-findfile\filehelp.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\repos_main\common\STL-findfile\findfile.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="ThresholdTools.rc">
+      <Filter>资源文件</Filter>
+    </ResourceCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="res\ThresholdTools.rc2">
+      <Filter>资源文件</Filter>
+    </None>
+  </ItemGroup>
+  <ItemGroup>
+    <Image Include="res\ThresholdTools.ico">
+      <Filter>资源文件</Filter>
+    </Image>
+  </ItemGroup>
+</Project>

BIN
ThresholdTools/ThresholdTools/ThresholdToolsDlg.cpp


BIN
ThresholdTools/ThresholdTools/ThresholdToolsDlg.h


BIN
ThresholdTools/ThresholdTools/res/ThresholdTools.ico


BIN
ThresholdTools/ThresholdTools/res/ThresholdTools.rc2


BIN
ThresholdTools/ThresholdTools/resource.h


BIN
ThresholdTools/ThresholdTools/stdafx.cpp


BIN
ThresholdTools/ThresholdTools/stdafx.h


BIN
ThresholdTools/ThresholdTools/targetver.h


+ 73 - 0
ThresholdTools/include/opencv/cv.h

@@ -0,0 +1,73 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OLD_CV_H
+#define OPENCV_OLD_CV_H
+
+#if defined(_MSC_VER)
+    #define CV_DO_PRAGMA(x) __pragma(x)
+    #define __CVSTR2__(x) #x
+    #define __CVSTR1__(x) __CVSTR2__(x)
+    #define __CVMSVCLOC__ __FILE__ "("__CVSTR1__(__LINE__)") : "
+    #define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (__CVMSVCLOC__ _msg))
+#elif defined(__GNUC__)
+    #define CV_DO_PRAGMA(x) _Pragma (#x)
+    #define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (_msg))
+#else
+    #define CV_DO_PRAGMA(x)
+    #define CV_MSG_PRAGMA(_msg)
+#endif
+#define CV_WARNING(x) CV_MSG_PRAGMA("Warning: " #x)
+
+//CV_WARNING("This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module")
+
+#include "opencv2/core/core_c.h"
+#include "opencv2/imgproc/imgproc_c.h"
+#include "opencv2/photo/photo_c.h"
+#include "opencv2/video/tracking_c.h"
+#include "opencv2/objdetect/objdetect_c.h"
+
+#if !defined(CV_IMPL)
+#define CV_IMPL extern "C"
+#endif //CV_IMPL
+
+#endif // __OPENCV_OLD_CV_H_

+ 60 - 0
ThresholdTools/include/opencv/cv.hpp

@@ -0,0 +1,60 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OLD_CV_HPP
+#define OPENCV_OLD_CV_HPP
+
+//#if defined(__GNUC__)
+//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
+//#endif
+
+#include "cv.h"
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/photo.hpp"
+#include "opencv2/video.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/features2d.hpp"
+#include "opencv2/calib3d.hpp"
+#include "opencv2/objdetect.hpp"
+
+#endif

+ 57 - 0
ThresholdTools/include/opencv/cvaux.h

@@ -0,0 +1,57 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OLD_AUX_H
+#define OPENCV_OLD_AUX_H
+
+//#if defined(__GNUC__)
+//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
+//#endif
+
+#include "opencv2/core/core_c.h"
+#include "opencv2/imgproc/imgproc_c.h"
+#include "opencv2/photo/photo_c.h"
+#include "opencv2/video/tracking_c.h"
+#include "opencv2/objdetect/objdetect_c.h"
+
+#endif
+
+/* End of file. */

+ 52 - 0
ThresholdTools/include/opencv/cvaux.hpp

@@ -0,0 +1,52 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OLD_AUX_HPP
+#define OPENCV_OLD_AUX_HPP
+
+//#if defined(__GNUC__)
+//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
+//#endif
+
+#include "cvaux.h"
+#include "opencv2/core/utility.hpp"
+
+#endif

+ 46 - 0
ThresholdTools/include/opencv/cvwimage.h

@@ -0,0 +1,46 @@
+///////////////////////////////////////////////////////////////////////////////
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to
+//  this license.  If you do not agree to this license, do not download,
+//  install, copy or use the software.
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2008, Google, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//  * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//  * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//  * The name of Intel Corporation or contributors may not be used to endorse
+//     or promote products derived from this software without specific
+//     prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is"
+// and any express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular purpose
+// are disclaimed. In no event shall the Intel Corporation or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+
+
+#ifndef OPENCV_OLD_WIMAGE_HPP
+#define OPENCV_OLD_WIMAGE_HPP
+
+#include "opencv2/core/wimage.hpp"
+
+#endif

+ 52 - 0
ThresholdTools/include/opencv/cxcore.h

@@ -0,0 +1,52 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OLD_CXCORE_H
+#define OPENCV_OLD_CXCORE_H
+
+//#if defined(__GNUC__)
+//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
+//#endif
+
+#include "opencv2/core/core_c.h"
+
+#endif

+ 53 - 0
ThresholdTools/include/opencv/cxcore.hpp

@@ -0,0 +1,53 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OLD_CXCORE_HPP
+#define OPENCV_OLD_CXCORE_HPP
+
+//#if defined(__GNUC__)
+//#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
+//#endif
+
+#include "cxcore.h"
+#include "opencv2/core.hpp"
+
+#endif

+ 48 - 0
ThresholdTools/include/opencv/cxeigen.hpp

@@ -0,0 +1,48 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OLD_EIGEN_HPP
+#define OPENCV_OLD_EIGEN_HPP
+
+#include "opencv2/core/eigen.hpp"
+
+#endif

+ 8 - 0
ThresholdTools/include/opencv/cxmisc.h

@@ -0,0 +1,8 @@
+#ifndef OPENCV_OLD_CXMISC_H
+#define OPENCV_OLD_CXMISC_H
+
+#ifdef __cplusplus
+#  include "opencv2/core/utility.hpp"
+#endif
+
+#endif

+ 48 - 0
ThresholdTools/include/opencv/highgui.h

@@ -0,0 +1,48 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OLD_HIGHGUI_H
+#define OPENCV_OLD_HIGHGUI_H
+
+#include "opencv2/core/core_c.h"
+#include "opencv2/highgui/highgui_c.h"
+
+#endif

+ 47 - 0
ThresholdTools/include/opencv/ml.h

@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OLD_ML_H
+#define OPENCV_OLD_ML_H
+
+#include "opencv2/core/core_c.h"
+#include "opencv2/ml.hpp"
+
+#endif

+ 2458 - 0
ThresholdTools/include/opencv2/calib3d.hpp

@@ -0,0 +1,2458 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CALIB3D_HPP
+#define OPENCV_CALIB3D_HPP
+
+#include "opencv2/core.hpp"
+#include "opencv2/features2d.hpp"
+#include "opencv2/core/affine.hpp"
+
+/**
+  @defgroup calib3d Camera Calibration and 3D Reconstruction
+
+The functions in this section use a so-called pinhole camera model. In this model, a scene view is
+formed by projecting 3D points into the image plane using a perspective transformation.
+
+\f[s  \; m' = A [R|t] M'\f]
+
+or
+
+\f[s  \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}
+\begin{bmatrix}
+r_{11} & r_{12} & r_{13} & t_1  \\
+r_{21} & r_{22} & r_{23} & t_2  \\
+r_{31} & r_{32} & r_{33} & t_3
+\end{bmatrix}
+\begin{bmatrix}
+X \\
+Y \\
+Z \\
+1
+\end{bmatrix}\f]
+
+where:
+
+-   \f$(X, Y, Z)\f$ are the coordinates of a 3D point in the world coordinate space
+-   \f$(u, v)\f$ are the coordinates of the projection point in pixels
+-   \f$A\f$ is a camera matrix, or a matrix of intrinsic parameters
+-   \f$(cx, cy)\f$ is a principal point that is usually at the image center
+-   \f$fx, fy\f$ are the focal lengths expressed in pixel units.
+
+Thus, if an image from the camera is scaled by a factor, all of these parameters should be scaled
+(multiplied/divided, respectively) by the same factor. The matrix of intrinsic parameters does not
+depend on the scene viewed. So, once estimated, it can be re-used as long as the focal length is
+fixed (in case of zoom lens). The joint rotation-translation matrix \f$[R|t]\f$ is called a matrix of
+extrinsic parameters. It is used to describe the camera motion around a static scene, or vice versa,
+rigid motion of an object in front of a still camera. That is, \f$[R|t]\f$ translates coordinates of a
+point \f$(X, Y, Z)\f$ to a coordinate system, fixed with respect to the camera. The transformation above
+is equivalent to the following (when \f$z \ne 0\f$ ):
+
+\f[\begin{array}{l}
+\vecthree{x}{y}{z} = R  \vecthree{X}{Y}{Z} + t \\
+x' = x/z \\
+y' = y/z \\
+u = f_x*x' + c_x \\
+v = f_y*y' + c_y
+\end{array}\f]
+
+The following figure illustrates the pinhole camera model.
+
+![Pinhole camera model](pics/pinhole_camera_model.png)
+
+Real lenses usually have some distortion, mostly radial distortion and slight tangential distortion.
+So, the above model is extended as:
+
+\f[\begin{array}{l}
+\vecthree{x}{y}{z} = R  \vecthree{X}{Y}{Z} + t \\
+x' = x/z \\
+y' = y/z \\
+x'' = x'  \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + 2 p_1 x' y' + p_2(r^2 + 2 x'^2) + s_1 r^2 + s_2 r^4 \\
+y'' = y'  \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_3 r^2 + s_4 r^4 \\
+\text{where} \quad r^2 = x'^2 + y'^2  \\
+u = f_x*x'' + c_x \\
+v = f_y*y'' + c_y
+\end{array}\f]
+
+\f$k_1\f$, \f$k_2\f$, \f$k_3\f$, \f$k_4\f$, \f$k_5\f$, and \f$k_6\f$ are radial distortion coefficients. \f$p_1\f$ and \f$p_2\f$ are
+tangential distortion coefficients. \f$s_1\f$, \f$s_2\f$, \f$s_3\f$, and \f$s_4\f$, are the thin prism distortion
+coefficients. Higher-order coefficients are not considered in OpenCV.
+
+The next figure shows two common types of radial distortion: barrel distortion (typically \f$ k_1 > 0 \f$ and pincushion distortion (typically \f$ k_1 < 0 \f$).
+
+![](pics/distortion_examples.png)
+
+In some cases the image sensor may be tilted in order to focus an oblique plane in front of the
+camera (Scheimpfug condition). This can be useful for particle image velocimetry (PIV) or
+triangulation with a laser fan. The tilt causes a perspective distortion of \f$x''\f$ and
+\f$y''\f$. This distortion can be modelled in the following way, see e.g. @cite Louhichi07.
+
+\f[\begin{array}{l}
+s\vecthree{x'''}{y'''}{1} =
+\vecthreethree{R_{33}(\tau_x, \tau_y)}{0}{-R_{13}(\tau_x, \tau_y)}
+{0}{R_{33}(\tau_x, \tau_y)}{-R_{23}(\tau_x, \tau_y)}
+{0}{0}{1} R(\tau_x, \tau_y) \vecthree{x''}{y''}{1}\\
+u = f_x*x''' + c_x \\
+v = f_y*y''' + c_y
+\end{array}\f]
+
+where the matrix \f$R(\tau_x, \tau_y)\f$ is defined by two rotations with angular parameter \f$\tau_x\f$
+and \f$\tau_y\f$, respectively,
+
+\f[
+R(\tau_x, \tau_y) =
+\vecthreethree{\cos(\tau_y)}{0}{-\sin(\tau_y)}{0}{1}{0}{\sin(\tau_y)}{0}{\cos(\tau_y)}
+\vecthreethree{1}{0}{0}{0}{\cos(\tau_x)}{\sin(\tau_x)}{0}{-\sin(\tau_x)}{\cos(\tau_x)} =
+\vecthreethree{\cos(\tau_y)}{\sin(\tau_y)\sin(\tau_x)}{-\sin(\tau_y)\cos(\tau_x)}
+{0}{\cos(\tau_x)}{\sin(\tau_x)}
+{\sin(\tau_y)}{-\cos(\tau_y)\sin(\tau_x)}{\cos(\tau_y)\cos(\tau_x)}.
+\f]
+
+In the functions below the coefficients are passed or returned as
+
+\f[(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f]
+
+vector. That is, if the vector contains four elements, it means that \f$k_3=0\f$ . The distortion
+coefficients do not depend on the scene viewed. Thus, they also belong to the intrinsic camera
+parameters. And they remain the same regardless of the captured image resolution. If, for example, a
+camera has been calibrated on images of 320 x 240 resolution, absolutely the same distortion
+coefficients can be used for 640 x 480 images from the same camera while \f$f_x\f$, \f$f_y\f$, \f$c_x\f$, and
+\f$c_y\f$ need to be scaled appropriately.
+
+The functions below use the above model to do the following:
+
+-   Project 3D points to the image plane given intrinsic and extrinsic parameters.
+-   Compute extrinsic parameters given intrinsic parameters, a few 3D points, and their
+projections.
+-   Estimate intrinsic and extrinsic camera parameters from several views of a known calibration
+pattern (every view is described by several 3D-2D point correspondences).
+-   Estimate the relative position and orientation of the stereo camera "heads" and compute the
+*rectification* transformation that makes the camera optical axes parallel.
+
+@note
+   -   A calibration sample for 3 cameras in horizontal position can be found at
+        opencv_source_code/samples/cpp/3calibration.cpp
+    -   A calibration sample based on a sequence of images can be found at
+        opencv_source_code/samples/cpp/calibration.cpp
+    -   A calibration sample in order to do 3D reconstruction can be found at
+        opencv_source_code/samples/cpp/build3dmodel.cpp
+    -   A calibration sample of an artificially generated camera and chessboard patterns can be
+        found at opencv_source_code/samples/cpp/calibration_artificial.cpp
+    -   A calibration example on stereo calibration can be found at
+        opencv_source_code/samples/cpp/stereo_calib.cpp
+    -   A calibration example on stereo matching can be found at
+        opencv_source_code/samples/cpp/stereo_match.cpp
+    -   (Python) A camera calibration sample can be found at
+        opencv_source_code/samples/python/calibrate.py
+
+  @{
+    @defgroup calib3d_fisheye Fisheye camera model
+
+    Definitions: Let P be a point in 3D of coordinates X in the world reference frame (stored in the
+    matrix X) The coordinate vector of P in the camera reference frame is:
+
+    \f[Xc = R X + T\f]
+
+    where R is the rotation matrix corresponding to the rotation vector om: R = rodrigues(om); call x, y
+    and z the 3 coordinates of Xc:
+
+    \f[x = Xc_1 \\ y = Xc_2 \\ z = Xc_3\f]
+
+    The pinhole projection coordinates of P is [a; b] where
+
+    \f[a = x / z \ and \ b = y / z \\ r^2 = a^2 + b^2 \\ \theta = atan(r)\f]
+
+    Fisheye distortion:
+
+    \f[\theta_d = \theta (1 + k_1 \theta^2 + k_2 \theta^4 + k_3 \theta^6 + k_4 \theta^8)\f]
+
+    The distorted point coordinates are [x'; y'] where
+
+    \f[x' = (\theta_d / r) a \\ y' = (\theta_d / r) b \f]
+
+    Finally, conversion into pixel coordinates: The final pixel coordinates vector [u; v] where:
+
+    \f[u = f_x (x' + \alpha y') + c_x \\
+    v = f_y y' + c_y\f]
+
+    @defgroup calib3d_c C API
+
+  @}
+ */
+
+namespace cv
+{
+
+//! @addtogroup calib3d
+//! @{
+
+//! type of the robust estimation algorithm
+enum { LMEDS  = 4, //!< least-median of squares algorithm
+       RANSAC = 8, //!< RANSAC algorithm
+       RHO    = 16 //!< RHO algorithm
+     };
+
+enum { SOLVEPNP_ITERATIVE = 0,
+       SOLVEPNP_EPNP      = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp
+       SOLVEPNP_P3P       = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete
+       SOLVEPNP_DLS       = 3, //!< A Direct Least-Squares (DLS) Method for PnP  @cite hesch2011direct
+       SOLVEPNP_UPNP      = 4, //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive
+       SOLVEPNP_AP3P      = 5, //!< An Efficient Algebraic Solution to the Perspective-Three-Point Problem @cite Ke17
+       SOLVEPNP_MAX_COUNT      //!< Used for count
+};
+
+enum { CALIB_CB_ADAPTIVE_THRESH = 1,
+       CALIB_CB_NORMALIZE_IMAGE = 2,
+       CALIB_CB_FILTER_QUADS    = 4,
+       CALIB_CB_FAST_CHECK      = 8
+     };
+
+enum { CALIB_CB_SYMMETRIC_GRID  = 1,
+       CALIB_CB_ASYMMETRIC_GRID = 2,
+       CALIB_CB_CLUSTERING      = 4
+     };
+
+enum { CALIB_USE_INTRINSIC_GUESS = 0x00001,
+       CALIB_FIX_ASPECT_RATIO    = 0x00002,
+       CALIB_FIX_PRINCIPAL_POINT = 0x00004,
+       CALIB_ZERO_TANGENT_DIST   = 0x00008,
+       CALIB_FIX_FOCAL_LENGTH    = 0x00010,
+       CALIB_FIX_K1              = 0x00020,
+       CALIB_FIX_K2              = 0x00040,
+       CALIB_FIX_K3              = 0x00080,
+       CALIB_FIX_K4              = 0x00800,
+       CALIB_FIX_K5              = 0x01000,
+       CALIB_FIX_K6              = 0x02000,
+       CALIB_RATIONAL_MODEL      = 0x04000,
+       CALIB_THIN_PRISM_MODEL    = 0x08000,
+       CALIB_FIX_S1_S2_S3_S4     = 0x10000,
+       CALIB_TILTED_MODEL        = 0x40000,
+       CALIB_FIX_TAUX_TAUY       = 0x80000,
+       CALIB_USE_QR              = 0x100000, //!< use QR instead of SVD decomposition for solving. Faster but potentially less precise
+       CALIB_FIX_TANGENT_DIST    = 0x200000,
+       // only for stereo
+       CALIB_FIX_INTRINSIC       = 0x00100,
+       CALIB_SAME_FOCAL_LENGTH   = 0x00200,
+       // for stereo rectification
+       CALIB_ZERO_DISPARITY      = 0x00400,
+       CALIB_USE_LU              = (1 << 17), //!< use LU instead of SVD decomposition for solving. much faster but potentially less precise
+       CALIB_USE_EXTRINSIC_GUESS = (1 << 22), //!< for stereoCalibrate
+     };
+
+//! the algorithm for finding fundamental matrix
+enum { FM_7POINT = 1, //!< 7-point algorithm
+       FM_8POINT = 2, //!< 8-point algorithm
+       FM_LMEDS  = 4, //!< least-median algorithm. 7-point algorithm is used.
+       FM_RANSAC = 8  //!< RANSAC algorithm. It needs at least 15 points. 7-point algorithm is used.
+     };
+
+
+
+/** @brief Converts a rotation matrix to a rotation vector or vice versa.
+
+@param src Input rotation vector (3x1 or 1x3) or rotation matrix (3x3).
+@param dst Output rotation matrix (3x3) or rotation vector (3x1 or 1x3), respectively.
+@param jacobian Optional output Jacobian matrix, 3x9 or 9x3, which is a matrix of partial
+derivatives of the output array components with respect to the input array components.
+
+\f[\begin{array}{l} \theta \leftarrow norm(r) \\ r  \leftarrow r/ \theta \\ R =  \cos{\theta} I + (1- \cos{\theta} ) r r^T +  \sin{\theta} \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} \end{array}\f]
+
+Inverse transformation can be also done easily, since
+
+\f[\sin ( \theta ) \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} = \frac{R - R^T}{2}\f]
+
+A rotation vector is a convenient and most compact representation of a rotation matrix (since any
+rotation matrix has just 3 degrees of freedom). The representation is used in the global 3D geometry
+optimization procedures like calibrateCamera, stereoCalibrate, or solvePnP .
+ */
+CV_EXPORTS_W void Rodrigues( InputArray src, OutputArray dst, OutputArray jacobian = noArray() );
+
+/** @example pose_from_homography.cpp
+  An example program about pose estimation from coplanar points
+
+  Check @ref tutorial_homography "the corresponding tutorial" for more details
+ */
+
+/** @brief Finds a perspective transformation between two planes.
+
+@param srcPoints Coordinates of the points in the original plane, a matrix of the type CV_32FC2
+or vector\<Point2f\> .
+@param dstPoints Coordinates of the points in the target plane, a matrix of the type CV_32FC2 or
+a vector\<Point2f\> .
+@param method Method used to compute a homography matrix. The following methods are possible:
+-   **0** - a regular method using all the points, i.e., the least squares method
+-   **RANSAC** - RANSAC-based robust method
+-   **LMEDS** - Least-Median robust method
+-   **RHO** - PROSAC-based robust method
+@param ransacReprojThreshold Maximum allowed reprojection error to treat a point pair as an inlier
+(used in the RANSAC and RHO methods only). That is, if
+\f[\| \texttt{dstPoints} _i -  \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \|_2  >  \texttt{ransacReprojThreshold}\f]
+then the point \f$i\f$ is considered as an outlier. If srcPoints and dstPoints are measured in pixels,
+it usually makes sense to set this parameter somewhere in the range of 1 to 10.
+@param mask Optional output mask set by a robust method ( RANSAC or LMEDS ). Note that the input
+mask values are ignored.
+@param maxIters The maximum number of RANSAC iterations.
+@param confidence Confidence level, between 0 and 1.
+
+The function finds and returns the perspective transformation \f$H\f$ between the source and the
+destination planes:
+
+\f[s_i  \vecthree{x'_i}{y'_i}{1} \sim H  \vecthree{x_i}{y_i}{1}\f]
+
+so that the back-projection error
+
+\f[\sum _i \left ( x'_i- \frac{h_{11} x_i + h_{12} y_i + h_{13}}{h_{31} x_i + h_{32} y_i + h_{33}} \right )^2+ \left ( y'_i- \frac{h_{21} x_i + h_{22} y_i + h_{23}}{h_{31} x_i + h_{32} y_i + h_{33}} \right )^2\f]
+
+is minimized. If the parameter method is set to the default value 0, the function uses all the point
+pairs to compute an initial homography estimate with a simple least-squares scheme.
+
+However, if not all of the point pairs ( \f$srcPoints_i\f$, \f$dstPoints_i\f$ ) fit the rigid perspective
+transformation (that is, there are some outliers), this initial estimate will be poor. In this case,
+you can use one of the three robust methods. The methods RANSAC, LMeDS and RHO try many different
+random subsets of the corresponding point pairs (of four pairs each, collinear pairs are discarded), estimate the homography matrix
+using this subset and a simple least-squares algorithm, and then compute the quality/goodness of the
+computed homography (which is the number of inliers for RANSAC or the least median re-projection error for
+LMeDS). The best subset is then used to produce the initial estimate of the homography matrix and
+the mask of inliers/outliers.
+
+Regardless of the method, robust or not, the computed homography matrix is refined further (using
+inliers only in case of a robust method) with the Levenberg-Marquardt method to reduce the
+re-projection error even more.
+
+The methods RANSAC and RHO can handle practically any ratio of outliers but need a threshold to
+distinguish inliers from outliers. The method LMeDS does not need any threshold but it works
+correctly only when there are more than 50% of inliers. Finally, if there are no outliers and the
+noise is rather small, use the default method (method=0).
+
+The function is used to find initial intrinsic and extrinsic matrices. Homography matrix is
+determined up to a scale. Thus, it is normalized so that \f$h_{33}=1\f$. Note that whenever an \f$H\f$ matrix
+cannot be estimated, an empty one will be returned.
+
+@sa
+getAffineTransform, estimateAffine2D, estimateAffinePartial2D, getPerspectiveTransform, warpPerspective,
+perspectiveTransform
+ */
+CV_EXPORTS_W Mat findHomography( InputArray srcPoints, InputArray dstPoints,
+                                 int method = 0, double ransacReprojThreshold = 3,
+                                 OutputArray mask=noArray(), const int maxIters = 2000,
+                                 const double confidence = 0.995);
+
+/** @overload */
+CV_EXPORTS Mat findHomography( InputArray srcPoints, InputArray dstPoints,
+                               OutputArray mask, int method = 0, double ransacReprojThreshold = 3 );
+
+/** @brief Computes an RQ decomposition of 3x3 matrices.
+
+@param src 3x3 input matrix.
+@param mtxR Output 3x3 upper-triangular matrix.
+@param mtxQ Output 3x3 orthogonal matrix.
+@param Qx Optional output 3x3 rotation matrix around x-axis.
+@param Qy Optional output 3x3 rotation matrix around y-axis.
+@param Qz Optional output 3x3 rotation matrix around z-axis.
+
+The function computes a RQ decomposition using the given rotations. This function is used in
+decomposeProjectionMatrix to decompose the left 3x3 submatrix of a projection matrix into a camera
+and a rotation matrix.
+
+It optionally returns three rotation matrices, one for each axis, and the three Euler angles in
+degrees (as the return value) that could be used in OpenGL. Note, there is always more than one
+sequence of rotations about the three principal axes that results in the same orientation of an
+object, e.g. see @cite Slabaugh . Returned tree rotation matrices and corresponding three Euler angles
+are only one of the possible solutions.
+ */
+CV_EXPORTS_W Vec3d RQDecomp3x3( InputArray src, OutputArray mtxR, OutputArray mtxQ,
+                                OutputArray Qx = noArray(),
+                                OutputArray Qy = noArray(),
+                                OutputArray Qz = noArray());
+
+/** @brief Decomposes a projection matrix into a rotation matrix and a camera matrix.
+
+@param projMatrix 3x4 input projection matrix P.
+@param cameraMatrix Output 3x3 camera matrix K.
+@param rotMatrix Output 3x3 external rotation matrix R.
+@param transVect Output 4x1 translation vector T.
+@param rotMatrixX Optional 3x3 rotation matrix around x-axis.
+@param rotMatrixY Optional 3x3 rotation matrix around y-axis.
+@param rotMatrixZ Optional 3x3 rotation matrix around z-axis.
+@param eulerAngles Optional three-element vector containing three Euler angles of rotation in
+degrees.
+
+The function computes a decomposition of a projection matrix into a calibration and a rotation
+matrix and the position of a camera.
+
+It optionally returns three rotation matrices, one for each axis, and three Euler angles that could
+be used in OpenGL. Note, there is always more than one sequence of rotations about the three
+principal axes that results in the same orientation of an object, e.g. see @cite Slabaugh . Returned
+tree rotation matrices and corresponding three Euler angles are only one of the possible solutions.
+
+The function is based on RQDecomp3x3 .
+ */
+CV_EXPORTS_W void decomposeProjectionMatrix( InputArray projMatrix, OutputArray cameraMatrix,
+                                             OutputArray rotMatrix, OutputArray transVect,
+                                             OutputArray rotMatrixX = noArray(),
+                                             OutputArray rotMatrixY = noArray(),
+                                             OutputArray rotMatrixZ = noArray(),
+                                             OutputArray eulerAngles =noArray() );
+
+/** @brief Computes partial derivatives of the matrix product for each multiplied matrix.
+
+@param A First multiplied matrix.
+@param B Second multiplied matrix.
+@param dABdA First output derivative matrix d(A\*B)/dA of size
+\f$\texttt{A.rows*B.cols} \times {A.rows*A.cols}\f$ .
+@param dABdB Second output derivative matrix d(A\*B)/dB of size
+\f$\texttt{A.rows*B.cols} \times {B.rows*B.cols}\f$ .
+
+The function computes partial derivatives of the elements of the matrix product \f$A*B\f$ with regard to
+the elements of each of the two input matrices. The function is used to compute the Jacobian
+matrices in stereoCalibrate but can also be used in any other similar optimization function.
+ */
+CV_EXPORTS_W void matMulDeriv( InputArray A, InputArray B, OutputArray dABdA, OutputArray dABdB );
+
+/** @brief Combines two rotation-and-shift transformations.
+
+@param rvec1 First rotation vector.
+@param tvec1 First translation vector.
+@param rvec2 Second rotation vector.
+@param tvec2 Second translation vector.
+@param rvec3 Output rotation vector of the superposition.
+@param tvec3 Output translation vector of the superposition.
+@param dr3dr1
+@param dr3dt1
+@param dr3dr2
+@param dr3dt2
+@param dt3dr1
+@param dt3dt1
+@param dt3dr2
+@param dt3dt2 Optional output derivatives of rvec3 or tvec3 with regard to rvec1, rvec2, tvec1 and
+tvec2, respectively.
+
+The functions compute:
+
+\f[\begin{array}{l} \texttt{rvec3} =  \mathrm{rodrigues} ^{-1} \left ( \mathrm{rodrigues} ( \texttt{rvec2} )  \cdot \mathrm{rodrigues} ( \texttt{rvec1} ) \right )  \\ \texttt{tvec3} =  \mathrm{rodrigues} ( \texttt{rvec2} )  \cdot \texttt{tvec1} +  \texttt{tvec2} \end{array} ,\f]
+
+where \f$\mathrm{rodrigues}\f$ denotes a rotation vector to a rotation matrix transformation, and
+\f$\mathrm{rodrigues}^{-1}\f$ denotes the inverse transformation. See Rodrigues for details.
+
+Also, the functions can compute the derivatives of the output vectors with regards to the input
+vectors (see matMulDeriv ). The functions are used inside stereoCalibrate but can also be used in
+your own code where Levenberg-Marquardt or another gradient-based solver is used to optimize a
+function that contains a matrix multiplication.
+ */
+CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1,
+                             InputArray rvec2, InputArray tvec2,
+                             OutputArray rvec3, OutputArray tvec3,
+                             OutputArray dr3dr1 = noArray(), OutputArray dr3dt1 = noArray(),
+                             OutputArray dr3dr2 = noArray(), OutputArray dr3dt2 = noArray(),
+                             OutputArray dt3dr1 = noArray(), OutputArray dt3dt1 = noArray(),
+                             OutputArray dt3dr2 = noArray(), OutputArray dt3dt2 = noArray() );
+
+/** @brief Projects 3D points to an image plane.
+
+@param objectPoints Array of object points, 3xN/Nx3 1-channel or 1xN/Nx1 3-channel (or
+vector\<Point3f\> ), where N is the number of points in the view.
+@param rvec Rotation vector. See Rodrigues for details.
+@param tvec Translation vector.
+@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed.
+@param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or
+vector\<Point2f\> .
+@param jacobian Optional output 2Nx(10+\<numDistCoeffs\>) jacobian matrix of derivatives of image
+points with respect to components of the rotation vector, translation vector, focal lengths,
+coordinates of the principal point and the distortion coefficients. In the old interface different
+components of the jacobian are returned via different output parameters.
+@param aspectRatio Optional "fixed aspect ratio" parameter. If the parameter is not 0, the
+function assumes that the aspect ratio (*fx/fy*) is fixed and correspondingly adjusts the jacobian
+matrix.
+
+The function computes projections of 3D points to the image plane given intrinsic and extrinsic
+camera parameters. Optionally, the function computes Jacobians - matrices of partial derivatives of
+image points coordinates (as functions of all the input parameters) with respect to the particular
+parameters, intrinsic and/or extrinsic. The Jacobians are used during the global optimization in
+calibrateCamera, solvePnP, and stereoCalibrate . The function itself can also be used to compute a
+re-projection error given the current intrinsic and extrinsic parameters.
+
+@note By setting rvec=tvec=(0,0,0) or by setting cameraMatrix to a 3x3 identity matrix, or by
+passing zero distortion coefficients, you can get various useful partial cases of the function. This
+means that you can compute the distorted coordinates for a sparse set of points or apply a
+perspective transformation (and also compute the derivatives) in the ideal zero-distortion setup.
+ */
+CV_EXPORTS_W void projectPoints( InputArray objectPoints,
+                                 InputArray rvec, InputArray tvec,
+                                 InputArray cameraMatrix, InputArray distCoeffs,
+                                 OutputArray imagePoints,
+                                 OutputArray jacobian = noArray(),
+                                 double aspectRatio = 0 );
+
+/** @example homography_from_camera_displacement.cpp
+  An example program about homography from the camera displacement
+
+  Check @ref tutorial_homography "the corresponding tutorial" for more details
+ */
+
+/** @brief Finds an object pose from 3D-2D point correspondences.
+
+@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
+1xN/Nx1 3-channel, where N is the number of points. vector\<Point3f\> can be also passed here.
+@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
+where N is the number of points. vector\<Point2f\> can be also passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
+assumed.
+@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec , brings points from
+the model coordinate system to the camera coordinate system.
+@param tvec Output translation vector.
+@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
+the provided rvec and tvec values as initial approximations of the rotation and translation
+vectors, respectively, and further optimizes them.
+@param flags Method for solving a PnP problem:
+-   **SOLVEPNP_ITERATIVE** Iterative method is based on Levenberg-Marquardt optimization. In
+this case the function finds such a pose that minimizes reprojection error, that is the sum
+of squared distances between the observed projections imagePoints and the projected (using
+projectPoints ) objectPoints .
+-   **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
+"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
+In this case the function requires exactly four object and image points.
+-   **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis
+"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
+In this case the function requires exactly four object and image points.
+-   **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
+paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
+-   **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
+"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
+-   **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
+F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
+Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
+assuming that both have the same value. Then the cameraMatrix is updated with the estimated
+focal length.
+-   **SOLVEPNP_AP3P** Method is based on the paper of Tong Ke and Stergios I. Roumeliotis.
+"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). In this case the
+function requires exactly four object and image points.
+
+The function estimates the object pose given a set of object points, their corresponding image
+projections, as well as the camera matrix and the distortion coefficients, see the figure below
+(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
+and the Z-axis forward).
+
+![](pnp.jpg)
+
+Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
+using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$:
+
+\f[
+  \begin{align*}
+  \begin{bmatrix}
+  u \\
+  v \\
+  1
+  \end{bmatrix} &=
+  \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{M}_w
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix} \\
+  \begin{bmatrix}
+  u \\
+  v \\
+  1
+  \end{bmatrix} &=
+  \begin{bmatrix}
+  f_x & 0 & c_x \\
+  0 & f_y & c_y \\
+  0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  1 & 0 & 0 & 0 \\
+  0 & 1 & 0 & 0 \\
+  0 & 0 & 1 & 0
+  \end{bmatrix}
+  \begin{bmatrix}
+  r_{11} & r_{12} & r_{13} & t_x \\
+  r_{21} & r_{22} & r_{23} & t_y \\
+  r_{31} & r_{32} & r_{33} & t_z \\
+  0 & 0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix}
+  \end{align*}
+\f]
+
+The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow to transform
+a 3D point expressed in the world frame into the camera frame:
+
+\f[
+  \begin{align*}
+  \begin{bmatrix}
+  X_c \\
+  Y_c \\
+  Z_c \\
+  1
+  \end{bmatrix} &=
+  \hspace{0.2em} ^{c}\bf{M}_w
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix} \\
+  \begin{bmatrix}
+  X_c \\
+  Y_c \\
+  Z_c \\
+  1
+  \end{bmatrix} &=
+  \begin{bmatrix}
+  r_{11} & r_{12} & r_{13} & t_x \\
+  r_{21} & r_{22} & r_{23} & t_y \\
+  r_{31} & r_{32} & r_{33} & t_z \\
+  0 & 0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix}
+  \end{align*}
+\f]
+
+@note
+   -   An example of how to use solvePnP for planar augmented reality can be found at
+        opencv_source_code/samples/python/plane_ar.py
+   -   If you are using Python:
+        - Numpy array slices won't work as input because solvePnP requires contiguous
+        arrays (enforced by the assertion using cv::Mat::checkVector() around line 55 of
+        modules/calib3d/src/solvepnp.cpp version 2.4.9)
+        - The P3P algorithm requires image points to be in an array of shape (N,1,2) due
+        to its calling of cv::undistortPoints (around line 75 of modules/calib3d/src/solvepnp.cpp version 2.4.9)
+        which requires 2-channel information.
+        - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of
+        it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints =
+        np.ascontiguousarray(D[:,:2]).reshape((N,1,2))
+   -   The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are
+       unstable and sometimes give completely wrong results. If you pass one of these two
+       flags, **SOLVEPNP_EPNP** method will be used instead.
+   -   The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P**
+       methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions
+       of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error).
+   -   With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points
+       are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the
+       global solution to converge.
+ */
+CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
+                            InputArray cameraMatrix, InputArray distCoeffs,
+                            OutputArray rvec, OutputArray tvec,
+                            bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE );
+
+/** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme.
+
+@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
+1xN/Nx1 3-channel, where N is the number of points. vector\<Point3f\> can be also passed here.
+@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
+where N is the number of points. vector\<Point2f\> can be also passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
+assumed.
+@param rvec Output rotation vector (see Rodrigues ) that, together with tvec , brings points from
+the model coordinate system to the camera coordinate system.
+@param tvec Output translation vector.
+@param useExtrinsicGuess Parameter used for SOLVEPNP_ITERATIVE. If true (1), the function uses
+the provided rvec and tvec values as initial approximations of the rotation and translation
+vectors, respectively, and further optimizes them.
+@param iterationsCount Number of iterations.
+@param reprojectionError Inlier threshold value used by the RANSAC procedure. The parameter value
+is the maximum allowed distance between the observed and computed point projections to consider it
+an inlier.
+@param confidence The probability that the algorithm produces a useful result.
+@param inliers Output vector that contains indices of inliers in objectPoints and imagePoints .
+@param flags Method for solving a PnP problem (see solvePnP ).
+
+The function estimates an object pose given a set of object points, their corresponding image
+projections, as well as the camera matrix and the distortion coefficients. This function finds such
+a pose that minimizes reprojection error, that is, the sum of squared distances between the observed
+projections imagePoints and the projected (using projectPoints ) objectPoints. The use of RANSAC
+makes the function resistant to outliers.
+
+@note
+   -   An example of how to use solvePNPRansac for object detection can be found at
+        opencv_source_code/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/
+   -   The default method used to estimate the camera pose for the Minimal Sample Sets step
+       is #SOLVEPNP_EPNP. Exceptions are:
+         - if you choose #SOLVEPNP_P3P or #SOLVEPNP_AP3P, these methods will be used.
+         - if the number of input points is equal to 4, #SOLVEPNP_P3P is used.
+   -   The method used to estimate the camera pose using all the inliers is defined by the
+       flags parameters unless it is equal to #SOLVEPNP_P3P or #SOLVEPNP_AP3P. In this case,
+       the method #SOLVEPNP_EPNP will be used instead.
+ */
+CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoints,
+                                  InputArray cameraMatrix, InputArray distCoeffs,
+                                  OutputArray rvec, OutputArray tvec,
+                                  bool useExtrinsicGuess = false, int iterationsCount = 100,
+                                  float reprojectionError = 8.0, double confidence = 0.99,
+                                  OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE );
+/** @brief Finds an object pose from 3 3D-2D point correspondences.
+
+@param objectPoints Array of object points in the object coordinate space, 3x3 1-channel or
+1x3/3x1 3-channel. vector\<Point3f\> can be also passed here.
+@param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel.
+ vector\<Point2f\> can be also passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
+assumed.
+@param rvecs Output rotation vectors (see Rodrigues ) that, together with tvecs , brings points from
+the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions.
+@param tvecs Output translation vectors.
+@param flags Method for solving a P3P problem:
+-   **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
+"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
+-   **SOLVEPNP_AP3P** Method is based on the paper of Tong Ke and Stergios I. Roumeliotis.
+"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
+
+The function estimates the object pose given 3 object points, their corresponding image
+projections, as well as the camera matrix and the distortion coefficients.
+ */
+CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints,
+                           InputArray cameraMatrix, InputArray distCoeffs,
+                           OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+                           int flags );
+
+/** @brief Finds an initial camera matrix from 3D-2D point correspondences.
+
+@param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern
+coordinate space. In the old interface all the per-view vectors are concatenated. See
+calibrateCamera for details.
+@param imagePoints Vector of vectors of the projections of the calibration pattern points. In the
+old interface all the per-view vectors are concatenated.
+@param imageSize Image size in pixels used to initialize the principal point.
+@param aspectRatio If it is zero or negative, both \f$f_x\f$ and \f$f_y\f$ are estimated independently.
+Otherwise, \f$f_x = f_y * \texttt{aspectRatio}\f$ .
+
+The function estimates and returns an initial camera matrix for the camera calibration process.
+Currently, the function only supports planar calibration patterns, which are patterns where each
+object point has z-coordinate =0.
+ */
+CV_EXPORTS_W Mat initCameraMatrix2D( InputArrayOfArrays objectPoints,
+                                     InputArrayOfArrays imagePoints,
+                                     Size imageSize, double aspectRatio = 1.0 );
+
+/** @brief Finds the positions of internal corners of the chessboard.
+
+@param image Source chessboard view. It must be an 8-bit grayscale or color image.
+@param patternSize Number of inner corners per a chessboard row and column
+( patternSize = cvSize(points_per_row,points_per_colum) = cvSize(columns,rows) ).
+@param corners Output array of detected corners.
+@param flags Various operation flags that can be zero or a combination of the following values:
+-   **CALIB_CB_ADAPTIVE_THRESH** Use adaptive thresholding to convert the image to black
+and white, rather than a fixed threshold level (computed from the average image brightness).
+-   **CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before
+applying fixed or adaptive thresholding.
+-   **CALIB_CB_FILTER_QUADS** Use additional criteria (like contour area, perimeter,
+square-like shape) to filter out false quads extracted at the contour retrieval stage.
+-   **CALIB_CB_FAST_CHECK** Run a fast check on the image that looks for chessboard corners,
+and shortcut the call if none is found. This can drastically speed up the call in the
+degenerate condition when no chessboard is observed.
+
+The function attempts to determine whether the input image is a view of the chessboard pattern and
+locate the internal chessboard corners. The function returns a non-zero value if all of the corners
+are found and they are placed in a certain order (row by row, left to right in every row).
+Otherwise, if the function fails to find all the corners or reorder them, it returns 0. For example,
+a regular chessboard has 8 x 8 squares and 7 x 7 internal corners, that is, points where the black
+squares touch each other. The detected coordinates are approximate, and to determine their positions
+more accurately, the function calls cornerSubPix. You also may use the function cornerSubPix with
+different parameters if returned coordinates are not accurate enough.
+
+Sample usage of detecting and drawing chessboard corners: :
+@code
+    Size patternsize(8,6); //interior number of corners
+    Mat gray = ....; //source image
+    vector<Point2f> corners; //this will be filled by the detected corners
+
+    //CALIB_CB_FAST_CHECK saves a lot of time on images
+    //that do not contain any chessboard corners
+    bool patternfound = findChessboardCorners(gray, patternsize, corners,
+            CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE
+            + CALIB_CB_FAST_CHECK);
+
+    if(patternfound)
+      cornerSubPix(gray, corners, Size(11, 11), Size(-1, -1),
+        TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 30, 0.1));
+
+    drawChessboardCorners(img, patternsize, Mat(corners), patternfound);
+@endcode
+@note The function requires white space (like a square-thick border, the wider the better) around
+the board to make the detection more robust in various environments. Otherwise, if there is no
+border and the background is dark, the outer black squares cannot be segmented properly and so the
+square grouping and ordering algorithm fails.
+ */
+CV_EXPORTS_W bool findChessboardCorners( InputArray image, Size patternSize, OutputArray corners,
+                                         int flags = CALIB_CB_ADAPTIVE_THRESH + CALIB_CB_NORMALIZE_IMAGE );
+
+//! finds subpixel-accurate positions of the chessboard corners
+CV_EXPORTS bool find4QuadCornerSubpix( InputArray img, InputOutputArray corners, Size region_size );
+
+/** @brief Renders the detected chessboard corners.
+
+@param image Destination image. It must be an 8-bit color image.
+@param patternSize Number of inner corners per a chessboard row and column
+(patternSize = cv::Size(points_per_row,points_per_column)).
+@param corners Array of detected corners, the output of findChessboardCorners.
+@param patternWasFound Parameter indicating whether the complete board was found or not. The
+return value of findChessboardCorners should be passed here.
+
+The function draws individual chessboard corners detected either as red circles if the board was not
+found, or as colored corners connected with lines if the board was found.
+ */
+CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSize,
+                                         InputArray corners, bool patternWasFound );
+
+struct CV_EXPORTS_W_SIMPLE CirclesGridFinderParameters
+{
+    CV_WRAP CirclesGridFinderParameters();
+    CV_PROP_RW cv::Size2f densityNeighborhoodSize;
+    CV_PROP_RW float minDensity;
+    CV_PROP_RW int kmeansAttempts;
+    CV_PROP_RW int minDistanceToAddKeypoint;
+    CV_PROP_RW int keypointScale;
+    CV_PROP_RW float minGraphConfidence;
+    CV_PROP_RW float vertexGain;
+    CV_PROP_RW float vertexPenalty;
+    CV_PROP_RW float existingVertexGain;
+    CV_PROP_RW float edgeGain;
+    CV_PROP_RW float edgePenalty;
+    CV_PROP_RW float convexHullFactor;
+    CV_PROP_RW float minRNGEdgeSwitchDist;
+
+    enum GridType
+    {
+      SYMMETRIC_GRID, ASYMMETRIC_GRID
+    };
+    GridType gridType;
+};
+
+struct CV_EXPORTS_W_SIMPLE CirclesGridFinderParameters2 : public CirclesGridFinderParameters
+{
+    CV_WRAP CirclesGridFinderParameters2();
+
+    CV_PROP_RW float squareSize; //!< Distance between two adjacent points. Used by CALIB_CB_CLUSTERING.
+    CV_PROP_RW float maxRectifiedDistance; //!< Max deviation from predicion. Used by CALIB_CB_CLUSTERING.
+};
+
+/** @brief Finds centers in the grid of circles.
+
+@param image grid view of input circles; it must be an 8-bit grayscale or color image.
+@param patternSize number of circles per row and column
+( patternSize = Size(points_per_row, points_per_colum) ).
+@param centers output array of detected centers.
+@param flags various operation flags that can be one of the following values:
+-   **CALIB_CB_SYMMETRIC_GRID** uses symmetric pattern of circles.
+-   **CALIB_CB_ASYMMETRIC_GRID** uses asymmetric pattern of circles.
+-   **CALIB_CB_CLUSTERING** uses a special algorithm for grid detection. It is more robust to
+perspective distortions but much more sensitive to background clutter.
+@param blobDetector feature detector that finds blobs like dark circles on light background.
+@param parameters struct for finding circles in a grid pattern.
+
+The function attempts to determine whether the input image contains a grid of circles. If it is, the
+function locates centers of the circles. The function returns a non-zero value if all of the centers
+have been found and they have been placed in a certain order (row by row, left to right in every
+row). Otherwise, if the function fails to find all the corners or reorder them, it returns 0.
+
+Sample usage of detecting and drawing the centers of circles: :
+@code
+    Size patternsize(7,7); //number of centers
+    Mat gray = ....; //source image
+    vector<Point2f> centers; //this will be filled by the detected centers
+
+    bool patternfound = findCirclesGrid(gray, patternsize, centers);
+
+    drawChessboardCorners(img, patternsize, Mat(centers), patternfound);
+@endcode
+@note The function requires white space (like a square-thick border, the wider the better) around
+the board to make the detection more robust in various environments.
+ */
+CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize,
+                                   OutputArray centers, int flags,
+                                   const Ptr<FeatureDetector> &blobDetector,
+                                   CirclesGridFinderParameters parameters);
+
+/** @overload */
+CV_EXPORTS_W bool findCirclesGrid2( InputArray image, Size patternSize,
+                                   OutputArray centers, int flags,
+                                   const Ptr<FeatureDetector> &blobDetector,
+                                   CirclesGridFinderParameters2 parameters);
+
+/** @overload */
+CV_EXPORTS_W bool findCirclesGrid( InputArray image, Size patternSize,
+                                   OutputArray centers, int flags = CALIB_CB_SYMMETRIC_GRID,
+                                   const Ptr<FeatureDetector> &blobDetector = SimpleBlobDetector::create());
+
+/** @brief Finds the camera intrinsic and extrinsic parameters from several views of a calibration pattern.
+
+@param objectPoints In the new interface it is a vector of vectors of calibration pattern points in
+the calibration pattern coordinate space (e.g. std::vector<std::vector<cv::Vec3f>>). The outer
+vector contains as many elements as the number of the pattern views. If the same calibration pattern
+is shown in each view and it is fully visible, all the vectors will be the same. Although, it is
+possible to use partially occluded patterns, or even different patterns in different views. Then,
+the vectors will be different. The points are 3D, but since they are in a pattern coordinate system,
+then, if the rig is planar, it may make sense to put the model to a XY coordinate plane so that
+Z-coordinate of each input object point is 0.
+In the old interface all the vectors of object points from different views are concatenated
+together.
+@param imagePoints In the new interface it is a vector of vectors of the projections of calibration
+pattern points (e.g. std::vector<std::vector<cv::Vec2f>>). imagePoints.size() and
+objectPoints.size() and imagePoints[i].size() must be equal to objectPoints[i].size() for each i.
+In the old interface all the vectors of object points from different views are concatenated
+together.
+@param imageSize Size of the image used only to initialize the intrinsic camera matrix.
+@param cameraMatrix Output 3x3 floating-point camera matrix
+\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS
+and/or CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
+initialized before calling the function.
+@param distCoeffs Output vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements.
+@param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view
+(e.g. std::vector<cv::Mat>>). That is, each k-th rotation vector together with the corresponding
+k-th translation vector (see the next output parameter description) brings the calibration pattern
+from the model coordinate space (in which object points are specified) to the world coordinate
+space, that is, a real position of the calibration pattern in the k-th pattern view (k=0.. *M* -1).
+@param tvecs Output vector of translation vectors estimated for each pattern view.
+@param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic parameters.
+ Order of deviations values:
+\f$(f_x, f_y, c_x, c_y, k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 , s_1, s_2, s_3,
+ s_4, \tau_x, \tau_y)\f$ If one of parameters is not estimated, it's deviation is equals to zero.
+@param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic parameters.
+ Order of deviations values: \f$(R_1, T_1, \dotsc , R_M, T_M)\f$ where M is number of pattern views,
+ \f$R_i, T_i\f$ are concatenated 1x3 vectors.
+ @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view.
+@param flags Different flags that may be zero or a combination of the following values:
+-   **CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of
+fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
+center ( imageSize is used), and focal distances are computed in a least-squares fashion.
+Note, that if intrinsic parameters are known, there is no need to use this function just to
+estimate extrinsic parameters. Use solvePnP instead.
+-   **CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global
+optimization. It stays at the center or at a different location specified when
+CALIB_USE_INTRINSIC_GUESS is set too.
+-   **CALIB_FIX_ASPECT_RATIO** The functions considers only fy as a free parameter. The
+ratio fx/fy stays the same as in the input cameraMatrix . When
+CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are
+ignored, only their ratio is computed and used further.
+-   **CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients \f$(p_1, p_2)\f$ are set
+to zeros and stay zero.
+-   **CALIB_FIX_K1,...,CALIB_FIX_K6** The corresponding radial distortion
+coefficient is not changed during the optimization. If CALIB_USE_INTRINSIC_GUESS is
+set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0.
+-   **CALIB_RATIONAL_MODEL** Coefficients k4, k5, and k6 are enabled. To provide the
+backward compatibility, this extra flag should be explicitly specified to make the
+calibration function use the rational model and return 8 coefficients. If the flag is not
+set, the function computes and returns only 5 distortion coefficients.
+-   **CALIB_THIN_PRISM_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the
+backward compatibility, this extra flag should be explicitly specified to make the
+calibration function use the thin prism model and return 12 coefficients. If the flag is not
+set, the function computes and returns only 5 distortion coefficients.
+-   **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during
+the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+supplied distCoeffs matrix is used. Otherwise, it is set to 0.
+-   **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the
+backward compatibility, this extra flag should be explicitly specified to make the
+calibration function use the tilted sensor model and return 14 coefficients. If the flag is not
+set, the function computes and returns only 5 distortion coefficients.
+-   **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during
+the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+supplied distCoeffs matrix is used. Otherwise, it is set to 0.
+@param criteria Termination criteria for the iterative optimization algorithm.
+
+@return the overall RMS re-projection error.
+
+The function estimates the intrinsic camera parameters and extrinsic parameters for each of the
+views. The algorithm is based on @cite Zhang2000 and @cite BouguetMCT . The coordinates of 3D object
+points and their corresponding 2D projections in each view must be specified. That may be achieved
+by using an object with a known geometry and easily detectable feature points. Such an object is
+called a calibration rig or calibration pattern, and OpenCV has built-in support for a chessboard as
+a calibration rig (see findChessboardCorners ). Currently, initialization of intrinsic parameters
+(when CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration
+patterns (where Z-coordinates of the object points must be all zeros). 3D calibration rigs can also
+be used as long as initial cameraMatrix is provided.
+
+The algorithm performs the following steps:
+
+-   Compute the initial intrinsic parameters (the option only available for planar calibration
+    patterns) or read them from the input parameters. The distortion coefficients are all set to
+    zeros initially unless some of CALIB_FIX_K? are specified.
+
+-   Estimate the initial camera pose as if the intrinsic parameters have been already known. This is
+    done using solvePnP .
+
+-   Run the global Levenberg-Marquardt optimization algorithm to minimize the reprojection error,
+    that is, the total sum of squared distances between the observed feature points imagePoints and
+    the projected (using the current estimates for camera parameters and the poses) object points
+    objectPoints. See projectPoints for details.
+
+@note
+   If you use a non-square (=non-NxN) grid and findChessboardCorners for calibration, and
+    calibrateCamera returns bad values (zero distortion coefficients, an image center very far from
+    (w/2-0.5,h/2-0.5), and/or large differences between \f$f_x\f$ and \f$f_y\f$ (ratios of 10:1 or more)),
+    then you have probably used patternSize=cvSize(rows,cols) instead of using
+    patternSize=cvSize(cols,rows) in findChessboardCorners .
+
+@sa
+   findChessboardCorners, solvePnP, initCameraMatrix2D, stereoCalibrate, undistort
+ */
+CV_EXPORTS_AS(calibrateCameraExtended) double calibrateCamera( InputArrayOfArrays objectPoints,
+                                     InputArrayOfArrays imagePoints, Size imageSize,
+                                     InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+                                     OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+                                     OutputArray stdDeviationsIntrinsics,
+                                     OutputArray stdDeviationsExtrinsics,
+                                     OutputArray perViewErrors,
+                                     int flags = 0, TermCriteria criteria = TermCriteria(
+                                        TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) );
+
+/** @overload double calibrateCamera( InputArrayOfArrays objectPoints,
+                                     InputArrayOfArrays imagePoints, Size imageSize,
+                                     InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+                                     OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+                                     OutputArray stdDeviations, OutputArray perViewErrors,
+                                     int flags = 0, TermCriteria criteria = TermCriteria(
+                                        TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) )
+ */
+CV_EXPORTS_W double calibrateCamera( InputArrayOfArrays objectPoints,
+                                     InputArrayOfArrays imagePoints, Size imageSize,
+                                     InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+                                     OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+                                     int flags = 0, TermCriteria criteria = TermCriteria(
+                                        TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) );
+
+/** @brief Computes useful camera characteristics from the camera matrix.
+
+@param cameraMatrix Input camera matrix that can be estimated by calibrateCamera or
+stereoCalibrate .
+@param imageSize Input image size in pixels.
+@param apertureWidth Physical width in mm of the sensor.
+@param apertureHeight Physical height in mm of the sensor.
+@param fovx Output field of view in degrees along the horizontal sensor axis.
+@param fovy Output field of view in degrees along the vertical sensor axis.
+@param focalLength Focal length of the lens in mm.
+@param principalPoint Principal point in mm.
+@param aspectRatio \f$f_y/f_x\f$
+
+The function computes various useful camera characteristics from the previously estimated camera
+matrix.
+
+@note
+   Do keep in mind that the unity measure 'mm' stands for whatever unit of measure one chooses for
+    the chessboard pitch (it can thus be any value).
+ */
+CV_EXPORTS_W void calibrationMatrixValues( InputArray cameraMatrix, Size imageSize,
+                                           double apertureWidth, double apertureHeight,
+                                           CV_OUT double& fovx, CV_OUT double& fovy,
+                                           CV_OUT double& focalLength, CV_OUT Point2d& principalPoint,
+                                           CV_OUT double& aspectRatio );
+
+/** @brief Calibrates the stereo camera.
+
+@param objectPoints Vector of vectors of the calibration pattern points.
+@param imagePoints1 Vector of vectors of the projections of the calibration pattern points,
+observed by the first camera.
+@param imagePoints2 Vector of vectors of the projections of the calibration pattern points,
+observed by the second camera.
+@param cameraMatrix1 Input/output first camera matrix:
+\f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If
+any of CALIB_USE_INTRINSIC_GUESS , CALIB_FIX_ASPECT_RATIO ,
+CALIB_FIX_INTRINSIC , or CALIB_FIX_FOCAL_LENGTH are specified, some or all of the
+matrix components must be initialized. See the flags description for details.
+@param distCoeffs1 Input/output vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. The output vector length depends on the flags.
+@param cameraMatrix2 Input/output second camera matrix. The parameter is similar to cameraMatrix1
+@param distCoeffs2 Input/output lens distortion coefficients for the second camera. The parameter
+is similar to distCoeffs1 .
+@param imageSize Size of the image used only to initialize intrinsic camera matrix.
+@param R Output rotation matrix between the 1st and the 2nd camera coordinate systems.
+@param T Output translation vector between the coordinate systems of the cameras.
+@param E Output essential matrix.
+@param F Output fundamental matrix.
+@param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view.
+@param flags Different flags that may be zero or a combination of the following values:
+-   **CALIB_FIX_INTRINSIC** Fix cameraMatrix? and distCoeffs? so that only R, T, E , and F
+matrices are estimated.
+-   **CALIB_USE_INTRINSIC_GUESS** Optimize some or all of the intrinsic parameters
+according to the specified flags. Initial values are provided by the user.
+-   **CALIB_USE_EXTRINSIC_GUESS** R, T contain valid initial values that are optimized further.
+Otherwise R, T are initialized to the median value of the pattern views (each dimension separately).
+-   **CALIB_FIX_PRINCIPAL_POINT** Fix the principal points during the optimization.
+-   **CALIB_FIX_FOCAL_LENGTH** Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ .
+-   **CALIB_FIX_ASPECT_RATIO** Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$
+.
+-   **CALIB_SAME_FOCAL_LENGTH** Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ .
+-   **CALIB_ZERO_TANGENT_DIST** Set tangential distortion coefficients for each camera to
+zeros and fix there.
+-   **CALIB_FIX_K1,...,CALIB_FIX_K6** Do not change the corresponding radial
+distortion coefficient during the optimization. If CALIB_USE_INTRINSIC_GUESS is set,
+the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0.
+-   **CALIB_RATIONAL_MODEL** Enable coefficients k4, k5, and k6. To provide the backward
+compatibility, this extra flag should be explicitly specified to make the calibration
+function use the rational model and return 8 coefficients. If the flag is not set, the
+function computes and returns only 5 distortion coefficients.
+-   **CALIB_THIN_PRISM_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the
+backward compatibility, this extra flag should be explicitly specified to make the
+calibration function use the thin prism model and return 12 coefficients. If the flag is not
+set, the function computes and returns only 5 distortion coefficients.
+-   **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during
+the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+supplied distCoeffs matrix is used. Otherwise, it is set to 0.
+-   **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the
+backward compatibility, this extra flag should be explicitly specified to make the
+calibration function use the tilted sensor model and return 14 coefficients. If the flag is not
+set, the function computes and returns only 5 distortion coefficients.
+-   **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during
+the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
+supplied distCoeffs matrix is used. Otherwise, it is set to 0.
+@param criteria Termination criteria for the iterative optimization algorithm.
+
+The function estimates transformation between two cameras making a stereo pair. If you have a stereo
+camera where the relative position and orientation of two cameras is fixed, and if you computed
+poses of an object relative to the first camera and to the second camera, (R1, T1) and (R2, T2),
+respectively (this can be done with solvePnP ), then those poses definitely relate to each other.
+This means that, given ( \f$R_1\f$,\f$T_1\f$ ), it should be possible to compute ( \f$R_2\f$,\f$T_2\f$ ). You only
+need to know the position and orientation of the second camera relative to the first camera. This is
+what the described function does. It computes ( \f$R\f$,\f$T\f$ ) so that:
+
+\f[R_2=R*R_1\f]
+\f[T_2=R*T_1 + T,\f]
+
+Optionally, it computes the essential matrix E:
+
+\f[E= \vecthreethree{0}{-T_2}{T_1}{T_2}{0}{-T_0}{-T_1}{T_0}{0} *R\f]
+
+where \f$T_i\f$ are components of the translation vector \f$T\f$ : \f$T=[T_0, T_1, T_2]^T\f$ . And the function
+can also compute the fundamental matrix F:
+
+\f[F = cameraMatrix2^{-T} E cameraMatrix1^{-1}\f]
+
+Besides the stereo-related information, the function can also perform a full calibration of each of
+two cameras. However, due to the high dimensionality of the parameter space and noise in the input
+data, the function can diverge from the correct solution. If the intrinsic parameters can be
+estimated with high accuracy for each of the cameras individually (for example, using
+calibrateCamera ), you are recommended to do so and then pass CALIB_FIX_INTRINSIC flag to the
+function along with the computed intrinsic parameters. Otherwise, if all the parameters are
+estimated at once, it makes sense to restrict some parameters, for example, pass
+CALIB_SAME_FOCAL_LENGTH and CALIB_ZERO_TANGENT_DIST flags, which is usually a
+reasonable assumption.
+
+Similarly to calibrateCamera , the function minimizes the total re-projection error for all the
+points in all the available views from both cameras. The function returns the final value of the
+re-projection error.
+ */
+CV_EXPORTS_AS(stereoCalibrateExtended) double stereoCalibrate( InputArrayOfArrays objectPoints,
+                                     InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2,
+                                     InputOutputArray cameraMatrix1, InputOutputArray distCoeffs1,
+                                     InputOutputArray cameraMatrix2, InputOutputArray distCoeffs2,
+                                     Size imageSize, InputOutputArray R,InputOutputArray T, OutputArray E, OutputArray F,
+                                     OutputArray perViewErrors, int flags = CALIB_FIX_INTRINSIC,
+                                     TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 1e-6) );
+
+/// @overload
+CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints,
+                                     InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2,
+                                     InputOutputArray cameraMatrix1, InputOutputArray distCoeffs1,
+                                     InputOutputArray cameraMatrix2, InputOutputArray distCoeffs2,
+                                     Size imageSize, OutputArray R,OutputArray T, OutputArray E, OutputArray F,
+                                     int flags = CALIB_FIX_INTRINSIC,
+                                     TermCriteria criteria = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 1e-6) );
+
+/** @brief Computes rectification transforms for each head of a calibrated stereo camera.
+
+@param cameraMatrix1 First camera matrix.
+@param distCoeffs1 First camera distortion parameters.
+@param cameraMatrix2 Second camera matrix.
+@param distCoeffs2 Second camera distortion parameters.
+@param imageSize Size of the image used for stereo calibration.
+@param R Rotation matrix between the coordinate systems of the first and the second cameras.
+@param T Translation vector between coordinate systems of the cameras.
+@param R1 Output 3x3 rectification transform (rotation matrix) for the first camera.
+@param R2 Output 3x3 rectification transform (rotation matrix) for the second camera.
+@param P1 Output 3x4 projection matrix in the new (rectified) coordinate systems for the first
+camera.
+@param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second
+camera.
+@param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ).
+@param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set,
+the function makes the principal points of each camera have the same pixel coordinates in the
+rectified views. And if the flag is not set, the function may still shift the images in the
+horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the
+useful image area.
+@param alpha Free scaling parameter. If it is -1 or absent, the function performs the default
+scaling. Otherwise, the parameter should be between 0 and 1. alpha=0 means that the rectified
+images are zoomed and shifted so that only valid pixels are visible (no black areas after
+rectification). alpha=1 means that the rectified image is decimated and shifted so that all the
+pixels from the original images from the cameras are retained in the rectified images (no source
+image pixels are lost). Obviously, any intermediate value yields an intermediate result between
+those two extreme cases.
+@param newImageSize New image resolution after rectification. The same size should be passed to
+initUndistortRectifyMap (see the stereo_calib.cpp sample in OpenCV samples directory). When (0,0)
+is passed (default), it is set to the original imageSize . Setting it to larger value can help you
+preserve details in the original image, especially when there is a big radial distortion.
+@param validPixROI1 Optional output rectangles inside the rectified images where all the pixels
+are valid. If alpha=0 , the ROIs cover the whole images. Otherwise, they are likely to be smaller
+(see the picture below).
+@param validPixROI2 Optional output rectangles inside the rectified images where all the pixels
+are valid. If alpha=0 , the ROIs cover the whole images. Otherwise, they are likely to be smaller
+(see the picture below).
+
+The function computes the rotation matrices for each camera that (virtually) make both camera image
+planes the same plane. Consequently, this makes all the epipolar lines parallel and thus simplifies
+the dense stereo correspondence problem. The function takes the matrices computed by stereoCalibrate
+as input. As output, it provides two rotation matrices and also two projection matrices in the new
+coordinates. The function distinguishes the following two cases:
+
+-   **Horizontal stereo**: the first and the second camera views are shifted relative to each other
+    mainly along the x axis (with possible small vertical shift). In the rectified images, the
+    corresponding epipolar lines in the left and right cameras are horizontal and have the same
+    y-coordinate. P1 and P2 look like:
+
+    \f[\texttt{P1} = \begin{bmatrix} f & 0 & cx_1 & 0 \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix}\f]
+
+    \f[\texttt{P2} = \begin{bmatrix} f & 0 & cx_2 & T_x*f \\ 0 & f & cy & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix} ,\f]
+
+    where \f$T_x\f$ is a horizontal shift between the cameras and \f$cx_1=cx_2\f$ if
+    CALIB_ZERO_DISPARITY is set.
+
+-   **Vertical stereo**: the first and the second camera views are shifted relative to each other
+    mainly in vertical direction (and probably a bit in the horizontal direction too). The epipolar
+    lines in the rectified images are vertical and have the same x-coordinate. P1 and P2 look like:
+
+    \f[\texttt{P1} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_1 & 0 \\ 0 & 0 & 1 & 0 \end{bmatrix}\f]
+
+    \f[\texttt{P2} = \begin{bmatrix} f & 0 & cx & 0 \\ 0 & f & cy_2 & T_y*f \\ 0 & 0 & 1 & 0 \end{bmatrix} ,\f]
+
+    where \f$T_y\f$ is a vertical shift between the cameras and \f$cy_1=cy_2\f$ if CALIB_ZERO_DISPARITY is
+    set.
+
+As you can see, the first three columns of P1 and P2 will effectively be the new "rectified" camera
+matrices. The matrices, together with R1 and R2 , can then be passed to initUndistortRectifyMap to
+initialize the rectification map for each camera.
+
+See below the screenshot from the stereo_calib.cpp sample. Some red horizontal lines pass through
+the corresponding image regions. This means that the images are well rectified, which is what most
+stereo correspondence algorithms rely on. The green rectangles are roi1 and roi2 . You see that
+their interiors are all valid pixels.
+
+![image](pics/stereo_undistort.jpg)
+ */
+CV_EXPORTS_W void stereoRectify( InputArray cameraMatrix1, InputArray distCoeffs1,
+                                 InputArray cameraMatrix2, InputArray distCoeffs2,
+                                 Size imageSize, InputArray R, InputArray T,
+                                 OutputArray R1, OutputArray R2,
+                                 OutputArray P1, OutputArray P2,
+                                 OutputArray Q, int flags = CALIB_ZERO_DISPARITY,
+                                 double alpha = -1, Size newImageSize = Size(),
+                                 CV_OUT Rect* validPixROI1 = 0, CV_OUT Rect* validPixROI2 = 0 );
+
+/** @brief Computes a rectification transform for an uncalibrated stereo camera.
+
+@param points1 Array of feature points in the first image.
+@param points2 The corresponding points in the second image. The same formats as in
+findFundamentalMat are supported.
+@param F Input fundamental matrix. It can be computed from the same set of point pairs using
+findFundamentalMat .
+@param imgSize Size of the image.
+@param H1 Output rectification homography matrix for the first image.
+@param H2 Output rectification homography matrix for the second image.
+@param threshold Optional threshold used to filter out the outliers. If the parameter is greater
+than zero, all the point pairs that do not comply with the epipolar geometry (that is, the points
+for which \f$|\texttt{points2[i]}^T*\texttt{F}*\texttt{points1[i]}|>\texttt{threshold}\f$ ) are
+rejected prior to computing the homographies. Otherwise, all the points are considered inliers.
+
+The function computes the rectification transformations without knowing intrinsic parameters of the
+cameras and their relative position in the space, which explains the suffix "uncalibrated". Another
+related difference from stereoRectify is that the function outputs not the rectification
+transformations in the object (3D) space, but the planar perspective transformations encoded by the
+homography matrices H1 and H2 . The function implements the algorithm @cite Hartley99 .
+
+@note
+   While the algorithm does not need to know the intrinsic parameters of the cameras, it heavily
+    depends on the epipolar geometry. Therefore, if the camera lenses have a significant distortion,
+    it would be better to correct it before computing the fundamental matrix and calling this
+    function. For example, distortion coefficients can be estimated for each head of stereo camera
+    separately by using calibrateCamera . Then, the images can be corrected using undistort , or
+    just the point coordinates can be corrected with undistortPoints .
+ */
+CV_EXPORTS_W bool stereoRectifyUncalibrated( InputArray points1, InputArray points2,
+                                             InputArray F, Size imgSize,
+                                             OutputArray H1, OutputArray H2,
+                                             double threshold = 5 );
+
+//! computes the rectification transformations for 3-head camera, where all the heads are on the same line.
+CV_EXPORTS_W float rectify3Collinear( InputArray cameraMatrix1, InputArray distCoeffs1,
+                                      InputArray cameraMatrix2, InputArray distCoeffs2,
+                                      InputArray cameraMatrix3, InputArray distCoeffs3,
+                                      InputArrayOfArrays imgpt1, InputArrayOfArrays imgpt3,
+                                      Size imageSize, InputArray R12, InputArray T12,
+                                      InputArray R13, InputArray T13,
+                                      OutputArray R1, OutputArray R2, OutputArray R3,
+                                      OutputArray P1, OutputArray P2, OutputArray P3,
+                                      OutputArray Q, double alpha, Size newImgSize,
+                                      CV_OUT Rect* roi1, CV_OUT Rect* roi2, int flags );
+
+/** @brief Returns the new camera matrix based on the free scaling parameter.
+
+@param cameraMatrix Input camera matrix.
+@param distCoeffs Input vector of distortion coefficients
+\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
+4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
+assumed.
+@param imageSize Original image size.
+@param alpha Free scaling parameter between 0 (when all the pixels in the undistorted image are
+valid) and 1 (when all the source image pixels are retained in the undistorted image). See
+stereoRectify for details.
+@param newImgSize Image size after rectification. By default, it is set to imageSize .
+@param validPixROI Optional output rectangle that outlines all-good-pixels region in the
+undistorted image. See roi1, roi2 description in stereoRectify .
+@param centerPrincipalPoint Optional flag that indicates whether in the new camera matrix the
+principal point should be at the image center or not. By default, the principal point is chosen to
+best fit a subset of the source image (determined by alpha) to the corrected image.
+@return new_camera_matrix Output new camera matrix.
+
+The function computes and returns the optimal new camera matrix based on the free scaling parameter.
+By varying this parameter, you may retrieve only sensible pixels alpha=0 , keep all the original
+image pixels if there is valuable information in the corners alpha=1 , or get something in between.
+When alpha\>0 , the undistorted result is likely to have some black pixels corresponding to
+"virtual" pixels outside of the captured distorted image. The original camera matrix, distortion
+coefficients, the computed new camera matrix, and newImageSize should be passed to
+initUndistortRectifyMap to produce the maps for remap .
+ */
+CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray distCoeffs,
+                                            Size imageSize, double alpha, Size newImgSize = Size(),
+                                            CV_OUT Rect* validPixROI = 0,
+                                            bool centerPrincipalPoint = false);
+
+/** @brief Converts points from Euclidean to homogeneous space.
+
+@param src Input vector of N-dimensional points.
+@param dst Output vector of N+1-dimensional points.
+
+The function converts points from Euclidean to homogeneous space by appending 1's to the tuple of
+point coordinates. That is, each point (x1, x2, ..., xn) is converted to (x1, x2, ..., xn, 1).
+ */
+CV_EXPORTS_W void convertPointsToHomogeneous( InputArray src, OutputArray dst );
+
+/** @brief Converts points from homogeneous to Euclidean space.
+
+@param src Input vector of N-dimensional points.
+@param dst Output vector of N-1-dimensional points.
+
+The function converts points homogeneous to Euclidean space using perspective projection. That is,
+each point (x1, x2, ... x(n-1), xn) is converted to (x1/xn, x2/xn, ..., x(n-1)/xn). When xn=0, the
+output point coordinates will be (0,0,0,...).
+ */
+CV_EXPORTS_W void convertPointsFromHomogeneous( InputArray src, OutputArray dst );
+
+/** @brief Converts points to/from homogeneous coordinates.
+
+@param src Input array or vector of 2D, 3D, or 4D points.
+@param dst Output vector of 2D, 3D, or 4D points.
+
+The function converts 2D or 3D points from/to homogeneous coordinates by calling either
+convertPointsToHomogeneous or convertPointsFromHomogeneous.
+
+@note The function is obsolete. Use one of the previous two functions instead.
+ */
+CV_EXPORTS void convertPointsHomogeneous( InputArray src, OutputArray dst );
+
+/** @brief Calculates a fundamental matrix from the corresponding points in two images.
+
+@param points1 Array of N points from the first image. The point coordinates should be
+floating-point (single or double precision).
+@param points2 Array of the second image points of the same size and format as points1 .
+@param method Method for computing a fundamental matrix.
+-   **CV_FM_7POINT** for a 7-point algorithm. \f$N = 7\f$
+-   **CV_FM_8POINT** for an 8-point algorithm. \f$N \ge 8\f$
+-   **CV_FM_RANSAC** for the RANSAC algorithm. \f$N \ge 8\f$
+-   **CV_FM_LMEDS** for the LMedS algorithm. \f$N \ge 8\f$
+@param ransacReprojThreshold Parameter used only for RANSAC. It is the maximum distance from a point to an epipolar
+line in pixels, beyond which the point is considered an outlier and is not used for computing the
+final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the
+point localization, image resolution, and the image noise.
+@param confidence Parameter used for the RANSAC and LMedS methods only. It specifies a desirable level
+of confidence (probability) that the estimated matrix is correct.
+@param mask
+
+The epipolar geometry is described by the following equation:
+
+\f[[p_2; 1]^T F [p_1; 1] = 0\f]
+
+where \f$F\f$ is a fundamental matrix, \f$p_1\f$ and \f$p_2\f$ are corresponding points in the first and the
+second images, respectively.
+
+The function calculates the fundamental matrix using one of four methods listed above and returns
+the found fundamental matrix. Normally just one matrix is found. But in case of the 7-point
+algorithm, the function may return up to 3 solutions ( \f$9 \times 3\f$ matrix that stores all 3
+matrices sequentially).
+
+The calculated fundamental matrix may be passed further to computeCorrespondEpilines that finds the
+epipolar lines corresponding to the specified points. It can also be passed to
+stereoRectifyUncalibrated to compute the rectification transformation. :
+@code
+    // Example. Estimation of fundamental matrix using the RANSAC algorithm
+    int point_count = 100;
+    vector<Point2f> points1(point_count);
+    vector<Point2f> points2(point_count);
+
+    // initialize the points here ...
+    for( int i = 0; i < point_count; i++ )
+    {
+        points1[i] = ...;
+        points2[i] = ...;
+    }
+
+    Mat fundamental_matrix =
+     findFundamentalMat(points1, points2, FM_RANSAC, 3, 0.99);
+@endcode
+ */
+CV_EXPORTS_W Mat findFundamentalMat( InputArray points1, InputArray points2,
+                                     int method = FM_RANSAC,
+                                     double ransacReprojThreshold = 3., double confidence = 0.99,
+                                     OutputArray mask = noArray() );
+
+/** @overload */
+CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2,
+                                   OutputArray mask, int method = FM_RANSAC,
+                                   double ransacReprojThreshold = 3., double confidence = 0.99 );
+
+/** @brief Calculates an essential matrix from the corresponding points in two images.
+
+@param points1 Array of N (N \>= 5) 2D points from the first image. The point coordinates should
+be floating-point (single or double precision).
+@param points2 Array of the second image points of the same size and format as points1 .
+@param cameraMatrix Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+Note that this function assumes that points1 and points2 are feature points from cameras with the
+same camera matrix.
+@param method Method for computing an essential matrix.
+-   **RANSAC** for the RANSAC algorithm.
+-   **LMEDS** for the LMedS algorithm.
+@param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of
+confidence (probability) that the estimated matrix is correct.
+@param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar
+line in pixels, beyond which the point is considered an outlier and is not used for computing the
+final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the
+point localization, image resolution, and the image noise.
+@param mask Output array of N elements, every element of which is set to 0 for outliers and to 1
+for the other points. The array is computed only in the RANSAC and LMedS methods.
+
+This function estimates essential matrix based on the five-point algorithm solver in @cite Nister03 .
+@cite SteweniusCFS is also a related. The epipolar geometry is described by the following equation:
+
+\f[[p_2; 1]^T K^{-T} E K^{-1} [p_1; 1] = 0\f]
+
+where \f$E\f$ is an essential matrix, \f$p_1\f$ and \f$p_2\f$ are corresponding points in the first and the
+second images, respectively. The result of this function may be passed further to
+decomposeEssentialMat or recoverPose to recover the relative pose between cameras.
+ */
+CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2,
+                                 InputArray cameraMatrix, int method = RANSAC,
+                                 double prob = 0.999, double threshold = 1.0,
+                                 OutputArray mask = noArray() );
+
+/** @overload
+@param points1 Array of N (N \>= 5) 2D points from the first image. The point coordinates should
+be floating-point (single or double precision).
+@param points2 Array of the second image points of the same size and format as points1 .
+@param focal focal length of the camera. Note that this function assumes that points1 and points2
+are feature points from cameras with same focal length and principal point.
+@param pp principal point of the camera.
+@param method Method for computing a fundamental matrix.
+-   **RANSAC** for the RANSAC algorithm.
+-   **LMEDS** for the LMedS algorithm.
+@param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar
+line in pixels, beyond which the point is considered an outlier and is not used for computing the
+final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the
+point localization, image resolution, and the image noise.
+@param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of
+confidence (probability) that the estimated matrix is correct.
+@param mask Output array of N elements, every element of which is set to 0 for outliers and to 1
+for the other points. The array is computed only in the RANSAC and LMedS methods.
+
+This function differs from the one above that it computes camera matrix from focal length and
+principal point:
+
+\f[K =
+\begin{bmatrix}
+f & 0 & x_{pp}  \\
+0 & f & y_{pp}  \\
+0 & 0 & 1
+\end{bmatrix}\f]
+ */
+CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2,
+                                 double focal = 1.0, Point2d pp = Point2d(0, 0),
+                                 int method = RANSAC, double prob = 0.999,
+                                 double threshold = 1.0, OutputArray mask = noArray() );
+
+/** @brief Decompose an essential matrix to possible rotations and translation.
+
+@param E The input essential matrix.
+@param R1 One possible rotation matrix.
+@param R2 Another possible rotation matrix.
+@param t One possible translation.
+
+This function decompose an essential matrix E using svd decomposition @cite HartleyZ00 . Generally 4
+possible poses exists for a given E. They are \f$[R_1, t]\f$, \f$[R_1, -t]\f$, \f$[R_2, t]\f$, \f$[R_2, -t]\f$. By
+decomposing E, you can only get the direction of the translation, so the function returns unit t.
+ */
+CV_EXPORTS_W void decomposeEssentialMat( InputArray E, OutputArray R1, OutputArray R2, OutputArray t );
+
+/** @brief Recover relative camera rotation and translation from an estimated essential matrix and the
+corresponding points in two images, using cheirality check. Returns the number of inliers which pass
+the check.
+
+@param E The input essential matrix.
+@param points1 Array of N 2D points from the first image. The point coordinates should be
+floating-point (single or double precision).
+@param points2 Array of the second image points of the same size and format as points1 .
+@param cameraMatrix Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+Note that this function assumes that points1 and points2 are feature points from cameras with the
+same camera matrix.
+@param R Recovered relative rotation.
+@param t Recovered relative translation.
+@param mask Input/output mask for inliers in points1 and points2.
+:   If it is not empty, then it marks inliers in points1 and points2 for then given essential
+matrix E. Only these inliers will be used to recover pose. In the output mask only inliers
+which pass the cheirality check.
+This function decomposes an essential matrix using decomposeEssentialMat and then verifies possible
+pose hypotheses by doing cheirality check. The cheirality check basically means that the
+triangulated 3D points should have positive depth. Some details can be found in @cite Nister03 .
+
+This function can be used to process output E and mask from findEssentialMat. In this scenario,
+points1 and points2 are the same input for findEssentialMat. :
+@code
+    // Example. Estimation of fundamental matrix using the RANSAC algorithm
+    int point_count = 100;
+    vector<Point2f> points1(point_count);
+    vector<Point2f> points2(point_count);
+
+    // initialize the points here ...
+    for( int i = 0; i < point_count; i++ )
+    {
+        points1[i] = ...;
+        points2[i] = ...;
+    }
+
+    // cametra matrix with both focal lengths = 1, and principal point = (0, 0)
+    Mat cameraMatrix = Mat::eye(3, 3, CV_64F);
+
+    Mat E, R, t, mask;
+
+    E = findEssentialMat(points1, points2, cameraMatrix, RANSAC, 0.999, 1.0, mask);
+    recoverPose(E, points1, points2, cameraMatrix, R, t, mask);
+@endcode
+ */
+CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2,
+                            InputArray cameraMatrix, OutputArray R, OutputArray t,
+                            InputOutputArray mask = noArray() );
+
+/** @overload
+@param E The input essential matrix.
+@param points1 Array of N 2D points from the first image. The point coordinates should be
+floating-point (single or double precision).
+@param points2 Array of the second image points of the same size and format as points1 .
+@param R Recovered relative rotation.
+@param t Recovered relative translation.
+@param focal Focal length of the camera. Note that this function assumes that points1 and points2
+are feature points from cameras with same focal length and principal point.
+@param pp principal point of the camera.
+@param mask Input/output mask for inliers in points1 and points2.
+:   If it is not empty, then it marks inliers in points1 and points2 for then given essential
+matrix E. Only these inliers will be used to recover pose. In the output mask only inliers
+which pass the cheirality check.
+
+This function differs from the one above that it computes camera matrix from focal length and
+principal point:
+
+\f[K =
+\begin{bmatrix}
+f & 0 & x_{pp}  \\
+0 & f & y_{pp}  \\
+0 & 0 & 1
+\end{bmatrix}\f]
+ */
+CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2,
+                            OutputArray R, OutputArray t,
+                            double focal = 1.0, Point2d pp = Point2d(0, 0),
+                            InputOutputArray mask = noArray() );
+
+/** @overload
+@param E The input essential matrix.
+@param points1 Array of N 2D points from the first image. The point coordinates should be
+floating-point (single or double precision).
+@param points2 Array of the second image points of the same size and format as points1.
+@param cameraMatrix Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
+Note that this function assumes that points1 and points2 are feature points from cameras with the
+same camera matrix.
+@param R Recovered relative rotation.
+@param t Recovered relative translation.
+@param distanceThresh threshold distance which is used to filter out far away points (i.e. infinite points).
+@param mask Input/output mask for inliers in points1 and points2.
+:   If it is not empty, then it marks inliers in points1 and points2 for then given essential
+matrix E. Only these inliers will be used to recover pose. In the output mask only inliers
+which pass the cheirality check.
+@param triangulatedPoints 3d points which were reconstructed by triangulation.
+ */
+
+CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray points2,
+                            InputArray cameraMatrix, OutputArray R, OutputArray t, double distanceThresh, InputOutputArray mask = noArray(),
+                            OutputArray triangulatedPoints = noArray());
+
+/** @brief For points in an image of a stereo pair, computes the corresponding epilines in the other image.
+
+@param points Input points. \f$N \times 1\f$ or \f$1 \times N\f$ matrix of type CV_32FC2 or
+vector\<Point2f\> .
+@param whichImage Index of the image (1 or 2) that contains the points .
+@param F Fundamental matrix that can be estimated using findFundamentalMat or stereoRectify .
+@param lines Output vector of the epipolar lines corresponding to the points in the other image.
+Each line \f$ax + by + c=0\f$ is encoded by 3 numbers \f$(a, b, c)\f$ .
+
+For every point in one of the two images of a stereo pair, the function finds the equation of the
+corresponding epipolar line in the other image.
+
+From the fundamental matrix definition (see findFundamentalMat ), line \f$l^{(2)}_i\f$ in the second
+image for the point \f$p^{(1)}_i\f$ in the first image (when whichImage=1 ) is computed as:
+
+\f[l^{(2)}_i = F p^{(1)}_i\f]
+
+And vice versa, when whichImage=2, \f$l^{(1)}_i\f$ is computed from \f$p^{(2)}_i\f$ as:
+
+\f[l^{(1)}_i = F^T p^{(2)}_i\f]
+
+Line coefficients are defined up to a scale. They are normalized so that \f$a_i^2+b_i^2=1\f$ .
+ */
+CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, int whichImage,
+                                             InputArray F, OutputArray lines );
+
+/** @brief Reconstructs points by triangulation.
+
+@param projMatr1 3x4 projection matrix of the first camera.
+@param projMatr2 3x4 projection matrix of the second camera.
+@param projPoints1 2xN array of feature points in the first image. In case of c++ version it can
+be also a vector of feature points or two-channel matrix of size 1xN or Nx1.
+@param projPoints2 2xN array of corresponding points in the second image. In case of c++ version
+it can be also a vector of feature points or two-channel matrix of size 1xN or Nx1.
+@param points4D 4xN array of reconstructed points in homogeneous coordinates.
+
+The function reconstructs 3-dimensional points (in homogeneous coordinates) by using their
+observations with a stereo camera. Projections matrices can be obtained from stereoRectify.
+
+@note
+   Keep in mind that all input data should be of float type in order for this function to work.
+
+@sa
+   reprojectImageTo3D
+ */
+CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2,
+                                     InputArray projPoints1, InputArray projPoints2,
+                                     OutputArray points4D );
+
+/** @brief Refines coordinates of corresponding points.
+
+@param F 3x3 fundamental matrix.
+@param points1 1xN array containing the first set of points.
+@param points2 1xN array containing the second set of points.
+@param newPoints1 The optimized points1.
+@param newPoints2 The optimized points2.
+
+The function implements the Optimal Triangulation Method (see Multiple View Geometry for details).
+For each given point correspondence points1[i] \<-\> points2[i], and a fundamental matrix F, it
+computes the corrected correspondences newPoints1[i] \<-\> newPoints2[i] that minimize the geometric
+error \f$d(points1[i], newPoints1[i])^2 + d(points2[i],newPoints2[i])^2\f$ (where \f$d(a,b)\f$ is the
+geometric distance between points \f$a\f$ and \f$b\f$ ) subject to the epipolar constraint
+\f$newPoints2^T * F * newPoints1 = 0\f$ .
+ */
+CV_EXPORTS_W void correctMatches( InputArray F, InputArray points1, InputArray points2,
+                                  OutputArray newPoints1, OutputArray newPoints2 );
+
+/** @brief Filters off small noise blobs (speckles) in the disparity map
+
+@param img The input 16-bit signed disparity image
+@param newVal The disparity value used to paint-off the speckles
+@param maxSpeckleSize The maximum speckle size to consider it a speckle. Larger blobs are not
+affected by the algorithm
+@param maxDiff Maximum difference between neighbor disparity pixels to put them into the same
+blob. Note that since StereoBM, StereoSGBM and may be other algorithms return a fixed-point
+disparity map, where disparity values are multiplied by 16, this scale factor should be taken into
+account when specifying this parameter value.
+@param buf The optional temporary buffer to avoid memory allocation within the function.
+ */
+CV_EXPORTS_W void filterSpeckles( InputOutputArray img, double newVal,
+                                  int maxSpeckleSize, double maxDiff,
+                                  InputOutputArray buf = noArray() );
+
+//! computes valid disparity ROI from the valid ROIs of the rectified images (that are returned by cv::stereoRectify())
+CV_EXPORTS_W Rect getValidDisparityROI( Rect roi1, Rect roi2,
+                                        int minDisparity, int numberOfDisparities,
+                                        int SADWindowSize );
+
+//! validates disparity using the left-right check. The matrix "cost" should be computed by the stereo correspondence algorithm
+CV_EXPORTS_W void validateDisparity( InputOutputArray disparity, InputArray cost,
+                                     int minDisparity, int numberOfDisparities,
+                                     int disp12MaxDisp = 1 );
+
+/** @brief Reprojects a disparity image to 3D space.
+
+@param disparity Input single-channel 8-bit unsigned, 16-bit signed, 32-bit signed or 32-bit
+floating-point disparity image. If 16-bit signed format is used, the values are assumed to have no
+fractional bits.
+@param _3dImage Output 3-channel floating-point image of the same size as disparity . Each
+element of _3dImage(x,y) contains 3D coordinates of the point (x,y) computed from the disparity
+map.
+@param Q \f$4 \times 4\f$ perspective transformation matrix that can be obtained with stereoRectify.
+@param handleMissingValues Indicates, whether the function should handle missing values (i.e.
+points where the disparity was not computed). If handleMissingValues=true, then pixels with the
+minimal disparity that corresponds to the outliers (see StereoMatcher::compute ) are transformed
+to 3D points with a very large Z value (currently set to 10000).
+@param ddepth The optional output array depth. If it is -1, the output image will have CV_32F
+depth. ddepth can also be set to CV_16S, CV_32S or CV_32F.
+
+The function transforms a single-channel disparity map to a 3-channel image representing a 3D
+surface. That is, for each pixel (x,y) and the corresponding disparity d=disparity(x,y) , it
+computes:
+
+\f[\begin{array}{l} [X \; Y \; Z \; W]^T =  \texttt{Q} *[x \; y \; \texttt{disparity} (x,y) \; 1]^T  \\ \texttt{\_3dImage} (x,y) = (X/W, \; Y/W, \; Z/W) \end{array}\f]
+
+The matrix Q can be an arbitrary \f$4 \times 4\f$ matrix (for example, the one computed by
+stereoRectify). To reproject a sparse set of points {(x,y,d),...} to 3D space, use
+perspectiveTransform .
+ */
+CV_EXPORTS_W void reprojectImageTo3D( InputArray disparity,
+                                      OutputArray _3dImage, InputArray Q,
+                                      bool handleMissingValues = false,
+                                      int ddepth = -1 );
+
+/** @brief Calculates the Sampson Distance between two points.
+
+The function cv::sampsonDistance calculates and returns the first order approximation of the geometric error as:
+\f[
+sd( \texttt{pt1} , \texttt{pt2} )=
+\frac{(\texttt{pt2}^t \cdot \texttt{F} \cdot \texttt{pt1})^2}
+{((\texttt{F} \cdot \texttt{pt1})(0))^2 +
+((\texttt{F} \cdot \texttt{pt1})(1))^2 +
+((\texttt{F}^t \cdot \texttt{pt2})(0))^2 +
+((\texttt{F}^t \cdot \texttt{pt2})(1))^2}
+\f]
+The fundamental matrix may be calculated using the cv::findFundamentalMat function. See @cite HartleyZ00 11.4.3 for details.
+@param pt1 first homogeneous 2d point
+@param pt2 second homogeneous 2d point
+@param F fundamental matrix
+@return The computed Sampson distance.
+*/
+CV_EXPORTS_W double sampsonDistance(InputArray pt1, InputArray pt2, InputArray F);
+
+/** @brief Computes an optimal affine transformation between two 3D point sets.
+
+It computes
+\f[
+\begin{bmatrix}
+x\\
+y\\
+z\\
+\end{bmatrix}
+=
+\begin{bmatrix}
+a_{11} & a_{12} & a_{13}\\
+a_{21} & a_{22} & a_{23}\\
+a_{31} & a_{32} & a_{33}\\
+\end{bmatrix}
+\begin{bmatrix}
+X\\
+Y\\
+Z\\
+\end{bmatrix}
++
+\begin{bmatrix}
+b_1\\
+b_2\\
+b_3\\
+\end{bmatrix}
+\f]
+
+@param src First input 3D point set containing \f$(X,Y,Z)\f$.
+@param dst Second input 3D point set containing \f$(x,y,z)\f$.
+@param out Output 3D affine transformation matrix \f$3 \times 4\f$ of the form
+\f[
+\begin{bmatrix}
+a_{11} & a_{12} & a_{13} & b_1\\
+a_{21} & a_{22} & a_{23} & b_2\\
+a_{31} & a_{32} & a_{33} & b_3\\
+\end{bmatrix}
+\f]
+@param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier).
+@param ransacThreshold Maximum reprojection error in the RANSAC algorithm to consider a point as
+an inlier.
+@param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything
+between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation
+significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation.
+
+The function estimates an optimal 3D affine transformation between two 3D point sets using the
+RANSAC algorithm.
+ */
+CV_EXPORTS_W  int estimateAffine3D(InputArray src, InputArray dst,
+                                   OutputArray out, OutputArray inliers,
+                                   double ransacThreshold = 3, double confidence = 0.99);
+
+/** @brief Computes an optimal affine transformation between two 2D point sets.
+
+It computes
+\f[
+\begin{bmatrix}
+x\\
+y\\
+\end{bmatrix}
+=
+\begin{bmatrix}
+a_{11} & a_{12}\\
+a_{21} & a_{22}\\
+\end{bmatrix}
+\begin{bmatrix}
+X\\
+Y\\
+\end{bmatrix}
++
+\begin{bmatrix}
+b_1\\
+b_2\\
+\end{bmatrix}
+\f]
+
+@param from First input 2D point set containing \f$(X,Y)\f$.
+@param to Second input 2D point set containing \f$(x,y)\f$.
+@param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier).
+@param method Robust method used to compute transformation. The following methods are possible:
+-   cv::RANSAC - RANSAC-based robust method
+-   cv::LMEDS - Least-Median robust method
+RANSAC is the default method.
+@param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider
+a point as an inlier. Applies only to RANSAC.
+@param maxIters The maximum number of robust method iterations.
+@param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything
+between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation
+significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation.
+@param refineIters Maximum number of iterations of refining algorithm (Levenberg-Marquardt).
+Passing 0 will disable refining, so the output matrix will be output of robust method.
+
+@return Output 2D affine transformation matrix \f$2 \times 3\f$ or empty matrix if transformation
+could not be estimated. The returned matrix has the following form:
+\f[
+\begin{bmatrix}
+a_{11} & a_{12} & b_1\\
+a_{21} & a_{22} & b_2\\
+\end{bmatrix}
+\f]
+
+The function estimates an optimal 2D affine transformation between two 2D point sets using the
+selected robust algorithm.
+
+The computed transformation is then refined further (using only inliers) with the
+Levenberg-Marquardt method to reduce the re-projection error even more.
+
+@note
+The RANSAC method can handle practically any ratio of outliers but needs a threshold to
+distinguish inliers from outliers. The method LMeDS does not need any threshold but it works
+correctly only when there are more than 50% of inliers.
+
+@sa estimateAffinePartial2D, getAffineTransform
+*/
+CV_EXPORTS_W cv::Mat estimateAffine2D(InputArray from, InputArray to, OutputArray inliers = noArray(),
+                                  int method = RANSAC, double ransacReprojThreshold = 3,
+                                  size_t maxIters = 2000, double confidence = 0.99,
+                                  size_t refineIters = 10);
+
+/** @brief Computes an optimal limited affine transformation with 4 degrees of freedom between
+two 2D point sets.
+
+@param from First input 2D point set.
+@param to Second input 2D point set.
+@param inliers Output vector indicating which points are inliers.
+@param method Robust method used to compute transformation. The following methods are possible:
+-   cv::RANSAC - RANSAC-based robust method
+-   cv::LMEDS - Least-Median robust method
+RANSAC is the default method.
+@param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider
+a point as an inlier. Applies only to RANSAC.
+@param maxIters The maximum number of robust method iterations.
+@param confidence Confidence level, between 0 and 1, for the estimated transformation. Anything
+between 0.95 and 0.99 is usually good enough. Values too close to 1 can slow down the estimation
+significantly. Values lower than 0.8-0.9 can result in an incorrectly estimated transformation.
+@param refineIters Maximum number of iterations of refining algorithm (Levenberg-Marquardt).
+Passing 0 will disable refining, so the output matrix will be output of robust method.
+
+@return Output 2D affine transformation (4 degrees of freedom) matrix \f$2 \times 3\f$ or
+empty matrix if transformation could not be estimated.
+
+The function estimates an optimal 2D affine transformation with 4 degrees of freedom limited to
+combinations of translation, rotation, and uniform scaling. Uses the selected algorithm for robust
+estimation.
+
+The computed transformation is then refined further (using only inliers) with the
+Levenberg-Marquardt method to reduce the re-projection error even more.
+
+Estimated transformation matrix is:
+\f[ \begin{bmatrix} \cos(\theta) \cdot s & -\sin(\theta) \cdot s & t_x \\
+                \sin(\theta) \cdot s & \cos(\theta) \cdot s & t_y
+\end{bmatrix} \f]
+Where \f$ \theta \f$ is the rotation angle, \f$ s \f$ the scaling factor and \f$ t_x, t_y \f$ are
+translations in \f$ x, y \f$ axes respectively.
+
+@note
+The RANSAC method can handle practically any ratio of outliers but need a threshold to
+distinguish inliers from outliers. The method LMeDS does not need any threshold but it works
+correctly only when there are more than 50% of inliers.
+
+@sa estimateAffine2D, getAffineTransform
+*/
+CV_EXPORTS_W cv::Mat estimateAffinePartial2D(InputArray from, InputArray to, OutputArray inliers = noArray(),
+                                  int method = RANSAC, double ransacReprojThreshold = 3,
+                                  size_t maxIters = 2000, double confidence = 0.99,
+                                  size_t refineIters = 10);
+
+/** @example decompose_homography.cpp
+  An example program with homography decomposition.
+
+  Check @ref tutorial_homography "the corresponding tutorial" for more details.
+ */
+
+/** @brief Decompose a homography matrix to rotation(s), translation(s) and plane normal(s).
+
+@param H The input homography matrix between two images.
+@param K The input intrinsic camera calibration matrix.
+@param rotations Array of rotation matrices.
+@param translations Array of translation matrices.
+@param normals Array of plane normal matrices.
+
+This function extracts relative camera motion between two views observing a planar object from the
+homography H induced by the plane. The intrinsic camera matrix K must also be provided. The function
+may return up to four mathematical solution sets. At least two of the solutions may further be
+invalidated if point correspondences are available by applying positive depth constraint (all points
+must be in front of the camera). The decomposition method is described in detail in @cite Malis .
+ */
+CV_EXPORTS_W int decomposeHomographyMat(InputArray H,
+                                        InputArray K,
+                                        OutputArrayOfArrays rotations,
+                                        OutputArrayOfArrays translations,
+                                        OutputArrayOfArrays normals);
+
+/** @brief Filters homography decompositions based on additional information.
+
+@param rotations Vector of rotation matrices.
+@param normals Vector of plane normal matrices.
+@param beforePoints Vector of (rectified) visible reference points before the homography is applied
+@param afterPoints Vector of (rectified) visible reference points after the homography is applied
+@param possibleSolutions Vector of int indices representing the viable solution set after filtering
+@param pointsMask optional Mat/Vector of 8u type representing the mask for the inliers as given by the findHomography function
+
+This function is intended to filter the output of the decomposeHomographyMat based on additional
+information as described in @cite Malis . The summary of the method: the decomposeHomographyMat function
+returns 2 unique solutions and their "opposites" for a total of 4 solutions. If we have access to the
+sets of points visible in the camera frame before and after the homography transformation is applied,
+we can determine which are the true potential solutions and which are the opposites by verifying which
+homographies are consistent with all visible reference points being in front of the camera. The inputs
+are left unchanged; the filtered solution set is returned as indices into the existing one.
+
+*/
+CV_EXPORTS_W void filterHomographyDecompByVisibleRefpoints(InputArrayOfArrays rotations,
+                                                           InputArrayOfArrays normals,
+                                                           InputArray beforePoints,
+                                                           InputArray afterPoints,
+                                                           OutputArray possibleSolutions,
+                                                           InputArray pointsMask = noArray());
+
+/** @brief The base class for stereo correspondence algorithms.
+ */
+class CV_EXPORTS_W StereoMatcher : public Algorithm
+{
+public:
+    enum { DISP_SHIFT = 4,
+           DISP_SCALE = (1 << DISP_SHIFT)
+         };
+
+    /** @brief Computes disparity map for the specified stereo pair
+
+    @param left Left 8-bit single-channel image.
+    @param right Right image of the same size and the same type as the left one.
+    @param disparity Output disparity map. It has the same size as the input images. Some algorithms,
+    like StereoBM or StereoSGBM compute 16-bit fixed-point disparity map (where each disparity value
+    has 4 fractional bits), whereas other algorithms output 32-bit floating-point disparity map.
+     */
+    CV_WRAP virtual void compute( InputArray left, InputArray right,
+                                  OutputArray disparity ) = 0;
+
+    CV_WRAP virtual int getMinDisparity() const = 0;
+    CV_WRAP virtual void setMinDisparity(int minDisparity) = 0;
+
+    CV_WRAP virtual int getNumDisparities() const = 0;
+    CV_WRAP virtual void setNumDisparities(int numDisparities) = 0;
+
+    CV_WRAP virtual int getBlockSize() const = 0;
+    CV_WRAP virtual void setBlockSize(int blockSize) = 0;
+
+    CV_WRAP virtual int getSpeckleWindowSize() const = 0;
+    CV_WRAP virtual void setSpeckleWindowSize(int speckleWindowSize) = 0;
+
+    CV_WRAP virtual int getSpeckleRange() const = 0;
+    CV_WRAP virtual void setSpeckleRange(int speckleRange) = 0;
+
+    CV_WRAP virtual int getDisp12MaxDiff() const = 0;
+    CV_WRAP virtual void setDisp12MaxDiff(int disp12MaxDiff) = 0;
+};
+
+
+/** @brief Class for computing stereo correspondence using the block matching algorithm, introduced and
+contributed to OpenCV by K. Konolige.
+ */
+class CV_EXPORTS_W StereoBM : public StereoMatcher
+{
+public:
+    enum { PREFILTER_NORMALIZED_RESPONSE = 0,
+           PREFILTER_XSOBEL              = 1
+         };
+
+    CV_WRAP virtual int getPreFilterType() const = 0;
+    CV_WRAP virtual void setPreFilterType(int preFilterType) = 0;
+
+    CV_WRAP virtual int getPreFilterSize() const = 0;
+    CV_WRAP virtual void setPreFilterSize(int preFilterSize) = 0;
+
+    CV_WRAP virtual int getPreFilterCap() const = 0;
+    CV_WRAP virtual void setPreFilterCap(int preFilterCap) = 0;
+
+    CV_WRAP virtual int getTextureThreshold() const = 0;
+    CV_WRAP virtual void setTextureThreshold(int textureThreshold) = 0;
+
+    CV_WRAP virtual int getUniquenessRatio() const = 0;
+    CV_WRAP virtual void setUniquenessRatio(int uniquenessRatio) = 0;
+
+    CV_WRAP virtual int getSmallerBlockSize() const = 0;
+    CV_WRAP virtual void setSmallerBlockSize(int blockSize) = 0;
+
+    CV_WRAP virtual Rect getROI1() const = 0;
+    CV_WRAP virtual void setROI1(Rect roi1) = 0;
+
+    CV_WRAP virtual Rect getROI2() const = 0;
+    CV_WRAP virtual void setROI2(Rect roi2) = 0;
+
+    /** @brief Creates StereoBM object
+
+    @param numDisparities the disparity search range. For each pixel algorithm will find the best
+    disparity from 0 (default minimum disparity) to numDisparities. The search range can then be
+    shifted by changing the minimum disparity.
+    @param blockSize the linear size of the blocks compared by the algorithm. The size should be odd
+    (as the block is centered at the current pixel). Larger block size implies smoother, though less
+    accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher
+    chance for algorithm to find a wrong correspondence.
+
+    The function create StereoBM object. You can then call StereoBM::compute() to compute disparity for
+    a specific stereo pair.
+     */
+    CV_WRAP static Ptr<StereoBM> create(int numDisparities = 0, int blockSize = 21);
+};
+
+/** @brief The class implements the modified H. Hirschmuller algorithm @cite HH08 that differs from the original
+one as follows:
+
+-   By default, the algorithm is single-pass, which means that you consider only 5 directions
+instead of 8. Set mode=StereoSGBM::MODE_HH in createStereoSGBM to run the full variant of the
+algorithm but beware that it may consume a lot of memory.
+-   The algorithm matches blocks, not individual pixels. Though, setting blockSize=1 reduces the
+blocks to single pixels.
+-   Mutual information cost function is not implemented. Instead, a simpler Birchfield-Tomasi
+sub-pixel metric from @cite BT98 is used. Though, the color images are supported as well.
+-   Some pre- and post- processing steps from K. Konolige algorithm StereoBM are included, for
+example: pre-filtering (StereoBM::PREFILTER_XSOBEL type) and post-filtering (uniqueness
+check, quadratic interpolation and speckle filtering).
+
+@note
+   -   (Python) An example illustrating the use of the StereoSGBM matching algorithm can be found
+        at opencv_source_code/samples/python/stereo_match.py
+ */
+class CV_EXPORTS_W StereoSGBM : public StereoMatcher
+{
+public:
+    enum
+    {
+        MODE_SGBM = 0,
+        MODE_HH   = 1,
+        MODE_SGBM_3WAY = 2,
+        MODE_HH4  = 3
+    };
+
+    CV_WRAP virtual int getPreFilterCap() const = 0;
+    CV_WRAP virtual void setPreFilterCap(int preFilterCap) = 0;
+
+    CV_WRAP virtual int getUniquenessRatio() const = 0;
+    CV_WRAP virtual void setUniquenessRatio(int uniquenessRatio) = 0;
+
+    CV_WRAP virtual int getP1() const = 0;
+    CV_WRAP virtual void setP1(int P1) = 0;
+
+    CV_WRAP virtual int getP2() const = 0;
+    CV_WRAP virtual void setP2(int P2) = 0;
+
+    CV_WRAP virtual int getMode() const = 0;
+    CV_WRAP virtual void setMode(int mode) = 0;
+
+    /** @brief Creates StereoSGBM object
+
+    @param minDisparity Minimum possible disparity value. Normally, it is zero but sometimes
+    rectification algorithms can shift images, so this parameter needs to be adjusted accordingly.
+    @param numDisparities Maximum disparity minus minimum disparity. The value is always greater than
+    zero. In the current implementation, this parameter must be divisible by 16.
+    @param blockSize Matched block size. It must be an odd number \>=1 . Normally, it should be
+    somewhere in the 3..11 range.
+    @param P1 The first parameter controlling the disparity smoothness. See below.
+    @param P2 The second parameter controlling the disparity smoothness. The larger the values are,
+    the smoother the disparity is. P1 is the penalty on the disparity change by plus or minus 1
+    between neighbor pixels. P2 is the penalty on the disparity change by more than 1 between neighbor
+    pixels. The algorithm requires P2 \> P1 . See stereo_match.cpp sample where some reasonably good
+    P1 and P2 values are shown (like 8\*number_of_image_channels\*SADWindowSize\*SADWindowSize and
+    32\*number_of_image_channels\*SADWindowSize\*SADWindowSize , respectively).
+    @param disp12MaxDiff Maximum allowed difference (in integer pixel units) in the left-right
+    disparity check. Set it to a non-positive value to disable the check.
+    @param preFilterCap Truncation value for the prefiltered image pixels. The algorithm first
+    computes x-derivative at each pixel and clips its value by [-preFilterCap, preFilterCap] interval.
+    The result values are passed to the Birchfield-Tomasi pixel cost function.
+    @param uniquenessRatio Margin in percentage by which the best (minimum) computed cost function
+    value should "win" the second best value to consider the found match correct. Normally, a value
+    within the 5-15 range is good enough.
+    @param speckleWindowSize Maximum size of smooth disparity regions to consider their noise speckles
+    and invalidate. Set it to 0 to disable speckle filtering. Otherwise, set it somewhere in the
+    50-200 range.
+    @param speckleRange Maximum disparity variation within each connected component. If you do speckle
+    filtering, set the parameter to a positive value, it will be implicitly multiplied by 16.
+    Normally, 1 or 2 is good enough.
+    @param mode Set it to StereoSGBM::MODE_HH to run the full-scale two-pass dynamic programming
+    algorithm. It will consume O(W\*H\*numDisparities) bytes, which is large for 640x480 stereo and
+    huge for HD-size pictures. By default, it is set to false .
+
+    The first constructor initializes StereoSGBM with all the default parameters. So, you only have to
+    set StereoSGBM::numDisparities at minimum. The second constructor enables you to set each parameter
+    to a custom value.
+     */
+    CV_WRAP static Ptr<StereoSGBM> create(int minDisparity = 0, int numDisparities = 16, int blockSize = 3,
+                                          int P1 = 0, int P2 = 0, int disp12MaxDiff = 0,
+                                          int preFilterCap = 0, int uniquenessRatio = 0,
+                                          int speckleWindowSize = 0, int speckleRange = 0,
+                                          int mode = StereoSGBM::MODE_SGBM);
+};
+
+//! @} calib3d
+
+/** @brief The methods in this namespace use a so-called fisheye camera model.
+  @ingroup calib3d_fisheye
+*/
+namespace fisheye
+{
+//! @addtogroup calib3d_fisheye
+//! @{
+
+    enum{
+        CALIB_USE_INTRINSIC_GUESS   = 1 << 0,
+        CALIB_RECOMPUTE_EXTRINSIC   = 1 << 1,
+        CALIB_CHECK_COND            = 1 << 2,
+        CALIB_FIX_SKEW              = 1 << 3,
+        CALIB_FIX_K1                = 1 << 4,
+        CALIB_FIX_K2                = 1 << 5,
+        CALIB_FIX_K3                = 1 << 6,
+        CALIB_FIX_K4                = 1 << 7,
+        CALIB_FIX_INTRINSIC         = 1 << 8,
+        CALIB_FIX_PRINCIPAL_POINT   = 1 << 9
+    };
+
+    /** @brief Projects points using fisheye model
+
+    @param objectPoints Array of object points, 1xN/Nx1 3-channel (or vector\<Point3f\> ), where N is
+    the number of points in the view.
+    @param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or
+    vector\<Point2f\>.
+    @param affine
+    @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
+    @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
+    @param alpha The skew coefficient.
+    @param jacobian Optional output 2Nx15 jacobian matrix of derivatives of image points with respect
+    to components of the focal lengths, coordinates of the principal point, distortion coefficients,
+    rotation vector, translation vector, and the skew. In the old interface different components of
+    the jacobian are returned via different output parameters.
+
+    The function computes projections of 3D points to the image plane given intrinsic and extrinsic
+    camera parameters. Optionally, the function computes Jacobians - matrices of partial derivatives of
+    image points coordinates (as functions of all the input parameters) with respect to the particular
+    parameters, intrinsic and/or extrinsic.
+     */
+    CV_EXPORTS void projectPoints(InputArray objectPoints, OutputArray imagePoints, const Affine3d& affine,
+        InputArray K, InputArray D, double alpha = 0, OutputArray jacobian = noArray());
+
+    /** @overload */
+    CV_EXPORTS_W void projectPoints(InputArray objectPoints, OutputArray imagePoints, InputArray rvec, InputArray tvec,
+        InputArray K, InputArray D, double alpha = 0, OutputArray jacobian = noArray());
+
+    /** @brief Distorts 2D points using fisheye model.
+
+    @param undistorted Array of object points, 1xN/Nx1 2-channel (or vector\<Point2f\> ), where N is
+    the number of points in the view.
+    @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
+    @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
+    @param alpha The skew coefficient.
+    @param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\<Point2f\> .
+
+    Note that the function assumes the camera matrix of the undistorted points to be identity.
+    This means if you want to transform back points undistorted with undistortPoints() you have to
+    multiply them with \f$P^{-1}\f$.
+     */
+    CV_EXPORTS_W void distortPoints(InputArray undistorted, OutputArray distorted, InputArray K, InputArray D, double alpha = 0);
+
+    /** @brief Undistorts 2D points using fisheye model
+
+    @param distorted Array of object points, 1xN/Nx1 2-channel (or vector\<Point2f\> ), where N is the
+    number of points in the view.
+    @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
+    @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
+    @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
+    1-channel or 1x1 3-channel
+    @param P New camera matrix (3x3) or new projection matrix (3x4)
+    @param undistorted Output array of image points, 1xN/Nx1 2-channel, or vector\<Point2f\> .
+     */
+    CV_EXPORTS_W void undistortPoints(InputArray distorted, OutputArray undistorted,
+        InputArray K, InputArray D, InputArray R = noArray(), InputArray P  = noArray());
+
+    /** @brief Computes undistortion and rectification maps for image transform by cv::remap(). If D is empty zero
+    distortion is used, if R or P is empty identity matrixes are used.
+
+    @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
+    @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
+    @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
+    1-channel or 1x1 3-channel
+    @param P New camera matrix (3x3) or new projection matrix (3x4)
+    @param size Undistorted image size.
+    @param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps()
+    for details.
+    @param map1 The first output map.
+    @param map2 The second output map.
+     */
+    CV_EXPORTS_W void initUndistortRectifyMap(InputArray K, InputArray D, InputArray R, InputArray P,
+        const cv::Size& size, int m1type, OutputArray map1, OutputArray map2);
+
+    /** @brief Transforms an image to compensate for fisheye lens distortion.
+
+    @param distorted image with fisheye lens distortion.
+    @param undistorted Output image with compensated fisheye lens distortion.
+    @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
+    @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
+    @param Knew Camera matrix of the distorted image. By default, it is the identity matrix but you
+    may additionally scale and shift the result by using a different matrix.
+    @param new_size
+
+    The function transforms an image to compensate radial and tangential lens distortion.
+
+    The function is simply a combination of fisheye::initUndistortRectifyMap (with unity R ) and remap
+    (with bilinear interpolation). See the former function for details of the transformation being
+    performed.
+
+    See below the results of undistortImage.
+       -   a\) result of undistort of perspective camera model (all possible coefficients (k_1, k_2, k_3,
+            k_4, k_5, k_6) of distortion were optimized under calibration)
+        -   b\) result of fisheye::undistortImage of fisheye camera model (all possible coefficients (k_1, k_2,
+            k_3, k_4) of fisheye distortion were optimized under calibration)
+        -   c\) original image was captured with fisheye lens
+
+    Pictures a) and b) almost the same. But if we consider points of image located far from the center
+    of image, we can notice that on image a) these points are distorted.
+
+    ![image](pics/fisheye_undistorted.jpg)
+     */
+    CV_EXPORTS_W void undistortImage(InputArray distorted, OutputArray undistorted,
+        InputArray K, InputArray D, InputArray Knew = cv::noArray(), const Size& new_size = Size());
+
+    /** @brief Estimates new camera matrix for undistortion or rectification.
+
+    @param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
+    @param image_size
+    @param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
+    @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
+    1-channel or 1x1 3-channel
+    @param P New camera matrix (3x3) or new projection matrix (3x4)
+    @param balance Sets the new focal length in range between the min focal length and the max focal
+    length. Balance is in range of [0, 1].
+    @param new_size
+    @param fov_scale Divisor for new focal length.
+     */
+    CV_EXPORTS_W void estimateNewCameraMatrixForUndistortRectify(InputArray K, InputArray D, const Size &image_size, InputArray R,
+        OutputArray P, double balance = 0.0, const Size& new_size = Size(), double fov_scale = 1.0);
+
+    /** @brief Performs camera calibaration
+
+    @param objectPoints vector of vectors of calibration pattern points in the calibration pattern
+    coordinate space.
+    @param imagePoints vector of vectors of the projections of calibration pattern points.
+    imagePoints.size() and objectPoints.size() and imagePoints[i].size() must be equal to
+    objectPoints[i].size() for each i.
+    @param image_size Size of the image used only to initialize the intrinsic camera matrix.
+    @param K Output 3x3 floating-point camera matrix
+    \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If
+    fisheye::CALIB_USE_INTRINSIC_GUESS/ is specified, some or all of fx, fy, cx, cy must be
+    initialized before calling the function.
+    @param D Output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
+    @param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view.
+    That is, each k-th rotation vector together with the corresponding k-th translation vector (see
+    the next output parameter description) brings the calibration pattern from the model coordinate
+    space (in which object points are specified) to the world coordinate space, that is, a real
+    position of the calibration pattern in the k-th pattern view (k=0.. *M* -1).
+    @param tvecs Output vector of translation vectors estimated for each pattern view.
+    @param flags Different flags that may be zero or a combination of the following values:
+    -   **fisheye::CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of
+    fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
+    center ( imageSize is used), and focal distances are computed in a least-squares fashion.
+    -   **fisheye::CALIB_RECOMPUTE_EXTRINSIC** Extrinsic will be recomputed after each iteration
+    of intrinsic optimization.
+    -   **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number.
+    -   **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero.
+    -   **fisheye::CALIB_FIX_K1..fisheye::CALIB_FIX_K4** Selected distortion coefficients
+    are set to zeros and stay zero.
+    -   **fisheye::CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global
+optimization. It stays at the center or at a different location specified when CALIB_USE_INTRINSIC_GUESS is set too.
+    @param criteria Termination criteria for the iterative optimization algorithm.
+     */
+    CV_EXPORTS_W double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, const Size& image_size,
+        InputOutputArray K, InputOutputArray D, OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, int flags = 0,
+            TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 100, DBL_EPSILON));
+
+    /** @brief Stereo rectification for fisheye camera model
+
+    @param K1 First camera matrix.
+    @param D1 First camera distortion parameters.
+    @param K2 Second camera matrix.
+    @param D2 Second camera distortion parameters.
+    @param imageSize Size of the image used for stereo calibration.
+    @param R Rotation matrix between the coordinate systems of the first and the second
+    cameras.
+    @param tvec Translation vector between coordinate systems of the cameras.
+    @param R1 Output 3x3 rectification transform (rotation matrix) for the first camera.
+    @param R2 Output 3x3 rectification transform (rotation matrix) for the second camera.
+    @param P1 Output 3x4 projection matrix in the new (rectified) coordinate systems for the first
+    camera.
+    @param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second
+    camera.
+    @param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ).
+    @param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set,
+    the function makes the principal points of each camera have the same pixel coordinates in the
+    rectified views. And if the flag is not set, the function may still shift the images in the
+    horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the
+    useful image area.
+    @param newImageSize New image resolution after rectification. The same size should be passed to
+    initUndistortRectifyMap (see the stereo_calib.cpp sample in OpenCV samples directory). When (0,0)
+    is passed (default), it is set to the original imageSize . Setting it to larger value can help you
+    preserve details in the original image, especially when there is a big radial distortion.
+    @param balance Sets the new focal length in range between the min focal length and the max focal
+    length. Balance is in range of [0, 1].
+    @param fov_scale Divisor for new focal length.
+     */
+    CV_EXPORTS_W void stereoRectify(InputArray K1, InputArray D1, InputArray K2, InputArray D2, const Size &imageSize, InputArray R, InputArray tvec,
+        OutputArray R1, OutputArray R2, OutputArray P1, OutputArray P2, OutputArray Q, int flags, const Size &newImageSize = Size(),
+        double balance = 0.0, double fov_scale = 1.0);
+
+    /** @brief Performs stereo calibration
+
+    @param objectPoints Vector of vectors of the calibration pattern points.
+    @param imagePoints1 Vector of vectors of the projections of the calibration pattern points,
+    observed by the first camera.
+    @param imagePoints2 Vector of vectors of the projections of the calibration pattern points,
+    observed by the second camera.
+    @param K1 Input/output first camera matrix:
+    \f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If
+    any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CALIB_FIX_INTRINSIC are specified,
+    some or all of the matrix components must be initialized.
+    @param D1 Input/output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$ of 4 elements.
+    @param K2 Input/output second camera matrix. The parameter is similar to K1 .
+    @param D2 Input/output lens distortion coefficients for the second camera. The parameter is
+    similar to D1 .
+    @param imageSize Size of the image used only to initialize intrinsic camera matrix.
+    @param R Output rotation matrix between the 1st and the 2nd camera coordinate systems.
+    @param T Output translation vector between the coordinate systems of the cameras.
+    @param flags Different flags that may be zero or a combination of the following values:
+    -   **fisheye::CALIB_FIX_INTRINSIC** Fix K1, K2? and D1, D2? so that only R, T matrices
+    are estimated.
+    -   **fisheye::CALIB_USE_INTRINSIC_GUESS** K1, K2 contains valid initial values of
+    fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
+    center (imageSize is used), and focal distances are computed in a least-squares fashion.
+    -   **fisheye::CALIB_RECOMPUTE_EXTRINSIC** Extrinsic will be recomputed after each iteration
+    of intrinsic optimization.
+    -   **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number.
+    -   **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero.
+    -   **fisheye::CALIB_FIX_K1..4** Selected distortion coefficients are set to zeros and stay
+    zero.
+    @param criteria Termination criteria for the iterative optimization algorithm.
+     */
+    CV_EXPORTS_W double stereoCalibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2,
+                                  InputOutputArray K1, InputOutputArray D1, InputOutputArray K2, InputOutputArray D2, Size imageSize,
+                                  OutputArray R, OutputArray T, int flags = fisheye::CALIB_FIX_INTRINSIC,
+                                  TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 100, DBL_EPSILON));
+
+//! @} calib3d_fisheye
+} // end namespace fisheye
+
+} //end namespace cv
+
+#ifndef DISABLE_OPENCV_24_COMPATIBILITY
+#include "opencv2/calib3d/calib3d_c.h"
+#endif
+
+#endif

+ 48 - 0
ThresholdTools/include/opencv2/calib3d/calib3d.hpp

@@ -0,0 +1,48 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __OPENCV_BUILD
+#error this is a compatibility header which should not be used inside the OpenCV library
+#endif
+
+#include "opencv2/calib3d.hpp"

+ 427 - 0
ThresholdTools/include/opencv2/calib3d/calib3d_c.h

@@ -0,0 +1,427 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CALIB3D_C_H
+#define OPENCV_CALIB3D_C_H
+
+#include "opencv2/core/core_c.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @addtogroup calib3d_c
+  @{
+  */
+
+/****************************************************************************************\
+*                      Camera Calibration, Pose Estimation and Stereo                    *
+\****************************************************************************************/
+
+typedef struct CvPOSITObject CvPOSITObject;
+
+/* Allocates and initializes CvPOSITObject structure before doing cvPOSIT */
+CVAPI(CvPOSITObject*)  cvCreatePOSITObject( CvPoint3D32f* points, int point_count );
+
+
+/* Runs POSIT (POSe from ITeration) algorithm for determining 3d position of
+   an object given its model and projection in a weak-perspective case */
+CVAPI(void)  cvPOSIT(  CvPOSITObject* posit_object, CvPoint2D32f* image_points,
+                       double focal_length, CvTermCriteria criteria,
+                       float* rotation_matrix, float* translation_vector);
+
+/* Releases CvPOSITObject structure */
+CVAPI(void)  cvReleasePOSITObject( CvPOSITObject**  posit_object );
+
+/* updates the number of RANSAC iterations */
+CVAPI(int) cvRANSACUpdateNumIters( double p, double err_prob,
+                                   int model_points, int max_iters );
+
+CVAPI(void) cvConvertPointsHomogeneous( const CvMat* src, CvMat* dst );
+
+/* Calculates fundamental matrix given a set of corresponding points */
+#define CV_FM_7POINT 1
+#define CV_FM_8POINT 2
+
+#define CV_LMEDS 4
+#define CV_RANSAC 8
+
+#define CV_FM_LMEDS_ONLY  CV_LMEDS
+#define CV_FM_RANSAC_ONLY CV_RANSAC
+#define CV_FM_LMEDS CV_LMEDS
+#define CV_FM_RANSAC CV_RANSAC
+
+enum
+{
+    CV_ITERATIVE = 0,
+    CV_EPNP = 1, // F.Moreno-Noguer, V.Lepetit and P.Fua "EPnP: Efficient Perspective-n-Point Camera Pose Estimation"
+    CV_P3P = 2, // X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang; "Complete Solution Classification for the Perspective-Three-Point Problem"
+    CV_DLS = 3 // Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP"
+};
+
+CVAPI(int) cvFindFundamentalMat( const CvMat* points1, const CvMat* points2,
+                                 CvMat* fundamental_matrix,
+                                 int method CV_DEFAULT(CV_FM_RANSAC),
+                                 double param1 CV_DEFAULT(3.), double param2 CV_DEFAULT(0.99),
+                                 CvMat* status CV_DEFAULT(NULL) );
+
+/* For each input point on one of images
+   computes parameters of the corresponding
+   epipolar line on the other image */
+CVAPI(void) cvComputeCorrespondEpilines( const CvMat* points,
+                                         int which_image,
+                                         const CvMat* fundamental_matrix,
+                                         CvMat* correspondent_lines );
+
+/* Triangulation functions */
+
+CVAPI(void) cvTriangulatePoints(CvMat* projMatr1, CvMat* projMatr2,
+                                CvMat* projPoints1, CvMat* projPoints2,
+                                CvMat* points4D);
+
+CVAPI(void) cvCorrectMatches(CvMat* F, CvMat* points1, CvMat* points2,
+                             CvMat* new_points1, CvMat* new_points2);
+
+
+/* Computes the optimal new camera matrix according to the free scaling parameter alpha:
+   alpha=0 - only valid pixels will be retained in the undistorted image
+   alpha=1 - all the source image pixels will be retained in the undistorted image
+*/
+CVAPI(void) cvGetOptimalNewCameraMatrix( const CvMat* camera_matrix,
+                                         const CvMat* dist_coeffs,
+                                         CvSize image_size, double alpha,
+                                         CvMat* new_camera_matrix,
+                                         CvSize new_imag_size CV_DEFAULT(cvSize(0,0)),
+                                         CvRect* valid_pixel_ROI CV_DEFAULT(0),
+                                         int center_principal_point CV_DEFAULT(0));
+
+/* Converts rotation vector to rotation matrix or vice versa */
+CVAPI(int) cvRodrigues2( const CvMat* src, CvMat* dst,
+                         CvMat* jacobian CV_DEFAULT(0) );
+
+/* Finds perspective transformation between the object plane and image (view) plane */
+CVAPI(int) cvFindHomography( const CvMat* src_points,
+                             const CvMat* dst_points,
+                             CvMat* homography,
+                             int method CV_DEFAULT(0),
+                             double ransacReprojThreshold CV_DEFAULT(3),
+                             CvMat* mask CV_DEFAULT(0),
+                             int maxIters CV_DEFAULT(2000),
+                             double confidence CV_DEFAULT(0.995));
+
+/* Computes RQ decomposition for 3x3 matrices */
+CVAPI(void) cvRQDecomp3x3( const CvMat *matrixM, CvMat *matrixR, CvMat *matrixQ,
+                           CvMat *matrixQx CV_DEFAULT(NULL),
+                           CvMat *matrixQy CV_DEFAULT(NULL),
+                           CvMat *matrixQz CV_DEFAULT(NULL),
+                           CvPoint3D64f *eulerAngles CV_DEFAULT(NULL));
+
+/* Computes projection matrix decomposition */
+CVAPI(void) cvDecomposeProjectionMatrix( const CvMat *projMatr, CvMat *calibMatr,
+                                         CvMat *rotMatr, CvMat *posVect,
+                                         CvMat *rotMatrX CV_DEFAULT(NULL),
+                                         CvMat *rotMatrY CV_DEFAULT(NULL),
+                                         CvMat *rotMatrZ CV_DEFAULT(NULL),
+                                         CvPoint3D64f *eulerAngles CV_DEFAULT(NULL));
+
+/* Computes d(AB)/dA and d(AB)/dB */
+CVAPI(void) cvCalcMatMulDeriv( const CvMat* A, const CvMat* B, CvMat* dABdA, CvMat* dABdB );
+
+/* Computes r3 = rodrigues(rodrigues(r2)*rodrigues(r1)),
+   t3 = rodrigues(r2)*t1 + t2 and the respective derivatives */
+CVAPI(void) cvComposeRT( const CvMat* _rvec1, const CvMat* _tvec1,
+                         const CvMat* _rvec2, const CvMat* _tvec2,
+                         CvMat* _rvec3, CvMat* _tvec3,
+                         CvMat* dr3dr1 CV_DEFAULT(0), CvMat* dr3dt1 CV_DEFAULT(0),
+                         CvMat* dr3dr2 CV_DEFAULT(0), CvMat* dr3dt2 CV_DEFAULT(0),
+                         CvMat* dt3dr1 CV_DEFAULT(0), CvMat* dt3dt1 CV_DEFAULT(0),
+                         CvMat* dt3dr2 CV_DEFAULT(0), CvMat* dt3dt2 CV_DEFAULT(0) );
+
+/* Projects object points to the view plane using
+   the specified extrinsic and intrinsic camera parameters */
+CVAPI(void) cvProjectPoints2( const CvMat* object_points, const CvMat* rotation_vector,
+                              const CvMat* translation_vector, const CvMat* camera_matrix,
+                              const CvMat* distortion_coeffs, CvMat* image_points,
+                              CvMat* dpdrot CV_DEFAULT(NULL), CvMat* dpdt CV_DEFAULT(NULL),
+                              CvMat* dpdf CV_DEFAULT(NULL), CvMat* dpdc CV_DEFAULT(NULL),
+                              CvMat* dpddist CV_DEFAULT(NULL),
+                              double aspect_ratio CV_DEFAULT(0));
+
+/* Finds extrinsic camera parameters from
+   a few known corresponding point pairs and intrinsic parameters */
+CVAPI(void) cvFindExtrinsicCameraParams2( const CvMat* object_points,
+                                          const CvMat* image_points,
+                                          const CvMat* camera_matrix,
+                                          const CvMat* distortion_coeffs,
+                                          CvMat* rotation_vector,
+                                          CvMat* translation_vector,
+                                          int use_extrinsic_guess CV_DEFAULT(0) );
+
+/* Computes initial estimate of the intrinsic camera parameters
+   in case of planar calibration target (e.g. chessboard) */
+CVAPI(void) cvInitIntrinsicParams2D( const CvMat* object_points,
+                                     const CvMat* image_points,
+                                     const CvMat* npoints, CvSize image_size,
+                                     CvMat* camera_matrix,
+                                     double aspect_ratio CV_DEFAULT(1.) );
+
+#define CV_CALIB_CB_ADAPTIVE_THRESH  1
+#define CV_CALIB_CB_NORMALIZE_IMAGE  2
+#define CV_CALIB_CB_FILTER_QUADS     4
+#define CV_CALIB_CB_FAST_CHECK       8
+
+// Performs a fast check if a chessboard is in the input image. This is a workaround to
+// a problem of cvFindChessboardCorners being slow on images with no chessboard
+// - src: input image
+// - size: chessboard size
+// Returns 1 if a chessboard can be in this image and findChessboardCorners should be called,
+// 0 if there is no chessboard, -1 in case of error
+CVAPI(int) cvCheckChessboard(IplImage* src, CvSize size);
+
+    /* Detects corners on a chessboard calibration pattern */
+CVAPI(int) cvFindChessboardCorners( const void* image, CvSize pattern_size,
+                                    CvPoint2D32f* corners,
+                                    int* corner_count CV_DEFAULT(NULL),
+                                    int flags CV_DEFAULT(CV_CALIB_CB_ADAPTIVE_THRESH+CV_CALIB_CB_NORMALIZE_IMAGE) );
+
+/* Draws individual chessboard corners or the whole chessboard detected */
+CVAPI(void) cvDrawChessboardCorners( CvArr* image, CvSize pattern_size,
+                                     CvPoint2D32f* corners,
+                                     int count, int pattern_was_found );
+
+#define CV_CALIB_USE_INTRINSIC_GUESS  1
+#define CV_CALIB_FIX_ASPECT_RATIO     2
+#define CV_CALIB_FIX_PRINCIPAL_POINT  4
+#define CV_CALIB_ZERO_TANGENT_DIST    8
+#define CV_CALIB_FIX_FOCAL_LENGTH 16
+#define CV_CALIB_FIX_K1  32
+#define CV_CALIB_FIX_K2  64
+#define CV_CALIB_FIX_K3  128
+#define CV_CALIB_FIX_K4  2048
+#define CV_CALIB_FIX_K5  4096
+#define CV_CALIB_FIX_K6  8192
+#define CV_CALIB_RATIONAL_MODEL 16384
+#define CV_CALIB_THIN_PRISM_MODEL 32768
+#define CV_CALIB_FIX_S1_S2_S3_S4  65536
+#define CV_CALIB_TILTED_MODEL  262144
+#define CV_CALIB_FIX_TAUX_TAUY  524288
+#define CV_CALIB_FIX_TANGENT_DIST 2097152
+
+#define CV_CALIB_NINTRINSIC 18
+
+/* Finds intrinsic and extrinsic camera parameters
+   from a few views of known calibration pattern */
+CVAPI(double) cvCalibrateCamera2( const CvMat* object_points,
+                                const CvMat* image_points,
+                                const CvMat* point_counts,
+                                CvSize image_size,
+                                CvMat* camera_matrix,
+                                CvMat* distortion_coeffs,
+                                CvMat* rotation_vectors CV_DEFAULT(NULL),
+                                CvMat* translation_vectors CV_DEFAULT(NULL),
+                                int flags CV_DEFAULT(0),
+                                CvTermCriteria term_crit CV_DEFAULT(cvTermCriteria(
+                                    CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,30,DBL_EPSILON)) );
+
+/* Computes various useful characteristics of the camera from the data computed by
+   cvCalibrateCamera2 */
+CVAPI(void) cvCalibrationMatrixValues( const CvMat *camera_matrix,
+                                CvSize image_size,
+                                double aperture_width CV_DEFAULT(0),
+                                double aperture_height CV_DEFAULT(0),
+                                double *fovx CV_DEFAULT(NULL),
+                                double *fovy CV_DEFAULT(NULL),
+                                double *focal_length CV_DEFAULT(NULL),
+                                CvPoint2D64f *principal_point CV_DEFAULT(NULL),
+                                double *pixel_aspect_ratio CV_DEFAULT(NULL));
+
+#define CV_CALIB_FIX_INTRINSIC  256
+#define CV_CALIB_SAME_FOCAL_LENGTH 512
+
+/* Computes the transformation from one camera coordinate system to another one
+   from a few correspondent views of the same calibration target. Optionally, calibrates
+   both cameras */
+CVAPI(double) cvStereoCalibrate( const CvMat* object_points, const CvMat* image_points1,
+                               const CvMat* image_points2, const CvMat* npoints,
+                               CvMat* camera_matrix1, CvMat* dist_coeffs1,
+                               CvMat* camera_matrix2, CvMat* dist_coeffs2,
+                               CvSize image_size, CvMat* R, CvMat* T,
+                               CvMat* E CV_DEFAULT(0), CvMat* F CV_DEFAULT(0),
+                               int flags CV_DEFAULT(CV_CALIB_FIX_INTRINSIC),
+                               CvTermCriteria term_crit CV_DEFAULT(cvTermCriteria(
+                                   CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,30,1e-6)) );
+
+#define CV_CALIB_ZERO_DISPARITY 1024
+
+/* Computes 3D rotations (+ optional shift) for each camera coordinate system to make both
+   views parallel (=> to make all the epipolar lines horizontal or vertical) */
+CVAPI(void) cvStereoRectify( const CvMat* camera_matrix1, const CvMat* camera_matrix2,
+                             const CvMat* dist_coeffs1, const CvMat* dist_coeffs2,
+                             CvSize image_size, const CvMat* R, const CvMat* T,
+                             CvMat* R1, CvMat* R2, CvMat* P1, CvMat* P2,
+                             CvMat* Q CV_DEFAULT(0),
+                             int flags CV_DEFAULT(CV_CALIB_ZERO_DISPARITY),
+                             double alpha CV_DEFAULT(-1),
+                             CvSize new_image_size CV_DEFAULT(cvSize(0,0)),
+                             CvRect* valid_pix_ROI1 CV_DEFAULT(0),
+                             CvRect* valid_pix_ROI2 CV_DEFAULT(0));
+
+/* Computes rectification transformations for uncalibrated pair of images using a set
+   of point correspondences */
+CVAPI(int) cvStereoRectifyUncalibrated( const CvMat* points1, const CvMat* points2,
+                                        const CvMat* F, CvSize img_size,
+                                        CvMat* H1, CvMat* H2,
+                                        double threshold CV_DEFAULT(5));
+
+
+
+/* stereo correspondence parameters and functions */
+
+#define CV_STEREO_BM_NORMALIZED_RESPONSE  0
+#define CV_STEREO_BM_XSOBEL               1
+
+/* Block matching algorithm structure */
+typedef struct CvStereoBMState
+{
+    // pre-filtering (normalization of input images)
+    int preFilterType; // =CV_STEREO_BM_NORMALIZED_RESPONSE now
+    int preFilterSize; // averaging window size: ~5x5..21x21
+    int preFilterCap; // the output of pre-filtering is clipped by [-preFilterCap,preFilterCap]
+
+    // correspondence using Sum of Absolute Difference (SAD)
+    int SADWindowSize; // ~5x5..21x21
+    int minDisparity;  // minimum disparity (can be negative)
+    int numberOfDisparities; // maximum disparity - minimum disparity (> 0)
+
+    // post-filtering
+    int textureThreshold;  // the disparity is only computed for pixels
+                           // with textured enough neighborhood
+    int uniquenessRatio;   // accept the computed disparity d* only if
+                           // SAD(d) >= SAD(d*)*(1 + uniquenessRatio/100.)
+                           // for any d != d*+/-1 within the search range.
+    int speckleWindowSize; // disparity variation window
+    int speckleRange; // acceptable range of variation in window
+
+    int trySmallerWindows; // if 1, the results may be more accurate,
+                           // at the expense of slower processing
+    CvRect roi1, roi2;
+    int disp12MaxDiff;
+
+    // temporary buffers
+    CvMat* preFilteredImg0;
+    CvMat* preFilteredImg1;
+    CvMat* slidingSumBuf;
+    CvMat* cost;
+    CvMat* disp;
+} CvStereoBMState;
+
+#define CV_STEREO_BM_BASIC 0
+#define CV_STEREO_BM_FISH_EYE 1
+#define CV_STEREO_BM_NARROW 2
+
+CVAPI(CvStereoBMState*) cvCreateStereoBMState(int preset CV_DEFAULT(CV_STEREO_BM_BASIC),
+                                              int numberOfDisparities CV_DEFAULT(0));
+
+CVAPI(void) cvReleaseStereoBMState( CvStereoBMState** state );
+
+CVAPI(void) cvFindStereoCorrespondenceBM( const CvArr* left, const CvArr* right,
+                                          CvArr* disparity, CvStereoBMState* state );
+
+CVAPI(CvRect) cvGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity,
+                                      int numberOfDisparities, int SADWindowSize );
+
+CVAPI(void) cvValidateDisparity( CvArr* disparity, const CvArr* cost,
+                                 int minDisparity, int numberOfDisparities,
+                                 int disp12MaxDiff CV_DEFAULT(1) );
+
+/* Reprojects the computed disparity image to the 3D space using the specified 4x4 matrix */
+CVAPI(void)  cvReprojectImageTo3D( const CvArr* disparityImage,
+                                   CvArr* _3dImage, const CvMat* Q,
+                                   int handleMissingValues CV_DEFAULT(0) );
+
+/** @} calib3d_c */
+
+#ifdef __cplusplus
+} // extern "C"
+
+//////////////////////////////////////////////////////////////////////////////////////////
+class CV_EXPORTS CvLevMarq
+{
+public:
+    CvLevMarq();
+    CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria=
+              cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
+              bool completeSymmFlag=false );
+    ~CvLevMarq();
+    void init( int nparams, int nerrs, CvTermCriteria criteria=
+              cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
+              bool completeSymmFlag=false );
+    bool update( const CvMat*& param, CvMat*& J, CvMat*& err );
+    bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm );
+
+    void clear();
+    void step();
+    enum { DONE=0, STARTED=1, CALC_J=2, CHECK_ERR=3 };
+
+    cv::Ptr<CvMat> mask;
+    cv::Ptr<CvMat> prevParam;
+    cv::Ptr<CvMat> param;
+    cv::Ptr<CvMat> J;
+    cv::Ptr<CvMat> err;
+    cv::Ptr<CvMat> JtJ;
+    cv::Ptr<CvMat> JtJN;
+    cv::Ptr<CvMat> JtErr;
+    cv::Ptr<CvMat> JtJV;
+    cv::Ptr<CvMat> JtJW;
+    double prevErrNorm, errNorm;
+    int lambdaLg10;
+    CvTermCriteria criteria;
+    int state;
+    int iters;
+    bool completeSymmFlag;
+    int solveMethod;
+};
+
+#endif
+
+#endif /* OPENCV_CALIB3D_C_H */

+ 3273 - 0
ThresholdTools/include/opencv2/core.hpp

@@ -0,0 +1,3273 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
+// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_HPP
+#define OPENCV_CORE_HPP
+
+#ifndef __cplusplus
+#  error core.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/version.hpp"
+#include "opencv2/core/base.hpp"
+#include "opencv2/core/cvstd.hpp"
+#include "opencv2/core/traits.hpp"
+#include "opencv2/core/matx.hpp"
+#include "opencv2/core/types.hpp"
+#include "opencv2/core/mat.hpp"
+#include "opencv2/core/persistence.hpp"
+
+/**
+@defgroup core Core functionality
+@{
+    @defgroup core_basic Basic structures
+    @defgroup core_c C structures and operations
+    @{
+        @defgroup core_c_glue Connections with C++
+    @}
+    @defgroup core_array Operations on arrays
+    @defgroup core_xml XML/YAML Persistence
+    @defgroup core_cluster Clustering
+    @defgroup core_utils Utility and system functions and macros
+    @{
+        @defgroup core_utils_sse SSE utilities
+        @defgroup core_utils_neon NEON utilities
+        @defgroup core_utils_softfloat Softfloat support
+    @}
+    @defgroup core_opengl OpenGL interoperability
+    @defgroup core_ipp Intel IPP Asynchronous C/C++ Converters
+    @defgroup core_optim Optimization Algorithms
+    @defgroup core_directx DirectX interoperability
+    @defgroup core_eigen Eigen support
+    @defgroup core_opencl OpenCL support
+    @defgroup core_va_intel Intel VA-API/OpenCL (CL-VA) interoperability
+    @defgroup core_hal Hardware Acceleration Layer
+    @{
+        @defgroup core_hal_functions Functions
+        @defgroup core_hal_interface Interface
+        @defgroup core_hal_intrin Universal intrinsics
+        @{
+            @defgroup core_hal_intrin_impl Private implementation helpers
+        @}
+    @}
+@}
+ */
+
+namespace cv {
+
+//! @addtogroup core_utils
+//! @{
+
+/*! @brief Class passed to an error.
+
+This class encapsulates all or almost all necessary
+information about the error happened in the program. The exception is
+usually constructed and thrown implicitly via CV_Error and CV_Error_ macros.
+@see error
+ */
+class CV_EXPORTS Exception : public std::exception
+{
+public:
+    /*!
+     Default constructor
+     */
+    Exception();
+    /*!
+     Full constructor. Normally the constructor is not called explicitly.
+     Instead, the macros CV_Error(), CV_Error_() and CV_Assert() are used.
+    */
+    Exception(int _code, const String& _err, const String& _func, const String& _file, int _line);
+    virtual ~Exception() throw();
+
+    /*!
+     \return the error description and the context as a text string.
+    */
+    virtual const char *what() const throw() CV_OVERRIDE;
+    void formatMessage();
+
+    String msg; ///< the formatted error message
+
+    int code; ///< error code @see CVStatus
+    String err; ///< error description
+    String func; ///< function name. Available only when the compiler supports getting it
+    String file; ///< source file name where the error has occurred
+    int line; ///< line number in the source file where the error has occurred
+};
+
+/*! @brief Signals an error and raises the exception.
+
+By default the function prints information about the error to stderr,
+then it either stops if cv::setBreakOnError() had been called before or raises the exception.
+It is possible to alternate error processing by using #redirectError().
+@param exc the exception raisen.
+@deprecated drop this version
+ */
+CV_EXPORTS void error( const Exception& exc );
+
+enum SortFlags { SORT_EVERY_ROW    = 0, //!< each matrix row is sorted independently
+                 SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted
+                                        //!< independently; this flag and the previous one are
+                                        //!< mutually exclusive.
+                 SORT_ASCENDING    = 0, //!< each matrix row is sorted in the ascending
+                                        //!< order.
+                 SORT_DESCENDING   = 16 //!< each matrix row is sorted in the
+                                        //!< descending order; this flag and the previous one are also
+                                        //!< mutually exclusive.
+               };
+
+//! @} core_utils
+
+//! @addtogroup core
+//! @{
+
+//! Covariation flags
+enum CovarFlags {
+    /** The output covariance matrix is calculated as:
+       \f[\texttt{scale}   \cdot  [  \texttt{vects}  [0]-  \texttt{mean}  , \texttt{vects}  [1]-  \texttt{mean}  ,...]^T  \cdot  [ \texttt{vects}  [0]- \texttt{mean}  , \texttt{vects}  [1]- \texttt{mean}  ,...],\f]
+       The covariance matrix will be nsamples x nsamples. Such an unusual covariance matrix is used
+       for fast PCA of a set of very large vectors (see, for example, the EigenFaces technique for
+       face recognition). Eigenvalues of this "scrambled" matrix match the eigenvalues of the true
+       covariance matrix. The "true" eigenvectors can be easily calculated from the eigenvectors of
+       the "scrambled" covariance matrix. */
+    COVAR_SCRAMBLED = 0,
+    /**The output covariance matrix is calculated as:
+        \f[\texttt{scale}   \cdot  [  \texttt{vects}  [0]-  \texttt{mean}  , \texttt{vects}  [1]-  \texttt{mean}  ,...]  \cdot  [ \texttt{vects}  [0]- \texttt{mean}  , \texttt{vects}  [1]- \texttt{mean}  ,...]^T,\f]
+        covar will be a square matrix of the same size as the total number of elements in each input
+        vector. One and only one of #COVAR_SCRAMBLED and #COVAR_NORMAL must be specified.*/
+    COVAR_NORMAL    = 1,
+    /** If the flag is specified, the function does not calculate mean from
+        the input vectors but, instead, uses the passed mean vector. This is useful if mean has been
+        pre-calculated or known in advance, or if the covariance matrix is calculated by parts. In
+        this case, mean is not a mean vector of the input sub-set of vectors but rather the mean
+        vector of the whole set.*/
+    COVAR_USE_AVG   = 2,
+    /** If the flag is specified, the covariance matrix is scaled. In the
+        "normal" mode, scale is 1./nsamples . In the "scrambled" mode, scale is the reciprocal of the
+        total number of elements in each input vector. By default (if the flag is not specified), the
+        covariance matrix is not scaled ( scale=1 ).*/
+    COVAR_SCALE     = 4,
+    /** If the flag is
+        specified, all the input vectors are stored as rows of the samples matrix. mean should be a
+        single-row vector in this case.*/
+    COVAR_ROWS      = 8,
+    /** If the flag is
+        specified, all the input vectors are stored as columns of the samples matrix. mean should be a
+        single-column vector in this case.*/
+    COVAR_COLS      = 16
+};
+
+//! k-Means flags
+enum KmeansFlags {
+    /** Select random initial centers in each attempt.*/
+    KMEANS_RANDOM_CENTERS     = 0,
+    /** Use kmeans++ center initialization by Arthur and Vassilvitskii [Arthur2007].*/
+    KMEANS_PP_CENTERS         = 2,
+    /** During the first (and possibly the only) attempt, use the
+        user-supplied labels instead of computing them from the initial centers. For the second and
+        further attempts, use the random or semi-random centers. Use one of KMEANS_\*_CENTERS flag
+        to specify the exact method.*/
+    KMEANS_USE_INITIAL_LABELS = 1
+};
+
+//! type of line
+enum LineTypes {
+    FILLED  = -1,
+    LINE_4  = 4, //!< 4-connected line
+    LINE_8  = 8, //!< 8-connected line
+    LINE_AA = 16 //!< antialiased line
+};
+
+//! Only a subset of Hershey fonts <https://en.wikipedia.org/wiki/Hershey_fonts> are supported
+enum HersheyFonts {
+    FONT_HERSHEY_SIMPLEX        = 0, //!< normal size sans-serif font
+    FONT_HERSHEY_PLAIN          = 1, //!< small size sans-serif font
+    FONT_HERSHEY_DUPLEX         = 2, //!< normal size sans-serif font (more complex than FONT_HERSHEY_SIMPLEX)
+    FONT_HERSHEY_COMPLEX        = 3, //!< normal size serif font
+    FONT_HERSHEY_TRIPLEX        = 4, //!< normal size serif font (more complex than FONT_HERSHEY_COMPLEX)
+    FONT_HERSHEY_COMPLEX_SMALL  = 5, //!< smaller version of FONT_HERSHEY_COMPLEX
+    FONT_HERSHEY_SCRIPT_SIMPLEX = 6, //!< hand-writing style font
+    FONT_HERSHEY_SCRIPT_COMPLEX = 7, //!< more complex variant of FONT_HERSHEY_SCRIPT_SIMPLEX
+    FONT_ITALIC                 = 16 //!< flag for italic font
+};
+
+enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/columns of the matrix.
+                   REDUCE_AVG = 1, //!< the output is the mean vector of all rows/columns of the matrix.
+                   REDUCE_MAX = 2, //!< the output is the maximum (column/row-wise) of all rows/columns of the matrix.
+                   REDUCE_MIN = 3  //!< the output is the minimum (column/row-wise) of all rows/columns of the matrix.
+                 };
+
+
+/** @brief Swaps two matrices
+*/
+CV_EXPORTS void swap(Mat& a, Mat& b);
+/** @overload */
+CV_EXPORTS void swap( UMat& a, UMat& b );
+
+//! @} core
+
+//! @addtogroup core_array
+//! @{
+
+/** @brief Computes the source location of an extrapolated pixel.
+
+The function computes and returns the coordinate of a donor pixel corresponding to the specified
+extrapolated pixel when using the specified extrapolation border mode. For example, if you use
+cv::BORDER_WRAP mode in the horizontal direction, cv::BORDER_REFLECT_101 in the vertical direction and
+want to compute value of the "virtual" pixel Point(-5, 100) in a floating-point image img , it
+looks like:
+@code{.cpp}
+    float val = img.at<float>(borderInterpolate(100, img.rows, cv::BORDER_REFLECT_101),
+                              borderInterpolate(-5, img.cols, cv::BORDER_WRAP));
+@endcode
+Normally, the function is not called directly. It is used inside filtering functions and also in
+copyMakeBorder.
+@param p 0-based coordinate of the extrapolated pixel along one of the axes, likely \<0 or \>= len
+@param len Length of the array along the corresponding axis.
+@param borderType Border type, one of the #BorderTypes, except for #BORDER_TRANSPARENT and
+#BORDER_ISOLATED . When borderType==#BORDER_CONSTANT , the function always returns -1, regardless
+of p and len.
+
+@sa copyMakeBorder
+*/
+CV_EXPORTS_W int borderInterpolate(int p, int len, int borderType);
+
+/** @example copyMakeBorder_demo.cpp
+An example using copyMakeBorder function
+ */
+/** @brief Forms a border around an image.
+
+The function copies the source image into the middle of the destination image. The areas to the
+left, to the right, above and below the copied source image will be filled with extrapolated
+pixels. This is not what filtering functions based on it do (they extrapolate pixels on-fly), but
+what other more complex functions, including your own, may do to simplify image boundary handling.
+
+The function supports the mode when src is already in the middle of dst . In this case, the
+function does not copy src itself but simply constructs the border, for example:
+
+@code{.cpp}
+    // let border be the same in all directions
+    int border=2;
+    // constructs a larger image to fit both the image and the border
+    Mat gray_buf(rgb.rows + border*2, rgb.cols + border*2, rgb.depth());
+    // select the middle part of it w/o copying data
+    Mat gray(gray_canvas, Rect(border, border, rgb.cols, rgb.rows));
+    // convert image from RGB to grayscale
+    cvtColor(rgb, gray, COLOR_RGB2GRAY);
+    // form a border in-place
+    copyMakeBorder(gray, gray_buf, border, border,
+                   border, border, BORDER_REPLICATE);
+    // now do some custom filtering ...
+    ...
+@endcode
+@note When the source image is a part (ROI) of a bigger image, the function will try to use the
+pixels outside of the ROI to form a border. To disable this feature and always do extrapolation, as
+if src was not a ROI, use borderType | #BORDER_ISOLATED.
+
+@param src Source image.
+@param dst Destination image of the same type as src and the size Size(src.cols+left+right,
+src.rows+top+bottom) .
+@param top
+@param bottom
+@param left
+@param right Parameter specifying how many pixels in each direction from the source image rectangle
+to extrapolate. For example, top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs
+to be built.
+@param borderType Border type. See borderInterpolate for details.
+@param value Border value if borderType==BORDER_CONSTANT .
+
+@sa  borderInterpolate
+*/
+CV_EXPORTS_W void copyMakeBorder(InputArray src, OutputArray dst,
+                                 int top, int bottom, int left, int right,
+                                 int borderType, const Scalar& value = Scalar() );
+
+/** @brief Calculates the per-element sum of two arrays or an array and a scalar.
+
+The function add calculates:
+- Sum of two arrays when both input arrays have the same size and the same number of channels:
+\f[\texttt{dst}(I) =  \texttt{saturate} ( \texttt{src1}(I) +  \texttt{src2}(I)) \quad \texttt{if mask}(I) \ne0\f]
+- Sum of an array and a scalar when src2 is constructed from Scalar or has the same number of
+elements as `src1.channels()`:
+\f[\texttt{dst}(I) =  \texttt{saturate} ( \texttt{src1}(I) +  \texttt{src2} ) \quad \texttt{if mask}(I) \ne0\f]
+- Sum of a scalar and an array when src1 is constructed from Scalar or has the same number of
+elements as `src2.channels()`:
+\f[\texttt{dst}(I) =  \texttt{saturate} ( \texttt{src1} +  \texttt{src2}(I) ) \quad \texttt{if mask}(I) \ne0\f]
+where `I` is a multi-dimensional index of array elements. In case of multi-channel arrays, each
+channel is processed independently.
+
+The first function in the list above can be replaced with matrix expressions:
+@code{.cpp}
+    dst = src1 + src2;
+    dst += src1; // equivalent to add(dst, src1, dst);
+@endcode
+The input arrays and the output array can all have the same or different depths. For example, you
+can add a 16-bit unsigned array to a 8-bit signed array and store the sum as a 32-bit
+floating-point array. Depth of the output array is determined by the dtype parameter. In the second
+and third cases above, as well as in the first case, when src1.depth() == src2.depth(), dtype can
+be set to the default -1. In this case, the output array will have the same depth as the input
+array, be it src1, src2 or both.
+@note Saturation is not applied when the output array has the depth CV_32S. You may even get
+result of an incorrect sign in the case of overflow.
+@param src1 first input array or a scalar.
+@param src2 second input array or a scalar.
+@param dst output array that has the same size and number of channels as the input array(s); the
+depth is defined by dtype or src1/src2.
+@param mask optional operation mask - 8-bit single channel array, that specifies elements of the
+output array to be changed.
+@param dtype optional depth of the output array (see the discussion below).
+@sa subtract, addWeighted, scaleAdd, Mat::convertTo
+*/
+CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst,
+                      InputArray mask = noArray(), int dtype = -1);
+
+/** @brief Calculates the per-element difference between two arrays or array and a scalar.
+
+The function subtract calculates:
+- Difference between two arrays, when both input arrays have the same size and the same number of
+channels:
+    \f[\texttt{dst}(I) =  \texttt{saturate} ( \texttt{src1}(I) -  \texttt{src2}(I)) \quad \texttt{if mask}(I) \ne0\f]
+- Difference between an array and a scalar, when src2 is constructed from Scalar or has the same
+number of elements as `src1.channels()`:
+    \f[\texttt{dst}(I) =  \texttt{saturate} ( \texttt{src1}(I) -  \texttt{src2} ) \quad \texttt{if mask}(I) \ne0\f]
+- Difference between a scalar and an array, when src1 is constructed from Scalar or has the same
+number of elements as `src2.channels()`:
+    \f[\texttt{dst}(I) =  \texttt{saturate} ( \texttt{src1} -  \texttt{src2}(I) ) \quad \texttt{if mask}(I) \ne0\f]
+- The reverse difference between a scalar and an array in the case of `SubRS`:
+    \f[\texttt{dst}(I) =  \texttt{saturate} ( \texttt{src2} -  \texttt{src1}(I) ) \quad \texttt{if mask}(I) \ne0\f]
+where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each
+channel is processed independently.
+
+The first function in the list above can be replaced with matrix expressions:
+@code{.cpp}
+    dst = src1 - src2;
+    dst -= src1; // equivalent to subtract(dst, src1, dst);
+@endcode
+The input arrays and the output array can all have the same or different depths. For example, you
+can subtract to 8-bit unsigned arrays and store the difference in a 16-bit signed array. Depth of
+the output array is determined by dtype parameter. In the second and third cases above, as well as
+in the first case, when src1.depth() == src2.depth(), dtype can be set to the default -1. In this
+case the output array will have the same depth as the input array, be it src1, src2 or both.
+@note Saturation is not applied when the output array has the depth CV_32S. You may even get
+result of an incorrect sign in the case of overflow.
+@param src1 first input array or a scalar.
+@param src2 second input array or a scalar.
+@param dst output array of the same size and the same number of channels as the input array.
+@param mask optional operation mask; this is an 8-bit single channel array that specifies elements
+of the output array to be changed.
+@param dtype optional depth of the output array
+@sa  add, addWeighted, scaleAdd, Mat::convertTo
+  */
+CV_EXPORTS_W void subtract(InputArray src1, InputArray src2, OutputArray dst,
+                           InputArray mask = noArray(), int dtype = -1);
+
+
+/** @brief Calculates the per-element scaled product of two arrays.
+
+The function multiply calculates the per-element product of two arrays:
+
+\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{scale} \cdot \texttt{src1} (I)  \cdot \texttt{src2} (I))\f]
+
+There is also a @ref MatrixExpressions -friendly variant of the first function. See Mat::mul .
+
+For a not-per-element matrix product, see gemm .
+
+@note Saturation is not applied when the output array has the depth
+CV_32S. You may even get result of an incorrect sign in the case of
+overflow.
+@param src1 first input array.
+@param src2 second input array of the same size and the same type as src1.
+@param dst output array of the same size and type as src1.
+@param scale optional scale factor.
+@param dtype optional depth of the output array
+@sa add, subtract, divide, scaleAdd, addWeighted, accumulate, accumulateProduct, accumulateSquare,
+Mat::convertTo
+*/
+CV_EXPORTS_W void multiply(InputArray src1, InputArray src2,
+                           OutputArray dst, double scale = 1, int dtype = -1);
+
+/** @brief Performs per-element division of two arrays or a scalar by an array.
+
+The function cv::divide divides one array by another:
+\f[\texttt{dst(I) = saturate(src1(I)*scale/src2(I))}\f]
+or a scalar by an array when there is no src1 :
+\f[\texttt{dst(I) = saturate(scale/src2(I))}\f]
+
+When src2(I) is zero, dst(I) will also be zero. Different channels of
+multi-channel arrays are processed independently.
+
+@note Saturation is not applied when the output array has the depth CV_32S. You may even get
+result of an incorrect sign in the case of overflow.
+@param src1 first input array.
+@param src2 second input array of the same size and type as src1.
+@param scale scalar factor.
+@param dst output array of the same size and type as src2.
+@param dtype optional depth of the output array; if -1, dst will have depth src2.depth(), but in
+case of an array-by-array division, you can only pass -1 when src1.depth()==src2.depth().
+@sa  multiply, add, subtract
+*/
+CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst,
+                         double scale = 1, int dtype = -1);
+
+/** @overload */
+CV_EXPORTS_W void divide(double scale, InputArray src2,
+                         OutputArray dst, int dtype = -1);
+
+/** @brief Calculates the sum of a scaled array and another array.
+
+The function scaleAdd is one of the classical primitive linear algebra operations, known as DAXPY
+or SAXPY in [BLAS](http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms). It calculates
+the sum of a scaled array and another array:
+\f[\texttt{dst} (I)= \texttt{scale} \cdot \texttt{src1} (I) +  \texttt{src2} (I)\f]
+The function can also be emulated with a matrix expression, for example:
+@code{.cpp}
+    Mat A(3, 3, CV_64F);
+    ...
+    A.row(0) = A.row(1)*2 + A.row(2);
+@endcode
+@param src1 first input array.
+@param alpha scale factor for the first array.
+@param src2 second input array of the same size and type as src1.
+@param dst output array of the same size and type as src1.
+@sa add, addWeighted, subtract, Mat::dot, Mat::convertTo
+*/
+CV_EXPORTS_W void scaleAdd(InputArray src1, double alpha, InputArray src2, OutputArray dst);
+
+/** @example AddingImagesTrackbar.cpp
+
+ */
+/** @brief Calculates the weighted sum of two arrays.
+
+The function addWeighted calculates the weighted sum of two arrays as follows:
+\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I)* \texttt{alpha} +  \texttt{src2} (I)* \texttt{beta} +  \texttt{gamma} )\f]
+where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each
+channel is processed independently.
+The function can be replaced with a matrix expression:
+@code{.cpp}
+    dst = src1*alpha + src2*beta + gamma;
+@endcode
+@note Saturation is not applied when the output array has the depth CV_32S. You may even get
+result of an incorrect sign in the case of overflow.
+@param src1 first input array.
+@param alpha weight of the first array elements.
+@param src2 second input array of the same size and channel number as src1.
+@param beta weight of the second array elements.
+@param gamma scalar added to each sum.
+@param dst output array that has the same size and number of channels as the input arrays.
+@param dtype optional depth of the output array; when both input arrays have the same depth, dtype
+can be set to -1, which will be equivalent to src1.depth().
+@sa  add, subtract, scaleAdd, Mat::convertTo
+*/
+CV_EXPORTS_W void addWeighted(InputArray src1, double alpha, InputArray src2,
+                              double beta, double gamma, OutputArray dst, int dtype = -1);
+
+/** @brief Scales, calculates absolute values, and converts the result to 8-bit.
+
+On each element of the input array, the function convertScaleAbs
+performs three operations sequentially: scaling, taking an absolute
+value, conversion to an unsigned 8-bit type:
+\f[\texttt{dst} (I)= \texttt{saturate\_cast<uchar>} (| \texttt{src} (I)* \texttt{alpha} +  \texttt{beta} |)\f]
+In case of multi-channel arrays, the function processes each channel
+independently. When the output is not 8-bit, the operation can be
+emulated by calling the Mat::convertTo method (or by using matrix
+expressions) and then by calculating an absolute value of the result.
+For example:
+@code{.cpp}
+    Mat_<float> A(30,30);
+    randu(A, Scalar(-100), Scalar(100));
+    Mat_<float> B = A*5 + 3;
+    B = abs(B);
+    // Mat_<float> B = abs(A*5+3) will also do the job,
+    // but it will allocate a temporary matrix
+@endcode
+@param src input array.
+@param dst output array.
+@param alpha optional scale factor.
+@param beta optional delta added to the scaled values.
+@sa  Mat::convertTo, cv::abs(const Mat&)
+*/
+CV_EXPORTS_W void convertScaleAbs(InputArray src, OutputArray dst,
+                                  double alpha = 1, double beta = 0);
+
+/** @brief Converts an array to half precision floating number.
+
+This function converts FP32 (single precision floating point) from/to FP16 (half precision floating point). CV_16S format is used to represent FP16 data.
+There are two use modes (src -> dst): CV_32F -> CV_16S and CV_16S -> CV_32F. The input array has to have type of CV_32F or
+CV_16S to represent the bit depth. If the input array is neither of them, the function will raise an error.
+The format of half precision floating point is defined in IEEE 754-2008.
+
+@param src input array.
+@param dst output array.
+*/
+CV_EXPORTS_W void convertFp16(InputArray src, OutputArray dst);
+
+/** @brief Performs a look-up table transform of an array.
+
+The function LUT fills the output array with values from the look-up table. Indices of the entries
+are taken from the input array. That is, the function processes each element of src as follows:
+\f[\texttt{dst} (I)  \leftarrow \texttt{lut(src(I) + d)}\f]
+where
+\f[d =  \fork{0}{if \(\texttt{src}\) has depth \(\texttt{CV_8U}\)}{128}{if \(\texttt{src}\) has depth \(\texttt{CV_8S}\)}\f]
+@param src input array of 8-bit elements.
+@param lut look-up table of 256 elements; in case of multi-channel input array, the table should
+either have a single channel (in this case the same table is used for all channels) or the same
+number of channels as in the input array.
+@param dst output array of the same size and number of channels as src, and the same depth as lut.
+@sa  convertScaleAbs, Mat::convertTo
+*/
+CV_EXPORTS_W void LUT(InputArray src, InputArray lut, OutputArray dst);
+
+/** @brief Calculates the sum of array elements.
+
+The function cv::sum calculates and returns the sum of array elements,
+independently for each channel.
+@param src input array that must have from 1 to 4 channels.
+@sa  countNonZero, mean, meanStdDev, norm, minMaxLoc, reduce
+*/
+CV_EXPORTS_AS(sumElems) Scalar sum(InputArray src);
+
+/** @brief Counts non-zero array elements.
+
+The function returns the number of non-zero elements in src :
+\f[\sum _{I: \; \texttt{src} (I) \ne0 } 1\f]
+@param src single-channel array.
+@sa  mean, meanStdDev, norm, minMaxLoc, calcCovarMatrix
+*/
+CV_EXPORTS_W int countNonZero( InputArray src );
+
+/** @brief Returns the list of locations of non-zero pixels
+
+Given a binary matrix (likely returned from an operation such
+as threshold(), compare(), >, ==, etc, return all of
+the non-zero indices as a cv::Mat or std::vector<cv::Point> (x,y)
+For example:
+@code{.cpp}
+    cv::Mat binaryImage; // input, binary image
+    cv::Mat locations;   // output, locations of non-zero pixels
+    cv::findNonZero(binaryImage, locations);
+
+    // access pixel coordinates
+    Point pnt = locations.at<Point>(i);
+@endcode
+or
+@code{.cpp}
+    cv::Mat binaryImage; // input, binary image
+    vector<Point> locations;   // output, locations of non-zero pixels
+    cv::findNonZero(binaryImage, locations);
+
+    // access pixel coordinates
+    Point pnt = locations[i];
+@endcode
+@param src single-channel array (type CV_8UC1)
+@param idx the output array, type of cv::Mat or std::vector<Point>, corresponding to non-zero indices in the input
+*/
+CV_EXPORTS_W void findNonZero( InputArray src, OutputArray idx );
+
+/** @brief Calculates an average (mean) of array elements.
+
+The function cv::mean calculates the mean value M of array elements,
+independently for each channel, and return it:
+\f[\begin{array}{l} N =  \sum _{I: \; \texttt{mask} (I) \ne 0} 1 \\ M_c =  \left ( \sum _{I: \; \texttt{mask} (I) \ne 0}{ \texttt{mtx} (I)_c} \right )/N \end{array}\f]
+When all the mask elements are 0's, the function returns Scalar::all(0)
+@param src input array that should have from 1 to 4 channels so that the result can be stored in
+Scalar_ .
+@param mask optional operation mask.
+@sa  countNonZero, meanStdDev, norm, minMaxLoc
+*/
+CV_EXPORTS_W Scalar mean(InputArray src, InputArray mask = noArray());
+
+/** Calculates a mean and standard deviation of array elements.
+
+The function cv::meanStdDev calculates the mean and the standard deviation M
+of array elements independently for each channel and returns it via the
+output parameters:
+\f[\begin{array}{l} N =  \sum _{I, \texttt{mask} (I)  \ne 0} 1 \\ \texttt{mean} _c =  \frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \texttt{src} (I)_c}{N} \\ \texttt{stddev} _c =  \sqrt{\frac{\sum_{ I: \; \texttt{mask}(I) \ne 0} \left ( \texttt{src} (I)_c -  \texttt{mean} _c \right )^2}{N}} \end{array}\f]
+When all the mask elements are 0's, the function returns
+mean=stddev=Scalar::all(0).
+@note The calculated standard deviation is only the diagonal of the
+complete normalized covariance matrix. If the full matrix is needed, you
+can reshape the multi-channel array M x N to the single-channel array
+M\*N x mtx.channels() (only possible when the matrix is continuous) and
+then pass the matrix to calcCovarMatrix .
+@param src input array that should have from 1 to 4 channels so that the results can be stored in
+Scalar_ 's.
+@param mean output parameter: calculated mean value.
+@param stddev output parameter: calculated standard deviation.
+@param mask optional operation mask.
+@sa  countNonZero, mean, norm, minMaxLoc, calcCovarMatrix
+*/
+CV_EXPORTS_W void meanStdDev(InputArray src, OutputArray mean, OutputArray stddev,
+                             InputArray mask=noArray());
+
+/** @brief Calculates the  absolute norm of an array.
+
+This version of #norm calculates the absolute norm of src1. The type of norm to calculate is specified using #NormTypes.
+
+As example for one array consider the function \f$r(x)= \begin{pmatrix} x \\ 1-x \end{pmatrix}, x \in [-1;1]\f$.
+The \f$ L_{1}, L_{2} \f$ and \f$ L_{\infty} \f$ norm for the sample value \f$r(-1) = \begin{pmatrix} -1 \\ 2 \end{pmatrix}\f$
+is calculated as follows
+\f{align*}
+    \| r(-1) \|_{L_1} &= |-1| + |2| = 3 \\
+    \| r(-1) \|_{L_2} &= \sqrt{(-1)^{2} + (2)^{2}} = \sqrt{5} \\
+    \| r(-1) \|_{L_\infty} &= \max(|-1|,|2|) = 2
+\f}
+and for \f$r(0.5) = \begin{pmatrix} 0.5 \\ 0.5 \end{pmatrix}\f$ the calculation is
+\f{align*}
+    \| r(0.5) \|_{L_1} &= |0.5| + |0.5| = 1 \\
+    \| r(0.5) \|_{L_2} &= \sqrt{(0.5)^{2} + (0.5)^{2}} = \sqrt{0.5} \\
+    \| r(0.5) \|_{L_\infty} &= \max(|0.5|,|0.5|) = 0.5.
+\f}
+The following graphic shows all values for the three norm functions \f$\| r(x) \|_{L_1}, \| r(x) \|_{L_2}\f$ and \f$\| r(x) \|_{L_\infty}\f$.
+It is notable that the \f$ L_{1} \f$ norm forms the upper and the \f$ L_{\infty} \f$ norm forms the lower border for the example function \f$ r(x) \f$.
+![Graphs for the different norm functions from the above example](pics/NormTypes_OneArray_1-2-INF.png)
+
+When the mask parameter is specified and it is not empty, the norm is
+
+If normType is not specified, #NORM_L2 is used.
+calculated only over the region specified by the mask.
+
+Multi-channel input arrays are treated as single-channel arrays, that is,
+the results for all channels are combined.
+
+Hamming norms can only be calculated with CV_8U depth arrays.
+
+@param src1 first input array.
+@param normType type of the norm (see #NormTypes).
+@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
+*/
+CV_EXPORTS_W double norm(InputArray src1, int normType = NORM_L2, InputArray mask = noArray());
+
+/** @brief Calculates an absolute difference norm or a relative difference norm.
+
+This version of cv::norm calculates the absolute difference norm
+or the relative difference norm of arrays src1 and src2.
+The type of norm to calculate is specified using #NormTypes.
+
+@param src1 first input array.
+@param src2 second input array of the same size and the same type as src1.
+@param normType type of the norm (see #NormTypes).
+@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
+*/
+CV_EXPORTS_W double norm(InputArray src1, InputArray src2,
+                         int normType = NORM_L2, InputArray mask = noArray());
+/** @overload
+@param src first input array.
+@param normType type of the norm (see #NormTypes).
+*/
+CV_EXPORTS double norm( const SparseMat& src, int normType );
+
+/** @brief Computes the Peak Signal-to-Noise Ratio (PSNR) image quality metric.
+
+This function calculates the Peak Signal-to-Noise Ratio (PSNR) image quality metric in decibels (dB), between two input arrays src1 and src2. Arrays must have depth CV_8U.
+
+The PSNR is calculated as follows:
+
+\f[
+\texttt{PSNR} = 10 \cdot \log_{10}{\left( \frac{R^2}{MSE} \right) }
+\f]
+
+where R is the maximum integer value of depth CV_8U (255) and MSE is the mean squared error between the two arrays.
+
+@param src1 first input array.
+@param src2 second input array of the same size as src1.
+
+  */
+CV_EXPORTS_W double PSNR(InputArray src1, InputArray src2);
+
+/** @brief naive nearest neighbor finder
+
+see http://en.wikipedia.org/wiki/Nearest_neighbor_search
+@todo document
+  */
+CV_EXPORTS_W void batchDistance(InputArray src1, InputArray src2,
+                                OutputArray dist, int dtype, OutputArray nidx,
+                                int normType = NORM_L2, int K = 0,
+                                InputArray mask = noArray(), int update = 0,
+                                bool crosscheck = false);
+
+/** @brief Normalizes the norm or value range of an array.
+
+The function cv::normalize normalizes scale and shift the input array elements so that
+\f[\| \texttt{dst} \| _{L_p}= \texttt{alpha}\f]
+(where p=Inf, 1 or 2) when normType=NORM_INF, NORM_L1, or NORM_L2, respectively; or so that
+\f[\min _I  \texttt{dst} (I)= \texttt{alpha} , \, \, \max _I  \texttt{dst} (I)= \texttt{beta}\f]
+
+when normType=NORM_MINMAX (for dense arrays only). The optional mask specifies a sub-array to be
+normalized. This means that the norm or min-n-max are calculated over the sub-array, and then this
+sub-array is modified to be normalized. If you want to only use the mask to calculate the norm or
+min-max but modify the whole array, you can use norm and Mat::convertTo.
+
+In case of sparse matrices, only the non-zero values are analyzed and transformed. Because of this,
+the range transformation for sparse matrices is not allowed since it can shift the zero level.
+
+Possible usage with some positive example data:
+@code{.cpp}
+    vector<double> positiveData = { 2.0, 8.0, 10.0 };
+    vector<double> normalizedData_l1, normalizedData_l2, normalizedData_inf, normalizedData_minmax;
+
+    // Norm to probability (total count)
+    // sum(numbers) = 20.0
+    // 2.0      0.1     (2.0/20.0)
+    // 8.0      0.4     (8.0/20.0)
+    // 10.0     0.5     (10.0/20.0)
+    normalize(positiveData, normalizedData_l1, 1.0, 0.0, NORM_L1);
+
+    // Norm to unit vector: ||positiveData|| = 1.0
+    // 2.0      0.15
+    // 8.0      0.62
+    // 10.0     0.77
+    normalize(positiveData, normalizedData_l2, 1.0, 0.0, NORM_L2);
+
+    // Norm to max element
+    // 2.0      0.2     (2.0/10.0)
+    // 8.0      0.8     (8.0/10.0)
+    // 10.0     1.0     (10.0/10.0)
+    normalize(positiveData, normalizedData_inf, 1.0, 0.0, NORM_INF);
+
+    // Norm to range [0.0;1.0]
+    // 2.0      0.0     (shift to left border)
+    // 8.0      0.75    (6.0/8.0)
+    // 10.0     1.0     (shift to right border)
+    normalize(positiveData, normalizedData_minmax, 1.0, 0.0, NORM_MINMAX);
+@endcode
+
+@param src input array.
+@param dst output array of the same size as src .
+@param alpha norm value to normalize to or the lower range boundary in case of the range
+normalization.
+@param beta upper range boundary in case of the range normalization; it is not used for the norm
+normalization.
+@param norm_type normalization type (see cv::NormTypes).
+@param dtype when negative, the output array has the same type as src; otherwise, it has the same
+number of channels as src and the depth =CV_MAT_DEPTH(dtype).
+@param mask optional operation mask.
+@sa norm, Mat::convertTo, SparseMat::convertTo
+*/
+CV_EXPORTS_W void normalize( InputArray src, InputOutputArray dst, double alpha = 1, double beta = 0,
+                             int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray());
+
+/** @overload
+@param src input array.
+@param dst output array of the same size as src .
+@param alpha norm value to normalize to or the lower range boundary in case of the range
+normalization.
+@param normType normalization type (see cv::NormTypes).
+*/
+CV_EXPORTS void normalize( const SparseMat& src, SparseMat& dst, double alpha, int normType );
+
+/** @brief Finds the global minimum and maximum in an array.
+
+The function cv::minMaxLoc finds the minimum and maximum element values and their positions. The
+extremums are searched across the whole array or, if mask is not an empty array, in the specified
+array region.
+
+The function do not work with multi-channel arrays. If you need to find minimum or maximum
+elements across all the channels, use Mat::reshape first to reinterpret the array as
+single-channel. Or you may extract the particular channel using either extractImageCOI , or
+mixChannels , or split .
+@param src input single-channel array.
+@param minVal pointer to the returned minimum value; NULL is used if not required.
+@param maxVal pointer to the returned maximum value; NULL is used if not required.
+@param minLoc pointer to the returned minimum location (in 2D case); NULL is used if not required.
+@param maxLoc pointer to the returned maximum location (in 2D case); NULL is used if not required.
+@param mask optional mask used to select a sub-array.
+@sa max, min, compare, inRange, extractImageCOI, mixChannels, split, Mat::reshape
+*/
+CV_EXPORTS_W void minMaxLoc(InputArray src, CV_OUT double* minVal,
+                            CV_OUT double* maxVal = 0, CV_OUT Point* minLoc = 0,
+                            CV_OUT Point* maxLoc = 0, InputArray mask = noArray());
+
+
+/** @brief Finds the global minimum and maximum in an array
+
+The function cv::minMaxIdx finds the minimum and maximum element values and their positions. The
+extremums are searched across the whole array or, if mask is not an empty array, in the specified
+array region. The function does not work with multi-channel arrays. If you need to find minimum or
+maximum elements across all the channels, use Mat::reshape first to reinterpret the array as
+single-channel. Or you may extract the particular channel using either extractImageCOI , or
+mixChannels , or split . In case of a sparse matrix, the minimum is found among non-zero elements
+only.
+@note When minIdx is not NULL, it must have at least 2 elements (as well as maxIdx), even if src is
+a single-row or single-column matrix. In OpenCV (following MATLAB) each array has at least 2
+dimensions, i.e. single-column matrix is Mx1 matrix (and therefore minIdx/maxIdx will be
+(i1,0)/(i2,0)) and single-row matrix is 1xN matrix (and therefore minIdx/maxIdx will be
+(0,j1)/(0,j2)).
+@param src input single-channel array.
+@param minVal pointer to the returned minimum value; NULL is used if not required.
+@param maxVal pointer to the returned maximum value; NULL is used if not required.
+@param minIdx pointer to the returned minimum location (in nD case); NULL is used if not required;
+Otherwise, it must point to an array of src.dims elements, the coordinates of the minimum element
+in each dimension are stored there sequentially.
+@param maxIdx pointer to the returned maximum location (in nD case). NULL is used if not required.
+@param mask specified array region
+*/
+CV_EXPORTS void minMaxIdx(InputArray src, double* minVal, double* maxVal = 0,
+                          int* minIdx = 0, int* maxIdx = 0, InputArray mask = noArray());
+
+/** @overload
+@param a input single-channel array.
+@param minVal pointer to the returned minimum value; NULL is used if not required.
+@param maxVal pointer to the returned maximum value; NULL is used if not required.
+@param minIdx pointer to the returned minimum location (in nD case); NULL is used if not required;
+Otherwise, it must point to an array of src.dims elements, the coordinates of the minimum element
+in each dimension are stored there sequentially.
+@param maxIdx pointer to the returned maximum location (in nD case). NULL is used if not required.
+*/
+CV_EXPORTS void minMaxLoc(const SparseMat& a, double* minVal,
+                          double* maxVal, int* minIdx = 0, int* maxIdx = 0);
+
+/** @brief Reduces a matrix to a vector.
+
+The function #reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of
+1D vectors and performing the specified operation on the vectors until a single row/column is
+obtained. For example, the function can be used to compute horizontal and vertical projections of a
+raster image. In case of #REDUCE_MAX and #REDUCE_MIN , the output image should have the same type as the source one.
+In case of #REDUCE_SUM and #REDUCE_AVG , the output may have a larger element bit-depth to preserve accuracy.
+And multi-channel arrays are also supported in these two reduction modes.
+
+The following code demonstrates its usage for a single channel matrix.
+@snippet snippets/core_reduce.cpp example
+
+And the following code demonstrates its usage for a two-channel matrix.
+@snippet snippets/core_reduce.cpp example2
+
+@param src input 2D matrix.
+@param dst output vector. Its size and type is defined by dim and dtype parameters.
+@param dim dimension index along which the matrix is reduced. 0 means that the matrix is reduced to
+a single row. 1 means that the matrix is reduced to a single column.
+@param rtype reduction operation that could be one of #ReduceTypes
+@param dtype when negative, the output vector will have the same type as the input matrix,
+otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()).
+@sa repeat
+*/
+CV_EXPORTS_W void reduce(InputArray src, OutputArray dst, int dim, int rtype, int dtype = -1);
+
+/** @brief Creates one multi-channel array out of several single-channel ones.
+
+The function cv::merge merges several arrays to make a single multi-channel array. That is, each
+element of the output array will be a concatenation of the elements of the input arrays, where
+elements of i-th input array are treated as mv[i].channels()-element vectors.
+
+The function cv::split does the reverse operation. If you need to shuffle channels in some other
+advanced way, use cv::mixChannels.
+
+The following example shows how to merge 3 single channel matrices into a single 3-channel matrix.
+@snippet snippets/core_merge.cpp example
+
+@param mv input array of matrices to be merged; all the matrices in mv must have the same
+size and the same depth.
+@param count number of input matrices when mv is a plain C array; it must be greater than zero.
+@param dst output array of the same size and the same depth as mv[0]; The number of channels will
+be equal to the parameter count.
+@sa  mixChannels, split, Mat::reshape
+*/
+CV_EXPORTS void merge(const Mat* mv, size_t count, OutputArray dst);
+
+/** @overload
+@param mv input vector of matrices to be merged; all the matrices in mv must have the same
+size and the same depth.
+@param dst output array of the same size and the same depth as mv[0]; The number of channels will
+be the total number of channels in the matrix array.
+  */
+CV_EXPORTS_W void merge(InputArrayOfArrays mv, OutputArray dst);
+
+/** @brief Divides a multi-channel array into several single-channel arrays.
+
+The function cv::split splits a multi-channel array into separate single-channel arrays:
+\f[\texttt{mv} [c](I) =  \texttt{src} (I)_c\f]
+If you need to extract a single channel or do some other sophisticated channel permutation, use
+mixChannels .
+
+The following example demonstrates how to split a 3-channel matrix into 3 single channel matrices.
+@snippet snippets/core_split.cpp example
+
+@param src input multi-channel array.
+@param mvbegin output array; the number of arrays must match src.channels(); the arrays themselves are
+reallocated, if needed.
+@sa merge, mixChannels, cvtColor
+*/
+CV_EXPORTS void split(const Mat& src, Mat* mvbegin);
+
+/** @overload
+@param m input multi-channel array.
+@param mv output vector of arrays; the arrays themselves are reallocated, if needed.
+*/
+CV_EXPORTS_W void split(InputArray m, OutputArrayOfArrays mv);
+
+/** @brief Copies specified channels from input arrays to the specified channels of
+output arrays.
+
+The function cv::mixChannels provides an advanced mechanism for shuffling image channels.
+
+cv::split,cv::merge,cv::extractChannel,cv::insertChannel and some forms of cv::cvtColor are partial cases of cv::mixChannels.
+
+In the example below, the code splits a 4-channel BGRA image into a 3-channel BGR (with B and R
+channels swapped) and a separate alpha-channel image:
+@code{.cpp}
+    Mat bgra( 100, 100, CV_8UC4, Scalar(255,0,0,255) );
+    Mat bgr( bgra.rows, bgra.cols, CV_8UC3 );
+    Mat alpha( bgra.rows, bgra.cols, CV_8UC1 );
+
+    // forming an array of matrices is a quite efficient operation,
+    // because the matrix data is not copied, only the headers
+    Mat out[] = { bgr, alpha };
+    // bgra[0] -> bgr[2], bgra[1] -> bgr[1],
+    // bgra[2] -> bgr[0], bgra[3] -> alpha[0]
+    int from_to[] = { 0,2, 1,1, 2,0, 3,3 };
+    mixChannels( &bgra, 1, out, 2, from_to, 4 );
+@endcode
+@note Unlike many other new-style C++ functions in OpenCV (see the introduction section and
+Mat::create ), cv::mixChannels requires the output arrays to be pre-allocated before calling the
+function.
+@param src input array or vector of matrices; all of the matrices must have the same size and the
+same depth.
+@param nsrcs number of matrices in `src`.
+@param dst output array or vector of matrices; all the matrices **must be allocated**; their size and
+depth must be the same as in `src[0]`.
+@param ndsts number of matrices in `dst`.
+@param fromTo array of index pairs specifying which channels are copied and where; fromTo[k\*2] is
+a 0-based index of the input channel in src, fromTo[k\*2+1] is an index of the output channel in
+dst; the continuous channel numbering is used: the first input image channels are indexed from 0 to
+src[0].channels()-1, the second input image channels are indexed from src[0].channels() to
+src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for the output image
+channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is
+filled with zero .
+@param npairs number of index pairs in `fromTo`.
+@sa split, merge, extractChannel, insertChannel, cvtColor
+*/
+CV_EXPORTS void mixChannels(const Mat* src, size_t nsrcs, Mat* dst, size_t ndsts,
+                            const int* fromTo, size_t npairs);
+
+/** @overload
+@param src input array or vector of matrices; all of the matrices must have the same size and the
+same depth.
+@param dst output array or vector of matrices; all the matrices **must be allocated**; their size and
+depth must be the same as in src[0].
+@param fromTo array of index pairs specifying which channels are copied and where; fromTo[k\*2] is
+a 0-based index of the input channel in src, fromTo[k\*2+1] is an index of the output channel in
+dst; the continuous channel numbering is used: the first input image channels are indexed from 0 to
+src[0].channels()-1, the second input image channels are indexed from src[0].channels() to
+src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for the output image
+channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is
+filled with zero .
+@param npairs number of index pairs in fromTo.
+*/
+CV_EXPORTS void mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
+                            const int* fromTo, size_t npairs);
+
+/** @overload
+@param src input array or vector of matrices; all of the matrices must have the same size and the
+same depth.
+@param dst output array or vector of matrices; all the matrices **must be allocated**; their size and
+depth must be the same as in src[0].
+@param fromTo array of index pairs specifying which channels are copied and where; fromTo[k\*2] is
+a 0-based index of the input channel in src, fromTo[k\*2+1] is an index of the output channel in
+dst; the continuous channel numbering is used: the first input image channels are indexed from 0 to
+src[0].channels()-1, the second input image channels are indexed from src[0].channels() to
+src[0].channels() + src[1].channels()-1, and so on, the same scheme is used for the output image
+channels; as a special case, when fromTo[k\*2] is negative, the corresponding output channel is
+filled with zero .
+*/
+CV_EXPORTS_W void mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
+                              const std::vector<int>& fromTo);
+
+/** @brief Extracts a single channel from src (coi is 0-based index)
+@param src input array
+@param dst output array
+@param coi index of channel to extract
+@sa mixChannels, split
+*/
+CV_EXPORTS_W void extractChannel(InputArray src, OutputArray dst, int coi);
+
+/** @brief Inserts a single channel to dst (coi is 0-based index)
+@param src input array
+@param dst output array
+@param coi index of channel for insertion
+@sa mixChannels, merge
+*/
+CV_EXPORTS_W void insertChannel(InputArray src, InputOutputArray dst, int coi);
+
+/** @brief Flips a 2D array around vertical, horizontal, or both axes.
+
+The function cv::flip flips the array in one of three different ways (row
+and column indices are 0-based):
+\f[\texttt{dst} _{ij} =
+\left\{
+\begin{array}{l l}
+\texttt{src} _{\texttt{src.rows}-i-1,j} & if\;  \texttt{flipCode} = 0 \\
+\texttt{src} _{i, \texttt{src.cols} -j-1} & if\;  \texttt{flipCode} > 0 \\
+\texttt{src} _{ \texttt{src.rows} -i-1, \texttt{src.cols} -j-1} & if\; \texttt{flipCode} < 0 \\
+\end{array}
+\right.\f]
+The example scenarios of using the function are the following:
+*   Vertical flipping of the image (flipCode == 0) to switch between
+    top-left and bottom-left image origin. This is a typical operation
+    in video processing on Microsoft Windows\* OS.
+*   Horizontal flipping of the image with the subsequent horizontal
+    shift and absolute difference calculation to check for a
+    vertical-axis symmetry (flipCode \> 0).
+*   Simultaneous horizontal and vertical flipping of the image with
+    the subsequent shift and absolute difference calculation to check
+    for a central symmetry (flipCode \< 0).
+*   Reversing the order of point arrays (flipCode \> 0 or
+    flipCode == 0).
+@param src input array.
+@param dst output array of the same size and type as src.
+@param flipCode a flag to specify how to flip the array; 0 means
+flipping around the x-axis and positive value (for example, 1) means
+flipping around y-axis. Negative value (for example, -1) means flipping
+around both axes.
+@sa transpose , repeat , completeSymm
+*/
+CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode);
+
+enum RotateFlags {
+    ROTATE_90_CLOCKWISE = 0, //!<Rotate 90 degrees clockwise
+    ROTATE_180 = 1, //!<Rotate 180 degrees clockwise
+    ROTATE_90_COUNTERCLOCKWISE = 2, //!<Rotate 270 degrees clockwise
+};
+/** @brief Rotates a 2D array in multiples of 90 degrees.
+The function cv::rotate rotates the array in one of three different ways:
+*   Rotate by 90 degrees clockwise (rotateCode = ROTATE_90_CLOCKWISE).
+*   Rotate by 180 degrees clockwise (rotateCode = ROTATE_180).
+*   Rotate by 270 degrees clockwise (rotateCode = ROTATE_90_COUNTERCLOCKWISE).
+@param src input array.
+@param dst output array of the same type as src.  The size is the same with ROTATE_180,
+and the rows and cols are switched for ROTATE_90_CLOCKWISE and ROTATE_90_COUNTERCLOCKWISE.
+@param rotateCode an enum to specify how to rotate the array; see the enum #RotateFlags
+@sa transpose , repeat , completeSymm, flip, RotateFlags
+*/
+CV_EXPORTS_W void rotate(InputArray src, OutputArray dst, int rotateCode);
+
+/** @brief Fills the output array with repeated copies of the input array.
+
+The function cv::repeat duplicates the input array one or more times along each of the two axes:
+\f[\texttt{dst} _{ij}= \texttt{src} _{i\mod src.rows, \; j\mod src.cols }\f]
+The second variant of the function is more convenient to use with @ref MatrixExpressions.
+@param src input array to replicate.
+@param ny Flag to specify how many times the `src` is repeated along the
+vertical axis.
+@param nx Flag to specify how many times the `src` is repeated along the
+horizontal axis.
+@param dst output array of the same type as `src`.
+@sa cv::reduce
+*/
+CV_EXPORTS_W void repeat(InputArray src, int ny, int nx, OutputArray dst);
+
+/** @overload
+@param src input array to replicate.
+@param ny Flag to specify how many times the `src` is repeated along the
+vertical axis.
+@param nx Flag to specify how many times the `src` is repeated along the
+horizontal axis.
+  */
+CV_EXPORTS Mat repeat(const Mat& src, int ny, int nx);
+
+/** @brief Applies horizontal concatenation to given matrices.
+
+The function horizontally concatenates two or more cv::Mat matrices (with the same number of rows).
+@code{.cpp}
+    cv::Mat matArray[] = { cv::Mat(4, 1, CV_8UC1, cv::Scalar(1)),
+                           cv::Mat(4, 1, CV_8UC1, cv::Scalar(2)),
+                           cv::Mat(4, 1, CV_8UC1, cv::Scalar(3)),};
+
+    cv::Mat out;
+    cv::hconcat( matArray, 3, out );
+    //out:
+    //[1, 2, 3;
+    // 1, 2, 3;
+    // 1, 2, 3;
+    // 1, 2, 3]
+@endcode
+@param src input array or vector of matrices. all of the matrices must have the same number of rows and the same depth.
+@param nsrc number of matrices in src.
+@param dst output array. It has the same number of rows and depth as the src, and the sum of cols of the src.
+@sa cv::vconcat(const Mat*, size_t, OutputArray), @sa cv::vconcat(InputArrayOfArrays, OutputArray) and @sa cv::vconcat(InputArray, InputArray, OutputArray)
+*/
+CV_EXPORTS void hconcat(const Mat* src, size_t nsrc, OutputArray dst);
+/** @overload
+ @code{.cpp}
+    cv::Mat_<float> A = (cv::Mat_<float>(3, 2) << 1, 4,
+                                                  2, 5,
+                                                  3, 6);
+    cv::Mat_<float> B = (cv::Mat_<float>(3, 2) << 7, 10,
+                                                  8, 11,
+                                                  9, 12);
+
+    cv::Mat C;
+    cv::hconcat(A, B, C);
+    //C:
+    //[1, 4, 7, 10;
+    // 2, 5, 8, 11;
+    // 3, 6, 9, 12]
+ @endcode
+ @param src1 first input array to be considered for horizontal concatenation.
+ @param src2 second input array to be considered for horizontal concatenation.
+ @param dst output array. It has the same number of rows and depth as the src1 and src2, and the sum of cols of the src1 and src2.
+ */
+CV_EXPORTS void hconcat(InputArray src1, InputArray src2, OutputArray dst);
+/** @overload
+ @code{.cpp}
+    std::vector<cv::Mat> matrices = { cv::Mat(4, 1, CV_8UC1, cv::Scalar(1)),
+                                      cv::Mat(4, 1, CV_8UC1, cv::Scalar(2)),
+                                      cv::Mat(4, 1, CV_8UC1, cv::Scalar(3)),};
+
+    cv::Mat out;
+    cv::hconcat( matrices, out );
+    //out:
+    //[1, 2, 3;
+    // 1, 2, 3;
+    // 1, 2, 3;
+    // 1, 2, 3]
+ @endcode
+ @param src input array or vector of matrices. all of the matrices must have the same number of rows and the same depth.
+ @param dst output array. It has the same number of rows and depth as the src, and the sum of cols of the src.
+same depth.
+ */
+CV_EXPORTS_W void hconcat(InputArrayOfArrays src, OutputArray dst);
+
+/** @brief Applies vertical concatenation to given matrices.
+
+The function vertically concatenates two or more cv::Mat matrices (with the same number of cols).
+@code{.cpp}
+    cv::Mat matArray[] = { cv::Mat(1, 4, CV_8UC1, cv::Scalar(1)),
+                           cv::Mat(1, 4, CV_8UC1, cv::Scalar(2)),
+                           cv::Mat(1, 4, CV_8UC1, cv::Scalar(3)),};
+
+    cv::Mat out;
+    cv::vconcat( matArray, 3, out );
+    //out:
+    //[1,   1,   1,   1;
+    // 2,   2,   2,   2;
+    // 3,   3,   3,   3]
+@endcode
+@param src input array or vector of matrices. all of the matrices must have the same number of cols and the same depth.
+@param nsrc number of matrices in src.
+@param dst output array. It has the same number of cols and depth as the src, and the sum of rows of the src.
+@sa cv::hconcat(const Mat*, size_t, OutputArray), @sa cv::hconcat(InputArrayOfArrays, OutputArray) and @sa cv::hconcat(InputArray, InputArray, OutputArray)
+*/
+CV_EXPORTS void vconcat(const Mat* src, size_t nsrc, OutputArray dst);
+/** @overload
+ @code{.cpp}
+    cv::Mat_<float> A = (cv::Mat_<float>(3, 2) << 1, 7,
+                                                  2, 8,
+                                                  3, 9);
+    cv::Mat_<float> B = (cv::Mat_<float>(3, 2) << 4, 10,
+                                                  5, 11,
+                                                  6, 12);
+
+    cv::Mat C;
+    cv::vconcat(A, B, C);
+    //C:
+    //[1, 7;
+    // 2, 8;
+    // 3, 9;
+    // 4, 10;
+    // 5, 11;
+    // 6, 12]
+ @endcode
+ @param src1 first input array to be considered for vertical concatenation.
+ @param src2 second input array to be considered for vertical concatenation.
+ @param dst output array. It has the same number of cols and depth as the src1 and src2, and the sum of rows of the src1 and src2.
+ */
+CV_EXPORTS void vconcat(InputArray src1, InputArray src2, OutputArray dst);
+/** @overload
+ @code{.cpp}
+    std::vector<cv::Mat> matrices = { cv::Mat(1, 4, CV_8UC1, cv::Scalar(1)),
+                                      cv::Mat(1, 4, CV_8UC1, cv::Scalar(2)),
+                                      cv::Mat(1, 4, CV_8UC1, cv::Scalar(3)),};
+
+    cv::Mat out;
+    cv::vconcat( matrices, out );
+    //out:
+    //[1,   1,   1,   1;
+    // 2,   2,   2,   2;
+    // 3,   3,   3,   3]
+ @endcode
+ @param src input array or vector of matrices. all of the matrices must have the same number of cols and the same depth
+ @param dst output array. It has the same number of cols and depth as the src, and the sum of rows of the src.
+same depth.
+ */
+CV_EXPORTS_W void vconcat(InputArrayOfArrays src, OutputArray dst);
+
+/** @brief computes bitwise conjunction of the two arrays (dst = src1 & src2)
+Calculates the per-element bit-wise conjunction of two arrays or an
+array and a scalar.
+
+The function cv::bitwise_and calculates the per-element bit-wise logical conjunction for:
+*   Two arrays when src1 and src2 have the same size:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  \wedge \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f]
+*   An array and a scalar when src2 is constructed from Scalar or has
+    the same number of elements as `src1.channels()`:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  \wedge \texttt{src2} \quad \texttt{if mask} (I) \ne0\f]
+*   A scalar and an array when src1 is constructed from Scalar or has
+    the same number of elements as `src2.channels()`:
+    \f[\texttt{dst} (I) =  \texttt{src1}  \wedge \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f]
+In case of floating-point arrays, their machine-specific bit
+representations (usually IEEE754-compliant) are used for the operation.
+In case of multi-channel arrays, each channel is processed
+independently. In the second and third cases above, the scalar is first
+converted to the array type.
+@param src1 first input array or a scalar.
+@param src2 second input array or a scalar.
+@param dst output array that has the same size and type as the input
+arrays.
+@param mask optional operation mask, 8-bit single channel array, that
+specifies elements of the output array to be changed.
+*/
+CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2,
+                              OutputArray dst, InputArray mask = noArray());
+
+/** @brief Calculates the per-element bit-wise disjunction of two arrays or an
+array and a scalar.
+
+The function cv::bitwise_or calculates the per-element bit-wise logical disjunction for:
+*   Two arrays when src1 and src2 have the same size:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  \vee \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f]
+*   An array and a scalar when src2 is constructed from Scalar or has
+    the same number of elements as `src1.channels()`:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  \vee \texttt{src2} \quad \texttt{if mask} (I) \ne0\f]
+*   A scalar and an array when src1 is constructed from Scalar or has
+    the same number of elements as `src2.channels()`:
+    \f[\texttt{dst} (I) =  \texttt{src1}  \vee \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f]
+In case of floating-point arrays, their machine-specific bit
+representations (usually IEEE754-compliant) are used for the operation.
+In case of multi-channel arrays, each channel is processed
+independently. In the second and third cases above, the scalar is first
+converted to the array type.
+@param src1 first input array or a scalar.
+@param src2 second input array or a scalar.
+@param dst output array that has the same size and type as the input
+arrays.
+@param mask optional operation mask, 8-bit single channel array, that
+specifies elements of the output array to be changed.
+*/
+CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2,
+                             OutputArray dst, InputArray mask = noArray());
+
+/** @brief Calculates the per-element bit-wise "exclusive or" operation on two
+arrays or an array and a scalar.
+
+The function cv::bitwise_xor calculates the per-element bit-wise logical "exclusive-or"
+operation for:
+*   Two arrays when src1 and src2 have the same size:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  \oplus \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f]
+*   An array and a scalar when src2 is constructed from Scalar or has
+    the same number of elements as `src1.channels()`:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  \oplus \texttt{src2} \quad \texttt{if mask} (I) \ne0\f]
+*   A scalar and an array when src1 is constructed from Scalar or has
+    the same number of elements as `src2.channels()`:
+    \f[\texttt{dst} (I) =  \texttt{src1}  \oplus \texttt{src2} (I) \quad \texttt{if mask} (I) \ne0\f]
+In case of floating-point arrays, their machine-specific bit
+representations (usually IEEE754-compliant) are used for the operation.
+In case of multi-channel arrays, each channel is processed
+independently. In the 2nd and 3rd cases above, the scalar is first
+converted to the array type.
+@param src1 first input array or a scalar.
+@param src2 second input array or a scalar.
+@param dst output array that has the same size and type as the input
+arrays.
+@param mask optional operation mask, 8-bit single channel array, that
+specifies elements of the output array to be changed.
+*/
+CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2,
+                              OutputArray dst, InputArray mask = noArray());
+
+/** @brief  Inverts every bit of an array.
+
+The function cv::bitwise_not calculates per-element bit-wise inversion of the input
+array:
+\f[\texttt{dst} (I) =  \neg \texttt{src} (I)\f]
+In case of a floating-point input array, its machine-specific bit
+representation (usually IEEE754-compliant) is used for the operation. In
+case of multi-channel arrays, each channel is processed independently.
+@param src input array.
+@param dst output array that has the same size and type as the input
+array.
+@param mask optional operation mask, 8-bit single channel array, that
+specifies elements of the output array to be changed.
+*/
+CV_EXPORTS_W void bitwise_not(InputArray src, OutputArray dst,
+                              InputArray mask = noArray());
+
+/** @brief Calculates the per-element absolute difference between two arrays or between an array and a scalar.
+
+The function cv::absdiff calculates:
+*   Absolute difference between two arrays when they have the same
+    size and type:
+    \f[\texttt{dst}(I) =  \texttt{saturate} (| \texttt{src1}(I) -  \texttt{src2}(I)|)\f]
+*   Absolute difference between an array and a scalar when the second
+    array is constructed from Scalar or has as many elements as the
+    number of channels in `src1`:
+    \f[\texttt{dst}(I) =  \texttt{saturate} (| \texttt{src1}(I) -  \texttt{src2} |)\f]
+*   Absolute difference between a scalar and an array when the first
+    array is constructed from Scalar or has as many elements as the
+    number of channels in `src2`:
+    \f[\texttt{dst}(I) =  \texttt{saturate} (| \texttt{src1} -  \texttt{src2}(I) |)\f]
+    where I is a multi-dimensional index of array elements. In case of
+    multi-channel arrays, each channel is processed independently.
+@note Saturation is not applied when the arrays have the depth CV_32S.
+You may even get a negative value in the case of overflow.
+@param src1 first input array or a scalar.
+@param src2 second input array or a scalar.
+@param dst output array that has the same size and type as input arrays.
+@sa cv::abs(const Mat&)
+*/
+CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst);
+
+/** @brief  Checks if array elements lie between the elements of two other arrays.
+
+The function checks the range as follows:
+-   For every element of a single-channel input array:
+    \f[\texttt{dst} (I)= \texttt{lowerb} (I)_0  \leq \texttt{src} (I)_0 \leq  \texttt{upperb} (I)_0\f]
+-   For two-channel arrays:
+    \f[\texttt{dst} (I)= \texttt{lowerb} (I)_0  \leq \texttt{src} (I)_0 \leq  \texttt{upperb} (I)_0  \land \texttt{lowerb} (I)_1  \leq \texttt{src} (I)_1 \leq  \texttt{upperb} (I)_1\f]
+-   and so forth.
+
+That is, dst (I) is set to 255 (all 1 -bits) if src (I) is within the
+specified 1D, 2D, 3D, ... box and 0 otherwise.
+
+When the lower and/or upper boundary parameters are scalars, the indexes
+(I) at lowerb and upperb in the above formulas should be omitted.
+@param src first input array.
+@param lowerb inclusive lower boundary array or a scalar.
+@param upperb inclusive upper boundary array or a scalar.
+@param dst output array of the same size as src and CV_8U type.
+*/
+CV_EXPORTS_W void inRange(InputArray src, InputArray lowerb,
+                          InputArray upperb, OutputArray dst);
+
+/** @brief Performs the per-element comparison of two arrays or an array and scalar value.
+
+The function compares:
+*   Elements of two arrays when src1 and src2 have the same size:
+    \f[\texttt{dst} (I) =  \texttt{src1} (I)  \,\texttt{cmpop}\, \texttt{src2} (I)\f]
+*   Elements of src1 with a scalar src2 when src2 is constructed from
+    Scalar or has a single element:
+    \f[\texttt{dst} (I) =  \texttt{src1}(I) \,\texttt{cmpop}\,  \texttt{src2}\f]
+*   src1 with elements of src2 when src1 is constructed from Scalar or
+    has a single element:
+    \f[\texttt{dst} (I) =  \texttt{src1}  \,\texttt{cmpop}\, \texttt{src2} (I)\f]
+When the comparison result is true, the corresponding element of output
+array is set to 255. The comparison operations can be replaced with the
+equivalent matrix expressions:
+@code{.cpp}
+    Mat dst1 = src1 >= src2;
+    Mat dst2 = src1 < 8;
+    ...
+@endcode
+@param src1 first input array or a scalar; when it is an array, it must have a single channel.
+@param src2 second input array or a scalar; when it is an array, it must have a single channel.
+@param dst output array of type ref CV_8U that has the same size and the same number of channels as
+    the input arrays.
+@param cmpop a flag, that specifies correspondence between the arrays (cv::CmpTypes)
+@sa checkRange, min, max, threshold
+*/
+CV_EXPORTS_W void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop);
+
+/** @brief Calculates per-element minimum of two arrays or an array and a scalar.
+
+The function cv::min calculates the per-element minimum of two arrays:
+\f[\texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{src2} (I))\f]
+or array and a scalar:
+\f[\texttt{dst} (I)= \min ( \texttt{src1} (I), \texttt{value} )\f]
+@param src1 first input array.
+@param src2 second input array of the same size and type as src1.
+@param dst output array of the same size and type as src1.
+@sa max, compare, inRange, minMaxLoc
+*/
+CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst);
+/** @overload
+needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare)
+*/
+CV_EXPORTS void min(const Mat& src1, const Mat& src2, Mat& dst);
+/** @overload
+needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare)
+*/
+CV_EXPORTS void min(const UMat& src1, const UMat& src2, UMat& dst);
+
+/** @brief Calculates per-element maximum of two arrays or an array and a scalar.
+
+The function cv::max calculates the per-element maximum of two arrays:
+\f[\texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{src2} (I))\f]
+or array and a scalar:
+\f[\texttt{dst} (I)= \max ( \texttt{src1} (I), \texttt{value} )\f]
+@param src1 first input array.
+@param src2 second input array of the same size and type as src1 .
+@param dst output array of the same size and type as src1.
+@sa  min, compare, inRange, minMaxLoc, @ref MatrixExpressions
+*/
+CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst);
+/** @overload
+needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare)
+*/
+CV_EXPORTS void max(const Mat& src1, const Mat& src2, Mat& dst);
+/** @overload
+needed to avoid conflicts with const _Tp& std::min(const _Tp&, const _Tp&, _Compare)
+*/
+CV_EXPORTS void max(const UMat& src1, const UMat& src2, UMat& dst);
+
+/** @brief Calculates a square root of array elements.
+
+The function cv::sqrt calculates a square root of each input array element.
+In case of multi-channel arrays, each channel is processed
+independently. The accuracy is approximately the same as of the built-in
+std::sqrt .
+@param src input floating-point array.
+@param dst output array of the same size and type as src.
+*/
+CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst);
+
+/** @brief Raises every array element to a power.
+
+The function cv::pow raises every element of the input array to power :
+\f[\texttt{dst} (I) =  \fork{\texttt{src}(I)^{power}}{if \(\texttt{power}\) is integer}{|\texttt{src}(I)|^{power}}{otherwise}\f]
+
+So, for a non-integer power exponent, the absolute values of input array
+elements are used. However, it is possible to get true values for
+negative values using some extra operations. In the example below,
+computing the 5th root of array src shows:
+@code{.cpp}
+    Mat mask = src < 0;
+    pow(src, 1./5, dst);
+    subtract(Scalar::all(0), dst, dst, mask);
+@endcode
+For some values of power, such as integer values, 0.5 and -0.5,
+specialized faster algorithms are used.
+
+Special values (NaN, Inf) are not handled.
+@param src input array.
+@param power exponent of power.
+@param dst output array of the same size and type as src.
+@sa sqrt, exp, log, cartToPolar, polarToCart
+*/
+CV_EXPORTS_W void pow(InputArray src, double power, OutputArray dst);
+
+/** @brief Calculates the exponent of every array element.
+
+The function cv::exp calculates the exponent of every element of the input
+array:
+\f[\texttt{dst} [I] = e^{ src(I) }\f]
+
+The maximum relative error is about 7e-6 for single-precision input and
+less than 1e-10 for double-precision input. Currently, the function
+converts denormalized values to zeros on output. Special values (NaN,
+Inf) are not handled.
+@param src input array.
+@param dst output array of the same size and type as src.
+@sa log , cartToPolar , polarToCart , phase , pow , sqrt , magnitude
+*/
+CV_EXPORTS_W void exp(InputArray src, OutputArray dst);
+
+/** @brief Calculates the natural logarithm of every array element.
+
+The function cv::log calculates the natural logarithm of every element of the input array:
+\f[\texttt{dst} (I) =  \log (\texttt{src}(I)) \f]
+
+Output on zero, negative and special (NaN, Inf) values is undefined.
+
+@param src input array.
+@param dst output array of the same size and type as src .
+@sa exp, cartToPolar, polarToCart, phase, pow, sqrt, magnitude
+*/
+CV_EXPORTS_W void log(InputArray src, OutputArray dst);
+
+/** @brief Calculates x and y coordinates of 2D vectors from their magnitude and angle.
+
+The function cv::polarToCart calculates the Cartesian coordinates of each 2D
+vector represented by the corresponding elements of magnitude and angle:
+\f[\begin{array}{l} \texttt{x} (I) =  \texttt{magnitude} (I) \cos ( \texttt{angle} (I)) \\ \texttt{y} (I) =  \texttt{magnitude} (I) \sin ( \texttt{angle} (I)) \\ \end{array}\f]
+
+The relative accuracy of the estimated coordinates is about 1e-6.
+@param magnitude input floating-point array of magnitudes of 2D vectors;
+it can be an empty matrix (=Mat()), in this case, the function assumes
+that all the magnitudes are =1; if it is not empty, it must have the
+same size and type as angle.
+@param angle input floating-point array of angles of 2D vectors.
+@param x output array of x-coordinates of 2D vectors; it has the same
+size and type as angle.
+@param y output array of y-coordinates of 2D vectors; it has the same
+size and type as angle.
+@param angleInDegrees when true, the input angles are measured in
+degrees, otherwise, they are measured in radians.
+@sa cartToPolar, magnitude, phase, exp, log, pow, sqrt
+*/
+CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle,
+                              OutputArray x, OutputArray y, bool angleInDegrees = false);
+
+/** @brief Calculates the magnitude and angle of 2D vectors.
+
+The function cv::cartToPolar calculates either the magnitude, angle, or both
+for every 2D vector (x(I),y(I)):
+\f[\begin{array}{l} \texttt{magnitude} (I)= \sqrt{\texttt{x}(I)^2+\texttt{y}(I)^2} , \\ \texttt{angle} (I)= \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))[ \cdot180 / \pi ] \end{array}\f]
+
+The angles are calculated with accuracy about 0.3 degrees. For the point
+(0,0), the angle is set to 0.
+@param x array of x-coordinates; this must be a single-precision or
+double-precision floating-point array.
+@param y array of y-coordinates, that must have the same size and same type as x.
+@param magnitude output array of magnitudes of the same size and type as x.
+@param angle output array of angles that has the same size and type as
+x; the angles are measured in radians (from 0 to 2\*Pi) or in degrees (0 to 360 degrees).
+@param angleInDegrees a flag, indicating whether the angles are measured
+in radians (which is by default), or in degrees.
+@sa Sobel, Scharr
+*/
+CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y,
+                              OutputArray magnitude, OutputArray angle,
+                              bool angleInDegrees = false);
+
+/** @brief Calculates the rotation angle of 2D vectors.
+
+The function cv::phase calculates the rotation angle of each 2D vector that
+is formed from the corresponding elements of x and y :
+\f[\texttt{angle} (I) =  \texttt{atan2} ( \texttt{y} (I), \texttt{x} (I))\f]
+
+The angle estimation accuracy is about 0.3 degrees. When x(I)=y(I)=0 ,
+the corresponding angle(I) is set to 0.
+@param x input floating-point array of x-coordinates of 2D vectors.
+@param y input array of y-coordinates of 2D vectors; it must have the
+same size and the same type as x.
+@param angle output array of vector angles; it has the same size and
+same type as x .
+@param angleInDegrees when true, the function calculates the angle in
+degrees, otherwise, they are measured in radians.
+*/
+CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle,
+                        bool angleInDegrees = false);
+
+/** @brief Calculates the magnitude of 2D vectors.
+
+The function cv::magnitude calculates the magnitude of 2D vectors formed
+from the corresponding elements of x and y arrays:
+\f[\texttt{dst} (I) =  \sqrt{\texttt{x}(I)^2 + \texttt{y}(I)^2}\f]
+@param x floating-point array of x-coordinates of the vectors.
+@param y floating-point array of y-coordinates of the vectors; it must
+have the same size as x.
+@param magnitude output array of the same size and type as x.
+@sa cartToPolar, polarToCart, phase, sqrt
+*/
+CV_EXPORTS_W void magnitude(InputArray x, InputArray y, OutputArray magnitude);
+
+/** @brief Checks every element of an input array for invalid values.
+
+The function cv::checkRange checks that every array element is neither NaN nor infinite. When minVal \>
+-DBL_MAX and maxVal \< DBL_MAX, the function also checks that each value is between minVal and
+maxVal. In case of multi-channel arrays, each channel is processed independently. If some values
+are out of range, position of the first outlier is stored in pos (when pos != NULL). Then, the
+function either returns false (when quiet=true) or throws an exception.
+@param a input array.
+@param quiet a flag, indicating whether the functions quietly return false when the array elements
+are out of range or they throw an exception.
+@param pos optional output parameter, when not NULL, must be a pointer to array of src.dims
+elements.
+@param minVal inclusive lower boundary of valid values range.
+@param maxVal exclusive upper boundary of valid values range.
+*/
+CV_EXPORTS_W bool checkRange(InputArray a, bool quiet = true, CV_OUT Point* pos = 0,
+                            double minVal = -DBL_MAX, double maxVal = DBL_MAX);
+
+/** @brief converts NaN's to the given number
+*/
+CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val = 0);
+
+/** @brief Performs generalized matrix multiplication.
+
+The function cv::gemm performs generalized matrix multiplication similar to the
+gemm functions in BLAS level 3. For example,
+`gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T)`
+corresponds to
+\f[\texttt{dst} =  \texttt{alpha} \cdot \texttt{src1} ^T  \cdot \texttt{src2} +  \texttt{beta} \cdot \texttt{src3} ^T\f]
+
+In case of complex (two-channel) data, performed a complex matrix
+multiplication.
+
+The function can be replaced with a matrix expression. For example, the
+above call can be replaced with:
+@code{.cpp}
+    dst = alpha*src1.t()*src2 + beta*src3.t();
+@endcode
+@param src1 first multiplied input matrix that could be real(CV_32FC1,
+CV_64FC1) or complex(CV_32FC2, CV_64FC2).
+@param src2 second multiplied input matrix of the same type as src1.
+@param alpha weight of the matrix product.
+@param src3 third optional delta matrix added to the matrix product; it
+should have the same type as src1 and src2.
+@param beta weight of src3.
+@param dst output matrix; it has the proper size and the same type as
+input matrices.
+@param flags operation flags (cv::GemmFlags)
+@sa mulTransposed , transform
+*/
+CV_EXPORTS_W void gemm(InputArray src1, InputArray src2, double alpha,
+                       InputArray src3, double beta, OutputArray dst, int flags = 0);
+
+/** @brief Calculates the product of a matrix and its transposition.
+
+The function cv::mulTransposed calculates the product of src and its
+transposition:
+\f[\texttt{dst} = \texttt{scale} ( \texttt{src} - \texttt{delta} )^T ( \texttt{src} - \texttt{delta} )\f]
+if aTa=true , and
+\f[\texttt{dst} = \texttt{scale} ( \texttt{src} - \texttt{delta} ) ( \texttt{src} - \texttt{delta} )^T\f]
+otherwise. The function is used to calculate the covariance matrix. With
+zero delta, it can be used as a faster substitute for general matrix
+product A\*B when B=A'
+@param src input single-channel matrix. Note that unlike gemm, the
+function can multiply not only floating-point matrices.
+@param dst output square matrix.
+@param aTa Flag specifying the multiplication ordering. See the
+description below.
+@param delta Optional delta matrix subtracted from src before the
+multiplication. When the matrix is empty ( delta=noArray() ), it is
+assumed to be zero, that is, nothing is subtracted. If it has the same
+size as src , it is simply subtracted. Otherwise, it is "repeated" (see
+repeat ) to cover the full src and then subtracted. Type of the delta
+matrix, when it is not empty, must be the same as the type of created
+output matrix. See the dtype parameter description below.
+@param scale Optional scale factor for the matrix product.
+@param dtype Optional type of the output matrix. When it is negative,
+the output matrix will have the same type as src . Otherwise, it will be
+type=CV_MAT_DEPTH(dtype) that should be either CV_32F or CV_64F .
+@sa calcCovarMatrix, gemm, repeat, reduce
+*/
+CV_EXPORTS_W void mulTransposed( InputArray src, OutputArray dst, bool aTa,
+                                 InputArray delta = noArray(),
+                                 double scale = 1, int dtype = -1 );
+
+/** @brief Transposes a matrix.
+
+The function cv::transpose transposes the matrix src :
+\f[\texttt{dst} (i,j) =  \texttt{src} (j,i)\f]
+@note No complex conjugation is done in case of a complex matrix. It
+should be done separately if needed.
+@param src input array.
+@param dst output array of the same type as src.
+*/
+CV_EXPORTS_W void transpose(InputArray src, OutputArray dst);
+
+/** @brief Performs the matrix transformation of every array element.
+
+The function cv::transform performs the matrix transformation of every
+element of the array src and stores the results in dst :
+\f[\texttt{dst} (I) =  \texttt{m} \cdot \texttt{src} (I)\f]
+(when m.cols=src.channels() ), or
+\f[\texttt{dst} (I) =  \texttt{m} \cdot [ \texttt{src} (I); 1]\f]
+(when m.cols=src.channels()+1 )
+
+Every element of the N -channel array src is interpreted as N -element
+vector that is transformed using the M x N or M x (N+1) matrix m to
+M-element vector - the corresponding element of the output array dst .
+
+The function may be used for geometrical transformation of
+N -dimensional points, arbitrary linear color space transformation (such
+as various kinds of RGB to YUV transforms), shuffling the image
+channels, and so forth.
+@param src input array that must have as many channels (1 to 4) as
+m.cols or m.cols-1.
+@param dst output array of the same size and depth as src; it has as
+many channels as m.rows.
+@param m transformation 2x2 or 2x3 floating-point matrix.
+@sa perspectiveTransform, getAffineTransform, estimateAffine2D, warpAffine, warpPerspective
+*/
+CV_EXPORTS_W void transform(InputArray src, OutputArray dst, InputArray m );
+
+/** @brief Performs the perspective matrix transformation of vectors.
+
+The function cv::perspectiveTransform transforms every element of src by
+treating it as a 2D or 3D vector, in the following way:
+\f[(x, y, z)  \rightarrow (x'/w, y'/w, z'/w)\f]
+where
+\f[(x', y', z', w') =  \texttt{mat} \cdot \begin{bmatrix} x & y & z & 1  \end{bmatrix}\f]
+and
+\f[w =  \fork{w'}{if \(w' \ne 0\)}{\infty}{otherwise}\f]
+
+Here a 3D vector transformation is shown. In case of a 2D vector
+transformation, the z component is omitted.
+
+@note The function transforms a sparse set of 2D or 3D vectors. If you
+want to transform an image using perspective transformation, use
+warpPerspective . If you have an inverse problem, that is, you want to
+compute the most probable perspective transformation out of several
+pairs of corresponding points, you can use getPerspectiveTransform or
+findHomography .
+@param src input two-channel or three-channel floating-point array; each
+element is a 2D/3D vector to be transformed.
+@param dst output array of the same size and type as src.
+@param m 3x3 or 4x4 floating-point transformation matrix.
+@sa  transform, warpPerspective, getPerspectiveTransform, findHomography
+*/
+CV_EXPORTS_W void perspectiveTransform(InputArray src, OutputArray dst, InputArray m );
+
+/** @brief Copies the lower or the upper half of a square matrix to its another half.
+
+The function cv::completeSymm copies the lower or the upper half of a square matrix to
+its another half. The matrix diagonal remains unchanged:
+ - \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i > j\f$ if
+    lowerToUpper=false
+ - \f$\texttt{m}_{ij}=\texttt{m}_{ji}\f$ for \f$i < j\f$ if
+    lowerToUpper=true
+
+@param m input-output floating-point square matrix.
+@param lowerToUpper operation flag; if true, the lower half is copied to
+the upper half. Otherwise, the upper half is copied to the lower half.
+@sa flip, transpose
+*/
+CV_EXPORTS_W void completeSymm(InputOutputArray m, bool lowerToUpper = false);
+
+/** @brief Initializes a scaled identity matrix.
+
+The function cv::setIdentity initializes a scaled identity matrix:
+\f[\texttt{mtx} (i,j)= \fork{\texttt{value}}{ if \(i=j\)}{0}{otherwise}\f]
+
+The function can also be emulated using the matrix initializers and the
+matrix expressions:
+@code
+    Mat A = Mat::eye(4, 3, CV_32F)*5;
+    // A will be set to [[5, 0, 0], [0, 5, 0], [0, 0, 5], [0, 0, 0]]
+@endcode
+@param mtx matrix to initialize (not necessarily square).
+@param s value to assign to diagonal elements.
+@sa Mat::zeros, Mat::ones, Mat::setTo, Mat::operator=
+*/
+CV_EXPORTS_W void setIdentity(InputOutputArray mtx, const Scalar& s = Scalar(1));
+
+/** @brief Returns the determinant of a square floating-point matrix.
+
+The function cv::determinant calculates and returns the determinant of the
+specified matrix. For small matrices ( mtx.cols=mtx.rows\<=3 ), the
+direct method is used. For larger matrices, the function uses LU
+factorization with partial pivoting.
+
+For symmetric positively-determined matrices, it is also possible to use
+eigen decomposition to calculate the determinant.
+@param mtx input matrix that must have CV_32FC1 or CV_64FC1 type and
+square size.
+@sa trace, invert, solve, eigen, @ref MatrixExpressions
+*/
+CV_EXPORTS_W double determinant(InputArray mtx);
+
+/** @brief Returns the trace of a matrix.
+
+The function cv::trace returns the sum of the diagonal elements of the
+matrix mtx .
+\f[\mathrm{tr} ( \texttt{mtx} ) =  \sum _i  \texttt{mtx} (i,i)\f]
+@param mtx input matrix.
+*/
+CV_EXPORTS_W Scalar trace(InputArray mtx);
+
+/** @brief Finds the inverse or pseudo-inverse of a matrix.
+
+The function cv::invert inverts the matrix src and stores the result in dst
+. When the matrix src is singular or non-square, the function calculates
+the pseudo-inverse matrix (the dst matrix) so that norm(src\*dst - I) is
+minimal, where I is an identity matrix.
+
+In case of the #DECOMP_LU method, the function returns non-zero value if
+the inverse has been successfully calculated and 0 if src is singular.
+
+In case of the #DECOMP_SVD method, the function returns the inverse
+condition number of src (the ratio of the smallest singular value to the
+largest singular value) and 0 if src is singular. The SVD method
+calculates a pseudo-inverse matrix if src is singular.
+
+Similarly to #DECOMP_LU, the method #DECOMP_CHOLESKY works only with
+non-singular square matrices that should also be symmetrical and
+positively defined. In this case, the function stores the inverted
+matrix in dst and returns non-zero. Otherwise, it returns 0.
+
+@param src input floating-point M x N matrix.
+@param dst output matrix of N x M size and the same type as src.
+@param flags inversion method (cv::DecompTypes)
+@sa solve, SVD
+*/
+CV_EXPORTS_W double invert(InputArray src, OutputArray dst, int flags = DECOMP_LU);
+
+/** @brief Solves one or more linear systems or least-squares problems.
+
+The function cv::solve solves a linear system or least-squares problem (the
+latter is possible with SVD or QR methods, or by specifying the flag
+#DECOMP_NORMAL ):
+\f[\texttt{dst} =  \arg \min _X \| \texttt{src1} \cdot \texttt{X} -  \texttt{src2} \|\f]
+
+If #DECOMP_LU or #DECOMP_CHOLESKY method is used, the function returns 1
+if src1 (or \f$\texttt{src1}^T\texttt{src1}\f$ ) is non-singular. Otherwise,
+it returns 0. In the latter case, dst is not valid. Other methods find a
+pseudo-solution in case of a singular left-hand side part.
+
+@note If you want to find a unity-norm solution of an under-defined
+singular system \f$\texttt{src1}\cdot\texttt{dst}=0\f$ , the function solve
+will not do the work. Use SVD::solveZ instead.
+
+@param src1 input matrix on the left-hand side of the system.
+@param src2 input matrix on the right-hand side of the system.
+@param dst output solution.
+@param flags solution (matrix inversion) method (#DecompTypes)
+@sa invert, SVD, eigen
+*/
+CV_EXPORTS_W bool solve(InputArray src1, InputArray src2,
+                        OutputArray dst, int flags = DECOMP_LU);
+
+/** @brief Sorts each row or each column of a matrix.
+
+The function cv::sort sorts each matrix row or each matrix column in
+ascending or descending order. So you should pass two operation flags to
+get desired behaviour. If you want to sort matrix rows or columns
+lexicographically, you can use STL std::sort generic function with the
+proper comparison predicate.
+
+@param src input single-channel array.
+@param dst output array of the same size and type as src.
+@param flags operation flags, a combination of #SortFlags
+@sa sortIdx, randShuffle
+*/
+CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags);
+
+/** @brief Sorts each row or each column of a matrix.
+
+The function cv::sortIdx sorts each matrix row or each matrix column in the
+ascending or descending order. So you should pass two operation flags to
+get desired behaviour. Instead of reordering the elements themselves, it
+stores the indices of sorted elements in the output array. For example:
+@code
+    Mat A = Mat::eye(3,3,CV_32F), B;
+    sortIdx(A, B, SORT_EVERY_ROW + SORT_ASCENDING);
+    // B will probably contain
+    // (because of equal elements in A some permutations are possible):
+    // [[1, 2, 0], [0, 2, 1], [0, 1, 2]]
+@endcode
+@param src input single-channel array.
+@param dst output integer array of the same size as src.
+@param flags operation flags that could be a combination of cv::SortFlags
+@sa sort, randShuffle
+*/
+CV_EXPORTS_W void sortIdx(InputArray src, OutputArray dst, int flags);
+
+/** @brief Finds the real roots of a cubic equation.
+
+The function solveCubic finds the real roots of a cubic equation:
+-   if coeffs is a 4-element vector:
+\f[\texttt{coeffs} [0] x^3 +  \texttt{coeffs} [1] x^2 +  \texttt{coeffs} [2] x +  \texttt{coeffs} [3] = 0\f]
+-   if coeffs is a 3-element vector:
+\f[x^3 +  \texttt{coeffs} [0] x^2 +  \texttt{coeffs} [1] x +  \texttt{coeffs} [2] = 0\f]
+
+The roots are stored in the roots array.
+@param coeffs equation coefficients, an array of 3 or 4 elements.
+@param roots output array of real roots that has 1 or 3 elements.
+@return number of real roots. It can be 0, 1 or 2.
+*/
+CV_EXPORTS_W int solveCubic(InputArray coeffs, OutputArray roots);
+
+/** @brief Finds the real or complex roots of a polynomial equation.
+
+The function cv::solvePoly finds real and complex roots of a polynomial equation:
+\f[\texttt{coeffs} [n] x^{n} +  \texttt{coeffs} [n-1] x^{n-1} + ... +  \texttt{coeffs} [1] x +  \texttt{coeffs} [0] = 0\f]
+@param coeffs array of polynomial coefficients.
+@param roots output (complex) array of roots.
+@param maxIters maximum number of iterations the algorithm does.
+*/
+CV_EXPORTS_W double solvePoly(InputArray coeffs, OutputArray roots, int maxIters = 300);
+
+/** @brief Calculates eigenvalues and eigenvectors of a symmetric matrix.
+
+The function cv::eigen calculates just eigenvalues, or eigenvalues and eigenvectors of the symmetric
+matrix src:
+@code
+    src*eigenvectors.row(i).t() = eigenvalues.at<srcType>(i)*eigenvectors.row(i).t()
+@endcode
+
+@note Use cv::eigenNonSymmetric for calculation of real eigenvalues and eigenvectors of non-symmetric matrix.
+
+@param src input matrix that must have CV_32FC1 or CV_64FC1 type, square size and be symmetrical
+(src ^T^ == src).
+@param eigenvalues output vector of eigenvalues of the same type as src; the eigenvalues are stored
+in the descending order.
+@param eigenvectors output matrix of eigenvectors; it has the same size and type as src; the
+eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding
+eigenvalues.
+@sa eigenNonSymmetric, completeSymm , PCA
+*/
+CV_EXPORTS_W bool eigen(InputArray src, OutputArray eigenvalues,
+                        OutputArray eigenvectors = noArray());
+
+/** @brief Calculates eigenvalues and eigenvectors of a non-symmetric matrix (real eigenvalues only).
+
+@note Assumes real eigenvalues.
+
+The function calculates eigenvalues and eigenvectors (optional) of the square matrix src:
+@code
+    src*eigenvectors.row(i).t() = eigenvalues.at<srcType>(i)*eigenvectors.row(i).t()
+@endcode
+
+@param src input matrix (CV_32FC1 or CV_64FC1 type).
+@param eigenvalues output vector of eigenvalues (type is the same type as src).
+@param eigenvectors output matrix of eigenvectors (type is the same type as src). The eigenvectors are stored as subsequent matrix rows, in the same order as the corresponding eigenvalues.
+@sa eigen
+*/
+CV_EXPORTS_W void eigenNonSymmetric(InputArray src, OutputArray eigenvalues,
+                                    OutputArray eigenvectors);
+
+/** @brief Calculates the covariance matrix of a set of vectors.
+
+The function cv::calcCovarMatrix calculates the covariance matrix and, optionally, the mean vector of
+the set of input vectors.
+@param samples samples stored as separate matrices
+@param nsamples number of samples
+@param covar output covariance matrix of the type ctype and square size.
+@param mean input or output (depending on the flags) array as the average value of the input vectors.
+@param flags operation flags as a combination of #CovarFlags
+@param ctype type of the matrixl; it equals 'CV_64F' by default.
+@sa PCA, mulTransposed, Mahalanobis
+@todo InputArrayOfArrays
+*/
+CV_EXPORTS void calcCovarMatrix( const Mat* samples, int nsamples, Mat& covar, Mat& mean,
+                                 int flags, int ctype = CV_64F);
+
+/** @overload
+@note use #COVAR_ROWS or #COVAR_COLS flag
+@param samples samples stored as rows/columns of a single matrix.
+@param covar output covariance matrix of the type ctype and square size.
+@param mean input or output (depending on the flags) array as the average value of the input vectors.
+@param flags operation flags as a combination of #CovarFlags
+@param ctype type of the matrixl; it equals 'CV_64F' by default.
+*/
+CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar,
+                                   InputOutputArray mean, int flags, int ctype = CV_64F);
+
+/** wrap PCA::operator() */
+CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean,
+                             OutputArray eigenvectors, int maxComponents = 0);
+
+/** wrap PCA::operator() */
+CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean,
+                             OutputArray eigenvectors, double retainedVariance);
+
+/** wrap PCA::project */
+CV_EXPORTS_W void PCAProject(InputArray data, InputArray mean,
+                             InputArray eigenvectors, OutputArray result);
+
+/** wrap PCA::backProject */
+CV_EXPORTS_W void PCABackProject(InputArray data, InputArray mean,
+                                 InputArray eigenvectors, OutputArray result);
+
+/** wrap SVD::compute */
+CV_EXPORTS_W void SVDecomp( InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags = 0 );
+
+/** wrap SVD::backSubst */
+CV_EXPORTS_W void SVBackSubst( InputArray w, InputArray u, InputArray vt,
+                               InputArray rhs, OutputArray dst );
+
+/** @brief Calculates the Mahalanobis distance between two vectors.
+
+The function cv::Mahalanobis calculates and returns the weighted distance between two vectors:
+\f[d( \texttt{vec1} , \texttt{vec2} )= \sqrt{\sum_{i,j}{\texttt{icovar(i,j)}\cdot(\texttt{vec1}(I)-\texttt{vec2}(I))\cdot(\texttt{vec1(j)}-\texttt{vec2(j)})} }\f]
+The covariance matrix may be calculated using the #calcCovarMatrix function and then inverted using
+the invert function (preferably using the #DECOMP_SVD method, as the most accurate).
+@param v1 first 1D input vector.
+@param v2 second 1D input vector.
+@param icovar inverse covariance matrix.
+*/
+CV_EXPORTS_W double Mahalanobis(InputArray v1, InputArray v2, InputArray icovar);
+
+/** @brief Performs a forward or inverse Discrete Fourier transform of a 1D or 2D floating-point array.
+
+The function cv::dft performs one of the following:
+-   Forward the Fourier transform of a 1D vector of N elements:
+    \f[Y = F^{(N)}  \cdot X,\f]
+    where \f$F^{(N)}_{jk}=\exp(-2\pi i j k/N)\f$ and \f$i=\sqrt{-1}\f$
+-   Inverse the Fourier transform of a 1D vector of N elements:
+    \f[\begin{array}{l} X'=  \left (F^{(N)} \right )^{-1}  \cdot Y =  \left (F^{(N)} \right )^*  \cdot y  \\ X = (1/N)  \cdot X, \end{array}\f]
+    where \f$F^*=\left(\textrm{Re}(F^{(N)})-\textrm{Im}(F^{(N)})\right)^T\f$
+-   Forward the 2D Fourier transform of a M x N matrix:
+    \f[Y = F^{(M)}  \cdot X  \cdot F^{(N)}\f]
+-   Inverse the 2D Fourier transform of a M x N matrix:
+    \f[\begin{array}{l} X'=  \left (F^{(M)} \right )^*  \cdot Y  \cdot \left (F^{(N)} \right )^* \\ X =  \frac{1}{M \cdot N} \cdot X' \end{array}\f]
+
+In case of real (single-channel) data, the output spectrum of the forward Fourier transform or input
+spectrum of the inverse Fourier transform can be represented in a packed format called *CCS*
+(complex-conjugate-symmetrical). It was borrowed from IPL (Intel\* Image Processing Library). Here
+is how 2D *CCS* spectrum looks:
+\f[\begin{bmatrix} Re Y_{0,0} & Re Y_{0,1} & Im Y_{0,1} & Re Y_{0,2} & Im Y_{0,2} &  \cdots & Re Y_{0,N/2-1} & Im Y_{0,N/2-1} & Re Y_{0,N/2}  \\ Re Y_{1,0} & Re Y_{1,1} & Im Y_{1,1} & Re Y_{1,2} & Im Y_{1,2} &  \cdots & Re Y_{1,N/2-1} & Im Y_{1,N/2-1} & Re Y_{1,N/2}  \\ Im Y_{1,0} & Re Y_{2,1} & Im Y_{2,1} & Re Y_{2,2} & Im Y_{2,2} &  \cdots & Re Y_{2,N/2-1} & Im Y_{2,N/2-1} & Im Y_{1,N/2}  \\ \hdotsfor{9} \\ Re Y_{M/2-1,0} &  Re Y_{M-3,1}  & Im Y_{M-3,1} &  \hdotsfor{3} & Re Y_{M-3,N/2-1} & Im Y_{M-3,N/2-1}& Re Y_{M/2-1,N/2}  \\ Im Y_{M/2-1,0} &  Re Y_{M-2,1}  & Im Y_{M-2,1} &  \hdotsfor{3} & Re Y_{M-2,N/2-1} & Im Y_{M-2,N/2-1}& Im Y_{M/2-1,N/2}  \\ Re Y_{M/2,0}  &  Re Y_{M-1,1} &  Im Y_{M-1,1} &  \hdotsfor{3} & Re Y_{M-1,N/2-1} & Im Y_{M-1,N/2-1}& Re Y_{M/2,N/2} \end{bmatrix}\f]
+
+In case of 1D transform of a real vector, the output looks like the first row of the matrix above.
+
+So, the function chooses an operation mode depending on the flags and size of the input array:
+-   If #DFT_ROWS is set or the input array has a single row or single column, the function
+    performs a 1D forward or inverse transform of each row of a matrix when #DFT_ROWS is set.
+    Otherwise, it performs a 2D transform.
+-   If the input array is real and #DFT_INVERSE is not set, the function performs a forward 1D or
+    2D transform:
+    -   When #DFT_COMPLEX_OUTPUT is set, the output is a complex matrix of the same size as
+        input.
+    -   When #DFT_COMPLEX_OUTPUT is not set, the output is a real matrix of the same size as
+        input. In case of 2D transform, it uses the packed format as shown above. In case of a
+        single 1D transform, it looks like the first row of the matrix above. In case of
+        multiple 1D transforms (when using the #DFT_ROWS flag), each row of the output matrix
+        looks like the first row of the matrix above.
+-   If the input array is complex and either #DFT_INVERSE or #DFT_REAL_OUTPUT are not set, the
+    output is a complex array of the same size as input. The function performs a forward or
+    inverse 1D or 2D transform of the whole input array or each row of the input array
+    independently, depending on the flags DFT_INVERSE and DFT_ROWS.
+-   When #DFT_INVERSE is set and the input array is real, or it is complex but #DFT_REAL_OUTPUT
+    is set, the output is a real array of the same size as input. The function performs a 1D or 2D
+    inverse transformation of the whole input array or each individual row, depending on the flags
+    #DFT_INVERSE and #DFT_ROWS.
+
+If #DFT_SCALE is set, the scaling is done after the transformation.
+
+Unlike dct , the function supports arrays of arbitrary size. But only those arrays are processed
+efficiently, whose sizes can be factorized in a product of small prime numbers (2, 3, and 5 in the
+current implementation). Such an efficient DFT size can be calculated using the getOptimalDFTSize
+method.
+
+The sample below illustrates how to calculate a DFT-based convolution of two 2D real arrays:
+@code
+    void convolveDFT(InputArray A, InputArray B, OutputArray C)
+    {
+        // reallocate the output array if needed
+        C.create(abs(A.rows - B.rows)+1, abs(A.cols - B.cols)+1, A.type());
+        Size dftSize;
+        // calculate the size of DFT transform
+        dftSize.width = getOptimalDFTSize(A.cols + B.cols - 1);
+        dftSize.height = getOptimalDFTSize(A.rows + B.rows - 1);
+
+        // allocate temporary buffers and initialize them with 0's
+        Mat tempA(dftSize, A.type(), Scalar::all(0));
+        Mat tempB(dftSize, B.type(), Scalar::all(0));
+
+        // copy A and B to the top-left corners of tempA and tempB, respectively
+        Mat roiA(tempA, Rect(0,0,A.cols,A.rows));
+        A.copyTo(roiA);
+        Mat roiB(tempB, Rect(0,0,B.cols,B.rows));
+        B.copyTo(roiB);
+
+        // now transform the padded A & B in-place;
+        // use "nonzeroRows" hint for faster processing
+        dft(tempA, tempA, 0, A.rows);
+        dft(tempB, tempB, 0, B.rows);
+
+        // multiply the spectrums;
+        // the function handles packed spectrum representations well
+        mulSpectrums(tempA, tempB, tempA);
+
+        // transform the product back from the frequency domain.
+        // Even though all the result rows will be non-zero,
+        // you need only the first C.rows of them, and thus you
+        // pass nonzeroRows == C.rows
+        dft(tempA, tempA, DFT_INVERSE + DFT_SCALE, C.rows);
+
+        // now copy the result back to C.
+        tempA(Rect(0, 0, C.cols, C.rows)).copyTo(C);
+
+        // all the temporary buffers will be deallocated automatically
+    }
+@endcode
+To optimize this sample, consider the following approaches:
+-   Since nonzeroRows != 0 is passed to the forward transform calls and since A and B are copied to
+    the top-left corners of tempA and tempB, respectively, it is not necessary to clear the whole
+    tempA and tempB. It is only necessary to clear the tempA.cols - A.cols ( tempB.cols - B.cols)
+    rightmost columns of the matrices.
+-   This DFT-based convolution does not have to be applied to the whole big arrays, especially if B
+    is significantly smaller than A or vice versa. Instead, you can calculate convolution by parts.
+    To do this, you need to split the output array C into multiple tiles. For each tile, estimate
+    which parts of A and B are required to calculate convolution in this tile. If the tiles in C are
+    too small, the speed will decrease a lot because of repeated work. In the ultimate case, when
+    each tile in C is a single pixel, the algorithm becomes equivalent to the naive convolution
+    algorithm. If the tiles are too big, the temporary arrays tempA and tempB become too big and
+    there is also a slowdown because of bad cache locality. So, there is an optimal tile size
+    somewhere in the middle.
+-   If different tiles in C can be calculated in parallel and, thus, the convolution is done by
+    parts, the loop can be threaded.
+
+All of the above improvements have been implemented in #matchTemplate and #filter2D . Therefore, by
+using them, you can get the performance even better than with the above theoretically optimal
+implementation. Though, those two functions actually calculate cross-correlation, not convolution,
+so you need to "flip" the second convolution operand B vertically and horizontally using flip .
+@note
+-   An example using the discrete fourier transform can be found at
+    opencv_source_code/samples/cpp/dft.cpp
+-   (Python) An example using the dft functionality to perform Wiener deconvolution can be found
+    at opencv_source/samples/python/deconvolution.py
+-   (Python) An example rearranging the quadrants of a Fourier image can be found at
+    opencv_source/samples/python/dft.py
+@param src input array that could be real or complex.
+@param dst output array whose size and type depends on the flags .
+@param flags transformation flags, representing a combination of the #DftFlags
+@param nonzeroRows when the parameter is not zero, the function assumes that only the first
+nonzeroRows rows of the input array (#DFT_INVERSE is not set) or only the first nonzeroRows of the
+output array (#DFT_INVERSE is set) contain non-zeros, thus, the function can handle the rest of the
+rows more efficiently and save some time; this technique is very useful for calculating array
+cross-correlation or convolution using DFT.
+@sa dct , getOptimalDFTSize , mulSpectrums, filter2D , matchTemplate , flip , cartToPolar ,
+magnitude , phase
+*/
+CV_EXPORTS_W void dft(InputArray src, OutputArray dst, int flags = 0, int nonzeroRows = 0);
+
+/** @brief Calculates the inverse Discrete Fourier Transform of a 1D or 2D array.
+
+idft(src, dst, flags) is equivalent to dft(src, dst, flags | #DFT_INVERSE) .
+@note None of dft and idft scales the result by default. So, you should pass #DFT_SCALE to one of
+dft or idft explicitly to make these transforms mutually inverse.
+@sa dft, dct, idct, mulSpectrums, getOptimalDFTSize
+@param src input floating-point real or complex array.
+@param dst output array whose size and type depend on the flags.
+@param flags operation flags (see dft and #DftFlags).
+@param nonzeroRows number of dst rows to process; the rest of the rows have undefined content (see
+the convolution sample in dft description.
+*/
+CV_EXPORTS_W void idft(InputArray src, OutputArray dst, int flags = 0, int nonzeroRows = 0);
+
+/** @brief Performs a forward or inverse discrete Cosine transform of 1D or 2D array.
+
+The function cv::dct performs a forward or inverse discrete Cosine transform (DCT) of a 1D or 2D
+floating-point array:
+-   Forward Cosine transform of a 1D vector of N elements:
+    \f[Y = C^{(N)}  \cdot X\f]
+    where
+    \f[C^{(N)}_{jk}= \sqrt{\alpha_j/N} \cos \left ( \frac{\pi(2k+1)j}{2N} \right )\f]
+    and
+    \f$\alpha_0=1\f$, \f$\alpha_j=2\f$ for *j \> 0*.
+-   Inverse Cosine transform of a 1D vector of N elements:
+    \f[X =  \left (C^{(N)} \right )^{-1}  \cdot Y =  \left (C^{(N)} \right )^T  \cdot Y\f]
+    (since \f$C^{(N)}\f$ is an orthogonal matrix, \f$C^{(N)} \cdot \left(C^{(N)}\right)^T = I\f$ )
+-   Forward 2D Cosine transform of M x N matrix:
+    \f[Y = C^{(N)}  \cdot X  \cdot \left (C^{(N)} \right )^T\f]
+-   Inverse 2D Cosine transform of M x N matrix:
+    \f[X =  \left (C^{(N)} \right )^T  \cdot X  \cdot C^{(N)}\f]
+
+The function chooses the mode of operation by looking at the flags and size of the input array:
+-   If (flags & #DCT_INVERSE) == 0 , the function does a forward 1D or 2D transform. Otherwise, it
+    is an inverse 1D or 2D transform.
+-   If (flags & #DCT_ROWS) != 0 , the function performs a 1D transform of each row.
+-   If the array is a single column or a single row, the function performs a 1D transform.
+-   If none of the above is true, the function performs a 2D transform.
+
+@note Currently dct supports even-size arrays (2, 4, 6 ...). For data analysis and approximation, you
+can pad the array when necessary.
+Also, the function performance depends very much, and not monotonically, on the array size (see
+getOptimalDFTSize ). In the current implementation DCT of a vector of size N is calculated via DFT
+of a vector of size N/2 . Thus, the optimal DCT size N1 \>= N can be calculated as:
+@code
+    size_t getOptimalDCTSize(size_t N) { return 2*getOptimalDFTSize((N+1)/2); }
+    N1 = getOptimalDCTSize(N);
+@endcode
+@param src input floating-point array.
+@param dst output array of the same size and type as src .
+@param flags transformation flags as a combination of cv::DftFlags (DCT_*)
+@sa dft , getOptimalDFTSize , idct
+*/
+CV_EXPORTS_W void dct(InputArray src, OutputArray dst, int flags = 0);
+
+/** @brief Calculates the inverse Discrete Cosine Transform of a 1D or 2D array.
+
+idct(src, dst, flags) is equivalent to dct(src, dst, flags | DCT_INVERSE).
+@param src input floating-point single-channel array.
+@param dst output array of the same size and type as src.
+@param flags operation flags.
+@sa  dct, dft, idft, getOptimalDFTSize
+*/
+CV_EXPORTS_W void idct(InputArray src, OutputArray dst, int flags = 0);
+
+/** @brief Performs the per-element multiplication of two Fourier spectrums.
+
+The function cv::mulSpectrums performs the per-element multiplication of the two CCS-packed or complex
+matrices that are results of a real or complex Fourier transform.
+
+The function, together with dft and idft , may be used to calculate convolution (pass conjB=false )
+or correlation (pass conjB=true ) of two arrays rapidly. When the arrays are complex, they are
+simply multiplied (per element) with an optional conjugation of the second-array elements. When the
+arrays are real, they are assumed to be CCS-packed (see dft for details).
+@param a first input array.
+@param b second input array of the same size and type as src1 .
+@param c output array of the same size and type as src1 .
+@param flags operation flags; currently, the only supported flag is cv::DFT_ROWS, which indicates that
+each row of src1 and src2 is an independent 1D Fourier spectrum. If you do not want to use this flag, then simply add a `0` as value.
+@param conjB optional flag that conjugates the second input array before the multiplication (true)
+or not (false).
+*/
+CV_EXPORTS_W void mulSpectrums(InputArray a, InputArray b, OutputArray c,
+                               int flags, bool conjB = false);
+
+/** @brief Returns the optimal DFT size for a given vector size.
+
+DFT performance is not a monotonic function of a vector size. Therefore, when you calculate
+convolution of two arrays or perform the spectral analysis of an array, it usually makes sense to
+pad the input data with zeros to get a bit larger array that can be transformed much faster than the
+original one. Arrays whose size is a power-of-two (2, 4, 8, 16, 32, ...) are the fastest to process.
+Though, the arrays whose size is a product of 2's, 3's, and 5's (for example, 300 = 5\*5\*3\*2\*2)
+are also processed quite efficiently.
+
+The function cv::getOptimalDFTSize returns the minimum number N that is greater than or equal to vecsize
+so that the DFT of a vector of size N can be processed efficiently. In the current implementation N
+= 2 ^p^ \* 3 ^q^ \* 5 ^r^ for some integer p, q, r.
+
+The function returns a negative number if vecsize is too large (very close to INT_MAX ).
+
+While the function cannot be used directly to estimate the optimal vector size for DCT transform
+(since the current DCT implementation supports only even-size vectors), it can be easily processed
+as getOptimalDFTSize((vecsize+1)/2)\*2.
+@param vecsize vector size.
+@sa dft , dct , idft , idct , mulSpectrums
+*/
+CV_EXPORTS_W int getOptimalDFTSize(int vecsize);
+
+/** @brief Returns the default random number generator.
+
+The function cv::theRNG returns the default random number generator. For each thread, there is a
+separate random number generator, so you can use the function safely in multi-thread environments.
+If you just need to get a single random number using this generator or initialize an array, you can
+use randu or randn instead. But if you are going to generate many random numbers inside a loop, it
+is much faster to use this function to retrieve the generator and then use RNG::operator _Tp() .
+@sa RNG, randu, randn
+*/
+CV_EXPORTS RNG& theRNG();
+
+/** @brief Sets state of default random number generator.
+
+The function cv::setRNGSeed sets state of default random number generator to custom value.
+@param seed new state for default random number generator
+@sa RNG, randu, randn
+*/
+CV_EXPORTS_W void setRNGSeed(int seed);
+
+/** @brief Generates a single uniformly-distributed random number or an array of random numbers.
+
+Non-template variant of the function fills the matrix dst with uniformly-distributed
+random numbers from the specified range:
+\f[\texttt{low} _c  \leq \texttt{dst} (I)_c <  \texttt{high} _c\f]
+@param dst output array of random numbers; the array must be pre-allocated.
+@param low inclusive lower boundary of the generated random numbers.
+@param high exclusive upper boundary of the generated random numbers.
+@sa RNG, randn, theRNG
+*/
+CV_EXPORTS_W void randu(InputOutputArray dst, InputArray low, InputArray high);
+
+/** @brief Fills the array with normally distributed random numbers.
+
+The function cv::randn fills the matrix dst with normally distributed random numbers with the specified
+mean vector and the standard deviation matrix. The generated random numbers are clipped to fit the
+value range of the output array data type.
+@param dst output array of random numbers; the array must be pre-allocated and have 1 to 4 channels.
+@param mean mean value (expectation) of the generated random numbers.
+@param stddev standard deviation of the generated random numbers; it can be either a vector (in
+which case a diagonal standard deviation matrix is assumed) or a square matrix.
+@sa RNG, randu
+*/
+CV_EXPORTS_W void randn(InputOutputArray dst, InputArray mean, InputArray stddev);
+
+/** @brief Shuffles the array elements randomly.
+
+The function cv::randShuffle shuffles the specified 1D array by randomly choosing pairs of elements and
+swapping them. The number of such swap operations will be dst.rows\*dst.cols\*iterFactor .
+@param dst input/output numerical 1D array.
+@param iterFactor scale factor that determines the number of random swap operations (see the details
+below).
+@param rng optional random number generator used for shuffling; if it is zero, theRNG () is used
+instead.
+@sa RNG, sort
+*/
+CV_EXPORTS_W void randShuffle(InputOutputArray dst, double iterFactor = 1., RNG* rng = 0);
+
+/** @brief Principal Component Analysis
+
+The class is used to calculate a special basis for a set of vectors. The
+basis will consist of eigenvectors of the covariance matrix calculated
+from the input set of vectors. The class %PCA can also transform
+vectors to/from the new coordinate space defined by the basis. Usually,
+in this new coordinate system, each vector from the original set (and
+any linear combination of such vectors) can be quite accurately
+approximated by taking its first few components, corresponding to the
+eigenvectors of the largest eigenvalues of the covariance matrix.
+Geometrically it means that you calculate a projection of the vector to
+a subspace formed by a few eigenvectors corresponding to the dominant
+eigenvalues of the covariance matrix. And usually such a projection is
+very close to the original vector. So, you can represent the original
+vector from a high-dimensional space with a much shorter vector
+consisting of the projected vector's coordinates in the subspace. Such a
+transformation is also known as Karhunen-Loeve Transform, or KLT.
+See http://en.wikipedia.org/wiki/Principal_component_analysis
+
+The sample below is the function that takes two matrices. The first
+function stores a set of vectors (a row per vector) that is used to
+calculate PCA. The second function stores another "test" set of vectors
+(a row per vector). First, these vectors are compressed with PCA, then
+reconstructed back, and then the reconstruction error norm is computed
+and printed for each vector. :
+
+@code{.cpp}
+using namespace cv;
+
+PCA compressPCA(const Mat& pcaset, int maxComponents,
+                const Mat& testset, Mat& compressed)
+{
+    PCA pca(pcaset, // pass the data
+            Mat(), // we do not have a pre-computed mean vector,
+                   // so let the PCA engine to compute it
+            PCA::DATA_AS_ROW, // indicate that the vectors
+                                // are stored as matrix rows
+                                // (use PCA::DATA_AS_COL if the vectors are
+                                // the matrix columns)
+            maxComponents // specify, how many principal components to retain
+            );
+    // if there is no test data, just return the computed basis, ready-to-use
+    if( !testset.data )
+        return pca;
+    CV_Assert( testset.cols == pcaset.cols );
+
+    compressed.create(testset.rows, maxComponents, testset.type());
+
+    Mat reconstructed;
+    for( int i = 0; i < testset.rows; i++ )
+    {
+        Mat vec = testset.row(i), coeffs = compressed.row(i), reconstructed;
+        // compress the vector, the result will be stored
+        // in the i-th row of the output matrix
+        pca.project(vec, coeffs);
+        // and then reconstruct it
+        pca.backProject(coeffs, reconstructed);
+        // and measure the error
+        printf("%d. diff = %g\n", i, norm(vec, reconstructed, NORM_L2));
+    }
+    return pca;
+}
+@endcode
+@sa calcCovarMatrix, mulTransposed, SVD, dft, dct
+*/
+class CV_EXPORTS PCA
+{
+public:
+    enum Flags { DATA_AS_ROW = 0, //!< indicates that the input samples are stored as matrix rows
+                 DATA_AS_COL = 1, //!< indicates that the input samples are stored as matrix columns
+                 USE_AVG     = 2  //!
+               };
+
+    /** @brief default constructor
+
+    The default constructor initializes an empty %PCA structure. The other
+    constructors initialize the structure and call PCA::operator()().
+    */
+    PCA();
+
+    /** @overload
+    @param data input samples stored as matrix rows or matrix columns.
+    @param mean optional mean value; if the matrix is empty (@c noArray()),
+    the mean is computed from the data.
+    @param flags operation flags; currently the parameter is only used to
+    specify the data layout (PCA::Flags)
+    @param maxComponents maximum number of components that %PCA should
+    retain; by default, all the components are retained.
+    */
+    PCA(InputArray data, InputArray mean, int flags, int maxComponents = 0);
+
+    /** @overload
+    @param data input samples stored as matrix rows or matrix columns.
+    @param mean optional mean value; if the matrix is empty (noArray()),
+    the mean is computed from the data.
+    @param flags operation flags; currently the parameter is only used to
+    specify the data layout (PCA::Flags)
+    @param retainedVariance Percentage of variance that PCA should retain.
+    Using this parameter will let the PCA decided how many components to
+    retain but it will always keep at least 2.
+    */
+    PCA(InputArray data, InputArray mean, int flags, double retainedVariance);
+
+    /** @brief performs %PCA
+
+    The operator performs %PCA of the supplied dataset. It is safe to reuse
+    the same PCA structure for multiple datasets. That is, if the structure
+    has been previously used with another dataset, the existing internal
+    data is reclaimed and the new @ref eigenvalues, @ref eigenvectors and @ref
+    mean are allocated and computed.
+
+    The computed @ref eigenvalues are sorted from the largest to the smallest and
+    the corresponding @ref eigenvectors are stored as eigenvectors rows.
+
+    @param data input samples stored as the matrix rows or as the matrix
+    columns.
+    @param mean optional mean value; if the matrix is empty (noArray()),
+    the mean is computed from the data.
+    @param flags operation flags; currently the parameter is only used to
+    specify the data layout. (Flags)
+    @param maxComponents maximum number of components that PCA should
+    retain; by default, all the components are retained.
+    */
+    PCA& operator()(InputArray data, InputArray mean, int flags, int maxComponents = 0);
+
+    /** @overload
+    @param data input samples stored as the matrix rows or as the matrix
+    columns.
+    @param mean optional mean value; if the matrix is empty (noArray()),
+    the mean is computed from the data.
+    @param flags operation flags; currently the parameter is only used to
+    specify the data layout. (PCA::Flags)
+    @param retainedVariance Percentage of variance that %PCA should retain.
+    Using this parameter will let the %PCA decided how many components to
+    retain but it will always keep at least 2.
+     */
+    PCA& operator()(InputArray data, InputArray mean, int flags, double retainedVariance);
+
+    /** @brief Projects vector(s) to the principal component subspace.
+
+    The methods project one or more vectors to the principal component
+    subspace, where each vector projection is represented by coefficients in
+    the principal component basis. The first form of the method returns the
+    matrix that the second form writes to the result. So the first form can
+    be used as a part of expression while the second form can be more
+    efficient in a processing loop.
+    @param vec input vector(s); must have the same dimensionality and the
+    same layout as the input data used at %PCA phase, that is, if
+    DATA_AS_ROW are specified, then `vec.cols==data.cols`
+    (vector dimensionality) and `vec.rows` is the number of vectors to
+    project, and the same is true for the PCA::DATA_AS_COL case.
+    */
+    Mat project(InputArray vec) const;
+
+    /** @overload
+    @param vec input vector(s); must have the same dimensionality and the
+    same layout as the input data used at PCA phase, that is, if
+    DATA_AS_ROW are specified, then `vec.cols==data.cols`
+    (vector dimensionality) and `vec.rows` is the number of vectors to
+    project, and the same is true for the PCA::DATA_AS_COL case.
+    @param result output vectors; in case of PCA::DATA_AS_COL, the
+    output matrix has as many columns as the number of input vectors, this
+    means that `result.cols==vec.cols` and the number of rows match the
+    number of principal components (for example, `maxComponents` parameter
+    passed to the constructor).
+     */
+    void project(InputArray vec, OutputArray result) const;
+
+    /** @brief Reconstructs vectors from their PC projections.
+
+    The methods are inverse operations to PCA::project. They take PC
+    coordinates of projected vectors and reconstruct the original vectors.
+    Unless all the principal components have been retained, the
+    reconstructed vectors are different from the originals. But typically,
+    the difference is small if the number of components is large enough (but
+    still much smaller than the original vector dimensionality). As a
+    result, PCA is used.
+    @param vec coordinates of the vectors in the principal component
+    subspace, the layout and size are the same as of PCA::project output
+    vectors.
+     */
+    Mat backProject(InputArray vec) const;
+
+    /** @overload
+    @param vec coordinates of the vectors in the principal component
+    subspace, the layout and size are the same as of PCA::project output
+    vectors.
+    @param result reconstructed vectors; the layout and size are the same as
+    of PCA::project input vectors.
+     */
+    void backProject(InputArray vec, OutputArray result) const;
+
+    /** @brief write PCA objects
+
+    Writes @ref eigenvalues @ref eigenvectors and @ref mean to specified FileStorage
+     */
+    void write(FileStorage& fs) const;
+
+    /** @brief load PCA objects
+
+    Loads @ref eigenvalues @ref eigenvectors and @ref mean from specified FileNode
+     */
+    void read(const FileNode& fn);
+
+    Mat eigenvectors; //!< eigenvectors of the covariation matrix
+    Mat eigenvalues; //!< eigenvalues of the covariation matrix
+    Mat mean; //!< mean value subtracted before the projection and added after the back projection
+};
+
+/** @example pca.cpp
+  An example using %PCA for dimensionality reduction while maintaining an amount of variance
+ */
+
+/**
+   @brief Linear Discriminant Analysis
+   @todo document this class
+ */
+class CV_EXPORTS LDA
+{
+public:
+    /** @brief constructor
+    Initializes a LDA with num_components (default 0).
+    */
+    explicit LDA(int num_components = 0);
+
+    /** Initializes and performs a Discriminant Analysis with Fisher's
+     Optimization Criterion on given data in src and corresponding labels
+     in labels. If 0 (or less) number of components are given, they are
+     automatically determined for given data in computation.
+    */
+    LDA(InputArrayOfArrays src, InputArray labels, int num_components = 0);
+
+    /** Serializes this object to a given filename.
+      */
+    void save(const String& filename) const;
+
+    /** Deserializes this object from a given filename.
+      */
+    void load(const String& filename);
+
+    /** Serializes this object to a given cv::FileStorage.
+      */
+    void save(FileStorage& fs) const;
+
+    /** Deserializes this object from a given cv::FileStorage.
+      */
+    void load(const FileStorage& node);
+
+    /** destructor
+      */
+    ~LDA();
+
+    /** Compute the discriminants for data in src (row aligned) and labels.
+      */
+    void compute(InputArrayOfArrays src, InputArray labels);
+
+    /** Projects samples into the LDA subspace.
+        src may be one or more row aligned samples.
+      */
+    Mat project(InputArray src);
+
+    /** Reconstructs projections from the LDA subspace.
+        src may be one or more row aligned projections.
+      */
+    Mat reconstruct(InputArray src);
+
+    /** Returns the eigenvectors of this LDA.
+      */
+    Mat eigenvectors() const { return _eigenvectors; }
+
+    /** Returns the eigenvalues of this LDA.
+      */
+    Mat eigenvalues() const { return _eigenvalues; }
+
+    static Mat subspaceProject(InputArray W, InputArray mean, InputArray src);
+    static Mat subspaceReconstruct(InputArray W, InputArray mean, InputArray src);
+
+protected:
+    bool _dataAsRow; // unused, but needed for 3.0 ABI compatibility.
+    int _num_components;
+    Mat _eigenvectors;
+    Mat _eigenvalues;
+    void lda(InputArrayOfArrays src, InputArray labels);
+};
+
+/** @brief Singular Value Decomposition
+
+Class for computing Singular Value Decomposition of a floating-point
+matrix. The Singular Value Decomposition is used to solve least-square
+problems, under-determined linear systems, invert matrices, compute
+condition numbers, and so on.
+
+If you want to compute a condition number of a matrix or an absolute value of
+its determinant, you do not need `u` and `vt`. You can pass
+flags=SVD::NO_UV|... . Another flag SVD::FULL_UV indicates that full-size u
+and vt must be computed, which is not necessary most of the time.
+
+@sa invert, solve, eigen, determinant
+*/
+class CV_EXPORTS SVD
+{
+public:
+    enum Flags {
+        /** allow the algorithm to modify the decomposed matrix; it can save space and speed up
+            processing. currently ignored. */
+        MODIFY_A = 1,
+        /** indicates that only a vector of singular values `w` is to be processed, while u and vt
+            will be set to empty matrices */
+        NO_UV    = 2,
+        /** when the matrix is not square, by default the algorithm produces u and vt matrices of
+            sufficiently large size for the further A reconstruction; if, however, FULL_UV flag is
+            specified, u and vt will be full-size square orthogonal matrices.*/
+        FULL_UV  = 4
+    };
+
+    /** @brief the default constructor
+
+    initializes an empty SVD structure
+      */
+    SVD();
+
+    /** @overload
+    initializes an empty SVD structure and then calls SVD::operator()
+    @param src decomposed matrix. The depth has to be CV_32F or CV_64F.
+    @param flags operation flags (SVD::Flags)
+      */
+    SVD( InputArray src, int flags = 0 );
+
+    /** @brief the operator that performs SVD. The previously allocated u, w and vt are released.
+
+    The operator performs the singular value decomposition of the supplied
+    matrix. The u,`vt` , and the vector of singular values w are stored in
+    the structure. The same SVD structure can be reused many times with
+    different matrices. Each time, if needed, the previous u,`vt` , and w
+    are reclaimed and the new matrices are created, which is all handled by
+    Mat::create.
+    @param src decomposed matrix. The depth has to be CV_32F or CV_64F.
+    @param flags operation flags (SVD::Flags)
+      */
+    SVD& operator ()( InputArray src, int flags = 0 );
+
+    /** @brief decomposes matrix and stores the results to user-provided matrices
+
+    The methods/functions perform SVD of matrix. Unlike SVD::SVD constructor
+    and SVD::operator(), they store the results to the user-provided
+    matrices:
+
+    @code{.cpp}
+    Mat A, w, u, vt;
+    SVD::compute(A, w, u, vt);
+    @endcode
+
+    @param src decomposed matrix. The depth has to be CV_32F or CV_64F.
+    @param w calculated singular values
+    @param u calculated left singular vectors
+    @param vt transposed matrix of right singular vectors
+    @param flags operation flags - see SVD::Flags.
+      */
+    static void compute( InputArray src, OutputArray w,
+                         OutputArray u, OutputArray vt, int flags = 0 );
+
+    /** @overload
+    computes singular values of a matrix
+    @param src decomposed matrix. The depth has to be CV_32F or CV_64F.
+    @param w calculated singular values
+    @param flags operation flags - see SVD::Flags.
+      */
+    static void compute( InputArray src, OutputArray w, int flags = 0 );
+
+    /** @brief performs back substitution
+      */
+    static void backSubst( InputArray w, InputArray u,
+                           InputArray vt, InputArray rhs,
+                           OutputArray dst );
+
+    /** @brief solves an under-determined singular linear system
+
+    The method finds a unit-length solution x of a singular linear system
+    A\*x = 0. Depending on the rank of A, there can be no solutions, a
+    single solution or an infinite number of solutions. In general, the
+    algorithm solves the following problem:
+    \f[dst =  \arg \min _{x:  \| x \| =1}  \| src  \cdot x  \|\f]
+    @param src left-hand-side matrix.
+    @param dst found solution.
+      */
+    static void solveZ( InputArray src, OutputArray dst );
+
+    /** @brief performs a singular value back substitution.
+
+    The method calculates a back substitution for the specified right-hand
+    side:
+
+    \f[\texttt{x} =  \texttt{vt} ^T  \cdot diag( \texttt{w} )^{-1}  \cdot \texttt{u} ^T  \cdot \texttt{rhs} \sim \texttt{A} ^{-1}  \cdot \texttt{rhs}\f]
+
+    Using this technique you can either get a very accurate solution of the
+    convenient linear system, or the best (in the least-squares terms)
+    pseudo-solution of an overdetermined linear system.
+
+    @param rhs right-hand side of a linear system (u\*w\*v')\*dst = rhs to
+    be solved, where A has been previously decomposed.
+
+    @param dst found solution of the system.
+
+    @note Explicit SVD with the further back substitution only makes sense
+    if you need to solve many linear systems with the same left-hand side
+    (for example, src ). If all you need is to solve a single system
+    (possibly with multiple rhs immediately available), simply call solve
+    add pass #DECOMP_SVD there. It does absolutely the same thing.
+      */
+    void backSubst( InputArray rhs, OutputArray dst ) const;
+
+    /** @todo document */
+    template<typename _Tp, int m, int n, int nm> static
+    void compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w, Matx<_Tp, m, nm>& u, Matx<_Tp, n, nm>& vt );
+
+    /** @todo document */
+    template<typename _Tp, int m, int n, int nm> static
+    void compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w );
+
+    /** @todo document */
+    template<typename _Tp, int m, int n, int nm, int nb> static
+    void backSubst( const Matx<_Tp, nm, 1>& w, const Matx<_Tp, m, nm>& u, const Matx<_Tp, n, nm>& vt, const Matx<_Tp, m, nb>& rhs, Matx<_Tp, n, nb>& dst );
+
+    Mat u, w, vt;
+};
+
+/** @brief Random Number Generator
+
+Random number generator. It encapsulates the state (currently, a 64-bit
+integer) and has methods to return scalar random values and to fill
+arrays with random values. Currently it supports uniform and Gaussian
+(normal) distributions. The generator uses Multiply-With-Carry
+algorithm, introduced by G. Marsaglia (
+<http://en.wikipedia.org/wiki/Multiply-with-carry> ).
+Gaussian-distribution random numbers are generated using the Ziggurat
+algorithm ( <http://en.wikipedia.org/wiki/Ziggurat_algorithm> ),
+introduced by G. Marsaglia and W. W. Tsang.
+*/
+class CV_EXPORTS RNG
+{
+public:
+    enum { UNIFORM = 0,
+           NORMAL  = 1
+         };
+
+    /** @brief constructor
+
+    These are the RNG constructors. The first form sets the state to some
+    pre-defined value, equal to 2\*\*32-1 in the current implementation. The
+    second form sets the state to the specified value. If you passed state=0
+    , the constructor uses the above default value instead to avoid the
+    singular random number sequence, consisting of all zeros.
+    */
+    RNG();
+    /** @overload
+    @param state 64-bit value used to initialize the RNG.
+    */
+    RNG(uint64 state);
+    /**The method updates the state using the MWC algorithm and returns the
+    next 32-bit random number.*/
+    unsigned next();
+
+    /**Each of the methods updates the state using the MWC algorithm and
+    returns the next random number of the specified type. In case of integer
+    types, the returned number is from the available value range for the
+    specified type. In case of floating-point types, the returned value is
+    from [0,1) range.
+    */
+    operator uchar();
+    /** @overload */
+    operator schar();
+    /** @overload */
+    operator ushort();
+    /** @overload */
+    operator short();
+    /** @overload */
+    operator unsigned();
+    /** @overload */
+    operator int();
+    /** @overload */
+    operator float();
+    /** @overload */
+    operator double();
+
+    /** @brief returns a random integer sampled uniformly from [0, N).
+
+    The methods transform the state using the MWC algorithm and return the
+    next random number. The first form is equivalent to RNG::next . The
+    second form returns the random number modulo N , which means that the
+    result is in the range [0, N) .
+    */
+    unsigned operator ()();
+    /** @overload
+    @param N upper non-inclusive boundary of the returned random number.
+    */
+    unsigned operator ()(unsigned N);
+
+    /** @brief returns uniformly distributed integer random number from [a,b) range
+
+    The methods transform the state using the MWC algorithm and return the
+    next uniformly-distributed random number of the specified type, deduced
+    from the input parameter type, from the range [a, b) . There is a nuance
+    illustrated by the following sample:
+
+    @code{.cpp}
+    RNG rng;
+
+    // always produces 0
+    double a = rng.uniform(0, 1);
+
+    // produces double from [0, 1)
+    double a1 = rng.uniform((double)0, (double)1);
+
+    // produces float from [0, 1)
+    float b = rng.uniform(0.f, 1.f);
+
+    // produces double from [0, 1)
+    double c = rng.uniform(0., 1.);
+
+    // may cause compiler error because of ambiguity:
+    //  RNG::uniform(0, (int)0.999999)? or RNG::uniform((double)0, 0.99999)?
+    double d = rng.uniform(0, 0.999999);
+    @endcode
+
+    The compiler does not take into account the type of the variable to
+    which you assign the result of RNG::uniform . The only thing that
+    matters to the compiler is the type of a and b parameters. So, if you
+    want a floating-point random number, but the range boundaries are
+    integer numbers, either put dots in the end, if they are constants, or
+    use explicit type cast operators, as in the a1 initialization above.
+    @param a lower inclusive boundary of the returned random number.
+    @param b upper non-inclusive boundary of the returned random number.
+      */
+    int uniform(int a, int b);
+    /** @overload */
+    float uniform(float a, float b);
+    /** @overload */
+    double uniform(double a, double b);
+
+    /** @brief Fills arrays with random numbers.
+
+    @param mat 2D or N-dimensional matrix; currently matrices with more than
+    4 channels are not supported by the methods, use Mat::reshape as a
+    possible workaround.
+    @param distType distribution type, RNG::UNIFORM or RNG::NORMAL.
+    @param a first distribution parameter; in case of the uniform
+    distribution, this is an inclusive lower boundary, in case of the normal
+    distribution, this is a mean value.
+    @param b second distribution parameter; in case of the uniform
+    distribution, this is a non-inclusive upper boundary, in case of the
+    normal distribution, this is a standard deviation (diagonal of the
+    standard deviation matrix or the full standard deviation matrix).
+    @param saturateRange pre-saturation flag; for uniform distribution only;
+    if true, the method will first convert a and b to the acceptable value
+    range (according to the mat datatype) and then will generate uniformly
+    distributed random numbers within the range [saturate(a), saturate(b)),
+    if saturateRange=false, the method will generate uniformly distributed
+    random numbers in the original range [a, b) and then will saturate them,
+    it means, for example, that
+    <tt>theRNG().fill(mat_8u, RNG::UNIFORM, -DBL_MAX, DBL_MAX)</tt> will likely
+    produce array mostly filled with 0's and 255's, since the range (0, 255)
+    is significantly smaller than [-DBL_MAX, DBL_MAX).
+
+    Each of the methods fills the matrix with the random values from the
+    specified distribution. As the new numbers are generated, the RNG state
+    is updated accordingly. In case of multiple-channel images, every
+    channel is filled independently, which means that RNG cannot generate
+    samples from the multi-dimensional Gaussian distribution with
+    non-diagonal covariance matrix directly. To do that, the method
+    generates samples from multi-dimensional standard Gaussian distribution
+    with zero mean and identity covariation matrix, and then transforms them
+    using transform to get samples from the specified Gaussian distribution.
+    */
+    void fill( InputOutputArray mat, int distType, InputArray a, InputArray b, bool saturateRange = false );
+
+    /** @brief Returns the next random number sampled from the Gaussian distribution
+    @param sigma standard deviation of the distribution.
+
+    The method transforms the state using the MWC algorithm and returns the
+    next random number from the Gaussian distribution N(0,sigma) . That is,
+    the mean value of the returned random numbers is zero and the standard
+    deviation is the specified sigma .
+    */
+    double gaussian(double sigma);
+
+    uint64 state;
+
+    bool operator ==(const RNG& other) const;
+};
+
+/** @brief Mersenne Twister random number generator
+
+Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
+@todo document
+ */
+class CV_EXPORTS RNG_MT19937
+{
+public:
+    RNG_MT19937();
+    RNG_MT19937(unsigned s);
+    void seed(unsigned s);
+
+    unsigned next();
+
+    operator int();
+    operator unsigned();
+    operator float();
+    operator double();
+
+    unsigned operator ()(unsigned N);
+    unsigned operator ()();
+
+    /** @brief returns uniformly distributed integer random number from [a,b) range
+
+*/
+    int uniform(int a, int b);
+    /** @brief returns uniformly distributed floating-point random number from [a,b) range
+
+*/
+    float uniform(float a, float b);
+    /** @brief returns uniformly distributed double-precision floating-point random number from [a,b) range
+
+*/
+    double uniform(double a, double b);
+
+private:
+    enum PeriodParameters {N = 624, M = 397};
+    unsigned state[N];
+    int mti;
+};
+
+//! @} core_array
+
+//! @addtogroup core_cluster
+//!  @{
+
+/** @example kmeans.cpp
+  An example on K-means clustering
+*/
+
+/** @brief Finds centers of clusters and groups input samples around the clusters.
+
+The function kmeans implements a k-means algorithm that finds the centers of cluster_count clusters
+and groups the input samples around the clusters. As an output, \f$\texttt{labels}_i\f$ contains a
+0-based cluster index for the sample stored in the \f$i^{th}\f$ row of the samples matrix.
+
+@note
+-   (Python) An example on K-means clustering can be found at
+    opencv_source_code/samples/python/kmeans.py
+@param data Data for clustering. An array of N-Dimensional points with float coordinates is needed.
+Examples of this array can be:
+-   Mat points(count, 2, CV_32F);
+-   Mat points(count, 1, CV_32FC2);
+-   Mat points(1, count, CV_32FC2);
+-   std::vector\<cv::Point2f\> points(sampleCount);
+@param K Number of clusters to split the set by.
+@param bestLabels Input/output integer array that stores the cluster indices for every sample.
+@param criteria The algorithm termination criteria, that is, the maximum number of iterations and/or
+the desired accuracy. The accuracy is specified as criteria.epsilon. As soon as each of the cluster
+centers moves by less than criteria.epsilon on some iteration, the algorithm stops.
+@param attempts Flag to specify the number of times the algorithm is executed using different
+initial labellings. The algorithm returns the labels that yield the best compactness (see the last
+function parameter).
+@param flags Flag that can take values of cv::KmeansFlags
+@param centers Output matrix of the cluster centers, one row per each cluster center.
+@return The function returns the compactness measure that is computed as
+\f[\sum _i  \| \texttt{samples} _i -  \texttt{centers} _{ \texttt{labels} _i} \| ^2\f]
+after every attempt. The best (minimum) value is chosen and the corresponding labels and the
+compactness value are returned by the function. Basically, you can use only the core of the
+function, set the number of attempts to 1, initialize labels each time using a custom algorithm,
+pass them with the ( flags = #KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best
+(most-compact) clustering.
+*/
+CV_EXPORTS_W double kmeans( InputArray data, int K, InputOutputArray bestLabels,
+                            TermCriteria criteria, int attempts,
+                            int flags, OutputArray centers = noArray() );
+
+//! @} core_cluster
+
+//! @addtogroup core_basic
+//! @{
+
+/////////////////////////////// Formatted output of cv::Mat ///////////////////////////
+
+/** @todo document */
+class CV_EXPORTS Formatted
+{
+public:
+    virtual const char* next() = 0;
+    virtual void reset() = 0;
+    virtual ~Formatted();
+};
+
+/** @todo document */
+class CV_EXPORTS Formatter
+{
+public:
+    enum { FMT_DEFAULT = 0,
+           FMT_MATLAB  = 1,
+           FMT_CSV     = 2,
+           FMT_PYTHON  = 3,
+           FMT_NUMPY   = 4,
+           FMT_C       = 5
+         };
+
+    virtual ~Formatter();
+
+    virtual Ptr<Formatted> format(const Mat& mtx) const = 0;
+
+    virtual void set32fPrecision(int p = 8) = 0;
+    virtual void set64fPrecision(int p = 16) = 0;
+    virtual void setMultiline(bool ml = true) = 0;
+
+    static Ptr<Formatter> get(int fmt = FMT_DEFAULT);
+
+};
+
+static inline
+String& operator << (String& out, Ptr<Formatted> fmtd)
+{
+    fmtd->reset();
+    for(const char* str = fmtd->next(); str; str = fmtd->next())
+        out += cv::String(str);
+    return out;
+}
+
+static inline
+String& operator << (String& out, const Mat& mtx)
+{
+    return out << Formatter::get()->format(mtx);
+}
+
+//////////////////////////////////////// Algorithm ////////////////////////////////////
+
+class CV_EXPORTS Algorithm;
+
+template<typename _Tp> struct ParamType {};
+
+
+/** @brief This is a base class for all more or less complex algorithms in OpenCV
+
+especially for classes of algorithms, for which there can be multiple implementations. The examples
+are stereo correspondence (for which there are algorithms like block matching, semi-global block
+matching, graph-cut etc.), background subtraction (which can be done using mixture-of-gaussians
+models, codebook-based algorithm etc.), optical flow (block matching, Lucas-Kanade, Horn-Schunck
+etc.).
+
+Here is example of SimpleBlobDetector use in your application via Algorithm interface:
+@snippet snippets/core_various.cpp Algorithm
+ */
+class CV_EXPORTS_W Algorithm
+{
+public:
+    Algorithm();
+    virtual ~Algorithm();
+
+    /** @brief Clears the algorithm state
+    */
+    CV_WRAP virtual void clear() {}
+
+    /** @brief Stores algorithm parameters in a file storage
+    */
+    virtual void write(FileStorage& fs) const { (void)fs; }
+
+    /** @brief simplified API for language bindings
+     * @overload
+     */
+    CV_WRAP void write(const Ptr<FileStorage>& fs, const String& name = String()) const;
+
+    /** @brief Reads algorithm parameters from a file storage
+    */
+    CV_WRAP virtual void read(const FileNode& fn) { (void)fn; }
+
+    /** @brief Returns true if the Algorithm is empty (e.g. in the very beginning or after unsuccessful read
+     */
+    CV_WRAP virtual bool empty() const { return false; }
+
+    /** @brief Reads algorithm from the file node
+
+     This is static template method of Algorithm. It's usage is following (in the case of SVM):
+     @code
+     cv::FileStorage fsRead("example.xml", FileStorage::READ);
+     Ptr<SVM> svm = Algorithm::read<SVM>(fsRead.root());
+     @endcode
+     In order to make this method work, the derived class must overwrite Algorithm::read(const
+     FileNode& fn) and also have static create() method without parameters
+     (or with all the optional parameters)
+     */
+    template<typename _Tp> static Ptr<_Tp> read(const FileNode& fn)
+    {
+        Ptr<_Tp> obj = _Tp::create();
+        obj->read(fn);
+        return !obj->empty() ? obj : Ptr<_Tp>();
+    }
+
+    /** @brief Loads algorithm from the file
+
+     @param filename Name of the file to read.
+     @param objname The optional name of the node to read (if empty, the first top-level node will be used)
+
+     This is static template method of Algorithm. It's usage is following (in the case of SVM):
+     @code
+     Ptr<SVM> svm = Algorithm::load<SVM>("my_svm_model.xml");
+     @endcode
+     In order to make this method work, the derived class must overwrite Algorithm::read(const
+     FileNode& fn).
+     */
+    template<typename _Tp> static Ptr<_Tp> load(const String& filename, const String& objname=String())
+    {
+        FileStorage fs(filename, FileStorage::READ);
+        CV_Assert(fs.isOpened());
+        FileNode fn = objname.empty() ? fs.getFirstTopLevelNode() : fs[objname];
+        if (fn.empty()) return Ptr<_Tp>();
+        Ptr<_Tp> obj = _Tp::create();
+        obj->read(fn);
+        return !obj->empty() ? obj : Ptr<_Tp>();
+    }
+
+    /** @brief Loads algorithm from a String
+
+     @param strModel The string variable containing the model you want to load.
+     @param objname The optional name of the node to read (if empty, the first top-level node will be used)
+
+     This is static template method of Algorithm. It's usage is following (in the case of SVM):
+     @code
+     Ptr<SVM> svm = Algorithm::loadFromString<SVM>(myStringModel);
+     @endcode
+     */
+    template<typename _Tp> static Ptr<_Tp> loadFromString(const String& strModel, const String& objname=String())
+    {
+        FileStorage fs(strModel, FileStorage::READ + FileStorage::MEMORY);
+        FileNode fn = objname.empty() ? fs.getFirstTopLevelNode() : fs[objname];
+        Ptr<_Tp> obj = _Tp::create();
+        obj->read(fn);
+        return !obj->empty() ? obj : Ptr<_Tp>();
+    }
+
+    /** Saves the algorithm to a file.
+     In order to make this method work, the derived class must implement Algorithm::write(FileStorage& fs). */
+    CV_WRAP virtual void save(const String& filename) const;
+
+    /** Returns the algorithm string identifier.
+     This string is used as top level xml/yml node tag when the object is saved to a file or string. */
+    CV_WRAP virtual String getDefaultName() const;
+
+protected:
+    void writeFormat(FileStorage& fs) const;
+};
+
+struct Param {
+    enum { INT=0, BOOLEAN=1, REAL=2, STRING=3, MAT=4, MAT_VECTOR=5, ALGORITHM=6, FLOAT=7,
+           UNSIGNED_INT=8, UINT64=9, UCHAR=11, SCALAR=12 };
+};
+
+
+
+template<> struct ParamType<bool>
+{
+    typedef bool const_param_type;
+    typedef bool member_type;
+
+    enum { type = Param::BOOLEAN };
+};
+
+template<> struct ParamType<int>
+{
+    typedef int const_param_type;
+    typedef int member_type;
+
+    enum { type = Param::INT };
+};
+
+template<> struct ParamType<double>
+{
+    typedef double const_param_type;
+    typedef double member_type;
+
+    enum { type = Param::REAL };
+};
+
+template<> struct ParamType<String>
+{
+    typedef const String& const_param_type;
+    typedef String member_type;
+
+    enum { type = Param::STRING };
+};
+
+template<> struct ParamType<Mat>
+{
+    typedef const Mat& const_param_type;
+    typedef Mat member_type;
+
+    enum { type = Param::MAT };
+};
+
+template<> struct ParamType<std::vector<Mat> >
+{
+    typedef const std::vector<Mat>& const_param_type;
+    typedef std::vector<Mat> member_type;
+
+    enum { type = Param::MAT_VECTOR };
+};
+
+template<> struct ParamType<Algorithm>
+{
+    typedef const Ptr<Algorithm>& const_param_type;
+    typedef Ptr<Algorithm> member_type;
+
+    enum { type = Param::ALGORITHM };
+};
+
+template<> struct ParamType<float>
+{
+    typedef float const_param_type;
+    typedef float member_type;
+
+    enum { type = Param::FLOAT };
+};
+
+template<> struct ParamType<unsigned>
+{
+    typedef unsigned const_param_type;
+    typedef unsigned member_type;
+
+    enum { type = Param::UNSIGNED_INT };
+};
+
+template<> struct ParamType<uint64>
+{
+    typedef uint64 const_param_type;
+    typedef uint64 member_type;
+
+    enum { type = Param::UINT64 };
+};
+
+template<> struct ParamType<uchar>
+{
+    typedef uchar const_param_type;
+    typedef uchar member_type;
+
+    enum { type = Param::UCHAR };
+};
+
+template<> struct ParamType<Scalar>
+{
+    typedef const Scalar& const_param_type;
+    typedef Scalar member_type;
+
+    enum { type = Param::SCALAR };
+};
+
+//! @} core_basic
+
+} //namespace cv
+
+#include "opencv2/core/operations.hpp"
+#include "opencv2/core/cvstd.inl.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/core/optim.hpp"
+#include "opencv2/core/ovx.hpp"
+
+#endif /*OPENCV_CORE_HPP*/

+ 678 - 0
ThresholdTools/include/opencv2/core/affine.hpp

@@ -0,0 +1,678 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_AFFINE3_HPP
+#define OPENCV_CORE_AFFINE3_HPP
+
+#ifdef __cplusplus
+
+#include <opencv2/core.hpp>
+
+namespace cv
+{
+
+//! @addtogroup core
+//! @{
+
+    /** @brief Affine transform
+     *
+     * It represents a 4x4 homogeneous transformation matrix \f$T\f$
+     *
+     *  \f[T =
+     *  \begin{bmatrix}
+     *  R & t\\
+     *  0 & 1\\
+     *  \end{bmatrix}
+     *  \f]
+     *
+     *  where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector.
+     *
+     *  You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector,
+     *  which is converted to a 3x3 rotation matrix by the Rodrigues formula.
+     *
+     *  To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation
+     *  angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use
+     *
+     *  @code
+     *  cv::Vec3f r, t;
+     *  cv::Affine3f T(r, t);
+     *  @endcode
+     *
+     *  If you already have the rotation matrix \f$R\f$, then you can use
+     *
+     *  @code
+     *  cv::Matx33f R;
+     *  cv::Affine3f T(R, t);
+     *  @endcode
+     *
+     *  To extract the rotation matrix \f$R\f$ from \f$T\f$, use
+     *
+     *  @code
+     *  cv::Matx33f R = T.rotation();
+     *  @endcode
+     *
+     *  To extract the translation vector \f$t\f$ from \f$T\f$, use
+     *
+     *  @code
+     *  cv::Vec3f t = T.translation();
+     *  @endcode
+     *
+     *  To extract the rotation vector \f$r\f$ from \f$T\f$, use
+     *
+     *  @code
+     *  cv::Vec3f r = T.rvec();
+     *  @endcode
+     *
+     *  Note that since the mapping from rotation vectors to rotation matrices
+     *  is many to one. The returned rotation vector is not necessarily the one
+     *  you used before to set the matrix.
+     *
+     *  If you have two transformations \f$T = T_1 * T_2\f$, use
+     *
+     *  @code
+     *  cv::Affine3f T, T1, T2;
+     *  T = T2.concatenate(T1);
+     *  @endcode
+     *
+     *  To get the inverse transform of \f$T\f$, use
+     *
+     *  @code
+     *  cv::Affine3f T, T_inv;
+     *  T_inv = T.inv();
+     *  @endcode
+     *
+     */
+    template<typename T>
+    class Affine3
+    {
+    public:
+        typedef T float_type;
+        typedef Matx<float_type, 3, 3> Mat3;
+        typedef Matx<float_type, 4, 4> Mat4;
+        typedef Vec<float_type, 3> Vec3;
+
+       //! Default constructor. It represents a 4x4 identity matrix.
+        Affine3();
+
+        //! Augmented affine matrix
+        Affine3(const Mat4& affine);
+
+        /**
+         *  The resulting 4x4 matrix is
+         *
+         *  \f[
+         *  \begin{bmatrix}
+         *  R & t\\
+         *  0 & 1\\
+         *  \end{bmatrix}
+         *  \f]
+         *
+         * @param R 3x3 rotation matrix.
+         * @param t 3x1 translation vector.
+         */
+        Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));
+
+        /**
+         * Rodrigues vector.
+         *
+         * The last row of the current matrix is set to [0,0,0,1].
+         *
+         * @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length
+         *             indicates the rotation angle in radian (using right hand rule).
+         * @param t 3x1 translation vector.
+         */
+        Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));
+
+        /**
+         * Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix.
+         *
+         * The last row of the current matrix is set to [0,0,0,1] when data is not 4x4.
+         *
+         * @param data 1-channel matrix.
+         *             when it is 4x4, it is copied to the current matrix and t is not used.
+         *             When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used.
+         *             When it is 3x3, it is copied to the upper left 3x3 part of the current matrix.
+         *             When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used
+         *                             to compute a 3x3 rotation matrix.
+         * @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4.
+         */
+        explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));
+
+        //! From 16-element array
+        explicit Affine3(const float_type* vals);
+
+        //! Create an 4x4 identity transform
+        static Affine3 Identity();
+
+        /**
+         * Rotation matrix.
+         *
+         * Copy the rotation matrix to the upper left 3x3 part of the current matrix.
+         * The remaining elements of the current matrix are not changed.
+         *
+         * @param R 3x3 rotation matrix.
+         *
+         */
+        void rotation(const Mat3& R);
+
+        /**
+         * Rodrigues vector.
+         *
+         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
+         *
+         * @param rvec 3x1 rotation vector. The direction indicates the rotation axis and
+         *             its length indicates the rotation angle in radian (using the right thumb convention).
+         */
+        void rotation(const Vec3& rvec);
+
+        /**
+         * Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix.
+         *
+         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
+         *
+         * @param data 1-channel matrix.
+         *             When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix.
+         *             When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula
+         *             is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix.
+         */
+        void rotation(const Mat& data);
+
+        /**
+         * Copy the 3x3 matrix L to the upper left part of the current matrix
+         *
+         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
+         *
+         * @param L 3x3 matrix.
+         */
+        void linear(const Mat3& L);
+
+        /**
+         * Copy t to the first three elements of the last column of the current matrix
+         *
+         * It sets the upper right 3x1 part of the matrix. The remaining part is unaffected.
+         *
+         * @param t 3x1 translation vector.
+         */
+        void translation(const Vec3& t);
+
+        //! @return the upper left 3x3 part
+        Mat3 rotation() const;
+
+        //! @return the upper left 3x3 part
+        Mat3 linear() const;
+
+        //! @return the upper right 3x1 part
+        Vec3 translation() const;
+
+        //! Rodrigues vector.
+        //! @return a vector representing the upper left 3x3 rotation matrix of the current matrix.
+        //! @warning  Since the mapping between rotation vectors and rotation matrices is many to one,
+        //!           this function returns only one rotation vector that represents the current rotation matrix,
+        //!           which is not necessarily the same one set by `rotation(const Vec3& rvec)`.
+        Vec3 rvec() const;
+
+        //! @return the inverse of the current matrix.
+        Affine3 inv(int method = cv::DECOMP_SVD) const;
+
+        //! a.rotate(R) is equivalent to Affine(R, 0) * a;
+        Affine3 rotate(const Mat3& R) const;
+
+        //! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a;
+        Affine3 rotate(const Vec3& rvec) const;
+
+        //! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix
+        Affine3 translate(const Vec3& t) const;
+
+        //! a.concatenate(affine) is equivalent to affine * a;
+        Affine3 concatenate(const Affine3& affine) const;
+
+        template <typename Y> operator Affine3<Y>() const;
+
+        template <typename Y> Affine3<Y> cast() const;
+
+        Mat4 matrix;
+
+#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
+        Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine);
+        Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine);
+        operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const;
+        operator Eigen::Transform<T, 3, Eigen::Affine>() const;
+#endif
+    };
+
+    template<typename T> static
+    Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);
+
+    //! V is a 3-element vector with member fields x, y and z
+    template<typename T, typename V> static
+    V operator*(const Affine3<T>& affine, const V& vector);
+
+    typedef Affine3<float> Affine3f;
+    typedef Affine3<double> Affine3d;
+
+    static Vec3f operator*(const Affine3f& affine, const Vec3f& vector);
+    static Vec3d operator*(const Affine3d& affine, const Vec3d& vector);
+
+    template<typename _Tp> class DataType< Affine3<_Tp> >
+    {
+    public:
+        typedef Affine3<_Tp>                               value_type;
+        typedef Affine3<typename DataType<_Tp>::work_type> work_type;
+        typedef _Tp                                        channel_type;
+
+        enum { generic_type = 0,
+               channels     = 16,
+               fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+               ,depth        = DataType<channel_type>::depth
+               ,type         = CV_MAKETYPE(depth, channels)
+#endif
+             };
+
+        typedef Vec<channel_type, channels> vec_type;
+    };
+
+    namespace traits {
+    template<typename _Tp>
+    struct Depth< Affine3<_Tp> > { enum { value = Depth<_Tp>::value }; };
+    template<typename _Tp>
+    struct Type< Affine3<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 16) }; };
+    } // namespace
+
+//! @} core
+
+}
+
+//! @cond IGNORED
+
+///////////////////////////////////////////////////////////////////////////////////
+// Implementation
+
+template<typename T> inline
+cv::Affine3<T>::Affine3()
+    : matrix(Mat4::eye())
+{}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Mat4& affine)
+    : matrix(affine)
+{}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Mat3& R, const Vec3& t)
+{
+    rotation(R);
+    translation(t);
+    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
+    matrix.val[15] = 1;
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Vec3& _rvec, const Vec3& t)
+{
+    rotation(_rvec);
+    translation(t);
+    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
+    matrix.val[15] = 1;
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
+{
+    CV_Assert(data.type() == cv::traits::Type<T>::value);
+    CV_Assert(data.channels() == 1);
+
+    if (data.cols == 4 && data.rows == 4)
+    {
+        data.copyTo(matrix);
+        return;
+    }
+    else if (data.cols == 4 && data.rows == 3)
+    {
+        rotation(data(Rect(0, 0, 3, 3)));
+        translation(data(Rect(3, 0, 1, 3)));
+    }
+    else
+    {
+        rotation(data);
+        translation(t);
+    }
+
+    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
+    matrix.val[15] = 1;
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const float_type* vals) : matrix(vals)
+{}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::Identity()
+{
+    return Affine3<T>(cv::Affine3<T>::Mat4::eye());
+}
+
+template<typename T> inline
+void cv::Affine3<T>::rotation(const Mat3& R)
+{
+    linear(R);
+}
+
+template<typename T> inline
+void cv::Affine3<T>::rotation(const Vec3& _rvec)
+{
+    double theta = norm(_rvec);
+
+    if (theta < DBL_EPSILON)
+        rotation(Mat3::eye());
+    else
+    {
+        double c = std::cos(theta);
+        double s = std::sin(theta);
+        double c1 = 1. - c;
+        double itheta = (theta != 0) ? 1./theta : 0.;
+
+        Point3_<T> r = _rvec*itheta;
+
+        Mat3 rrt( r.x*r.x, r.x*r.y, r.x*r.z, r.x*r.y, r.y*r.y, r.y*r.z, r.x*r.z, r.y*r.z, r.z*r.z );
+        Mat3 r_x( 0, -r.z, r.y, r.z, 0, -r.x, -r.y, r.x, 0 );
+
+        // R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x]
+        // where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0]
+        Mat3 R = c*Mat3::eye() + c1*rrt + s*r_x;
+
+        rotation(R);
+    }
+}
+
+//Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix;
+template<typename T> inline
+void cv::Affine3<T>::rotation(const cv::Mat& data)
+{
+    CV_Assert(data.type() == cv::traits::Type<T>::value);
+    CV_Assert(data.channels() == 1);
+
+    if (data.cols == 3 && data.rows == 3)
+    {
+        Mat3 R;
+        data.copyTo(R);
+        rotation(R);
+    }
+    else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3))
+    {
+        Vec3 _rvec;
+        data.reshape(1, 3).copyTo(_rvec);
+        rotation(_rvec);
+    }
+    else
+        CV_Error(Error::StsError, "Input matrix can only be 3x3, 1x3 or 3x1");
+}
+
+template<typename T> inline
+void cv::Affine3<T>::linear(const Mat3& L)
+{
+    matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1];  matrix.val[ 2] = L.val[2];
+    matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4];  matrix.val[ 6] = L.val[5];
+    matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7];  matrix.val[10] = L.val[8];
+}
+
+template<typename T> inline
+void cv::Affine3<T>::translation(const Vec3& t)
+{
+    matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2];
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Mat3 cv::Affine3<T>::rotation() const
+{
+    return linear();
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Mat3 cv::Affine3<T>::linear() const
+{
+    typename cv::Affine3<T>::Mat3 R;
+    R.val[0] = matrix.val[0];  R.val[1] = matrix.val[1];  R.val[2] = matrix.val[ 2];
+    R.val[3] = matrix.val[4];  R.val[4] = matrix.val[5];  R.val[5] = matrix.val[ 6];
+    R.val[6] = matrix.val[8];  R.val[7] = matrix.val[9];  R.val[8] = matrix.val[10];
+    return R;
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Vec3 cv::Affine3<T>::translation() const
+{
+    return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]);
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Vec3 cv::Affine3<T>::rvec() const
+{
+    cv::Vec3d w;
+    cv::Matx33d u, vt, R = rotation();
+    cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A);
+    R = u * vt;
+
+    double rx = R.val[7] - R.val[5];
+    double ry = R.val[2] - R.val[6];
+    double rz = R.val[3] - R.val[1];
+
+    double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25);
+    double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5;
+    c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c;
+    double theta = acos(c);
+
+    if( s < 1e-5 )
+    {
+        if( c > 0 )
+            rx = ry = rz = 0;
+        else
+        {
+            double t;
+            t = (R.val[0] + 1) * 0.5;
+            rx = std::sqrt(std::max(t, 0.0));
+            t = (R.val[4] + 1) * 0.5;
+            ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0);
+            t = (R.val[8] + 1) * 0.5;
+            rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0);
+
+            if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) )
+                rz = -rz;
+            theta /= std::sqrt(rx*rx + ry*ry + rz*rz);
+            rx *= theta;
+            ry *= theta;
+            rz *= theta;
+        }
+    }
+    else
+    {
+        double vth = 1/(2*s);
+        vth *= theta;
+        rx *= vth; ry *= vth; rz *= vth;
+    }
+
+    return cv::Vec3d(rx, ry, rz);
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::inv(int method) const
+{
+    return matrix.inv(method);
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::rotate(const Mat3& R) const
+{
+    Mat3 Lc = linear();
+    Vec3 tc = translation();
+    Mat4 result;
+    result.val[12] = result.val[13] = result.val[14] = 0;
+    result.val[15] = 1;
+
+    for(int j = 0; j < 3; ++j)
+    {
+        for(int i = 0; i < 3; ++i)
+        {
+            float_type value = 0;
+            for(int k = 0; k < 3; ++k)
+                value += R(j, k) * Lc(k, i);
+            result(j, i) = value;
+        }
+
+        result(j, 3) = R.row(j).dot(tc.t());
+    }
+    return result;
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::rotate(const Vec3& _rvec) const
+{
+    return rotate(Affine3f(_rvec).rotation());
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::translate(const Vec3& t) const
+{
+    Mat4 m = matrix;
+    m.val[ 3] += t[0];
+    m.val[ 7] += t[1];
+    m.val[11] += t[2];
+    return m;
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::concatenate(const Affine3<T>& affine) const
+{
+    return (*this).rotate(affine.rotation()).translate(affine.translation());
+}
+
+template<typename T> template <typename Y> inline
+cv::Affine3<T>::operator Affine3<Y>() const
+{
+    return Affine3<Y>(matrix);
+}
+
+template<typename T> template <typename Y> inline
+cv::Affine3<Y> cv::Affine3<T>::cast() const
+{
+    return Affine3<Y>(matrix);
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::operator*(const cv::Affine3<T>& affine1, const cv::Affine3<T>& affine2)
+{
+    return affine2.concatenate(affine1);
+}
+
+template<typename T, typename V> inline
+V cv::operator*(const cv::Affine3<T>& affine, const V& v)
+{
+    const typename Affine3<T>::Mat4& m = affine.matrix;
+
+    V r;
+    r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3];
+    r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7];
+    r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11];
+    return r;
+}
+
+static inline
+cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v)
+{
+    const cv::Matx44f& m = affine.matrix;
+    cv::Vec3f r;
+    r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
+    r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
+    r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
+    return r;
+}
+
+static inline
+cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v)
+{
+    const cv::Matx44d& m = affine.matrix;
+    cv::Vec3d r;
+    r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
+    r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
+    r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
+    return r;
+}
+
+
+
+#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine)
+{
+    cv::Mat(4, 4, cv::traits::Type<T>::value, affine.matrix().data()).copyTo(matrix);
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine)
+{
+    Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> a = affine;
+    cv::Mat(4, 4, cv::traits::Type<T>::value, a.matrix().data()).copyTo(matrix);
+}
+
+template<typename T> inline
+cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const
+{
+    Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> r;
+    cv::Mat hdr(4, 4, cv::traits::Type<T>::value, r.matrix().data());
+    cv::Mat(matrix, false).copyTo(hdr);
+    return r;
+}
+
+template<typename T> inline
+cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
+{
+    return this->operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>();
+}
+
+#endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */
+
+//! @endcond
+
+#endif /* __cplusplus */
+
+#endif /* OPENCV_CORE_AFFINE3_HPP */

+ 772 - 0
ThresholdTools/include/opencv2/core/base.hpp

@@ -0,0 +1,772 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2014, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_BASE_HPP
+#define OPENCV_CORE_BASE_HPP
+
+#ifndef __cplusplus
+#  error base.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/opencv_modules.hpp"
+
+#include <climits>
+#include <algorithm>
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/cvstd.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_utils
+//! @{
+
+namespace Error {
+//! error codes
+enum Code {
+    StsOk=                       0,  //!< everything is ok
+    StsBackTrace=               -1,  //!< pseudo error for back trace
+    StsError=                   -2,  //!< unknown /unspecified error
+    StsInternal=                -3,  //!< internal error (bad state)
+    StsNoMem=                   -4,  //!< insufficient memory
+    StsBadArg=                  -5,  //!< function arg/param is bad
+    StsBadFunc=                 -6,  //!< unsupported function
+    StsNoConv=                  -7,  //!< iteration didn't converge
+    StsAutoTrace=               -8,  //!< tracing
+    HeaderIsNull=               -9,  //!< image header is NULL
+    BadImageSize=              -10,  //!< image size is invalid
+    BadOffset=                 -11,  //!< offset is invalid
+    BadDataPtr=                -12,  //!<
+    BadStep=                   -13,  //!< image step is wrong, this may happen for a non-continuous matrix.
+    BadModelOrChSeq=           -14,  //!<
+    BadNumChannels=            -15,  //!< bad number of channels, for example, some functions accept only single channel matrices.
+    BadNumChannel1U=           -16,  //!<
+    BadDepth=                  -17,  //!< input image depth is not supported by the function
+    BadAlphaChannel=           -18,  //!<
+    BadOrder=                  -19,  //!< number of dimensions is out of range
+    BadOrigin=                 -20,  //!< incorrect input origin
+    BadAlign=                  -21,  //!< incorrect input align
+    BadCallBack=               -22,  //!<
+    BadTileSize=               -23,  //!<
+    BadCOI=                    -24,  //!< input COI is not supported
+    BadROISize=                -25,  //!< incorrect input roi
+    MaskIsTiled=               -26,  //!<
+    StsNullPtr=                -27,  //!< null pointer
+    StsVecLengthErr=           -28,  //!< incorrect vector length
+    StsFilterStructContentErr= -29,  //!< incorrect filter structure content
+    StsKernelStructContentErr= -30,  //!< incorrect transform kernel content
+    StsFilterOffsetErr=        -31,  //!< incorrect filter offset value
+    StsBadSize=                -201, //!< the input/output structure size is incorrect
+    StsDivByZero=              -202, //!< division by zero
+    StsInplaceNotSupported=    -203, //!< in-place operation is not supported
+    StsObjectNotFound=         -204, //!< request can't be completed
+    StsUnmatchedFormats=       -205, //!< formats of input/output arrays differ
+    StsBadFlag=                -206, //!< flag is wrong or not supported
+    StsBadPoint=               -207, //!< bad CvPoint
+    StsBadMask=                -208, //!< bad format of mask (neither 8uC1 nor 8sC1)
+    StsUnmatchedSizes=         -209, //!< sizes of input/output structures do not match
+    StsUnsupportedFormat=      -210, //!< the data format/type is not supported by the function
+    StsOutOfRange=             -211, //!< some of parameters are out of range
+    StsParseError=             -212, //!< invalid syntax/structure of the parsed file
+    StsNotImplemented=         -213, //!< the requested function/feature is not implemented
+    StsBadMemBlock=            -214, //!< an allocated block has been corrupted
+    StsAssert=                 -215, //!< assertion failed
+    GpuNotSupported=           -216, //!< no CUDA support
+    GpuApiCallError=           -217, //!< GPU API call error
+    OpenGlNotSupported=        -218, //!< no OpenGL support
+    OpenGlApiCallError=        -219, //!< OpenGL API call error
+    OpenCLApiCallError=        -220, //!< OpenCL API call error
+    OpenCLDoubleNotSupported=  -221,
+    OpenCLInitError=           -222, //!< OpenCL initialization error
+    OpenCLNoAMDBlasFft=        -223
+};
+} //Error
+
+//! @} core_utils
+
+//! @addtogroup core_array
+//! @{
+
+//! matrix decomposition types
+enum DecompTypes {
+    /** Gaussian elimination with the optimal pivot element chosen. */
+    DECOMP_LU       = 0,
+    /** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix
+    src1 can be singular */
+    DECOMP_SVD      = 1,
+    /** eigenvalue decomposition; the matrix src1 must be symmetrical */
+    DECOMP_EIG      = 2,
+    /** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively
+    defined */
+    DECOMP_CHOLESKY = 3,
+    /** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */
+    DECOMP_QR       = 4,
+    /** while all the previous flags are mutually exclusive, this flag can be used together with
+    any of the previous; it means that the normal equations
+    \f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are
+    solved instead of the original system
+    \f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */
+    DECOMP_NORMAL   = 16
+};
+
+/** norm types
+
+src1 and src2 denote input arrays.
+*/
+
+enum NormTypes {
+                /**
+                \f[
+                norm =  \forkthree
+                {\|\texttt{src1}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
+                {\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
+                {\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}}    }{\|\texttt{src2}\|_{L_{\infty}} }}{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_INF}\) }
+                \f]
+                */
+                NORM_INF       = 1,
+                /**
+                \f[
+                norm =  \forkthree
+                {\| \texttt{src1} \| _{L_1} =  \sum _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\)}
+                { \| \texttt{src1} - \texttt{src2} \| _{L_1} =  \sum _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
+                { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L1}\) }
+                \f]*/
+                 NORM_L1        = 2,
+                 /**
+                 \f[
+                 norm =  \forkthree
+                 { \| \texttt{src1} \| _{L_2} =  \sqrt{\sum_I \texttt{src1}(I)^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }
+                 { \| \texttt{src1} - \texttt{src2} \| _{L_2} =  \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }
+                 { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
+                 \f]
+                 */
+                 NORM_L2        = 4,
+                 /**
+                 \f[
+                 norm =  \forkthree
+                 { \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if  \(\texttt{normType} = \texttt{NORM_L2SQR}\)}
+                 { \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} =  \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if  \(\texttt{normType} = \texttt{NORM_L2SQR}\) }
+                 { \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
+                 \f]
+                 */
+                 NORM_L2SQR     = 5,
+                 /**
+                 In the case of one input array, calculates the Hamming distance of the array from zero,
+                 In the case of two input arrays, calculates the Hamming distance between the arrays.
+                 */
+                 NORM_HAMMING   = 6,
+                 /**
+                 Similar to NORM_HAMMING, but in the calculation, each two bits of the input sequence will
+                 be added and treated as a single bit to be used in the same calculation as NORM_HAMMING.
+                 */
+                 NORM_HAMMING2  = 7,
+                 NORM_TYPE_MASK = 7, //!< bit-mask which can be used to separate norm type from norm flags
+                 NORM_RELATIVE  = 8, //!< flag
+                 NORM_MINMAX    = 32 //!< flag
+               };
+
+//! comparison types
+enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2.
+                CMP_GT = 1, //!< src1 is greater than src2.
+                CMP_GE = 2, //!< src1 is greater than or equal to src2.
+                CMP_LT = 3, //!< src1 is less than src2.
+                CMP_LE = 4, //!< src1 is less than or equal to src2.
+                CMP_NE = 5  //!< src1 is unequal to src2.
+              };
+
+//! generalized matrix multiplication flags
+enum GemmFlags { GEMM_1_T = 1, //!< transposes src1
+                 GEMM_2_T = 2, //!< transposes src2
+                 GEMM_3_T = 4 //!< transposes src3
+               };
+
+enum DftFlags {
+    /** performs an inverse 1D or 2D transform instead of the default forward
+        transform. */
+    DFT_INVERSE        = 1,
+    /** scales the result: divide it by the number of array elements. Normally, it is
+        combined with DFT_INVERSE. */
+    DFT_SCALE          = 2,
+    /** performs a forward or inverse transform of every individual row of the input
+        matrix; this flag enables you to transform multiple vectors simultaneously and can be used to
+        decrease the overhead (which is sometimes several times larger than the processing itself) to
+        perform 3D and higher-dimensional transformations and so forth.*/
+    DFT_ROWS           = 4,
+    /** performs a forward transformation of 1D or 2D real array; the result,
+        though being a complex array, has complex-conjugate symmetry (*CCS*, see the function
+        description below for details), and such an array can be packed into a real array of the same
+        size as input, which is the fastest option and which is what the function does by default;
+        however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) -
+        pass the flag to enable the function to produce a full-size complex output array. */
+    DFT_COMPLEX_OUTPUT = 16,
+    /** performs an inverse transformation of a 1D or 2D complex array; the
+        result is normally a complex array of the same size, however, if the input array has
+        conjugate-complex symmetry (for example, it is a result of forward transformation with
+        DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not
+        check whether the input is symmetrical or not, you can pass the flag and then the function
+        will assume the symmetry and produce the real output array (note that when the input is packed
+        into a real array and inverse transformation is executed, the function treats the input as a
+        packed complex-conjugate symmetrical array, and the output will also be a real array). */
+    DFT_REAL_OUTPUT    = 32,
+    /** specifies that input is complex input. If this flag is set, the input must have 2 channels.
+        On the other hand, for backwards compatibility reason, if input has 2 channels, input is
+        already considered complex. */
+    DFT_COMPLEX_INPUT  = 64,
+    /** performs an inverse 1D or 2D transform instead of the default forward transform. */
+    DCT_INVERSE        = DFT_INVERSE,
+    /** performs a forward or inverse transform of every individual row of the input
+        matrix. This flag enables you to transform multiple vectors simultaneously and can be used to
+        decrease the overhead (which is sometimes several times larger than the processing itself) to
+        perform 3D and higher-dimensional transforms and so forth.*/
+    DCT_ROWS           = DFT_ROWS
+};
+
+//! Various border types, image boundaries are denoted with `|`
+//! @see borderInterpolate, copyMakeBorder
+enum BorderTypes {
+    BORDER_CONSTANT    = 0, //!< `iiiiii|abcdefgh|iiiiiii`  with some specified `i`
+    BORDER_REPLICATE   = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
+    BORDER_REFLECT     = 2, //!< `fedcba|abcdefgh|hgfedcb`
+    BORDER_WRAP        = 3, //!< `cdefgh|abcdefgh|abcdefg`
+    BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
+    BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno`
+
+    BORDER_REFLECT101  = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
+    BORDER_DEFAULT     = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
+    BORDER_ISOLATED    = 16 //!< do not look outside of ROI
+};
+
+//! @} core_array
+
+//! @addtogroup core_utils
+//! @{
+
+//! @cond IGNORED
+
+//////////////// static assert /////////////////
+#define CVAUX_CONCAT_EXP(a, b) a##b
+#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
+
+#if defined(__clang__)
+#  ifndef __has_extension
+#    define __has_extension __has_feature /* compatibility, for older versions of clang */
+#  endif
+#  if __has_extension(cxx_static_assert)
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  elif __has_extension(c_static_assert)
+#    define CV_StaticAssert(condition, reason)    _Static_assert((condition), reason " " #condition)
+#  endif
+#elif defined(__GNUC__)
+#  if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  endif
+#elif defined(_MSC_VER)
+#  if _MSC_VER >= 1600 /* MSVC 10 */
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  endif
+#endif
+#ifndef CV_StaticAssert
+#  if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
+#    define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
+#  else
+     template <bool x> struct CV_StaticAssert_failed;
+     template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
+     template<int x> struct CV_StaticAssert_test {};
+#    define CV_StaticAssert(condition, reason)\
+       typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
+#  endif
+#endif
+
+// Suppress warning "-Wdeprecated-declarations" / C4996
+#if defined(_MSC_VER)
+    #define CV_DO_PRAGMA(x) __pragma(x)
+#elif defined(__GNUC__)
+    #define CV_DO_PRAGMA(x) _Pragma (#x)
+#else
+    #define CV_DO_PRAGMA(x)
+#endif
+
+#ifdef _MSC_VER
+#define CV_SUPPRESS_DEPRECATED_START \
+    CV_DO_PRAGMA(warning(push)) \
+    CV_DO_PRAGMA(warning(disable: 4996))
+#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
+#elif defined (__clang__) || ((__GNUC__)  && (__GNUC__*100 + __GNUC_MINOR__ > 405))
+#define CV_SUPPRESS_DEPRECATED_START \
+    CV_DO_PRAGMA(GCC diagnostic push) \
+    CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
+#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
+#else
+#define CV_SUPPRESS_DEPRECATED_START
+#define CV_SUPPRESS_DEPRECATED_END
+#endif
+
+#define CV_UNUSED(name) (void)name
+
+#if defined __GNUC__ && !defined __EXCEPTIONS
+#define CV_TRY
+#define CV_CATCH(A, B) for (A B; false; )
+#define CV_CATCH_ALL if (false)
+#define CV_THROW(A) abort()
+#define CV_RETHROW() abort()
+#else
+#define CV_TRY try
+#define CV_CATCH(A, B) catch(const A & B)
+#define CV_CATCH_ALL catch(...)
+#define CV_THROW(A) throw A
+#define CV_RETHROW() throw
+#endif
+
+//! @endcond
+
+/*! @brief Signals an error and raises the exception.
+
+By default the function prints information about the error to stderr,
+then it either stops if setBreakOnError() had been called before or raises the exception.
+It is possible to alternate error processing by using redirectError().
+@param _code - error code (Error::Code)
+@param _err - error description
+@param _func - function name. Available only when the compiler supports getting it
+@param _file - source file name where the error has occurred
+@param _line - line number in the source file where the error has occurred
+@see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert
+ */
+CV_EXPORTS void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
+
+#ifdef __GNUC__
+# if defined __clang__ || defined __APPLE__
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Winvalid-noreturn"
+# endif
+#endif
+
+/** same as cv::error, but does not return */
+CV_INLINE CV_NORETURN void errorNoReturn(int _code, const String& _err, const char* _func, const char* _file, int _line)
+{
+    error(_code, _err, _func, _file, _line);
+#ifdef __GNUC__
+# if !defined __clang__ && !defined __APPLE__
+    // this suppresses this warning: "noreturn" function does return [enabled by default]
+    __builtin_trap();
+    // or use infinite loop: for (;;) {}
+# endif
+#endif
+}
+#ifdef __GNUC__
+# if defined __clang__ || defined __APPLE__
+#   pragma GCC diagnostic pop
+# endif
+#endif
+
+#if defined __GNUC__
+#define CV_Func __func__
+#elif defined _MSC_VER
+#define CV_Func __FUNCTION__
+#else
+#define CV_Func ""
+#endif
+
+#ifdef CV_STATIC_ANALYSIS
+
+// In practice, some macro are not processed correctly (noreturn is not detected).
+// We need to use simplified definition for them.
+#define CV_Error(...) do { abort(); } while (0)
+#define CV_Error_( code, args ) do { cv::format args; abort(); } while (0)
+#define CV_Assert_1( expr ) do { if (!(expr)) abort(); } while (0)
+
+#else // CV_STATIC_ANALYSIS
+
+/** @brief Call the error handler.
+
+Currently, the error handler prints the error code and the error message to the standard
+error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that
+the execution stack and all the parameters can be analyzed by the debugger. In the Release
+configuration, the exception is thrown.
+
+@param code one of Error::Code
+@param msg error message
+*/
+#define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ )
+
+/**  @brief Call the error handler.
+
+This macro can be used to construct an error message on-fly to include some dynamic information,
+for example:
+@code
+    // note the extra parentheses around the formatted text message
+    CV_Error_( CV_StsOutOfRange,
+    ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
+@endcode
+@param code one of Error::Code
+@param args printf-like formatted error message in parentheses
+*/
+#define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ )
+
+#define CV_Assert_1( expr ) if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ )
+
+//! @cond IGNORED
+#define CV__ErrorNoReturn( code, msg ) cv::errorNoReturn( code, msg, CV_Func, __FILE__, __LINE__ )
+#define CV__ErrorNoReturn_( code, args ) cv::errorNoReturn( code, cv::format args, CV_Func, __FILE__, __LINE__ )
+#ifdef __OPENCV_BUILD
+#undef CV_Error
+#define CV_Error CV__ErrorNoReturn
+#undef CV_Error_
+#define CV_Error_ CV__ErrorNoReturn_
+#undef CV_Assert_1
+#define CV_Assert_1( expr ) if(!!(expr)) ; else cv::errorNoReturn( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ )
+#else
+// backward compatibility
+#define CV_ErrorNoReturn CV__ErrorNoReturn
+#define CV_ErrorNoReturn_ CV__ErrorNoReturn_
+#endif
+//! @endcond
+
+#endif // CV_STATIC_ANALYSIS
+
+#define CV_Assert_2( expr1, expr2 ) CV_Assert_1(expr1); CV_Assert_1(expr2)
+#define CV_Assert_3( expr1, expr2, expr3 ) CV_Assert_2(expr1, expr2); CV_Assert_1(expr3)
+#define CV_Assert_4( expr1, expr2, expr3, expr4 ) CV_Assert_3(expr1, expr2, expr3); CV_Assert_1(expr4)
+#define CV_Assert_5( expr1, expr2, expr3, expr4, expr5 ) CV_Assert_4(expr1, expr2, expr3, expr4); CV_Assert_1(expr5)
+#define CV_Assert_6( expr1, expr2, expr3, expr4, expr5, expr6 ) CV_Assert_5(expr1, expr2, expr3, expr4, expr5); CV_Assert_1(expr6)
+#define CV_Assert_7( expr1, expr2, expr3, expr4, expr5, expr6, expr7 ) CV_Assert_6(expr1, expr2, expr3, expr4, expr5, expr6 ); CV_Assert_1(expr7)
+#define CV_Assert_8( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8 ) CV_Assert_7(expr1, expr2, expr3, expr4, expr5, expr6, expr7 ); CV_Assert_1(expr8)
+#define CV_Assert_9( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9 ) CV_Assert_8(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8 ); CV_Assert_1(expr9)
+#define CV_Assert_10( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9, expr10 ) CV_Assert_9(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9 ); CV_Assert_1(expr10)
+
+#define CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
+#define CV_VA_NUM_ARGS(...) CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+/** @brief Checks a condition at runtime and throws exception if it fails
+
+The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros
+raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
+configurations while CV_DbgAssert is only retained in the Debug configuration.
+*/
+#define CV_Assert(...) do { CVAUX_CONCAT(CV_Assert_, CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__); } while(0)
+
+/** replaced with CV_Assert(expr) in Debug configuration */
+#ifdef _DEBUG
+#  define CV_DbgAssert(expr) CV_Assert(expr)
+#else
+#  define CV_DbgAssert(expr)
+#endif
+
+/*
+ * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
+ * bit count of A exclusive XOR'ed with B
+ */
+struct CV_EXPORTS Hamming
+{
+    enum { normType = NORM_HAMMING };
+    typedef unsigned char ValueType;
+    typedef int ResultType;
+
+    /** this will count the bits in a ^ b
+     */
+    ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const;
+};
+
+typedef Hamming HammingLUT;
+
+/////////////////////////////////// inline norms ////////////////////////////////////
+
+template<typename _Tp> inline _Tp cv_abs(_Tp x) { return std::abs(x); }
+inline int cv_abs(uchar x) { return x; }
+inline int cv_abs(schar x) { return std::abs(x); }
+inline int cv_abs(ushort x) { return x; }
+inline int cv_abs(short x) { return std::abs(x); }
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL2Sqr(const _Tp* a, int n)
+{
+    _AccTp s = 0;
+    int i=0;
+#if CV_ENABLE_UNROLLED
+    for( ; i <= n - 4; i += 4 )
+    {
+        _AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3];
+        s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
+    }
+#endif
+    for( ; i < n; i++ )
+    {
+        _AccTp v = a[i];
+        s += v*v;
+    }
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL1(const _Tp* a, int n)
+{
+    _AccTp s = 0;
+    int i = 0;
+#if CV_ENABLE_UNROLLED
+    for(; i <= n - 4; i += 4 )
+    {
+        s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) +
+            (_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]);
+    }
+#endif
+    for( ; i < n; i++ )
+        s += cv_abs(a[i]);
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normInf(const _Tp* a, int n)
+{
+    _AccTp s = 0;
+    for( int i = 0; i < n; i++ )
+        s = std::max(s, (_AccTp)cv_abs(a[i]));
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n)
+{
+    _AccTp s = 0;
+    int i= 0;
+#if CV_ENABLE_UNROLLED
+    for(; i <= n - 4; i += 4 )
+    {
+        _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
+        s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
+    }
+#endif
+    for( ; i < n; i++ )
+    {
+        _AccTp v = _AccTp(a[i] - b[i]);
+        s += v*v;
+    }
+    return s;
+}
+
+static inline float normL2Sqr(const float* a, const float* b, int n)
+{
+    float s = 0.f;
+    for( int i = 0; i < n; i++ )
+    {
+        float v = a[i] - b[i];
+        s += v*v;
+    }
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL1(const _Tp* a, const _Tp* b, int n)
+{
+    _AccTp s = 0;
+    int i= 0;
+#if CV_ENABLE_UNROLLED
+    for(; i <= n - 4; i += 4 )
+    {
+        _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
+        s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3);
+    }
+#endif
+    for( ; i < n; i++ )
+    {
+        _AccTp v = _AccTp(a[i] - b[i]);
+        s += std::abs(v);
+    }
+    return s;
+}
+
+inline float normL1(const float* a, const float* b, int n)
+{
+    float s = 0.f;
+    for( int i = 0; i < n; i++ )
+    {
+        s += std::abs(a[i] - b[i]);
+    }
+    return s;
+}
+
+inline int normL1(const uchar* a, const uchar* b, int n)
+{
+    int s = 0;
+    for( int i = 0; i < n; i++ )
+    {
+        s += std::abs(a[i] - b[i]);
+    }
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normInf(const _Tp* a, const _Tp* b, int n)
+{
+    _AccTp s = 0;
+    for( int i = 0; i < n; i++ )
+    {
+        _AccTp v0 = a[i] - b[i];
+        s = std::max(s, std::abs(v0));
+    }
+    return s;
+}
+
+/** @brief Computes the cube root of an argument.
+
+ The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly.
+ NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for
+ single-precision data.
+ @param val A function argument.
+ */
+CV_EXPORTS_W float cubeRoot(float val);
+
+/** @brief Calculates the angle of a 2D vector in degrees.
+
+ The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
+ in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees.
+ @param x x-coordinate of the vector.
+ @param y y-coordinate of the vector.
+ */
+CV_EXPORTS_W float fastAtan2(float y, float x);
+
+/** proxy for hal::LU */
+CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+/** proxy for hal::LU */
+CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+/** proxy for hal::Cholesky */
+CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+/** proxy for hal::Cholesky */
+CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+
+////////////////// forward declarations for important OpenCV types //////////////////
+
+//! @cond IGNORED
+
+template<typename _Tp, int cn> class Vec;
+template<typename _Tp, int m, int n> class Matx;
+
+template<typename _Tp> class Complex;
+template<typename _Tp> class Point_;
+template<typename _Tp> class Point3_;
+template<typename _Tp> class Size_;
+template<typename _Tp> class Rect_;
+template<typename _Tp> class Scalar_;
+
+class CV_EXPORTS RotatedRect;
+class CV_EXPORTS Range;
+class CV_EXPORTS TermCriteria;
+class CV_EXPORTS KeyPoint;
+class CV_EXPORTS DMatch;
+class CV_EXPORTS RNG;
+
+class CV_EXPORTS Mat;
+class CV_EXPORTS MatExpr;
+
+class CV_EXPORTS UMat;
+
+class CV_EXPORTS SparseMat;
+typedef Mat MatND;
+
+template<typename _Tp> class Mat_;
+template<typename _Tp> class SparseMat_;
+
+class CV_EXPORTS MatConstIterator;
+class CV_EXPORTS SparseMatIterator;
+class CV_EXPORTS SparseMatConstIterator;
+template<typename _Tp> class MatIterator_;
+template<typename _Tp> class MatConstIterator_;
+template<typename _Tp> class SparseMatIterator_;
+template<typename _Tp> class SparseMatConstIterator_;
+
+namespace ogl
+{
+    class CV_EXPORTS Buffer;
+    class CV_EXPORTS Texture2D;
+    class CV_EXPORTS Arrays;
+}
+
+namespace cuda
+{
+    class CV_EXPORTS GpuMat;
+    class CV_EXPORTS HostMem;
+    class CV_EXPORTS Stream;
+    class CV_EXPORTS Event;
+}
+
+namespace cudev
+{
+    template <typename _Tp> class GpuMat_;
+}
+
+namespace ipp
+{
+#if OPENCV_ABI_COMPATIBILITY > 300
+CV_EXPORTS   unsigned long long getIppFeatures();
+#else
+CV_EXPORTS   int getIppFeatures();
+#endif
+CV_EXPORTS   void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
+                             int line = 0);
+CV_EXPORTS   int getIppStatus();
+CV_EXPORTS   String getIppErrorLocation();
+CV_EXPORTS_W bool   useIPP();
+CV_EXPORTS_W void   setUseIPP(bool flag);
+CV_EXPORTS_W String getIppVersion();
+
+// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
+// but have internal accuracy differences which have to much direct or indirect impact on accuracy tests.
+CV_EXPORTS_W bool useIPP_NE();
+CV_EXPORTS_W void setUseIPP_NE(bool flag);
+
+} // ipp
+
+//! @endcond
+
+//! @} core_utils
+
+
+
+
+} // cv
+
+#include "opencv2/core/neon_utils.hpp"
+#include "opencv2/core/vsx_utils.hpp"
+#include "opencv2/core/check.hpp"
+
+#endif //OPENCV_CORE_BASE_HPP

+ 40 - 0
ThresholdTools/include/opencv2/core/bufferpool.hpp

@@ -0,0 +1,40 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
+
+#ifndef OPENCV_CORE_BUFFER_POOL_HPP
+#define OPENCV_CORE_BUFFER_POOL_HPP
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4265)
+#endif
+
+namespace cv
+{
+
+//! @addtogroup core
+//! @{
+
+class BufferPoolController
+{
+protected:
+    ~BufferPoolController() { }
+public:
+    virtual size_t getReservedSize() const = 0;
+    virtual size_t getMaxReservedSize() const = 0;
+    virtual void setMaxReservedSize(size_t size) = 0;
+    virtual void freeAllReservedBuffers() = 0;
+};
+
+//! @}
+
+}
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#endif // OPENCV_CORE_BUFFER_POOL_HPP

+ 135 - 0
ThresholdTools/include/opencv2/core/check.hpp

@@ -0,0 +1,135 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_CHECK_HPP
+#define OPENCV_CORE_CHECK_HPP
+
+#include <opencv2/core/base.hpp>
+
+namespace cv {
+
+/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "<invalid depth>" */
+CV_EXPORTS const char* depthToString(int depth);
+
+/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "<invalid type>" */
+CV_EXPORTS const String typeToString(int type);
+
+
+//! @cond IGNORED
+namespace detail {
+
+/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */
+CV_EXPORTS const char* depthToString_(int depth);
+
+/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */
+CV_EXPORTS const cv::String typeToString_(int type);
+
+enum TestOp {
+  TEST_CUSTOM = 0,
+  TEST_EQ = 1,
+  TEST_NE = 2,
+  TEST_LE = 3,
+  TEST_LT = 4,
+  TEST_GE = 5,
+  TEST_GT = 6,
+  CV__LAST_TEST_OP
+};
+
+struct CheckContext {
+    const char* func;
+    const char* file;
+    int line;
+    enum TestOp testOp;
+    const char* message;
+    const char* p1_str;
+    const char* p2_str;
+};
+
+#ifndef CV__CHECK_FILENAME
+# define CV__CHECK_FILENAME __FILE__
+#endif
+
+#ifndef CV__CHECK_FUNCTION
+# if defined _MSC_VER
+#   define CV__CHECK_FUNCTION __FUNCSIG__
+# elif defined __GNUC__
+#   define CV__CHECK_FUNCTION __PRETTY_FUNCTION__
+# else
+#   define CV__CHECK_FUNCTION "<unknown>"
+# endif
+#endif
+
+#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
+#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
+    static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
+            { CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str }
+
+CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);
+
+CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);
+
+
+#define CV__TEST_EQ(v1, v2) ((v1) == (v2))
+#define CV__TEST_NE(v1, v2) ((v1) != (v2))
+#define CV__TEST_LE(v1, v2) ((v1) <= (v2))
+#define CV__TEST_LT(v1, v2) ((v1) < (v2))
+#define CV__TEST_GE(v1, v2) ((v1) >= (v2))
+#define CV__TEST_GT(v1, v2) ((v1) > (v2))
+
+#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \
+    if(CV__TEST_##op((v1), (v2))) ; else { \
+        CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \
+        cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \
+    } \
+} while (0)
+
+#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \
+    if(!!(test_expr)) ; else { \
+        CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \
+        cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \
+    } \
+} while (0)
+
+} // namespace
+//! @endcond
+
+
+/// Supported values of these types: int, float, double
+#define CV_CheckEQ(v1, v2, msg)  CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckNE(v1, v2, msg)  CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckLE(v1, v2, msg)  CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckLT(v1, v2, msg)  CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckGE(v1, v2, msg)  CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckGT(v1, v2, msg)  CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
+
+/// Check with additional "decoding" of type values in error message
+#define CV_CheckTypeEQ(t1, t2, msg)  CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg)
+/// Check with additional "decoding" of depth values in error message
+#define CV_CheckDepthEQ(d1, d2, msg)  CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg)
+
+#define CV_CheckChannelsEQ(c1, c2, msg)  CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)
+
+
+/// Example: type == CV_8UC1 || type == CV_8UC3
+#define CV_CheckType(t, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)
+
+/// Example: depth == CV_32F || depth == CV_64F
+#define CV_CheckDepth(t, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)
+
+/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
+// TODO define pretty-printers: #define CV_Check(v, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
+
+} // namespace
+
+#endif // OPENCV_CORE_CHECK_HPP

+ 48 - 0
ThresholdTools/include/opencv2/core/core.hpp

@@ -0,0 +1,48 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __OPENCV_BUILD
+#error this is a compatibility header which should not be used inside the OpenCV library
+#endif
+
+#include "opencv2/core.hpp"

+ 3175 - 0
ThresholdTools/include/opencv2/core/core_c.h

@@ -0,0 +1,3175 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#ifndef OPENCV_CORE_C_H
+#define OPENCV_CORE_C_H
+
+#include "opencv2/core/types_c.h"
+
+#ifdef __cplusplus
+#  ifdef _MSC_VER
+/* disable warning C4190: 'function' has C-linkage specified, but returns UDT 'typename'
+                          which is incompatible with C
+
+   It is OK to disable it because we only extend few plain structures with
+   C++ construrtors for simpler interoperability with C++ API of the library
+*/
+#    pragma warning(disable:4190)
+#  elif defined __clang__ && __clang_major__ >= 3
+#    pragma GCC diagnostic ignored "-Wreturn-type-c-linkage"
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @addtogroup core_c
+    @{
+*/
+
+/****************************************************************************************\
+*          Array allocation, deallocation, initialization and access to elements         *
+\****************************************************************************************/
+
+/** `malloc` wrapper.
+   If there is no enough memory, the function
+   (as well as other OpenCV functions that call cvAlloc)
+   raises an error. */
+CVAPI(void*)  cvAlloc( size_t size );
+
+/** `free` wrapper.
+   Here and further all the memory releasing functions
+   (that all call cvFree) take double pointer in order to
+   to clear pointer to the data after releasing it.
+   Passing pointer to NULL pointer is Ok: nothing happens in this case
+*/
+CVAPI(void)   cvFree_( void* ptr );
+#define cvFree(ptr) (cvFree_(*(ptr)), *(ptr)=0)
+
+/** @brief Creates an image header but does not allocate the image data.
+
+@param size Image width and height
+@param depth Image depth (see cvCreateImage )
+@param channels Number of channels (see cvCreateImage )
+ */
+CVAPI(IplImage*)  cvCreateImageHeader( CvSize size, int depth, int channels );
+
+/** @brief Initializes an image header that was previously allocated.
+
+The returned IplImage\* points to the initialized header.
+@param image Image header to initialize
+@param size Image width and height
+@param depth Image depth (see cvCreateImage )
+@param channels Number of channels (see cvCreateImage )
+@param origin Top-left IPL_ORIGIN_TL or bottom-left IPL_ORIGIN_BL
+@param align Alignment for image rows, typically 4 or 8 bytes
+ */
+CVAPI(IplImage*) cvInitImageHeader( IplImage* image, CvSize size, int depth,
+                                   int channels, int origin CV_DEFAULT(0),
+                                   int align CV_DEFAULT(4));
+
+/** @brief Creates an image header and allocates the image data.
+
+This function call is equivalent to the following code:
+@code
+    header = cvCreateImageHeader(size, depth, channels);
+    cvCreateData(header);
+@endcode
+@param size Image width and height
+@param depth Bit depth of image elements. See IplImage for valid depths.
+@param channels Number of channels per pixel. See IplImage for details. This function only creates
+images with interleaved channels.
+ */
+CVAPI(IplImage*)  cvCreateImage( CvSize size, int depth, int channels );
+
+/** @brief Deallocates an image header.
+
+This call is an analogue of :
+@code
+    if(image )
+    {
+        iplDeallocate(*image, IPL_IMAGE_HEADER | IPL_IMAGE_ROI);
+        *image = 0;
+    }
+@endcode
+but it does not use IPL functions by default (see the CV_TURN_ON_IPL_COMPATIBILITY macro).
+@param image Double pointer to the image header
+ */
+CVAPI(void)  cvReleaseImageHeader( IplImage** image );
+
+/** @brief Deallocates the image header and the image data.
+
+This call is a shortened form of :
+@code
+    if(*image )
+    {
+        cvReleaseData(*image);
+        cvReleaseImageHeader(image);
+    }
+@endcode
+@param image Double pointer to the image header
+*/
+CVAPI(void)  cvReleaseImage( IplImage** image );
+
+/** Creates a copy of IPL image (widthStep may differ) */
+CVAPI(IplImage*) cvCloneImage( const IplImage* image );
+
+/** @brief Sets the channel of interest in an IplImage.
+
+If the ROI is set to NULL and the coi is *not* 0, the ROI is allocated. Most OpenCV functions do
+*not* support the COI setting, so to process an individual image/matrix channel one may copy (via
+cvCopy or cvSplit) the channel to a separate image/matrix, process it and then copy the result
+back (via cvCopy or cvMerge) if needed.
+@param image A pointer to the image header
+@param coi The channel of interest. 0 - all channels are selected, 1 - first channel is selected,
+etc. Note that the channel indices become 1-based.
+ */
+CVAPI(void)  cvSetImageCOI( IplImage* image, int coi );
+
+/** @brief Returns the index of the channel of interest.
+
+Returns the channel of interest of in an IplImage. Returned values correspond to the coi in
+cvSetImageCOI.
+@param image A pointer to the image header
+ */
+CVAPI(int)  cvGetImageCOI( const IplImage* image );
+
+/** @brief Sets an image Region Of Interest (ROI) for a given rectangle.
+
+If the original image ROI was NULL and the rect is not the whole image, the ROI structure is
+allocated.
+
+Most OpenCV functions support the use of ROI and treat the image rectangle as a separate image. For
+example, all of the pixel coordinates are counted from the top-left (or bottom-left) corner of the
+ROI, not the original image.
+@param image A pointer to the image header
+@param rect The ROI rectangle
+ */
+CVAPI(void)  cvSetImageROI( IplImage* image, CvRect rect );
+
+/** @brief Resets the image ROI to include the entire image and releases the ROI structure.
+
+This produces a similar result to the following, but in addition it releases the ROI structure. :
+@code
+    cvSetImageROI(image, cvRect(0, 0, image->width, image->height ));
+    cvSetImageCOI(image, 0);
+@endcode
+@param image A pointer to the image header
+ */
+CVAPI(void)  cvResetImageROI( IplImage* image );
+
+/** @brief Returns the image ROI.
+
+If there is no ROI set, cvRect(0,0,image-\>width,image-\>height) is returned.
+@param image A pointer to the image header
+ */
+CVAPI(CvRect) cvGetImageROI( const IplImage* image );
+
+/** @brief Creates a matrix header but does not allocate the matrix data.
+
+The function allocates a new matrix header and returns a pointer to it. The matrix data can then be
+allocated using cvCreateData or set explicitly to user-allocated data via cvSetData.
+@param rows Number of rows in the matrix
+@param cols Number of columns in the matrix
+@param type Type of the matrix elements, see cvCreateMat
+ */
+CVAPI(CvMat*)  cvCreateMatHeader( int rows, int cols, int type );
+
+#define CV_AUTOSTEP  0x7fffffff
+
+/** @brief Initializes a pre-allocated matrix header.
+
+This function is often used to process raw data with OpenCV matrix functions. For example, the
+following code computes the matrix product of two matrices, stored as ordinary arrays:
+@code
+    double a[] = { 1, 2, 3, 4,
+                   5, 6, 7, 8,
+                   9, 10, 11, 12 };
+
+    double b[] = { 1, 5, 9,
+                   2, 6, 10,
+                   3, 7, 11,
+                   4, 8, 12 };
+
+    double c[9];
+    CvMat Ma, Mb, Mc ;
+
+    cvInitMatHeader(&Ma, 3, 4, CV_64FC1, a);
+    cvInitMatHeader(&Mb, 4, 3, CV_64FC1, b);
+    cvInitMatHeader(&Mc, 3, 3, CV_64FC1, c);
+
+    cvMatMulAdd(&Ma, &Mb, 0, &Mc);
+    // the c array now contains the product of a (3x4) and b (4x3)
+@endcode
+@param mat A pointer to the matrix header to be initialized
+@param rows Number of rows in the matrix
+@param cols Number of columns in the matrix
+@param type Type of the matrix elements, see cvCreateMat .
+@param data Optional: data pointer assigned to the matrix header
+@param step Optional: full row width in bytes of the assigned data. By default, the minimal
+possible step is used which assumes there are no gaps between subsequent rows of the matrix.
+ */
+CVAPI(CvMat*) cvInitMatHeader( CvMat* mat, int rows, int cols,
+                              int type, void* data CV_DEFAULT(NULL),
+                              int step CV_DEFAULT(CV_AUTOSTEP) );
+
+/** @brief Creates a matrix header and allocates the matrix data.
+
+The function call is equivalent to the following code:
+@code
+    CvMat* mat = cvCreateMatHeader(rows, cols, type);
+    cvCreateData(mat);
+@endcode
+@param rows Number of rows in the matrix
+@param cols Number of columns in the matrix
+@param type The type of the matrix elements in the form
+CV_\<bit depth\>\<S|U|F\>C\<number of channels\> , where S=signed, U=unsigned, F=float. For
+example, CV _ 8UC1 means the elements are 8-bit unsigned and the there is 1 channel, and CV _
+32SC2 means the elements are 32-bit signed and there are 2 channels.
+ */
+CVAPI(CvMat*)  cvCreateMat( int rows, int cols, int type );
+
+/** @brief Deallocates a matrix.
+
+The function decrements the matrix data reference counter and deallocates matrix header. If the data
+reference counter is 0, it also deallocates the data. :
+@code
+    if(*mat )
+        cvDecRefData(*mat);
+    cvFree((void**)mat);
+@endcode
+@param mat Double pointer to the matrix
+ */
+CVAPI(void)  cvReleaseMat( CvMat** mat );
+
+/** @brief Decrements an array data reference counter.
+
+The function decrements the data reference counter in a CvMat or CvMatND if the reference counter
+
+pointer is not NULL. If the counter reaches zero, the data is deallocated. In the current
+implementation the reference counter is not NULL only if the data was allocated using the
+cvCreateData function. The counter will be NULL in other cases such as: external data was assigned
+to the header using cvSetData, header is part of a larger matrix or image, or the header was
+converted from an image or n-dimensional matrix header.
+@param arr Pointer to an array header
+ */
+CV_INLINE  void  cvDecRefData( CvArr* arr )
+{
+    if( CV_IS_MAT( arr ))
+    {
+        CvMat* mat = (CvMat*)arr;
+        mat->data.ptr = NULL;
+        if( mat->refcount != NULL && --*mat->refcount == 0 )
+            cvFree( &mat->refcount );
+        mat->refcount = NULL;
+    }
+    else if( CV_IS_MATND( arr ))
+    {
+        CvMatND* mat = (CvMatND*)arr;
+        mat->data.ptr = NULL;
+        if( mat->refcount != NULL && --*mat->refcount == 0 )
+            cvFree( &mat->refcount );
+        mat->refcount = NULL;
+    }
+}
+
+/** @brief Increments array data reference counter.
+
+The function increments CvMat or CvMatND data reference counter and returns the new counter value if
+the reference counter pointer is not NULL, otherwise it returns zero.
+@param arr Array header
+ */
+CV_INLINE  int  cvIncRefData( CvArr* arr )
+{
+    int refcount = 0;
+    if( CV_IS_MAT( arr ))
+    {
+        CvMat* mat = (CvMat*)arr;
+        if( mat->refcount != NULL )
+            refcount = ++*mat->refcount;
+    }
+    else if( CV_IS_MATND( arr ))
+    {
+        CvMatND* mat = (CvMatND*)arr;
+        if( mat->refcount != NULL )
+            refcount = ++*mat->refcount;
+    }
+    return refcount;
+}
+
+
+/** Creates an exact copy of the input matrix (except, may be, step value) */
+CVAPI(CvMat*) cvCloneMat( const CvMat* mat );
+
+
+/** @brief Returns matrix header corresponding to the rectangular sub-array of input image or matrix.
+
+The function returns header, corresponding to a specified rectangle of the input array. In other
+
+words, it allows the user to treat a rectangular part of input array as a stand-alone array. ROI is
+taken into account by the function so the sub-array of ROI is actually extracted.
+@param arr Input array
+@param submat Pointer to the resultant sub-array header
+@param rect Zero-based coordinates of the rectangle of interest
+ */
+CVAPI(CvMat*) cvGetSubRect( const CvArr* arr, CvMat* submat, CvRect rect );
+#define cvGetSubArr cvGetSubRect
+
+/** @brief Returns array row or row span.
+
+The function returns the header, corresponding to a specified row/row span of the input array.
+cvGetRow(arr, submat, row) is a shortcut for cvGetRows(arr, submat, row, row+1).
+@param arr Input array
+@param submat Pointer to the resulting sub-array header
+@param start_row Zero-based index of the starting row (inclusive) of the span
+@param end_row Zero-based index of the ending row (exclusive) of the span
+@param delta_row Index step in the row span. That is, the function extracts every delta_row -th
+row from start_row and up to (but not including) end_row .
+ */
+CVAPI(CvMat*) cvGetRows( const CvArr* arr, CvMat* submat,
+                        int start_row, int end_row,
+                        int delta_row CV_DEFAULT(1));
+
+/** @overload
+@param arr Input array
+@param submat Pointer to the resulting sub-array header
+@param row Zero-based index of the selected row
+*/
+CV_INLINE  CvMat*  cvGetRow( const CvArr* arr, CvMat* submat, int row )
+{
+    return cvGetRows( arr, submat, row, row + 1, 1 );
+}
+
+
+/** @brief Returns one of more array columns.
+
+The function returns the header, corresponding to a specified column span of the input array. That
+
+is, no data is copied. Therefore, any modifications of the submatrix will affect the original array.
+If you need to copy the columns, use cvCloneMat. cvGetCol(arr, submat, col) is a shortcut for
+cvGetCols(arr, submat, col, col+1).
+@param arr Input array
+@param submat Pointer to the resulting sub-array header
+@param start_col Zero-based index of the starting column (inclusive) of the span
+@param end_col Zero-based index of the ending column (exclusive) of the span
+ */
+CVAPI(CvMat*) cvGetCols( const CvArr* arr, CvMat* submat,
+                        int start_col, int end_col );
+
+/** @overload
+@param arr Input array
+@param submat Pointer to the resulting sub-array header
+@param col Zero-based index of the selected column
+*/
+CV_INLINE  CvMat*  cvGetCol( const CvArr* arr, CvMat* submat, int col )
+{
+    return cvGetCols( arr, submat, col, col + 1 );
+}
+
+/** @brief Returns one of array diagonals.
+
+The function returns the header, corresponding to a specified diagonal of the input array.
+@param arr Input array
+@param submat Pointer to the resulting sub-array header
+@param diag Index of the array diagonal. Zero value corresponds to the main diagonal, -1
+corresponds to the diagonal above the main, 1 corresponds to the diagonal below the main, and so
+forth.
+ */
+CVAPI(CvMat*) cvGetDiag( const CvArr* arr, CvMat* submat,
+                            int diag CV_DEFAULT(0));
+
+/** low-level scalar <-> raw data conversion functions */
+CVAPI(void) cvScalarToRawData( const CvScalar* scalar, void* data, int type,
+                              int extend_to_12 CV_DEFAULT(0) );
+
+CVAPI(void) cvRawDataToScalar( const void* data, int type, CvScalar* scalar );
+
+/** @brief Creates a new matrix header but does not allocate the matrix data.
+
+The function allocates a header for a multi-dimensional dense array. The array data can further be
+allocated using cvCreateData or set explicitly to user-allocated data via cvSetData.
+@param dims Number of array dimensions
+@param sizes Array of dimension sizes
+@param type Type of array elements, see cvCreateMat
+ */
+CVAPI(CvMatND*)  cvCreateMatNDHeader( int dims, const int* sizes, int type );
+
+/** @brief Creates the header and allocates the data for a multi-dimensional dense array.
+
+This function call is equivalent to the following code:
+@code
+    CvMatND* mat = cvCreateMatNDHeader(dims, sizes, type);
+    cvCreateData(mat);
+@endcode
+@param dims Number of array dimensions. This must not exceed CV_MAX_DIM (32 by default, but can be
+changed at build time).
+@param sizes Array of dimension sizes.
+@param type Type of array elements, see cvCreateMat .
+ */
+CVAPI(CvMatND*)  cvCreateMatND( int dims, const int* sizes, int type );
+
+/** @brief Initializes a pre-allocated multi-dimensional array header.
+
+@param mat A pointer to the array header to be initialized
+@param dims The number of array dimensions
+@param sizes An array of dimension sizes
+@param type Type of array elements, see cvCreateMat
+@param data Optional data pointer assigned to the matrix header
+ */
+CVAPI(CvMatND*)  cvInitMatNDHeader( CvMatND* mat, int dims, const int* sizes,
+                                    int type, void* data CV_DEFAULT(NULL) );
+
+/** @brief Deallocates a multi-dimensional array.
+
+The function decrements the array data reference counter and releases the array header. If the
+reference counter reaches 0, it also deallocates the data. :
+@code
+    if(*mat )
+        cvDecRefData(*mat);
+    cvFree((void**)mat);
+@endcode
+@param mat Double pointer to the array
+ */
+CV_INLINE  void  cvReleaseMatND( CvMatND** mat )
+{
+    cvReleaseMat( (CvMat**)mat );
+}
+
+/** Creates a copy of CvMatND (except, may be, steps) */
+CVAPI(CvMatND*) cvCloneMatND( const CvMatND* mat );
+
+/** @brief Creates sparse array.
+
+The function allocates a multi-dimensional sparse array. Initially the array contain no elements,
+that is PtrND and other related functions will return 0 for every index.
+@param dims Number of array dimensions. In contrast to the dense matrix, the number of dimensions is
+practically unlimited (up to \f$2^{16}\f$ ).
+@param sizes Array of dimension sizes
+@param type Type of array elements. The same as for CvMat
+ */
+CVAPI(CvSparseMat*)  cvCreateSparseMat( int dims, const int* sizes, int type );
+
+/** @brief Deallocates sparse array.
+
+The function releases the sparse array and clears the array pointer upon exit.
+@param mat Double pointer to the array
+ */
+CVAPI(void)  cvReleaseSparseMat( CvSparseMat** mat );
+
+/** Creates a copy of CvSparseMat (except, may be, zero items) */
+CVAPI(CvSparseMat*) cvCloneSparseMat( const CvSparseMat* mat );
+
+/** @brief Initializes sparse array elements iterator.
+
+The function initializes iterator of sparse array elements and returns pointer to the first element,
+or NULL if the array is empty.
+@param mat Input array
+@param mat_iterator Initialized iterator
+ */
+CVAPI(CvSparseNode*) cvInitSparseMatIterator( const CvSparseMat* mat,
+                                              CvSparseMatIterator* mat_iterator );
+
+/** @brief Returns the next sparse matrix element
+
+The function moves iterator to the next sparse matrix element and returns pointer to it. In the
+current version there is no any particular order of the elements, because they are stored in the
+hash table. The sample below demonstrates how to iterate through the sparse matrix:
+@code
+    // print all the non-zero sparse matrix elements and compute their sum
+    double sum = 0;
+    int i, dims = cvGetDims(sparsemat);
+    CvSparseMatIterator it;
+    CvSparseNode* node = cvInitSparseMatIterator(sparsemat, &it);
+
+    for(; node != 0; node = cvGetNextSparseNode(&it))
+    {
+        int* idx = CV_NODE_IDX(array, node);
+        float val = *(float*)CV_NODE_VAL(array, node);
+        printf("M");
+        for(i = 0; i < dims; i++ )
+            printf("[%d]", idx[i]);
+        printf("=%g\n", val);
+
+        sum += val;
+    }
+
+    printf("nTotal sum = %g\n", sum);
+@endcode
+@param mat_iterator Sparse array iterator
+ */
+CV_INLINE CvSparseNode* cvGetNextSparseNode( CvSparseMatIterator* mat_iterator )
+{
+    if( mat_iterator->node->next )
+        return mat_iterator->node = mat_iterator->node->next;
+    else
+    {
+        int idx;
+        for( idx = ++mat_iterator->curidx; idx < mat_iterator->mat->hashsize; idx++ )
+        {
+            CvSparseNode* node = (CvSparseNode*)mat_iterator->mat->hashtable[idx];
+            if( node )
+            {
+                mat_iterator->curidx = idx;
+                return mat_iterator->node = node;
+            }
+        }
+        return NULL;
+    }
+}
+
+
+#define CV_MAX_ARR 10
+
+/** matrix iterator: used for n-ary operations on dense arrays */
+typedef struct CvNArrayIterator
+{
+    int count; /**< number of arrays */
+    int dims; /**< number of dimensions to iterate */
+    CvSize size; /**< maximal common linear size: { width = size, height = 1 } */
+    uchar* ptr[CV_MAX_ARR]; /**< pointers to the array slices */
+    int stack[CV_MAX_DIM]; /**< for internal use */
+    CvMatND* hdr[CV_MAX_ARR]; /**< pointers to the headers of the
+                                 matrices that are processed */
+}
+CvNArrayIterator;
+
+#define CV_NO_DEPTH_CHECK     1
+#define CV_NO_CN_CHECK        2
+#define CV_NO_SIZE_CHECK      4
+
+/** initializes iterator that traverses through several arrays simulteneously
+   (the function together with cvNextArraySlice is used for
+    N-ari element-wise operations) */
+CVAPI(int) cvInitNArrayIterator( int count, CvArr** arrs,
+                                 const CvArr* mask, CvMatND* stubs,
+                                 CvNArrayIterator* array_iterator,
+                                 int flags CV_DEFAULT(0) );
+
+/** returns zero value if iteration is finished, non-zero (slice length) otherwise */
+CVAPI(int) cvNextNArraySlice( CvNArrayIterator* array_iterator );
+
+
+/** @brief Returns type of array elements.
+
+The function returns type of the array elements. In the case of IplImage the type is converted to
+CvMat-like representation. For example, if the image has been created as:
+@code
+    IplImage* img = cvCreateImage(cvSize(640, 480), IPL_DEPTH_8U, 3);
+@endcode
+The code cvGetElemType(img) will return CV_8UC3.
+@param arr Input array
+ */
+CVAPI(int) cvGetElemType( const CvArr* arr );
+
+/** @brief Return number of array dimensions
+
+The function returns the array dimensionality and the array of dimension sizes. In the case of
+IplImage or CvMat it always returns 2 regardless of number of image/matrix rows. For example, the
+following code calculates total number of array elements:
+@code
+    int sizes[CV_MAX_DIM];
+    int i, total = 1;
+    int dims = cvGetDims(arr, size);
+    for(i = 0; i < dims; i++ )
+        total *= sizes[i];
+@endcode
+@param arr Input array
+@param sizes Optional output vector of the array dimension sizes. For 2d arrays the number of rows
+(height) goes first, number of columns (width) next.
+ */
+CVAPI(int) cvGetDims( const CvArr* arr, int* sizes CV_DEFAULT(NULL) );
+
+
+/** @brief Returns array size along the specified dimension.
+
+@param arr Input array
+@param index Zero-based dimension index (for matrices 0 means number of rows, 1 means number of
+columns; for images 0 means height, 1 means width)
+ */
+CVAPI(int) cvGetDimSize( const CvArr* arr, int index );
+
+
+/** @brief Return pointer to a particular array element.
+
+The functions return a pointer to a specific array element. Number of array dimension should match
+to the number of indices passed to the function except for cvPtr1D function that can be used for
+sequential access to 1D, 2D or nD dense arrays.
+
+The functions can be used for sparse arrays as well - if the requested node does not exist they
+create it and set it to zero.
+
+All these as well as other functions accessing array elements ( cvGetND , cvGetRealND , cvSet
+, cvSetND , cvSetRealND ) raise an error in case if the element index is out of range.
+@param arr Input array
+@param idx0 The first zero-based component of the element index
+@param type Optional output parameter: type of matrix elements
+ */
+CVAPI(uchar*) cvPtr1D( const CvArr* arr, int idx0, int* type CV_DEFAULT(NULL));
+/** @overload */
+CVAPI(uchar*) cvPtr2D( const CvArr* arr, int idx0, int idx1, int* type CV_DEFAULT(NULL) );
+/** @overload */
+CVAPI(uchar*) cvPtr3D( const CvArr* arr, int idx0, int idx1, int idx2,
+                      int* type CV_DEFAULT(NULL));
+/** @overload
+@param arr Input array
+@param idx Array of the element indices
+@param type Optional output parameter: type of matrix elements
+@param create_node Optional input parameter for sparse matrices. Non-zero value of the parameter
+means that the requested element is created if it does not exist already.
+@param precalc_hashval Optional input parameter for sparse matrices. If the pointer is not NULL,
+the function does not recalculate the node hash value, but takes it from the specified location.
+It is useful for speeding up pair-wise operations (TODO: provide an example)
+*/
+CVAPI(uchar*) cvPtrND( const CvArr* arr, const int* idx, int* type CV_DEFAULT(NULL),
+                      int create_node CV_DEFAULT(1),
+                      unsigned* precalc_hashval CV_DEFAULT(NULL));
+
+/** @brief Return a specific array element.
+
+The functions return a specific array element. In the case of a sparse array the functions return 0
+if the requested node does not exist (no new node is created by the functions).
+@param arr Input array
+@param idx0 The first zero-based component of the element index
+ */
+CVAPI(CvScalar) cvGet1D( const CvArr* arr, int idx0 );
+/** @overload */
+CVAPI(CvScalar) cvGet2D( const CvArr* arr, int idx0, int idx1 );
+/** @overload */
+CVAPI(CvScalar) cvGet3D( const CvArr* arr, int idx0, int idx1, int idx2 );
+/** @overload
+@param arr Input array
+@param idx Array of the element indices
+*/
+CVAPI(CvScalar) cvGetND( const CvArr* arr, const int* idx );
+
+/** @brief Return a specific element of single-channel 1D, 2D, 3D or nD array.
+
+Returns a specific element of a single-channel array. If the array has multiple channels, a runtime
+error is raised. Note that Get?D functions can be used safely for both single-channel and
+multiple-channel arrays though they are a bit slower.
+
+In the case of a sparse array the functions return 0 if the requested node does not exist (no new
+node is created by the functions).
+@param arr Input array. Must have a single channel.
+@param idx0 The first zero-based component of the element index
+ */
+CVAPI(double) cvGetReal1D( const CvArr* arr, int idx0 );
+/** @overload */
+CVAPI(double) cvGetReal2D( const CvArr* arr, int idx0, int idx1 );
+/** @overload */
+CVAPI(double) cvGetReal3D( const CvArr* arr, int idx0, int idx1, int idx2 );
+/** @overload
+@param arr Input array. Must have a single channel.
+@param idx Array of the element indices
+*/
+CVAPI(double) cvGetRealND( const CvArr* arr, const int* idx );
+
+/** @brief Change the particular array element.
+
+The functions assign the new value to a particular array element. In the case of a sparse array the
+functions create the node if it does not exist yet.
+@param arr Input array
+@param idx0 The first zero-based component of the element index
+@param value The assigned value
+ */
+CVAPI(void) cvSet1D( CvArr* arr, int idx0, CvScalar value );
+/** @overload */
+CVAPI(void) cvSet2D( CvArr* arr, int idx0, int idx1, CvScalar value );
+/** @overload */
+CVAPI(void) cvSet3D( CvArr* arr, int idx0, int idx1, int idx2, CvScalar value );
+/** @overload
+@param arr Input array
+@param idx Array of the element indices
+@param value The assigned value
+*/
+CVAPI(void) cvSetND( CvArr* arr, const int* idx, CvScalar value );
+
+/** @brief Change a specific array element.
+
+The functions assign a new value to a specific element of a single-channel array. If the array has
+multiple channels, a runtime error is raised. Note that the Set\*D function can be used safely for
+both single-channel and multiple-channel arrays, though they are a bit slower.
+
+In the case of a sparse array the functions create the node if it does not yet exist.
+@param arr Input array
+@param idx0 The first zero-based component of the element index
+@param value The assigned value
+ */
+CVAPI(void) cvSetReal1D( CvArr* arr, int idx0, double value );
+/** @overload */
+CVAPI(void) cvSetReal2D( CvArr* arr, int idx0, int idx1, double value );
+/** @overload */
+CVAPI(void) cvSetReal3D( CvArr* arr, int idx0,
+                        int idx1, int idx2, double value );
+/** @overload
+@param arr Input array
+@param idx Array of the element indices
+@param value The assigned value
+*/
+CVAPI(void) cvSetRealND( CvArr* arr, const int* idx, double value );
+
+/** clears element of ND dense array,
+   in case of sparse arrays it deletes the specified node */
+CVAPI(void) cvClearND( CvArr* arr, const int* idx );
+
+/** @brief Returns matrix header for arbitrary array.
+
+The function returns a matrix header for the input array that can be a matrix - CvMat, an image -
+IplImage, or a multi-dimensional dense array - CvMatND (the third option is allowed only if
+allowND != 0) . In the case of matrix the function simply returns the input pointer. In the case of
+IplImage\* or CvMatND it initializes the header structure with parameters of the current image ROI
+and returns &header. Because COI is not supported by CvMat, it is returned separately.
+
+The function provides an easy way to handle both types of arrays - IplImage and CvMat using the same
+code. Input array must have non-zero data pointer, otherwise the function will report an error.
+
+@note If the input array is IplImage with planar data layout and COI set, the function returns the
+pointer to the selected plane and COI == 0. This feature allows user to process IplImage structures
+with planar data layout, even though OpenCV does not support such images.
+@param arr Input array
+@param header Pointer to CvMat structure used as a temporary buffer
+@param coi Optional output parameter for storing COI
+@param allowND If non-zero, the function accepts multi-dimensional dense arrays (CvMatND\*) and
+returns 2D matrix (if CvMatND has two dimensions) or 1D matrix (when CvMatND has 1 dimension or
+more than 2 dimensions). The CvMatND array must be continuous.
+@sa cvGetImage, cvarrToMat.
+ */
+CVAPI(CvMat*) cvGetMat( const CvArr* arr, CvMat* header,
+                       int* coi CV_DEFAULT(NULL),
+                       int allowND CV_DEFAULT(0));
+
+/** @brief Returns image header for arbitrary array.
+
+The function returns the image header for the input array that can be a matrix (CvMat) or image
+(IplImage). In the case of an image the function simply returns the input pointer. In the case of
+CvMat it initializes an image_header structure with the parameters of the input matrix. Note that
+if we transform IplImage to CvMat using cvGetMat and then transform CvMat back to IplImage using
+this function, we will get different headers if the ROI is set in the original image.
+@param arr Input array
+@param image_header Pointer to IplImage structure used as a temporary buffer
+ */
+CVAPI(IplImage*) cvGetImage( const CvArr* arr, IplImage* image_header );
+
+
+/** @brief Changes the shape of a multi-dimensional array without copying the data.
+
+The function is an advanced version of cvReshape that can work with multi-dimensional arrays as
+well (though it can work with ordinary images and matrices) and change the number of dimensions.
+
+Below are the two samples from the cvReshape description rewritten using cvReshapeMatND:
+@code
+    IplImage* color_img = cvCreateImage(cvSize(320,240), IPL_DEPTH_8U, 3);
+    IplImage gray_img_hdr, *gray_img;
+    gray_img = (IplImage*)cvReshapeMatND(color_img, sizeof(gray_img_hdr), &gray_img_hdr, 1, 0, 0);
+    ...
+    int size[] = { 2, 2, 2 };
+    CvMatND* mat = cvCreateMatND(3, size, CV_32F);
+    CvMat row_header, *row;
+    row = (CvMat*)cvReshapeMatND(mat, sizeof(row_header), &row_header, 0, 1, 0);
+@endcode
+In C, the header file for this function includes a convenient macro cvReshapeND that does away with
+the sizeof_header parameter. So, the lines containing the call to cvReshapeMatND in the examples
+may be replaced as follow:
+@code
+    gray_img = (IplImage*)cvReshapeND(color_img, &gray_img_hdr, 1, 0, 0);
+    ...
+    row = (CvMat*)cvReshapeND(mat, &row_header, 0, 1, 0);
+@endcode
+@param arr Input array
+@param sizeof_header Size of output header to distinguish between IplImage, CvMat and CvMatND
+output headers
+@param header Output header to be filled
+@param new_cn New number of channels. new_cn = 0 means that the number of channels remains
+unchanged.
+@param new_dims New number of dimensions. new_dims = 0 means that the number of dimensions
+remains the same.
+@param new_sizes Array of new dimension sizes. Only new_dims-1 values are used, because the
+total number of elements must remain the same. Thus, if new_dims = 1, new_sizes array is not
+used.
+ */
+CVAPI(CvArr*) cvReshapeMatND( const CvArr* arr,
+                             int sizeof_header, CvArr* header,
+                             int new_cn, int new_dims, int* new_sizes );
+
+#define cvReshapeND( arr, header, new_cn, new_dims, new_sizes )   \
+      cvReshapeMatND( (arr), sizeof(*(header)), (header),         \
+                      (new_cn), (new_dims), (new_sizes))
+
+/** @brief Changes shape of matrix/image without copying data.
+
+The function initializes the CvMat header so that it points to the same data as the original array
+but has a different shape - different number of channels, different number of rows, or both.
+
+The following example code creates one image buffer and two image headers, the first is for a
+320x240x3 image and the second is for a 960x240x1 image:
+@code
+    IplImage* color_img = cvCreateImage(cvSize(320,240), IPL_DEPTH_8U, 3);
+    CvMat gray_mat_hdr;
+    IplImage gray_img_hdr, *gray_img;
+    cvReshape(color_img, &gray_mat_hdr, 1);
+    gray_img = cvGetImage(&gray_mat_hdr, &gray_img_hdr);
+@endcode
+And the next example converts a 3x3 matrix to a single 1x9 vector:
+@code
+    CvMat* mat = cvCreateMat(3, 3, CV_32F);
+    CvMat row_header, *row;
+    row = cvReshape(mat, &row_header, 0, 1);
+@endcode
+@param arr Input array
+@param header Output header to be filled
+@param new_cn New number of channels. 'new_cn = 0' means that the number of channels remains
+unchanged.
+@param new_rows New number of rows. 'new_rows = 0' means that the number of rows remains
+unchanged unless it needs to be changed according to new_cn value.
+*/
+CVAPI(CvMat*) cvReshape( const CvArr* arr, CvMat* header,
+                        int new_cn, int new_rows CV_DEFAULT(0) );
+
+/** Repeats source 2d array several times in both horizontal and
+   vertical direction to fill destination array */
+CVAPI(void) cvRepeat( const CvArr* src, CvArr* dst );
+
+/** @brief Allocates array data
+
+The function allocates image, matrix or multi-dimensional dense array data. Note that in the case of
+matrix types OpenCV allocation functions are used. In the case of IplImage they are used unless
+CV_TURN_ON_IPL_COMPATIBILITY() has been called before. In the latter case IPL functions are used
+to allocate the data.
+@param arr Array header
+ */
+CVAPI(void)  cvCreateData( CvArr* arr );
+
+/** @brief Releases array data.
+
+The function releases the array data. In the case of CvMat or CvMatND it simply calls
+cvDecRefData(), that is the function can not deallocate external data. See also the note to
+cvCreateData .
+@param arr Array header
+ */
+CVAPI(void)  cvReleaseData( CvArr* arr );
+
+/** @brief Assigns user data to the array header.
+
+The function assigns user data to the array header. Header should be initialized before using
+cvCreateMatHeader, cvCreateImageHeader, cvCreateMatNDHeader, cvInitMatHeader,
+cvInitImageHeader or cvInitMatNDHeader.
+@param arr Array header
+@param data User data
+@param step Full row length in bytes
+ */
+CVAPI(void)  cvSetData( CvArr* arr, void* data, int step );
+
+/** @brief Retrieves low-level information about the array.
+
+The function fills output variables with low-level information about the array data. All output
+
+parameters are optional, so some of the pointers may be set to NULL. If the array is IplImage with
+ROI set, the parameters of ROI are returned.
+
+The following example shows how to get access to array elements. It computes absolute values of the
+array elements :
+@code
+    float* data;
+    int step;
+    CvSize size;
+
+    cvGetRawData(array, (uchar**)&data, &step, &size);
+    step /= sizeof(data[0]);
+
+    for(int y = 0; y < size.height; y++, data += step )
+        for(int x = 0; x < size.width; x++ )
+            data[x] = (float)fabs(data[x]);
+@endcode
+@param arr Array header
+@param data Output pointer to the whole image origin or ROI origin if ROI is set
+@param step Output full row length in bytes
+@param roi_size Output ROI size
+ */
+CVAPI(void) cvGetRawData( const CvArr* arr, uchar** data,
+                         int* step CV_DEFAULT(NULL),
+                         CvSize* roi_size CV_DEFAULT(NULL));
+
+/** @brief Returns size of matrix or image ROI.
+
+The function returns number of rows (CvSize::height) and number of columns (CvSize::width) of the
+input matrix or image. In the case of image the size of ROI is returned.
+@param arr array header
+ */
+CVAPI(CvSize) cvGetSize( const CvArr* arr );
+
+/** @brief Copies one array to another.
+
+The function copies selected elements from an input array to an output array:
+
+\f[\texttt{dst} (I)= \texttt{src} (I)  \quad \text{if} \quad \texttt{mask} (I)  \ne 0.\f]
+
+If any of the passed arrays is of IplImage type, then its ROI and COI fields are used. Both arrays
+must have the same type, the same number of dimensions, and the same size. The function can also
+copy sparse arrays (mask is not supported in this case).
+@param src The source array
+@param dst The destination array
+@param mask Operation mask, 8-bit single channel array; specifies elements of the destination array
+to be changed
+ */
+CVAPI(void)  cvCopy( const CvArr* src, CvArr* dst,
+                     const CvArr* mask CV_DEFAULT(NULL) );
+
+/** @brief Sets every element of an array to a given value.
+
+The function copies the scalar value to every selected element of the destination array:
+\f[\texttt{arr} (I)= \texttt{value} \quad \text{if} \quad \texttt{mask} (I)  \ne 0\f]
+If array arr is of IplImage type, then is ROI used, but COI must not be set.
+@param arr The destination array
+@param value Fill value
+@param mask Operation mask, 8-bit single channel array; specifies elements of the destination
+array to be changed
+ */
+CVAPI(void)  cvSet( CvArr* arr, CvScalar value,
+                    const CvArr* mask CV_DEFAULT(NULL) );
+
+/** @brief Clears the array.
+
+The function clears the array. In the case of dense arrays (CvMat, CvMatND or IplImage),
+cvZero(array) is equivalent to cvSet(array,cvScalarAll(0),0). In the case of sparse arrays all the
+elements are removed.
+@param arr Array to be cleared
+ */
+CVAPI(void)  cvSetZero( CvArr* arr );
+#define cvZero  cvSetZero
+
+
+/** Splits a multi-channel array into the set of single-channel arrays or
+   extracts particular [color] plane */
+CVAPI(void)  cvSplit( const CvArr* src, CvArr* dst0, CvArr* dst1,
+                      CvArr* dst2, CvArr* dst3 );
+
+/** Merges a set of single-channel arrays into the single multi-channel array
+   or inserts one particular [color] plane to the array */
+CVAPI(void)  cvMerge( const CvArr* src0, const CvArr* src1,
+                      const CvArr* src2, const CvArr* src3,
+                      CvArr* dst );
+
+/** Copies several channels from input arrays to
+   certain channels of output arrays */
+CVAPI(void)  cvMixChannels( const CvArr** src, int src_count,
+                            CvArr** dst, int dst_count,
+                            const int* from_to, int pair_count );
+
+/** @brief Converts one array to another with optional linear transformation.
+
+The function has several different purposes, and thus has several different names. It copies one
+array to another with optional scaling, which is performed first, and/or optional type conversion,
+performed after:
+
+\f[\texttt{dst} (I) =  \texttt{scale} \texttt{src} (I) + ( \texttt{shift} _0, \texttt{shift} _1,...)\f]
+
+All the channels of multi-channel arrays are processed independently.
+
+The type of conversion is done with rounding and saturation, that is if the result of scaling +
+conversion can not be represented exactly by a value of the destination array element type, it is
+set to the nearest representable value on the real axis.
+@param src Source array
+@param dst Destination array
+@param scale Scale factor
+@param shift Value added to the scaled source array elements
+ */
+CVAPI(void)  cvConvertScale( const CvArr* src, CvArr* dst,
+                             double scale CV_DEFAULT(1),
+                             double shift CV_DEFAULT(0) );
+#define cvCvtScale cvConvertScale
+#define cvScale  cvConvertScale
+#define cvConvert( src, dst )  cvConvertScale( (src), (dst), 1, 0 )
+
+
+/** Performs linear transformation on every source array element,
+   stores absolute value of the result:
+   dst(x,y,c) = abs(scale*src(x,y,c)+shift).
+   destination array must have 8u type.
+   In other cases one may use cvConvertScale + cvAbsDiffS */
+CVAPI(void)  cvConvertScaleAbs( const CvArr* src, CvArr* dst,
+                                double scale CV_DEFAULT(1),
+                                double shift CV_DEFAULT(0) );
+#define cvCvtScaleAbs  cvConvertScaleAbs
+
+
+/** checks termination criteria validity and
+   sets eps to default_eps (if it is not set),
+   max_iter to default_max_iters (if it is not set)
+*/
+CVAPI(CvTermCriteria) cvCheckTermCriteria( CvTermCriteria criteria,
+                                           double default_eps,
+                                           int default_max_iters );
+
+/****************************************************************************************\
+*                   Arithmetic, logic and comparison operations                          *
+\****************************************************************************************/
+
+/** dst(mask) = src1(mask) + src2(mask) */
+CVAPI(void)  cvAdd( const CvArr* src1, const CvArr* src2, CvArr* dst,
+                    const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(mask) = src(mask) + value */
+CVAPI(void)  cvAddS( const CvArr* src, CvScalar value, CvArr* dst,
+                     const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(mask) = src1(mask) - src2(mask) */
+CVAPI(void)  cvSub( const CvArr* src1, const CvArr* src2, CvArr* dst,
+                    const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(mask) = src(mask) - value = src(mask) + (-value) */
+CV_INLINE  void  cvSubS( const CvArr* src, CvScalar value, CvArr* dst,
+                         const CvArr* mask CV_DEFAULT(NULL))
+{
+    cvAddS( src, cvScalar( -value.val[0], -value.val[1], -value.val[2], -value.val[3]),
+            dst, mask );
+}
+
+/** dst(mask) = value - src(mask) */
+CVAPI(void)  cvSubRS( const CvArr* src, CvScalar value, CvArr* dst,
+                      const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src1(idx) * src2(idx) * scale
+   (scaled element-wise multiplication of 2 arrays) */
+CVAPI(void)  cvMul( const CvArr* src1, const CvArr* src2,
+                    CvArr* dst, double scale CV_DEFAULT(1) );
+
+/** element-wise division/inversion with scaling:
+    dst(idx) = src1(idx) * scale / src2(idx)
+    or dst(idx) = scale / src2(idx) if src1 == 0 */
+CVAPI(void)  cvDiv( const CvArr* src1, const CvArr* src2,
+                    CvArr* dst, double scale CV_DEFAULT(1));
+
+/** dst = src1 * scale + src2 */
+CVAPI(void)  cvScaleAdd( const CvArr* src1, CvScalar scale,
+                         const CvArr* src2, CvArr* dst );
+#define cvAXPY( A, real_scalar, B, C ) cvScaleAdd(A, cvRealScalar(real_scalar), B, C)
+
+/** dst = src1 * alpha + src2 * beta + gamma */
+CVAPI(void)  cvAddWeighted( const CvArr* src1, double alpha,
+                            const CvArr* src2, double beta,
+                            double gamma, CvArr* dst );
+
+/** @brief Calculates the dot product of two arrays in Euclidean metrics.
+
+The function calculates and returns the Euclidean dot product of two arrays.
+
+\f[src1  \bullet src2 =  \sum _I ( \texttt{src1} (I)  \texttt{src2} (I))\f]
+
+In the case of multiple channel arrays, the results for all channels are accumulated. In particular,
+cvDotProduct(a,a) where a is a complex vector, will return \f$||\texttt{a}||^2\f$. The function can
+process multi-dimensional arrays, row by row, layer by layer, and so on.
+@param src1 The first source array
+@param src2 The second source array
+ */
+CVAPI(double)  cvDotProduct( const CvArr* src1, const CvArr* src2 );
+
+/** dst(idx) = src1(idx) & src2(idx) */
+CVAPI(void) cvAnd( const CvArr* src1, const CvArr* src2,
+                  CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src(idx) & value */
+CVAPI(void) cvAndS( const CvArr* src, CvScalar value,
+                   CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src1(idx) | src2(idx) */
+CVAPI(void) cvOr( const CvArr* src1, const CvArr* src2,
+                 CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src(idx) | value */
+CVAPI(void) cvOrS( const CvArr* src, CvScalar value,
+                  CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src1(idx) ^ src2(idx) */
+CVAPI(void) cvXor( const CvArr* src1, const CvArr* src2,
+                  CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = src(idx) ^ value */
+CVAPI(void) cvXorS( const CvArr* src, CvScalar value,
+                   CvArr* dst, const CvArr* mask CV_DEFAULT(NULL));
+
+/** dst(idx) = ~src(idx) */
+CVAPI(void) cvNot( const CvArr* src, CvArr* dst );
+
+/** dst(idx) = lower(idx) <= src(idx) < upper(idx) */
+CVAPI(void) cvInRange( const CvArr* src, const CvArr* lower,
+                      const CvArr* upper, CvArr* dst );
+
+/** dst(idx) = lower <= src(idx) < upper */
+CVAPI(void) cvInRangeS( const CvArr* src, CvScalar lower,
+                       CvScalar upper, CvArr* dst );
+
+#define CV_CMP_EQ   0
+#define CV_CMP_GT   1
+#define CV_CMP_GE   2
+#define CV_CMP_LT   3
+#define CV_CMP_LE   4
+#define CV_CMP_NE   5
+
+/** The comparison operation support single-channel arrays only.
+   Destination image should be 8uC1 or 8sC1 */
+
+/** dst(idx) = src1(idx) _cmp_op_ src2(idx) */
+CVAPI(void) cvCmp( const CvArr* src1, const CvArr* src2, CvArr* dst, int cmp_op );
+
+/** dst(idx) = src1(idx) _cmp_op_ value */
+CVAPI(void) cvCmpS( const CvArr* src, double value, CvArr* dst, int cmp_op );
+
+/** dst(idx) = min(src1(idx),src2(idx)) */
+CVAPI(void) cvMin( const CvArr* src1, const CvArr* src2, CvArr* dst );
+
+/** dst(idx) = max(src1(idx),src2(idx)) */
+CVAPI(void) cvMax( const CvArr* src1, const CvArr* src2, CvArr* dst );
+
+/** dst(idx) = min(src(idx),value) */
+CVAPI(void) cvMinS( const CvArr* src, double value, CvArr* dst );
+
+/** dst(idx) = max(src(idx),value) */
+CVAPI(void) cvMaxS( const CvArr* src, double value, CvArr* dst );
+
+/** dst(x,y,c) = abs(src1(x,y,c) - src2(x,y,c)) */
+CVAPI(void) cvAbsDiff( const CvArr* src1, const CvArr* src2, CvArr* dst );
+
+/** dst(x,y,c) = abs(src(x,y,c) - value(c)) */
+CVAPI(void) cvAbsDiffS( const CvArr* src, CvArr* dst, CvScalar value );
+#define cvAbs( src, dst ) cvAbsDiffS( (src), (dst), cvScalarAll(0))
+
+/****************************************************************************************\
+*                                Math operations                                         *
+\****************************************************************************************/
+
+/** Does cartesian->polar coordinates conversion.
+   Either of output components (magnitude or angle) is optional */
+CVAPI(void)  cvCartToPolar( const CvArr* x, const CvArr* y,
+                            CvArr* magnitude, CvArr* angle CV_DEFAULT(NULL),
+                            int angle_in_degrees CV_DEFAULT(0));
+
+/** Does polar->cartesian coordinates conversion.
+   Either of output components (magnitude or angle) is optional.
+   If magnitude is missing it is assumed to be all 1's */
+CVAPI(void)  cvPolarToCart( const CvArr* magnitude, const CvArr* angle,
+                            CvArr* x, CvArr* y,
+                            int angle_in_degrees CV_DEFAULT(0));
+
+/** Does powering: dst(idx) = src(idx)^power */
+CVAPI(void)  cvPow( const CvArr* src, CvArr* dst, double power );
+
+/** Does exponention: dst(idx) = exp(src(idx)).
+   Overflow is not handled yet. Underflow is handled.
+   Maximal relative error is ~7e-6 for single-precision input */
+CVAPI(void)  cvExp( const CvArr* src, CvArr* dst );
+
+/** Calculates natural logarithms: dst(idx) = log(abs(src(idx))).
+   Logarithm of 0 gives large negative number(~-700)
+   Maximal relative error is ~3e-7 for single-precision output
+*/
+CVAPI(void)  cvLog( const CvArr* src, CvArr* dst );
+
+/** Fast arctangent calculation */
+CVAPI(float) cvFastArctan( float y, float x );
+
+/** Fast cubic root calculation */
+CVAPI(float)  cvCbrt( float value );
+
+#define  CV_CHECK_RANGE    1
+#define  CV_CHECK_QUIET    2
+/** Checks array values for NaNs, Infs or simply for too large numbers
+   (if CV_CHECK_RANGE is set). If CV_CHECK_QUIET is set,
+   no runtime errors is raised (function returns zero value in case of "bad" values).
+   Otherwise cvError is called */
+CVAPI(int)  cvCheckArr( const CvArr* arr, int flags CV_DEFAULT(0),
+                        double min_val CV_DEFAULT(0), double max_val CV_DEFAULT(0));
+#define cvCheckArray cvCheckArr
+
+#define CV_RAND_UNI      0
+#define CV_RAND_NORMAL   1
+
+/** @brief Fills an array with random numbers and updates the RNG state.
+
+The function fills the destination array with uniformly or normally distributed random numbers.
+@param rng CvRNG state initialized by cvRNG
+@param arr The destination array
+@param dist_type Distribution type
+> -   **CV_RAND_UNI** uniform distribution
+> -   **CV_RAND_NORMAL** normal or Gaussian distribution
+@param param1 The first parameter of the distribution. In the case of a uniform distribution it is
+the inclusive lower boundary of the random numbers range. In the case of a normal distribution it
+is the mean value of the random numbers.
+@param param2 The second parameter of the distribution. In the case of a uniform distribution it
+is the exclusive upper boundary of the random numbers range. In the case of a normal distribution
+it is the standard deviation of the random numbers.
+@sa randu, randn, RNG::fill.
+ */
+CVAPI(void) cvRandArr( CvRNG* rng, CvArr* arr, int dist_type,
+                      CvScalar param1, CvScalar param2 );
+
+CVAPI(void) cvRandShuffle( CvArr* mat, CvRNG* rng,
+                           double iter_factor CV_DEFAULT(1.));
+
+#define CV_SORT_EVERY_ROW 0
+#define CV_SORT_EVERY_COLUMN 1
+#define CV_SORT_ASCENDING 0
+#define CV_SORT_DESCENDING 16
+
+CVAPI(void) cvSort( const CvArr* src, CvArr* dst CV_DEFAULT(NULL),
+                    CvArr* idxmat CV_DEFAULT(NULL),
+                    int flags CV_DEFAULT(0));
+
+/** Finds real roots of a cubic equation */
+CVAPI(int) cvSolveCubic( const CvMat* coeffs, CvMat* roots );
+
+/** Finds all real and complex roots of a polynomial equation */
+CVAPI(void) cvSolvePoly(const CvMat* coeffs, CvMat *roots2,
+      int maxiter CV_DEFAULT(20), int fig CV_DEFAULT(100));
+
+/****************************************************************************************\
+*                                Matrix operations                                       *
+\****************************************************************************************/
+
+/** @brief Calculates the cross product of two 3D vectors.
+
+The function calculates the cross product of two 3D vectors:
+\f[\texttt{dst} =  \texttt{src1} \times \texttt{src2}\f]
+or:
+\f[\begin{array}{l} \texttt{dst} _1 =  \texttt{src1} _2  \texttt{src2} _3 -  \texttt{src1} _3  \texttt{src2} _2 \\ \texttt{dst} _2 =  \texttt{src1} _3  \texttt{src2} _1 -  \texttt{src1} _1  \texttt{src2} _3 \\ \texttt{dst} _3 =  \texttt{src1} _1  \texttt{src2} _2 -  \texttt{src1} _2  \texttt{src2} _1 \end{array}\f]
+@param src1 The first source vector
+@param src2 The second source vector
+@param dst The destination vector
+ */
+CVAPI(void)  cvCrossProduct( const CvArr* src1, const CvArr* src2, CvArr* dst );
+
+/** Matrix transform: dst = A*B + C, C is optional */
+#define cvMatMulAdd( src1, src2, src3, dst ) cvGEMM( (src1), (src2), 1., (src3), 1., (dst), 0 )
+#define cvMatMul( src1, src2, dst )  cvMatMulAdd( (src1), (src2), NULL, (dst))
+
+#define CV_GEMM_A_T 1
+#define CV_GEMM_B_T 2
+#define CV_GEMM_C_T 4
+/** Extended matrix transform:
+   dst = alpha*op(A)*op(B) + beta*op(C), where op(X) is X or X^T */
+CVAPI(void)  cvGEMM( const CvArr* src1, const CvArr* src2, double alpha,
+                     const CvArr* src3, double beta, CvArr* dst,
+                     int tABC CV_DEFAULT(0));
+#define cvMatMulAddEx cvGEMM
+
+/** Transforms each element of source array and stores
+   resultant vectors in destination array */
+CVAPI(void)  cvTransform( const CvArr* src, CvArr* dst,
+                          const CvMat* transmat,
+                          const CvMat* shiftvec CV_DEFAULT(NULL));
+#define cvMatMulAddS cvTransform
+
+/** Does perspective transform on every element of input array */
+CVAPI(void)  cvPerspectiveTransform( const CvArr* src, CvArr* dst,
+                                     const CvMat* mat );
+
+/** Calculates (A-delta)*(A-delta)^T (order=0) or (A-delta)^T*(A-delta) (order=1) */
+CVAPI(void) cvMulTransposed( const CvArr* src, CvArr* dst, int order,
+                             const CvArr* delta CV_DEFAULT(NULL),
+                             double scale CV_DEFAULT(1.) );
+
+/** Tranposes matrix. Square matrices can be transposed in-place */
+CVAPI(void)  cvTranspose( const CvArr* src, CvArr* dst );
+#define cvT cvTranspose
+
+/** Completes the symmetric matrix from the lower (LtoR=0) or from the upper (LtoR!=0) part */
+CVAPI(void)  cvCompleteSymm( CvMat* matrix, int LtoR CV_DEFAULT(0) );
+
+/** Mirror array data around horizontal (flip=0),
+   vertical (flip=1) or both(flip=-1) axises:
+   cvFlip(src) flips images vertically and sequences horizontally (inplace) */
+CVAPI(void)  cvFlip( const CvArr* src, CvArr* dst CV_DEFAULT(NULL),
+                     int flip_mode CV_DEFAULT(0));
+#define cvMirror cvFlip
+
+
+#define CV_SVD_MODIFY_A   1
+#define CV_SVD_U_T        2
+#define CV_SVD_V_T        4
+
+/** Performs Singular Value Decomposition of a matrix */
+CVAPI(void)   cvSVD( CvArr* A, CvArr* W, CvArr* U CV_DEFAULT(NULL),
+                     CvArr* V CV_DEFAULT(NULL), int flags CV_DEFAULT(0));
+
+/** Performs Singular Value Back Substitution (solves A*X = B):
+   flags must be the same as in cvSVD */
+CVAPI(void)   cvSVBkSb( const CvArr* W, const CvArr* U,
+                        const CvArr* V, const CvArr* B,
+                        CvArr* X, int flags );
+
+#define CV_LU  0
+#define CV_SVD 1
+#define CV_SVD_SYM 2
+#define CV_CHOLESKY 3
+#define CV_QR  4
+#define CV_NORMAL 16
+
+/** Inverts matrix */
+CVAPI(double)  cvInvert( const CvArr* src, CvArr* dst,
+                         int method CV_DEFAULT(CV_LU));
+#define cvInv cvInvert
+
+/** Solves linear system (src1)*(dst) = (src2)
+   (returns 0 if src1 is a singular and CV_LU method is used) */
+CVAPI(int)  cvSolve( const CvArr* src1, const CvArr* src2, CvArr* dst,
+                     int method CV_DEFAULT(CV_LU));
+
+/** Calculates determinant of input matrix */
+CVAPI(double) cvDet( const CvArr* mat );
+
+/** Calculates trace of the matrix (sum of elements on the main diagonal) */
+CVAPI(CvScalar) cvTrace( const CvArr* mat );
+
+/** Finds eigen values and vectors of a symmetric matrix */
+CVAPI(void)  cvEigenVV( CvArr* mat, CvArr* evects, CvArr* evals,
+                        double eps CV_DEFAULT(0),
+                        int lowindex CV_DEFAULT(-1),
+                        int highindex CV_DEFAULT(-1));
+
+///* Finds selected eigen values and vectors of a symmetric matrix */
+//CVAPI(void)  cvSelectedEigenVV( CvArr* mat, CvArr* evects, CvArr* evals,
+//                                int lowindex, int highindex );
+
+/** Makes an identity matrix (mat_ij = i == j) */
+CVAPI(void)  cvSetIdentity( CvArr* mat, CvScalar value CV_DEFAULT(cvRealScalar(1)) );
+
+/** Fills matrix with given range of numbers */
+CVAPI(CvArr*)  cvRange( CvArr* mat, double start, double end );
+
+/**   @anchor core_c_CovarFlags
+@name Flags for cvCalcCovarMatrix
+@see cvCalcCovarMatrix
+  @{
+*/
+
+/** flag for cvCalcCovarMatrix, transpose([v1-avg, v2-avg,...]) * [v1-avg,v2-avg,...] */
+#define CV_COVAR_SCRAMBLED 0
+
+/** flag for cvCalcCovarMatrix, [v1-avg, v2-avg,...] * transpose([v1-avg,v2-avg,...]) */
+#define CV_COVAR_NORMAL    1
+
+/** flag for cvCalcCovarMatrix, do not calc average (i.e. mean vector) - use the input vector instead
+   (useful for calculating covariance matrix by parts) */
+#define CV_COVAR_USE_AVG   2
+
+/** flag for cvCalcCovarMatrix, scale the covariance matrix coefficients by number of the vectors */
+#define CV_COVAR_SCALE     4
+
+/** flag for cvCalcCovarMatrix, all the input vectors are stored in a single matrix, as its rows */
+#define CV_COVAR_ROWS      8
+
+/** flag for cvCalcCovarMatrix, all the input vectors are stored in a single matrix, as its columns */
+#define CV_COVAR_COLS     16
+
+/** @} */
+
+/** Calculates covariation matrix for a set of vectors
+@see @ref core_c_CovarFlags "flags"
+*/
+CVAPI(void)  cvCalcCovarMatrix( const CvArr** vects, int count,
+                                CvArr* cov_mat, CvArr* avg, int flags );
+
+#define CV_PCA_DATA_AS_ROW 0
+#define CV_PCA_DATA_AS_COL 1
+#define CV_PCA_USE_AVG 2
+CVAPI(void)  cvCalcPCA( const CvArr* data, CvArr* mean,
+                        CvArr* eigenvals, CvArr* eigenvects, int flags );
+
+CVAPI(void)  cvProjectPCA( const CvArr* data, const CvArr* mean,
+                           const CvArr* eigenvects, CvArr* result );
+
+CVAPI(void)  cvBackProjectPCA( const CvArr* proj, const CvArr* mean,
+                               const CvArr* eigenvects, CvArr* result );
+
+/** Calculates Mahalanobis(weighted) distance */
+CVAPI(double)  cvMahalanobis( const CvArr* vec1, const CvArr* vec2, const CvArr* mat );
+#define cvMahalonobis  cvMahalanobis
+
+/****************************************************************************************\
+*                                    Array Statistics                                    *
+\****************************************************************************************/
+
+/** Finds sum of array elements */
+CVAPI(CvScalar)  cvSum( const CvArr* arr );
+
+/** Calculates number of non-zero pixels */
+CVAPI(int)  cvCountNonZero( const CvArr* arr );
+
+/** Calculates mean value of array elements */
+CVAPI(CvScalar)  cvAvg( const CvArr* arr, const CvArr* mask CV_DEFAULT(NULL) );
+
+/** Calculates mean and standard deviation of pixel values */
+CVAPI(void)  cvAvgSdv( const CvArr* arr, CvScalar* mean, CvScalar* std_dev,
+                       const CvArr* mask CV_DEFAULT(NULL) );
+
+/** Finds global minimum, maximum and their positions */
+CVAPI(void)  cvMinMaxLoc( const CvArr* arr, double* min_val, double* max_val,
+                          CvPoint* min_loc CV_DEFAULT(NULL),
+                          CvPoint* max_loc CV_DEFAULT(NULL),
+                          const CvArr* mask CV_DEFAULT(NULL) );
+
+/** @anchor core_c_NormFlags
+  @name Flags for cvNorm and cvNormalize
+  @{
+*/
+#define CV_C            1
+#define CV_L1           2
+#define CV_L2           4
+#define CV_NORM_MASK    7
+#define CV_RELATIVE     8
+#define CV_DIFF         16
+#define CV_MINMAX       32
+
+#define CV_DIFF_C       (CV_DIFF | CV_C)
+#define CV_DIFF_L1      (CV_DIFF | CV_L1)
+#define CV_DIFF_L2      (CV_DIFF | CV_L2)
+#define CV_RELATIVE_C   (CV_RELATIVE | CV_C)
+#define CV_RELATIVE_L1  (CV_RELATIVE | CV_L1)
+#define CV_RELATIVE_L2  (CV_RELATIVE | CV_L2)
+/** @} */
+
+/** Finds norm, difference norm or relative difference norm for an array (or two arrays)
+@see ref core_c_NormFlags "flags"
+*/
+CVAPI(double)  cvNorm( const CvArr* arr1, const CvArr* arr2 CV_DEFAULT(NULL),
+                       int norm_type CV_DEFAULT(CV_L2),
+                       const CvArr* mask CV_DEFAULT(NULL) );
+
+/** @see ref core_c_NormFlags "flags" */
+CVAPI(void)  cvNormalize( const CvArr* src, CvArr* dst,
+                          double a CV_DEFAULT(1.), double b CV_DEFAULT(0.),
+                          int norm_type CV_DEFAULT(CV_L2),
+                          const CvArr* mask CV_DEFAULT(NULL) );
+
+/** @anchor core_c_ReduceFlags
+  @name Flags for cvReduce
+  @{
+*/
+#define CV_REDUCE_SUM 0
+#define CV_REDUCE_AVG 1
+#define CV_REDUCE_MAX 2
+#define CV_REDUCE_MIN 3
+/** @} */
+
+/** @see @ref core_c_ReduceFlags "flags" */
+CVAPI(void)  cvReduce( const CvArr* src, CvArr* dst, int dim CV_DEFAULT(-1),
+                       int op CV_DEFAULT(CV_REDUCE_SUM) );
+
+/****************************************************************************************\
+*                      Discrete Linear Transforms and Related Functions                  *
+\****************************************************************************************/
+
+/** @anchor core_c_DftFlags
+  @name Flags for cvDFT, cvDCT and cvMulSpectrums
+  @{
+  */
+#define CV_DXT_FORWARD  0
+#define CV_DXT_INVERSE  1
+#define CV_DXT_SCALE    2 /**< divide result by size of array */
+#define CV_DXT_INV_SCALE (CV_DXT_INVERSE + CV_DXT_SCALE)
+#define CV_DXT_INVERSE_SCALE CV_DXT_INV_SCALE
+#define CV_DXT_ROWS     4 /**< transform each row individually */
+#define CV_DXT_MUL_CONJ 8 /**< conjugate the second argument of cvMulSpectrums */
+/** @} */
+
+/** Discrete Fourier Transform:
+    complex->complex,
+    real->ccs (forward),
+    ccs->real (inverse)
+@see core_c_DftFlags "flags"
+*/
+CVAPI(void)  cvDFT( const CvArr* src, CvArr* dst, int flags,
+                    int nonzero_rows CV_DEFAULT(0) );
+#define cvFFT cvDFT
+
+/** Multiply results of DFTs: DFT(X)*DFT(Y) or DFT(X)*conj(DFT(Y))
+@see core_c_DftFlags "flags"
+*/
+CVAPI(void)  cvMulSpectrums( const CvArr* src1, const CvArr* src2,
+                             CvArr* dst, int flags );
+
+/** Finds optimal DFT vector size >= size0 */
+CVAPI(int)  cvGetOptimalDFTSize( int size0 );
+
+/** Discrete Cosine Transform
+@see core_c_DftFlags "flags"
+*/
+CVAPI(void)  cvDCT( const CvArr* src, CvArr* dst, int flags );
+
+/****************************************************************************************\
+*                              Dynamic data structures                                   *
+\****************************************************************************************/
+
+/** Calculates length of sequence slice (with support of negative indices). */
+CVAPI(int) cvSliceLength( CvSlice slice, const CvSeq* seq );
+
+
+/** Creates new memory storage.
+   block_size == 0 means that default,
+   somewhat optimal size, is used (currently, it is 64K) */
+CVAPI(CvMemStorage*)  cvCreateMemStorage( int block_size CV_DEFAULT(0));
+
+
+/** Creates a memory storage that will borrow memory blocks from parent storage */
+CVAPI(CvMemStorage*)  cvCreateChildMemStorage( CvMemStorage* parent );
+
+
+/** Releases memory storage. All the children of a parent must be released before
+   the parent. A child storage returns all the blocks to parent when it is released */
+CVAPI(void)  cvReleaseMemStorage( CvMemStorage** storage );
+
+
+/** Clears memory storage. This is the only way(!!!) (besides cvRestoreMemStoragePos)
+   to reuse memory allocated for the storage - cvClearSeq,cvClearSet ...
+   do not free any memory.
+   A child storage returns all the blocks to the parent when it is cleared */
+CVAPI(void)  cvClearMemStorage( CvMemStorage* storage );
+
+/** Remember a storage "free memory" position */
+CVAPI(void)  cvSaveMemStoragePos( const CvMemStorage* storage, CvMemStoragePos* pos );
+
+/** Restore a storage "free memory" position */
+CVAPI(void)  cvRestoreMemStoragePos( CvMemStorage* storage, CvMemStoragePos* pos );
+
+/** Allocates continuous buffer of the specified size in the storage */
+CVAPI(void*) cvMemStorageAlloc( CvMemStorage* storage, size_t size );
+
+/** Allocates string in memory storage */
+CVAPI(CvString) cvMemStorageAllocString( CvMemStorage* storage, const char* ptr,
+                                         int len CV_DEFAULT(-1) );
+
+/** Creates new empty sequence that will reside in the specified storage */
+CVAPI(CvSeq*)  cvCreateSeq( int seq_flags, size_t header_size,
+                            size_t elem_size, CvMemStorage* storage );
+
+/** Changes default size (granularity) of sequence blocks.
+   The default size is ~1Kbyte */
+CVAPI(void)  cvSetSeqBlockSize( CvSeq* seq, int delta_elems );
+
+
+/** Adds new element to the end of sequence. Returns pointer to the element */
+CVAPI(schar*)  cvSeqPush( CvSeq* seq, const void* element CV_DEFAULT(NULL));
+
+
+/** Adds new element to the beginning of sequence. Returns pointer to it */
+CVAPI(schar*)  cvSeqPushFront( CvSeq* seq, const void* element CV_DEFAULT(NULL));
+
+
+/** Removes the last element from sequence and optionally saves it */
+CVAPI(void)  cvSeqPop( CvSeq* seq, void* element CV_DEFAULT(NULL));
+
+
+/** Removes the first element from sequence and optioanally saves it */
+CVAPI(void)  cvSeqPopFront( CvSeq* seq, void* element CV_DEFAULT(NULL));
+
+
+#define CV_FRONT 1
+#define CV_BACK 0
+/** Adds several new elements to the end of sequence */
+CVAPI(void)  cvSeqPushMulti( CvSeq* seq, const void* elements,
+                             int count, int in_front CV_DEFAULT(0) );
+
+/** Removes several elements from the end of sequence and optionally saves them */
+CVAPI(void)  cvSeqPopMulti( CvSeq* seq, void* elements,
+                            int count, int in_front CV_DEFAULT(0) );
+
+/** Inserts a new element in the middle of sequence.
+   cvSeqInsert(seq,0,elem) == cvSeqPushFront(seq,elem) */
+CVAPI(schar*)  cvSeqInsert( CvSeq* seq, int before_index,
+                            const void* element CV_DEFAULT(NULL));
+
+/** Removes specified sequence element */
+CVAPI(void)  cvSeqRemove( CvSeq* seq, int index );
+
+
+/** Removes all the elements from the sequence. The freed memory
+   can be reused later only by the same sequence unless cvClearMemStorage
+   or cvRestoreMemStoragePos is called */
+CVAPI(void)  cvClearSeq( CvSeq* seq );
+
+
+/** Retrieves pointer to specified sequence element.
+   Negative indices are supported and mean counting from the end
+   (e.g -1 means the last sequence element) */
+CVAPI(schar*)  cvGetSeqElem( const CvSeq* seq, int index );
+
+/** Calculates index of the specified sequence element.
+   Returns -1 if element does not belong to the sequence */
+CVAPI(int)  cvSeqElemIdx( const CvSeq* seq, const void* element,
+                         CvSeqBlock** block CV_DEFAULT(NULL) );
+
+/** Initializes sequence writer. The new elements will be added to the end of sequence */
+CVAPI(void)  cvStartAppendToSeq( CvSeq* seq, CvSeqWriter* writer );
+
+
+/** Combination of cvCreateSeq and cvStartAppendToSeq */
+CVAPI(void)  cvStartWriteSeq( int seq_flags, int header_size,
+                              int elem_size, CvMemStorage* storage,
+                              CvSeqWriter* writer );
+
+/** Closes sequence writer, updates sequence header and returns pointer
+   to the resultant sequence
+   (which may be useful if the sequence was created using cvStartWriteSeq))
+*/
+CVAPI(CvSeq*)  cvEndWriteSeq( CvSeqWriter* writer );
+
+
+/** Updates sequence header. May be useful to get access to some of previously
+   written elements via cvGetSeqElem or sequence reader */
+CVAPI(void)   cvFlushSeqWriter( CvSeqWriter* writer );
+
+
+/** Initializes sequence reader.
+   The sequence can be read in forward or backward direction */
+CVAPI(void) cvStartReadSeq( const CvSeq* seq, CvSeqReader* reader,
+                           int reverse CV_DEFAULT(0) );
+
+
+/** Returns current sequence reader position (currently observed sequence element) */
+CVAPI(int)  cvGetSeqReaderPos( CvSeqReader* reader );
+
+
+/** Changes sequence reader position. It may seek to an absolute or
+   to relative to the current position */
+CVAPI(void)   cvSetSeqReaderPos( CvSeqReader* reader, int index,
+                                 int is_relative CV_DEFAULT(0));
+
+/** Copies sequence content to a continuous piece of memory */
+CVAPI(void*)  cvCvtSeqToArray( const CvSeq* seq, void* elements,
+                               CvSlice slice CV_DEFAULT(CV_WHOLE_SEQ) );
+
+/** Creates sequence header for array.
+   After that all the operations on sequences that do not alter the content
+   can be applied to the resultant sequence */
+CVAPI(CvSeq*) cvMakeSeqHeaderForArray( int seq_type, int header_size,
+                                       int elem_size, void* elements, int total,
+                                       CvSeq* seq, CvSeqBlock* block );
+
+/** Extracts sequence slice (with or without copying sequence elements) */
+CVAPI(CvSeq*) cvSeqSlice( const CvSeq* seq, CvSlice slice,
+                         CvMemStorage* storage CV_DEFAULT(NULL),
+                         int copy_data CV_DEFAULT(0));
+
+CV_INLINE CvSeq* cvCloneSeq( const CvSeq* seq, CvMemStorage* storage CV_DEFAULT(NULL))
+{
+    return cvSeqSlice( seq, CV_WHOLE_SEQ, storage, 1 );
+}
+
+/** Removes sequence slice */
+CVAPI(void)  cvSeqRemoveSlice( CvSeq* seq, CvSlice slice );
+
+/** Inserts a sequence or array into another sequence */
+CVAPI(void)  cvSeqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr );
+
+/** a < b ? -1 : a > b ? 1 : 0 */
+typedef int (CV_CDECL* CvCmpFunc)(const void* a, const void* b, void* userdata );
+
+/** Sorts sequence in-place given element comparison function */
+CVAPI(void) cvSeqSort( CvSeq* seq, CvCmpFunc func, void* userdata CV_DEFAULT(NULL) );
+
+/** Finds element in a [sorted] sequence */
+CVAPI(schar*) cvSeqSearch( CvSeq* seq, const void* elem, CvCmpFunc func,
+                           int is_sorted, int* elem_idx,
+                           void* userdata CV_DEFAULT(NULL) );
+
+/** Reverses order of sequence elements in-place */
+CVAPI(void) cvSeqInvert( CvSeq* seq );
+
+/** Splits sequence into one or more equivalence classes using the specified criteria */
+CVAPI(int)  cvSeqPartition( const CvSeq* seq, CvMemStorage* storage,
+                            CvSeq** labels, CvCmpFunc is_equal, void* userdata );
+
+/************ Internal sequence functions ************/
+CVAPI(void)  cvChangeSeqBlock( void* reader, int direction );
+CVAPI(void)  cvCreateSeqBlock( CvSeqWriter* writer );
+
+
+/** Creates a new set */
+CVAPI(CvSet*)  cvCreateSet( int set_flags, int header_size,
+                            int elem_size, CvMemStorage* storage );
+
+/** Adds new element to the set and returns pointer to it */
+CVAPI(int)  cvSetAdd( CvSet* set_header, CvSetElem* elem CV_DEFAULT(NULL),
+                      CvSetElem** inserted_elem CV_DEFAULT(NULL) );
+
+/** Fast variant of cvSetAdd */
+CV_INLINE  CvSetElem* cvSetNew( CvSet* set_header )
+{
+    CvSetElem* elem = set_header->free_elems;
+    if( elem )
+    {
+        set_header->free_elems = elem->next_free;
+        elem->flags = elem->flags & CV_SET_ELEM_IDX_MASK;
+        set_header->active_count++;
+    }
+    else
+        cvSetAdd( set_header, NULL, &elem );
+    return elem;
+}
+
+/** Removes set element given its pointer */
+CV_INLINE  void cvSetRemoveByPtr( CvSet* set_header, void* elem )
+{
+    CvSetElem* _elem = (CvSetElem*)elem;
+    assert( _elem->flags >= 0 /*&& (elem->flags & CV_SET_ELEM_IDX_MASK) < set_header->total*/ );
+    _elem->next_free = set_header->free_elems;
+    _elem->flags = (_elem->flags & CV_SET_ELEM_IDX_MASK) | CV_SET_ELEM_FREE_FLAG;
+    set_header->free_elems = _elem;
+    set_header->active_count--;
+}
+
+/** Removes element from the set by its index  */
+CVAPI(void)   cvSetRemove( CvSet* set_header, int index );
+
+/** Returns a set element by index. If the element doesn't belong to the set,
+   NULL is returned */
+CV_INLINE CvSetElem* cvGetSetElem( const CvSet* set_header, int idx )
+{
+    CvSetElem* elem = (CvSetElem*)(void *)cvGetSeqElem( (CvSeq*)set_header, idx );
+    return elem && CV_IS_SET_ELEM( elem ) ? elem : 0;
+}
+
+/** Removes all the elements from the set */
+CVAPI(void)  cvClearSet( CvSet* set_header );
+
+/** Creates new graph */
+CVAPI(CvGraph*)  cvCreateGraph( int graph_flags, int header_size,
+                                int vtx_size, int edge_size,
+                                CvMemStorage* storage );
+
+/** Adds new vertex to the graph */
+CVAPI(int)  cvGraphAddVtx( CvGraph* graph, const CvGraphVtx* vtx CV_DEFAULT(NULL),
+                           CvGraphVtx** inserted_vtx CV_DEFAULT(NULL) );
+
+
+/** Removes vertex from the graph together with all incident edges */
+CVAPI(int)  cvGraphRemoveVtx( CvGraph* graph, int index );
+CVAPI(int)  cvGraphRemoveVtxByPtr( CvGraph* graph, CvGraphVtx* vtx );
+
+
+/** Link two vertices specified by indices or pointers if they
+   are not connected or return pointer to already existing edge
+   connecting the vertices.
+   Functions return 1 if a new edge was created, 0 otherwise */
+CVAPI(int)  cvGraphAddEdge( CvGraph* graph,
+                            int start_idx, int end_idx,
+                            const CvGraphEdge* edge CV_DEFAULT(NULL),
+                            CvGraphEdge** inserted_edge CV_DEFAULT(NULL) );
+
+CVAPI(int)  cvGraphAddEdgeByPtr( CvGraph* graph,
+                               CvGraphVtx* start_vtx, CvGraphVtx* end_vtx,
+                               const CvGraphEdge* edge CV_DEFAULT(NULL),
+                               CvGraphEdge** inserted_edge CV_DEFAULT(NULL) );
+
+/** Remove edge connecting two vertices */
+CVAPI(void)  cvGraphRemoveEdge( CvGraph* graph, int start_idx, int end_idx );
+CVAPI(void)  cvGraphRemoveEdgeByPtr( CvGraph* graph, CvGraphVtx* start_vtx,
+                                     CvGraphVtx* end_vtx );
+
+/** Find edge connecting two vertices */
+CVAPI(CvGraphEdge*)  cvFindGraphEdge( const CvGraph* graph, int start_idx, int end_idx );
+CVAPI(CvGraphEdge*)  cvFindGraphEdgeByPtr( const CvGraph* graph,
+                                           const CvGraphVtx* start_vtx,
+                                           const CvGraphVtx* end_vtx );
+#define cvGraphFindEdge cvFindGraphEdge
+#define cvGraphFindEdgeByPtr cvFindGraphEdgeByPtr
+
+/** Remove all vertices and edges from the graph */
+CVAPI(void)  cvClearGraph( CvGraph* graph );
+
+
+/** Count number of edges incident to the vertex */
+CVAPI(int)  cvGraphVtxDegree( const CvGraph* graph, int vtx_idx );
+CVAPI(int)  cvGraphVtxDegreeByPtr( const CvGraph* graph, const CvGraphVtx* vtx );
+
+
+/** Retrieves graph vertex by given index */
+#define cvGetGraphVtx( graph, idx ) (CvGraphVtx*)cvGetSetElem((CvSet*)(graph), (idx))
+
+/** Retrieves index of a graph vertex given its pointer */
+#define cvGraphVtxIdx( graph, vtx ) ((vtx)->flags & CV_SET_ELEM_IDX_MASK)
+
+/** Retrieves index of a graph edge given its pointer */
+#define cvGraphEdgeIdx( graph, edge ) ((edge)->flags & CV_SET_ELEM_IDX_MASK)
+
+#define cvGraphGetVtxCount( graph ) ((graph)->active_count)
+#define cvGraphGetEdgeCount( graph ) ((graph)->edges->active_count)
+
+#define  CV_GRAPH_VERTEX        1
+#define  CV_GRAPH_TREE_EDGE     2
+#define  CV_GRAPH_BACK_EDGE     4
+#define  CV_GRAPH_FORWARD_EDGE  8
+#define  CV_GRAPH_CROSS_EDGE    16
+#define  CV_GRAPH_ANY_EDGE      30
+#define  CV_GRAPH_NEW_TREE      32
+#define  CV_GRAPH_BACKTRACKING  64
+#define  CV_GRAPH_OVER          -1
+
+#define  CV_GRAPH_ALL_ITEMS    -1
+
+/** flags for graph vertices and edges */
+#define  CV_GRAPH_ITEM_VISITED_FLAG  (1 << 30)
+#define  CV_IS_GRAPH_VERTEX_VISITED(vtx) \
+    (((CvGraphVtx*)(vtx))->flags & CV_GRAPH_ITEM_VISITED_FLAG)
+#define  CV_IS_GRAPH_EDGE_VISITED(edge) \
+    (((CvGraphEdge*)(edge))->flags & CV_GRAPH_ITEM_VISITED_FLAG)
+#define  CV_GRAPH_SEARCH_TREE_NODE_FLAG   (1 << 29)
+#define  CV_GRAPH_FORWARD_EDGE_FLAG       (1 << 28)
+
+typedef struct CvGraphScanner
+{
+    CvGraphVtx* vtx;       /* current graph vertex (or current edge origin) */
+    CvGraphVtx* dst;       /* current graph edge destination vertex */
+    CvGraphEdge* edge;     /* current edge */
+
+    CvGraph* graph;        /* the graph */
+    CvSeq*   stack;        /* the graph vertex stack */
+    int      index;        /* the lower bound of certainly visited vertices */
+    int      mask;         /* event mask */
+}
+CvGraphScanner;
+
+/** Creates new graph scanner. */
+CVAPI(CvGraphScanner*)  cvCreateGraphScanner( CvGraph* graph,
+                                             CvGraphVtx* vtx CV_DEFAULT(NULL),
+                                             int mask CV_DEFAULT(CV_GRAPH_ALL_ITEMS));
+
+/** Releases graph scanner. */
+CVAPI(void) cvReleaseGraphScanner( CvGraphScanner** scanner );
+
+/** Get next graph element */
+CVAPI(int)  cvNextGraphItem( CvGraphScanner* scanner );
+
+/** Creates a copy of graph */
+CVAPI(CvGraph*) cvCloneGraph( const CvGraph* graph, CvMemStorage* storage );
+
+
+/** Does look-up transformation. Elements of the source array
+   (that should be 8uC1 or 8sC1) are used as indexes in lutarr 256-element table */
+CVAPI(void) cvLUT( const CvArr* src, CvArr* dst, const CvArr* lut );
+
+
+/******************* Iteration through the sequence tree *****************/
+typedef struct CvTreeNodeIterator
+{
+    const void* node;
+    int level;
+    int max_level;
+}
+CvTreeNodeIterator;
+
+CVAPI(void) cvInitTreeNodeIterator( CvTreeNodeIterator* tree_iterator,
+                                   const void* first, int max_level );
+CVAPI(void*) cvNextTreeNode( CvTreeNodeIterator* tree_iterator );
+CVAPI(void*) cvPrevTreeNode( CvTreeNodeIterator* tree_iterator );
+
+/** Inserts sequence into tree with specified "parent" sequence.
+   If parent is equal to frame (e.g. the most external contour),
+   then added contour will have null pointer to parent. */
+CVAPI(void) cvInsertNodeIntoTree( void* node, void* parent, void* frame );
+
+/** Removes contour from tree (together with the contour children). */
+CVAPI(void) cvRemoveNodeFromTree( void* node, void* frame );
+
+/** Gathers pointers to all the sequences,
+   accessible from the `first`, to the single sequence */
+CVAPI(CvSeq*) cvTreeToNodeSeq( const void* first, int header_size,
+                              CvMemStorage* storage );
+
+/** The function implements the K-means algorithm for clustering an array of sample
+   vectors in a specified number of classes */
+#define CV_KMEANS_USE_INITIAL_LABELS    1
+CVAPI(int) cvKMeans2( const CvArr* samples, int cluster_count, CvArr* labels,
+                      CvTermCriteria termcrit, int attempts CV_DEFAULT(1),
+                      CvRNG* rng CV_DEFAULT(0), int flags CV_DEFAULT(0),
+                      CvArr* _centers CV_DEFAULT(0), double* compactness CV_DEFAULT(0) );
+
+/****************************************************************************************\
+*                                    System functions                                    *
+\****************************************************************************************/
+
+/** Loads optimized functions from IPP, MKL etc. or switches back to pure C code */
+CVAPI(int)  cvUseOptimized( int on_off );
+
+typedef IplImage* (CV_STDCALL* Cv_iplCreateImageHeader)
+                            (int,int,int,char*,char*,int,int,int,int,int,
+                            IplROI*,IplImage*,void*,IplTileInfo*);
+typedef void (CV_STDCALL* Cv_iplAllocateImageData)(IplImage*,int,int);
+typedef void (CV_STDCALL* Cv_iplDeallocate)(IplImage*,int);
+typedef IplROI* (CV_STDCALL* Cv_iplCreateROI)(int,int,int,int,int);
+typedef IplImage* (CV_STDCALL* Cv_iplCloneImage)(const IplImage*);
+
+/** @brief Makes OpenCV use IPL functions for allocating IplImage and IplROI structures.
+
+Normally, the function is not called directly. Instead, a simple macro
+CV_TURN_ON_IPL_COMPATIBILITY() is used that calls cvSetIPLAllocators and passes there pointers
+to IPL allocation functions. :
+@code
+    ...
+    CV_TURN_ON_IPL_COMPATIBILITY()
+    ...
+@endcode
+@param create_header pointer to a function, creating IPL image header.
+@param allocate_data pointer to a function, allocating IPL image data.
+@param deallocate pointer to a function, deallocating IPL image.
+@param create_roi pointer to a function, creating IPL image ROI (i.e. Region of Interest).
+@param clone_image pointer to a function, cloning an IPL image.
+ */
+CVAPI(void) cvSetIPLAllocators( Cv_iplCreateImageHeader create_header,
+                               Cv_iplAllocateImageData allocate_data,
+                               Cv_iplDeallocate deallocate,
+                               Cv_iplCreateROI create_roi,
+                               Cv_iplCloneImage clone_image );
+
+#define CV_TURN_ON_IPL_COMPATIBILITY()                                  \
+    cvSetIPLAllocators( iplCreateImageHeader, iplAllocateImage,         \
+                        iplDeallocate, iplCreateROI, iplCloneImage )
+
+/****************************************************************************************\
+*                                    Data Persistence                                    *
+\****************************************************************************************/
+
+/********************************** High-level functions ********************************/
+
+/** @brief Opens file storage for reading or writing data.
+
+The function opens file storage for reading or writing data. In the latter case, a new file is
+created or an existing file is rewritten. The type of the read or written file is determined by the
+filename extension: .xml for XML, .yml or .yaml for YAML and .json for JSON.
+
+At the same time, it also supports adding parameters like "example.xml?base64".
+
+The function returns a pointer to the CvFileStorage structure.
+If the file cannot be opened then the function returns NULL.
+@param filename Name of the file associated with the storage
+@param memstorage Memory storage used for temporary data and for
+:   storing dynamic structures, such as CvSeq or CvGraph . If it is NULL, a temporary memory
+    storage is created and used.
+@param flags Can be one of the following:
+> -   **CV_STORAGE_READ** the storage is open for reading
+> -   **CV_STORAGE_WRITE** the storage is open for writing
+      (use **CV_STORAGE_WRITE | CV_STORAGE_WRITE_BASE64** to write rawdata in Base64)
+@param encoding
+ */
+CVAPI(CvFileStorage*)  cvOpenFileStorage( const char* filename, CvMemStorage* memstorage,
+                                          int flags, const char* encoding CV_DEFAULT(NULL) );
+
+/** @brief Releases file storage.
+
+The function closes the file associated with the storage and releases all the temporary structures.
+It must be called after all I/O operations with the storage are finished.
+@param fs Double pointer to the released file storage
+ */
+CVAPI(void) cvReleaseFileStorage( CvFileStorage** fs );
+
+/** returns attribute value or 0 (NULL) if there is no such attribute */
+CVAPI(const char*) cvAttrValue( const CvAttrList* attr, const char* attr_name );
+
+/** @brief Starts writing a new structure.
+
+The function starts writing a compound structure (collection) that can be a sequence or a map. After
+all the structure fields, which can be scalars or structures, are written, cvEndWriteStruct should
+be called. The function can be used to group some objects or to implement the write function for a
+some user object (see CvTypeInfo).
+@param fs File storage
+@param name Name of the written structure. The structure can be accessed by this name when the
+storage is read.
+@param struct_flags A combination one of the following values:
+-   **CV_NODE_SEQ** the written structure is a sequence (see discussion of CvFileStorage ),
+    that is, its elements do not have a name.
+-   **CV_NODE_MAP** the written structure is a map (see discussion of CvFileStorage ), that
+    is, all its elements have names.
+One and only one of the two above flags must be specified
+-   **CV_NODE_FLOW** the optional flag that makes sense only for YAML streams. It means that
+     the structure is written as a flow (not as a block), which is more compact. It is
+     recommended to use this flag for structures or arrays whose elements are all scalars.
+@param type_name Optional parameter - the object type name. In
+    case of XML it is written as a type_id attribute of the structure opening tag. In the case of
+    YAML it is written after a colon following the structure name (see the example in
+    CvFileStorage description). In case of JSON it is written as a name/value pair.
+    Mainly it is used with user objects. When the storage is read, the
+    encoded type name is used to determine the object type (see CvTypeInfo and cvFindType ).
+@param attributes This parameter is not used in the current implementation
+ */
+CVAPI(void) cvStartWriteStruct( CvFileStorage* fs, const char* name,
+                                int struct_flags, const char* type_name CV_DEFAULT(NULL),
+                                CvAttrList attributes CV_DEFAULT(cvAttrList()));
+
+/** @brief Finishes writing to a file node collection.
+@param fs File storage
+@sa cvStartWriteStruct.
+ */
+CVAPI(void) cvEndWriteStruct( CvFileStorage* fs );
+
+/** @brief Writes an integer value.
+
+The function writes a single integer value (with or without a name) to the file storage.
+@param fs File storage
+@param name Name of the written value. Should be NULL if and only if the parent structure is a
+sequence.
+@param value The written value
+ */
+CVAPI(void) cvWriteInt( CvFileStorage* fs, const char* name, int value );
+
+/** @brief Writes a floating-point value.
+
+The function writes a single floating-point value (with or without a name) to file storage. Special
+values are encoded as follows: NaN (Not A Number) as .NaN, infinity as +.Inf or -.Inf.
+
+The following example shows how to use the low-level writing functions to store custom structures,
+such as termination criteria, without registering a new type. :
+@code
+    void write_termcriteria( CvFileStorage* fs, const char* struct_name,
+                             CvTermCriteria* termcrit )
+    {
+        cvStartWriteStruct( fs, struct_name, CV_NODE_MAP, NULL, cvAttrList(0,0));
+        cvWriteComment( fs, "termination criteria", 1 ); // just a description
+        if( termcrit->type & CV_TERMCRIT_ITER )
+            cvWriteInteger( fs, "max_iterations", termcrit->max_iter );
+        if( termcrit->type & CV_TERMCRIT_EPS )
+            cvWriteReal( fs, "accuracy", termcrit->epsilon );
+        cvEndWriteStruct( fs );
+    }
+@endcode
+@param fs File storage
+@param name Name of the written value. Should be NULL if and only if the parent structure is a
+sequence.
+@param value The written value
+*/
+CVAPI(void) cvWriteReal( CvFileStorage* fs, const char* name, double value );
+
+/** @brief Writes a text string.
+
+The function writes a text string to file storage.
+@param fs File storage
+@param name Name of the written string . Should be NULL if and only if the parent structure is a
+sequence.
+@param str The written text string
+@param quote If non-zero, the written string is put in quotes, regardless of whether they are
+required. Otherwise, if the flag is zero, quotes are used only when they are required (e.g. when
+the string starts with a digit or contains spaces).
+ */
+CVAPI(void) cvWriteString( CvFileStorage* fs, const char* name,
+                           const char* str, int quote CV_DEFAULT(0) );
+
+/** @brief Writes a comment.
+
+The function writes a comment into file storage. The comments are skipped when the storage is read.
+@param fs File storage
+@param comment The written comment, single-line or multi-line
+@param eol_comment If non-zero, the function tries to put the comment at the end of current line.
+If the flag is zero, if the comment is multi-line, or if it does not fit at the end of the current
+line, the comment starts a new line.
+ */
+CVAPI(void) cvWriteComment( CvFileStorage* fs, const char* comment,
+                            int eol_comment );
+
+/** @brief Writes an object to file storage.
+
+The function writes an object to file storage. First, the appropriate type info is found using
+cvTypeOf. Then, the write method associated with the type info is called.
+
+Attributes are used to customize the writing procedure. The standard types support the following
+attributes (all the dt attributes have the same format as in cvWriteRawData):
+
+-# CvSeq
+    -   **header_dt** description of user fields of the sequence header that follow CvSeq, or
+        CvChain (if the sequence is a Freeman chain) or CvContour (if the sequence is a contour or
+        point sequence)
+    -   **dt** description of the sequence elements.
+    -   **recursive** if the attribute is present and is not equal to "0" or "false", the whole
+        tree of sequences (contours) is stored.
+-# CvGraph
+    -   **header_dt** description of user fields of the graph header that follows CvGraph;
+    -   **vertex_dt** description of user fields of graph vertices
+    -   **edge_dt** description of user fields of graph edges (note that the edge weight is
+        always written, so there is no need to specify it explicitly)
+
+Below is the code that creates the YAML file shown in the CvFileStorage description:
+@code
+    #include "cxcore.h"
+
+    int main( int argc, char** argv )
+    {
+        CvMat* mat = cvCreateMat( 3, 3, CV_32F );
+        CvFileStorage* fs = cvOpenFileStorage( "example.yml", 0, CV_STORAGE_WRITE );
+
+        cvSetIdentity( mat );
+        cvWrite( fs, "A", mat, cvAttrList(0,0) );
+
+        cvReleaseFileStorage( &fs );
+        cvReleaseMat( &mat );
+        return 0;
+    }
+@endcode
+@param fs File storage
+@param name Name of the written object. Should be NULL if and only if the parent structure is a
+sequence.
+@param ptr Pointer to the object
+@param attributes The attributes of the object. They are specific for each particular type (see
+the discussion below).
+ */
+CVAPI(void) cvWrite( CvFileStorage* fs, const char* name, const void* ptr,
+                         CvAttrList attributes CV_DEFAULT(cvAttrList()));
+
+/** @brief Starts the next stream.
+
+The function finishes the currently written stream and starts the next stream. In the case of XML
+the file with multiple streams looks like this:
+@code{.xml}
+    <opencv_storage>
+    <!-- stream #1 data -->
+    </opencv_storage>
+    <opencv_storage>
+    <!-- stream #2 data -->
+    </opencv_storage>
+    ...
+@endcode
+The YAML file will look like this:
+@code{.yaml}
+    %YAML 1.0
+    # stream #1 data
+    ...
+    ---
+    # stream #2 data
+@endcode
+This is useful for concatenating files or for resuming the writing process.
+@param fs File storage
+ */
+CVAPI(void) cvStartNextStream( CvFileStorage* fs );
+
+/** @brief Writes multiple numbers.
+
+The function writes an array, whose elements consist of single or multiple numbers. The function
+call can be replaced with a loop containing a few cvWriteInt and cvWriteReal calls, but a single
+call is more efficient. Note that because none of the elements have a name, they should be written
+to a sequence rather than a map.
+@param fs File storage
+@param src Pointer to the written array
+@param len Number of the array elements to write
+@param dt Specification of each array element, see @ref format_spec "format specification"
+ */
+CVAPI(void) cvWriteRawData( CvFileStorage* fs, const void* src,
+                                int len, const char* dt );
+
+/** @brief Writes multiple numbers in Base64.
+
+If either CV_STORAGE_WRITE_BASE64 or cv::FileStorage::WRITE_BASE64 is used,
+this function will be the same as cvWriteRawData. If neither, the main
+difference is that it outputs a sequence in Base64 encoding rather than
+in plain text.
+
+This function can only be used to write a sequence with a type "binary".
+
+@param fs File storage
+@param src Pointer to the written array
+@param len Number of the array elements to write
+@param dt Specification of each array element, see @ref format_spec "format specification"
+*/
+CVAPI(void) cvWriteRawDataBase64( CvFileStorage* fs, const void* src,
+                                 int len, const char* dt );
+
+/** @brief Returns a unique pointer for a given name.
+
+The function returns a unique pointer for each particular file node name. This pointer can be then
+passed to the cvGetFileNode function that is faster than cvGetFileNodeByName because it compares
+text strings by comparing pointers rather than the strings' content.
+
+Consider the following example where an array of points is encoded as a sequence of 2-entry maps:
+@code
+    points:
+      - { x: 10, y: 10 }
+      - { x: 20, y: 20 }
+      - { x: 30, y: 30 }
+      # ...
+@endcode
+Then, it is possible to get hashed "x" and "y" pointers to speed up decoding of the points. :
+@code
+    #include "cxcore.h"
+
+    int main( int argc, char** argv )
+    {
+        CvFileStorage* fs = cvOpenFileStorage( "points.yml", 0, CV_STORAGE_READ );
+        CvStringHashNode* x_key = cvGetHashedNode( fs, "x", -1, 1 );
+        CvStringHashNode* y_key = cvGetHashedNode( fs, "y", -1, 1 );
+        CvFileNode* points = cvGetFileNodeByName( fs, 0, "points" );
+
+        if( CV_NODE_IS_SEQ(points->tag) )
+        {
+            CvSeq* seq = points->data.seq;
+            int i, total = seq->total;
+            CvSeqReader reader;
+            cvStartReadSeq( seq, &reader, 0 );
+            for( i = 0; i < total; i++ )
+            {
+                CvFileNode* pt = (CvFileNode*)reader.ptr;
+    #if 1 // faster variant
+                CvFileNode* xnode = cvGetFileNode( fs, pt, x_key, 0 );
+                CvFileNode* ynode = cvGetFileNode( fs, pt, y_key, 0 );
+                assert( xnode && CV_NODE_IS_INT(xnode->tag) &&
+                        ynode && CV_NODE_IS_INT(ynode->tag));
+                int x = xnode->data.i; // or x = cvReadInt( xnode, 0 );
+                int y = ynode->data.i; // or y = cvReadInt( ynode, 0 );
+    #elif 1 // slower variant; does not use x_key & y_key
+                CvFileNode* xnode = cvGetFileNodeByName( fs, pt, "x" );
+                CvFileNode* ynode = cvGetFileNodeByName( fs, pt, "y" );
+                assert( xnode && CV_NODE_IS_INT(xnode->tag) &&
+                        ynode && CV_NODE_IS_INT(ynode->tag));
+                int x = xnode->data.i; // or x = cvReadInt( xnode, 0 );
+                int y = ynode->data.i; // or y = cvReadInt( ynode, 0 );
+    #else // the slowest yet the easiest to use variant
+                int x = cvReadIntByName( fs, pt, "x", 0 );
+                int y = cvReadIntByName( fs, pt, "y", 0 );
+    #endif
+                CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
+                printf("
+            }
+        }
+        cvReleaseFileStorage( &fs );
+        return 0;
+    }
+@endcode
+Please note that whatever method of accessing a map you are using, it is still much slower than
+using plain sequences; for example, in the above example, it is more efficient to encode the points
+as pairs of integers in a single numeric sequence.
+@param fs File storage
+@param name Literal node name
+@param len Length of the name (if it is known apriori), or -1 if it needs to be calculated
+@param create_missing Flag that specifies, whether an absent key should be added into the hash table
+*/
+CVAPI(CvStringHashNode*) cvGetHashedKey( CvFileStorage* fs, const char* name,
+                                        int len CV_DEFAULT(-1),
+                                        int create_missing CV_DEFAULT(0));
+
+/** @brief Retrieves one of the top-level nodes of the file storage.
+
+The function returns one of the top-level file nodes. The top-level nodes do not have a name, they
+correspond to the streams that are stored one after another in the file storage. If the index is out
+of range, the function returns a NULL pointer, so all the top-level nodes can be iterated by
+subsequent calls to the function with stream_index=0,1,..., until the NULL pointer is returned.
+This function can be used as a base for recursive traversal of the file storage.
+@param fs File storage
+@param stream_index Zero-based index of the stream. See cvStartNextStream . In most cases,
+there is only one stream in the file; however, there can be several.
+ */
+CVAPI(CvFileNode*) cvGetRootFileNode( const CvFileStorage* fs,
+                                     int stream_index CV_DEFAULT(0) );
+
+/** @brief Finds a node in a map or file storage.
+
+The function finds a file node. It is a faster version of cvGetFileNodeByName (see
+cvGetHashedKey discussion). Also, the function can insert a new node, if it is not in the map yet.
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches a top-level node. If both map and
+key are NULLs, the function returns the root file node - a map that contains top-level nodes.
+@param key Unique pointer to the node name, retrieved with cvGetHashedKey
+@param create_missing Flag that specifies whether an absent node should be added to the map
+ */
+CVAPI(CvFileNode*) cvGetFileNode( CvFileStorage* fs, CvFileNode* map,
+                                 const CvStringHashNode* key,
+                                 int create_missing CV_DEFAULT(0) );
+
+/** @brief Finds a node in a map or file storage.
+
+The function finds a file node by name. The node is searched either in map or, if the pointer is
+NULL, among the top-level file storage nodes. Using this function for maps and cvGetSeqElem (or
+sequence reader) for sequences, it is possible to navigate through the file storage. To speed up
+multiple queries for a certain key (e.g., in the case of an array of structures) one may use a
+combination of cvGetHashedKey and cvGetFileNode.
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches in all the top-level nodes
+(streams), starting with the first one.
+@param name The file node name
+ */
+CVAPI(CvFileNode*) cvGetFileNodeByName( const CvFileStorage* fs,
+                                       const CvFileNode* map,
+                                       const char* name );
+
+/** @brief Retrieves an integer value from a file node.
+
+The function returns an integer that is represented by the file node. If the file node is NULL, the
+default_value is returned (thus, it is convenient to call the function right after cvGetFileNode
+without checking for a NULL pointer). If the file node has type CV_NODE_INT, then node-\>data.i is
+returned. If the file node has type CV_NODE_REAL, then node-\>data.f is converted to an integer
+and returned. Otherwise the error is reported.
+@param node File node
+@param default_value The value that is returned if node is NULL
+ */
+CV_INLINE int cvReadInt( const CvFileNode* node, int default_value CV_DEFAULT(0) )
+{
+    return !node ? default_value :
+        CV_NODE_IS_INT(node->tag) ? node->data.i :
+        CV_NODE_IS_REAL(node->tag) ? cvRound(node->data.f) : 0x7fffffff;
+}
+
+/** @brief Finds a file node and returns its value.
+
+The function is a simple superposition of cvGetFileNodeByName and cvReadInt.
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches a top-level node.
+@param name The node name
+@param default_value The value that is returned if the file node is not found
+ */
+CV_INLINE int cvReadIntByName( const CvFileStorage* fs, const CvFileNode* map,
+                         const char* name, int default_value CV_DEFAULT(0) )
+{
+    return cvReadInt( cvGetFileNodeByName( fs, map, name ), default_value );
+}
+
+/** @brief Retrieves a floating-point value from a file node.
+
+The function returns a floating-point value that is represented by the file node. If the file node
+is NULL, the default_value is returned (thus, it is convenient to call the function right after
+cvGetFileNode without checking for a NULL pointer). If the file node has type CV_NODE_REAL ,
+then node-\>data.f is returned. If the file node has type CV_NODE_INT , then node-:math:\>data.f
+is converted to floating-point and returned. Otherwise the result is not determined.
+@param node File node
+@param default_value The value that is returned if node is NULL
+ */
+CV_INLINE double cvReadReal( const CvFileNode* node, double default_value CV_DEFAULT(0.) )
+{
+    return !node ? default_value :
+        CV_NODE_IS_INT(node->tag) ? (double)node->data.i :
+        CV_NODE_IS_REAL(node->tag) ? node->data.f : 1e300;
+}
+
+/** @brief Finds a file node and returns its value.
+
+The function is a simple superposition of cvGetFileNodeByName and cvReadReal .
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches a top-level node.
+@param name The node name
+@param default_value The value that is returned if the file node is not found
+ */
+CV_INLINE double cvReadRealByName( const CvFileStorage* fs, const CvFileNode* map,
+                        const char* name, double default_value CV_DEFAULT(0.) )
+{
+    return cvReadReal( cvGetFileNodeByName( fs, map, name ), default_value );
+}
+
+/** @brief Retrieves a text string from a file node.
+
+The function returns a text string that is represented by the file node. If the file node is NULL,
+the default_value is returned (thus, it is convenient to call the function right after
+cvGetFileNode without checking for a NULL pointer). If the file node has type CV_NODE_STR , then
+node-:math:\>data.str.ptr is returned. Otherwise the result is not determined.
+@param node File node
+@param default_value The value that is returned if node is NULL
+ */
+CV_INLINE const char* cvReadString( const CvFileNode* node,
+                        const char* default_value CV_DEFAULT(NULL) )
+{
+    return !node ? default_value : CV_NODE_IS_STRING(node->tag) ? node->data.str.ptr : 0;
+}
+
+/** @brief Finds a file node by its name and returns its value.
+
+The function is a simple superposition of cvGetFileNodeByName and cvReadString .
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches a top-level node.
+@param name The node name
+@param default_value The value that is returned if the file node is not found
+ */
+CV_INLINE const char* cvReadStringByName( const CvFileStorage* fs, const CvFileNode* map,
+                        const char* name, const char* default_value CV_DEFAULT(NULL) )
+{
+    return cvReadString( cvGetFileNodeByName( fs, map, name ), default_value );
+}
+
+
+/** @brief Decodes an object and returns a pointer to it.
+
+The function decodes a user object (creates an object in a native representation from the file
+storage subtree) and returns it. The object to be decoded must be an instance of a registered type
+that supports the read method (see CvTypeInfo). The type of the object is determined by the type
+name that is encoded in the file. If the object is a dynamic structure, it is created either in
+memory storage and passed to cvOpenFileStorage or, if a NULL pointer was passed, in temporary
+memory storage, which is released when cvReleaseFileStorage is called. Otherwise, if the object is
+not a dynamic structure, it is created in a heap and should be released with a specialized function
+or by using the generic cvRelease.
+@param fs File storage
+@param node The root object node
+@param attributes Unused parameter
+ */
+CVAPI(void*) cvRead( CvFileStorage* fs, CvFileNode* node,
+                        CvAttrList* attributes CV_DEFAULT(NULL));
+
+/** @brief Finds an object by name and decodes it.
+
+The function is a simple superposition of cvGetFileNodeByName and cvRead.
+@param fs File storage
+@param map The parent map. If it is NULL, the function searches a top-level node.
+@param name The node name
+@param attributes Unused parameter
+ */
+CV_INLINE void* cvReadByName( CvFileStorage* fs, const CvFileNode* map,
+                              const char* name, CvAttrList* attributes CV_DEFAULT(NULL) )
+{
+    return cvRead( fs, cvGetFileNodeByName( fs, map, name ), attributes );
+}
+
+
+/** @brief Initializes the file node sequence reader.
+
+The function initializes the sequence reader to read data from a file node. The initialized reader
+can be then passed to cvReadRawDataSlice.
+@param fs File storage
+@param src The file node (a sequence) to read numbers from
+@param reader Pointer to the sequence reader
+ */
+CVAPI(void) cvStartReadRawData( const CvFileStorage* fs, const CvFileNode* src,
+                               CvSeqReader* reader );
+
+/** @brief Initializes file node sequence reader.
+
+The function reads one or more elements from the file node, representing a sequence, to a
+user-specified array. The total number of read sequence elements is a product of total and the
+number of components in each array element. For example, if dt=2if, the function will read total\*3
+sequence elements. As with any sequence, some parts of the file node sequence can be skipped or read
+repeatedly by repositioning the reader using cvSetSeqReaderPos.
+@param fs File storage
+@param reader The sequence reader. Initialize it with cvStartReadRawData .
+@param count The number of elements to read
+@param dst Pointer to the destination array
+@param dt Specification of each array element. It has the same format as in cvWriteRawData .
+ */
+CVAPI(void) cvReadRawDataSlice( const CvFileStorage* fs, CvSeqReader* reader,
+                               int count, void* dst, const char* dt );
+
+/** @brief Reads multiple numbers.
+
+The function reads elements from a file node that represents a sequence of scalars.
+@param fs File storage
+@param src The file node (a sequence) to read numbers from
+@param dst Pointer to the destination array
+@param dt Specification of each array element. It has the same format as in cvWriteRawData .
+ */
+CVAPI(void) cvReadRawData( const CvFileStorage* fs, const CvFileNode* src,
+                          void* dst, const char* dt );
+
+/** @brief Writes a file node to another file storage.
+
+The function writes a copy of a file node to file storage. Possible applications of the function are
+merging several file storages into one and conversion between XML, YAML and JSON formats.
+@param fs Destination file storage
+@param new_node_name New name of the file node in the destination file storage. To keep the
+existing name, use cvcvGetFileNodeName
+@param node The written node
+@param embed If the written node is a collection and this parameter is not zero, no extra level of
+hierarchy is created. Instead, all the elements of node are written into the currently written
+structure. Of course, map elements can only be embedded into another map, and sequence elements
+can only be embedded into another sequence.
+ */
+CVAPI(void) cvWriteFileNode( CvFileStorage* fs, const char* new_node_name,
+                            const CvFileNode* node, int embed );
+
+/** @brief Returns the name of a file node.
+
+The function returns the name of a file node or NULL, if the file node does not have a name or if
+node is NULL.
+@param node File node
+ */
+CVAPI(const char*) cvGetFileNodeName( const CvFileNode* node );
+
+/*********************************** Adding own types ***********************************/
+
+/** @brief Registers a new type.
+
+The function registers a new type, which is described by info . The function creates a copy of the
+structure, so the user should delete it after calling the function.
+@param info Type info structure
+ */
+CVAPI(void) cvRegisterType( const CvTypeInfo* info );
+
+/** @brief Unregisters the type.
+
+The function unregisters a type with a specified name. If the name is unknown, it is possible to
+locate the type info by an instance of the type using cvTypeOf or by iterating the type list,
+starting from cvFirstType, and then calling cvUnregisterType(info-\>typeName).
+@param type_name Name of an unregistered type
+ */
+CVAPI(void) cvUnregisterType( const char* type_name );
+
+/** @brief Returns the beginning of a type list.
+
+The function returns the first type in the list of registered types. Navigation through the list can
+be done via the prev and next fields of the CvTypeInfo structure.
+ */
+CVAPI(CvTypeInfo*) cvFirstType(void);
+
+/** @brief Finds a type by its name.
+
+The function finds a registered type by its name. It returns NULL if there is no type with the
+specified name.
+@param type_name Type name
+ */
+CVAPI(CvTypeInfo*) cvFindType( const char* type_name );
+
+/** @brief Returns the type of an object.
+
+The function finds the type of a given object. It iterates through the list of registered types and
+calls the is_instance function/method for every type info structure with that object until one of
+them returns non-zero or until the whole list has been traversed. In the latter case, the function
+returns NULL.
+@param struct_ptr The object pointer
+ */
+CVAPI(CvTypeInfo*) cvTypeOf( const void* struct_ptr );
+
+/** @brief Releases an object.
+
+The function finds the type of a given object and calls release with the double pointer.
+@param struct_ptr Double pointer to the object
+ */
+CVAPI(void) cvRelease( void** struct_ptr );
+
+/** @brief Makes a clone of an object.
+
+The function finds the type of a given object and calls clone with the passed object. Of course, if
+you know the object type, for example, struct_ptr is CvMat\*, it is faster to call the specific
+function, like cvCloneMat.
+@param struct_ptr The object to clone
+ */
+CVAPI(void*) cvClone( const void* struct_ptr );
+
+/** @brief Saves an object to a file.
+
+The function saves an object to a file. It provides a simple interface to cvWrite .
+@param filename File name
+@param struct_ptr Object to save
+@param name Optional object name. If it is NULL, the name will be formed from filename .
+@param comment Optional comment to put in the beginning of the file
+@param attributes Optional attributes passed to cvWrite
+ */
+CVAPI(void) cvSave( const char* filename, const void* struct_ptr,
+                    const char* name CV_DEFAULT(NULL),
+                    const char* comment CV_DEFAULT(NULL),
+                    CvAttrList attributes CV_DEFAULT(cvAttrList()));
+
+/** @brief Loads an object from a file.
+
+The function loads an object from a file. It basically reads the specified file, find the first
+top-level node and calls cvRead for that node. If the file node does not have type information or
+the type information can not be found by the type name, the function returns NULL. After the object
+is loaded, the file storage is closed and all the temporary buffers are deleted. Thus, to load a
+dynamic structure, such as a sequence, contour, or graph, one should pass a valid memory storage
+destination to the function.
+@param filename File name
+@param memstorage Memory storage for dynamic structures, such as CvSeq or CvGraph . It is not used
+for matrices or images.
+@param name Optional object name. If it is NULL, the first top-level object in the storage will be
+loaded.
+@param real_name Optional output parameter that will contain the name of the loaded object
+(useful if name=NULL )
+ */
+CVAPI(void*) cvLoad( const char* filename,
+                     CvMemStorage* memstorage CV_DEFAULT(NULL),
+                     const char* name CV_DEFAULT(NULL),
+                     const char** real_name CV_DEFAULT(NULL) );
+
+/*********************************** Measuring Execution Time ***************************/
+
+/** helper functions for RNG initialization and accurate time measurement:
+   uses internal clock counter on x86 */
+CVAPI(int64)  cvGetTickCount( void );
+CVAPI(double) cvGetTickFrequency( void );
+
+/*********************************** CPU capabilities ***********************************/
+
+CVAPI(int) cvCheckHardwareSupport(int feature);
+
+/*********************************** Multi-Threading ************************************/
+
+/** retrieve/set the number of threads used in OpenMP implementations */
+CVAPI(int)  cvGetNumThreads( void );
+CVAPI(void) cvSetNumThreads( int threads CV_DEFAULT(0) );
+/** get index of the thread being executed */
+CVAPI(int)  cvGetThreadNum( void );
+
+
+/********************************** Error Handling **************************************/
+
+/** Get current OpenCV error status */
+CVAPI(int) cvGetErrStatus( void );
+
+/** Sets error status silently */
+CVAPI(void) cvSetErrStatus( int status );
+
+#define CV_ErrModeLeaf     0   /* Print error and exit program */
+#define CV_ErrModeParent   1   /* Print error and continue */
+#define CV_ErrModeSilent   2   /* Don't print and continue */
+
+/** Retrieves current error processing mode */
+CVAPI(int)  cvGetErrMode( void );
+
+/** Sets error processing mode, returns previously used mode */
+CVAPI(int) cvSetErrMode( int mode );
+
+/** Sets error status and performs some additional actions (displaying message box,
+ writing message to stderr, terminating application etc.)
+ depending on the current error mode */
+CVAPI(void) cvError( int status, const char* func_name,
+                    const char* err_msg, const char* file_name, int line );
+
+/** Retrieves textual description of the error given its code */
+CVAPI(const char*) cvErrorStr( int status );
+
+/** Retrieves detailed information about the last error occurred */
+CVAPI(int) cvGetErrInfo( const char** errcode_desc, const char** description,
+                        const char** filename, int* line );
+
+/** Maps IPP error codes to the counterparts from OpenCV */
+CVAPI(int) cvErrorFromIppStatus( int ipp_status );
+
+typedef int (CV_CDECL *CvErrorCallback)( int status, const char* func_name,
+                                        const char* err_msg, const char* file_name, int line, void* userdata );
+
+/** Assigns a new error-handling function */
+CVAPI(CvErrorCallback) cvRedirectError( CvErrorCallback error_handler,
+                                       void* userdata CV_DEFAULT(NULL),
+                                       void** prev_userdata CV_DEFAULT(NULL) );
+
+/** Output nothing */
+CVAPI(int) cvNulDevReport( int status, const char* func_name, const char* err_msg,
+                          const char* file_name, int line, void* userdata );
+
+/** Output to console(fprintf(stderr,...)) */
+CVAPI(int) cvStdErrReport( int status, const char* func_name, const char* err_msg,
+                          const char* file_name, int line, void* userdata );
+
+/** Output to MessageBox(WIN32) */
+CVAPI(int) cvGuiBoxReport( int status, const char* func_name, const char* err_msg,
+                          const char* file_name, int line, void* userdata );
+
+#define OPENCV_ERROR(status,func,context)                           \
+cvError((status),(func),(context),__FILE__,__LINE__)
+
+#define OPENCV_ASSERT(expr,func,context)                            \
+{if (! (expr))                                      \
+{OPENCV_ERROR(CV_StsInternal,(func),(context));}}
+
+#define OPENCV_CALL( Func )                                         \
+{                                                                   \
+Func;                                                           \
+}
+
+
+/** CV_FUNCNAME macro defines icvFuncName constant which is used by CV_ERROR macro */
+#ifdef CV_NO_FUNC_NAMES
+#define CV_FUNCNAME( Name )
+#define cvFuncName ""
+#else
+#define CV_FUNCNAME( Name )  \
+static char cvFuncName[] = Name
+#endif
+
+
+/**
+ CV_ERROR macro unconditionally raises error with passed code and message.
+ After raising error, control will be transferred to the exit label.
+ */
+#define CV_ERROR( Code, Msg )                                       \
+{                                                                   \
+    cvError( (Code), cvFuncName, Msg, __FILE__, __LINE__ );        \
+    __CV_EXIT__;                                                   \
+}
+
+/**
+ CV_CHECK macro checks error status after CV (or IPL)
+ function call. If error detected, control will be transferred to the exit
+ label.
+ */
+#define CV_CHECK()                                                  \
+{                                                                   \
+    if( cvGetErrStatus() < 0 )                                      \
+        CV_ERROR( CV_StsBackTrace, "Inner function failed." );      \
+}
+
+
+/**
+ CV_CALL macro calls CV (or IPL) function, checks error status and
+ signals a error if the function failed. Useful in "parent node"
+ error processing mode
+ */
+#define CV_CALL( Func )                                             \
+{                                                                   \
+    Func;                                                           \
+    CV_CHECK();                                                     \
+}
+
+
+/** Runtime assertion macro */
+#define CV_ASSERT( Condition )                                          \
+{                                                                       \
+    if( !(Condition) )                                                  \
+        CV_ERROR( CV_StsInternal, "Assertion: " #Condition " failed" ); \
+}
+
+#define __CV_BEGIN__       {
+#define __CV_END__         goto exit; exit: ; }
+#define __CV_EXIT__        goto exit
+
+/** @} core_c */
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#ifdef __cplusplus
+
+//! @addtogroup core_c_glue
+//! @{
+
+//! class for automatic module/RTTI data registration/unregistration
+struct CV_EXPORTS CvType
+{
+    CvType( const char* type_name,
+            CvIsInstanceFunc is_instance, CvReleaseFunc release=0,
+            CvReadFunc read=0, CvWriteFunc write=0, CvCloneFunc clone=0 );
+    ~CvType();
+    CvTypeInfo* info;
+
+    static CvTypeInfo* first;
+    static CvTypeInfo* last;
+};
+
+//! @}
+
+#include "opencv2/core/utility.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_c_glue
+//! @{
+
+/////////////////////////////////////////// glue ///////////////////////////////////////////
+
+//! converts array (CvMat or IplImage) to cv::Mat
+CV_EXPORTS Mat cvarrToMat(const CvArr* arr, bool copyData=false,
+                          bool allowND=true, int coiMode=0,
+                          AutoBuffer<double>* buf=0);
+
+static inline Mat cvarrToMatND(const CvArr* arr, bool copyData=false, int coiMode=0)
+{
+    return cvarrToMat(arr, copyData, true, coiMode);
+}
+
+
+//! extracts Channel of Interest from CvMat or IplImage and makes cv::Mat out of it.
+CV_EXPORTS void extractImageCOI(const CvArr* arr, OutputArray coiimg, int coi=-1);
+//! inserts single-channel cv::Mat into a multi-channel CvMat or IplImage
+CV_EXPORTS void insertImageCOI(InputArray coiimg, CvArr* arr, int coi=-1);
+
+
+
+////// specialized implementations of DefaultDeleter::operator() for classic OpenCV types //////
+
+template<> CV_EXPORTS void DefaultDeleter<CvMat>::operator ()(CvMat* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<IplImage>::operator ()(IplImage* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<CvMatND>::operator ()(CvMatND* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<CvSparseMat>::operator ()(CvSparseMat* obj) const;
+template<> CV_EXPORTS void DefaultDeleter<CvMemStorage>::operator ()(CvMemStorage* obj) const;
+
+////////////// convenient wrappers for operating old-style dynamic structures //////////////
+
+template<typename _Tp> class SeqIterator;
+
+typedef Ptr<CvMemStorage> MemStorage;
+
+/*!
+ Template Sequence Class derived from CvSeq
+
+ The class provides more convenient access to sequence elements,
+ STL-style operations and iterators.
+
+ \note The class is targeted for simple data types,
+    i.e. no constructors or destructors
+    are called for the sequence elements.
+*/
+template<typename _Tp> class Seq
+{
+public:
+    typedef SeqIterator<_Tp> iterator;
+    typedef SeqIterator<_Tp> const_iterator;
+
+    //! the default constructor
+    Seq();
+    //! the constructor for wrapping CvSeq structure. The real element type in CvSeq should match _Tp.
+    Seq(const CvSeq* seq);
+    //! creates the empty sequence that resides in the specified storage
+    Seq(MemStorage& storage, int headerSize = sizeof(CvSeq));
+    //! returns read-write reference to the specified element
+    _Tp& operator [](int idx);
+    //! returns read-only reference to the specified element
+    const _Tp& operator[](int idx) const;
+    //! returns iterator pointing to the beginning of the sequence
+    SeqIterator<_Tp> begin() const;
+    //! returns iterator pointing to the element following the last sequence element
+    SeqIterator<_Tp> end() const;
+    //! returns the number of elements in the sequence
+    size_t size() const;
+    //! returns the type of sequence elements (CV_8UC1 ... CV_64FC(CV_CN_MAX) ...)
+    int type() const;
+    //! returns the depth of sequence elements (CV_8U ... CV_64F)
+    int depth() const;
+    //! returns the number of channels in each sequence element
+    int channels() const;
+    //! returns the size of each sequence element
+    size_t elemSize() const;
+    //! returns index of the specified sequence element
+    size_t index(const _Tp& elem) const;
+    //! appends the specified element to the end of the sequence
+    void push_back(const _Tp& elem);
+    //! appends the specified element to the front of the sequence
+    void push_front(const _Tp& elem);
+    //! appends zero or more elements to the end of the sequence
+    void push_back(const _Tp* elems, size_t count);
+    //! appends zero or more elements to the front of the sequence
+    void push_front(const _Tp* elems, size_t count);
+    //! inserts the specified element to the specified position
+    void insert(int idx, const _Tp& elem);
+    //! inserts zero or more elements to the specified position
+    void insert(int idx, const _Tp* elems, size_t count);
+    //! removes element at the specified position
+    void remove(int idx);
+    //! removes the specified subsequence
+    void remove(const Range& r);
+
+    //! returns reference to the first sequence element
+    _Tp& front();
+    //! returns read-only reference to the first sequence element
+    const _Tp& front() const;
+    //! returns reference to the last sequence element
+    _Tp& back();
+    //! returns read-only reference to the last sequence element
+    const _Tp& back() const;
+    //! returns true iff the sequence contains no elements
+    bool empty() const;
+
+    //! removes all the elements from the sequence
+    void clear();
+    //! removes the first element from the sequence
+    void pop_front();
+    //! removes the last element from the sequence
+    void pop_back();
+    //! removes zero or more elements from the beginning of the sequence
+    void pop_front(_Tp* elems, size_t count);
+    //! removes zero or more elements from the end of the sequence
+    void pop_back(_Tp* elems, size_t count);
+
+    //! copies the whole sequence or the sequence slice to the specified vector
+    void copyTo(std::vector<_Tp>& vec, const Range& range=Range::all()) const;
+    //! returns the vector containing all the sequence elements
+    operator std::vector<_Tp>() const;
+
+    CvSeq* seq;
+};
+
+
+/*!
+ STL-style Sequence Iterator inherited from the CvSeqReader structure
+*/
+template<typename _Tp> class SeqIterator : public CvSeqReader
+{
+public:
+    //! the default constructor
+    SeqIterator();
+    //! the constructor setting the iterator to the beginning or to the end of the sequence
+    SeqIterator(const Seq<_Tp>& seq, bool seekEnd=false);
+    //! positions the iterator within the sequence
+    void seek(size_t pos);
+    //! reports the current iterator position
+    size_t tell() const;
+    //! returns reference to the current sequence element
+    _Tp& operator *();
+    //! returns read-only reference to the current sequence element
+    const _Tp& operator *() const;
+    //! moves iterator to the next sequence element
+    SeqIterator& operator ++();
+    //! moves iterator to the next sequence element
+    SeqIterator operator ++(int) const;
+    //! moves iterator to the previous sequence element
+    SeqIterator& operator --();
+    //! moves iterator to the previous sequence element
+    SeqIterator operator --(int) const;
+
+    //! moves iterator forward by the specified offset (possibly negative)
+    SeqIterator& operator +=(int);
+    //! moves iterator backward by the specified offset (possibly negative)
+    SeqIterator& operator -=(int);
+
+    // this is index of the current element module seq->total*2
+    // (to distinguish between 0 and seq->total)
+    int index;
+};
+
+
+
+// bridge C++ => C Seq API
+CV_EXPORTS schar*  seqPush( CvSeq* seq, const void* element=0);
+CV_EXPORTS schar*  seqPushFront( CvSeq* seq, const void* element=0);
+CV_EXPORTS void  seqPop( CvSeq* seq, void* element=0);
+CV_EXPORTS void  seqPopFront( CvSeq* seq, void* element=0);
+CV_EXPORTS void  seqPopMulti( CvSeq* seq, void* elements,
+                              int count, int in_front=0 );
+CV_EXPORTS void  seqRemove( CvSeq* seq, int index );
+CV_EXPORTS void  clearSeq( CvSeq* seq );
+CV_EXPORTS schar*  getSeqElem( const CvSeq* seq, int index );
+CV_EXPORTS void  seqRemoveSlice( CvSeq* seq, CvSlice slice );
+CV_EXPORTS void  seqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr );
+
+template<typename _Tp> inline Seq<_Tp>::Seq() : seq(0) {}
+template<typename _Tp> inline Seq<_Tp>::Seq( const CvSeq* _seq ) : seq((CvSeq*)_seq)
+{
+    CV_Assert(!_seq || _seq->elem_size == sizeof(_Tp));
+}
+
+template<typename _Tp> inline Seq<_Tp>::Seq( MemStorage& storage,
+                                             int headerSize )
+{
+    CV_Assert(headerSize >= (int)sizeof(CvSeq));
+    seq = cvCreateSeq(DataType<_Tp>::type, headerSize, sizeof(_Tp), storage);
+}
+
+template<typename _Tp> inline _Tp& Seq<_Tp>::operator [](int idx)
+{ return *(_Tp*)getSeqElem(seq, idx); }
+
+template<typename _Tp> inline const _Tp& Seq<_Tp>::operator [](int idx) const
+{ return *(_Tp*)getSeqElem(seq, idx); }
+
+template<typename _Tp> inline SeqIterator<_Tp> Seq<_Tp>::begin() const
+{ return SeqIterator<_Tp>(*this); }
+
+template<typename _Tp> inline SeqIterator<_Tp> Seq<_Tp>::end() const
+{ return SeqIterator<_Tp>(*this, true); }
+
+template<typename _Tp> inline size_t Seq<_Tp>::size() const
+{ return seq ? seq->total : 0; }
+
+template<typename _Tp> inline int Seq<_Tp>::type() const
+{ return seq ? CV_MAT_TYPE(seq->flags) : 0; }
+
+template<typename _Tp> inline int Seq<_Tp>::depth() const
+{ return seq ? CV_MAT_DEPTH(seq->flags) : 0; }
+
+template<typename _Tp> inline int Seq<_Tp>::channels() const
+{ return seq ? CV_MAT_CN(seq->flags) : 0; }
+
+template<typename _Tp> inline size_t Seq<_Tp>::elemSize() const
+{ return seq ? seq->elem_size : 0; }
+
+template<typename _Tp> inline size_t Seq<_Tp>::index(const _Tp& elem) const
+{ return cvSeqElemIdx(seq, &elem); }
+
+template<typename _Tp> inline void Seq<_Tp>::push_back(const _Tp& elem)
+{ cvSeqPush(seq, &elem); }
+
+template<typename _Tp> inline void Seq<_Tp>::push_front(const _Tp& elem)
+{ cvSeqPushFront(seq, &elem); }
+
+template<typename _Tp> inline void Seq<_Tp>::push_back(const _Tp* elem, size_t count)
+{ cvSeqPushMulti(seq, elem, (int)count, 0); }
+
+template<typename _Tp> inline void Seq<_Tp>::push_front(const _Tp* elem, size_t count)
+{ cvSeqPushMulti(seq, elem, (int)count, 1); }
+
+template<typename _Tp> inline _Tp& Seq<_Tp>::back()
+{ return *(_Tp*)getSeqElem(seq, -1); }
+
+template<typename _Tp> inline const _Tp& Seq<_Tp>::back() const
+{ return *(const _Tp*)getSeqElem(seq, -1); }
+
+template<typename _Tp> inline _Tp& Seq<_Tp>::front()
+{ return *(_Tp*)getSeqElem(seq, 0); }
+
+template<typename _Tp> inline const _Tp& Seq<_Tp>::front() const
+{ return *(const _Tp*)getSeqElem(seq, 0); }
+
+template<typename _Tp> inline bool Seq<_Tp>::empty() const
+{ return !seq || seq->total == 0; }
+
+template<typename _Tp> inline void Seq<_Tp>::clear()
+{ if(seq) clearSeq(seq); }
+
+template<typename _Tp> inline void Seq<_Tp>::pop_back()
+{ seqPop(seq); }
+
+template<typename _Tp> inline void Seq<_Tp>::pop_front()
+{ seqPopFront(seq); }
+
+template<typename _Tp> inline void Seq<_Tp>::pop_back(_Tp* elem, size_t count)
+{ seqPopMulti(seq, elem, (int)count, 0); }
+
+template<typename _Tp> inline void Seq<_Tp>::pop_front(_Tp* elem, size_t count)
+{ seqPopMulti(seq, elem, (int)count, 1); }
+
+template<typename _Tp> inline void Seq<_Tp>::insert(int idx, const _Tp& elem)
+{ seqInsert(seq, idx, &elem); }
+
+template<typename _Tp> inline void Seq<_Tp>::insert(int idx, const _Tp* elems, size_t count)
+{
+    CvMat m = cvMat(1, count, DataType<_Tp>::type, elems);
+    seqInsertSlice(seq, idx, &m);
+}
+
+template<typename _Tp> inline void Seq<_Tp>::remove(int idx)
+{ seqRemove(seq, idx); }
+
+template<typename _Tp> inline void Seq<_Tp>::remove(const Range& r)
+{ seqRemoveSlice(seq, cvSlice(r.start, r.end)); }
+
+template<typename _Tp> inline void Seq<_Tp>::copyTo(std::vector<_Tp>& vec, const Range& range) const
+{
+    size_t len = !seq ? 0 : range == Range::all() ? seq->total : range.end - range.start;
+    vec.resize(len);
+    if( seq && len )
+        cvCvtSeqToArray(seq, &vec[0], range);
+}
+
+template<typename _Tp> inline Seq<_Tp>::operator std::vector<_Tp>() const
+{
+    std::vector<_Tp> vec;
+    copyTo(vec);
+    return vec;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp>::SeqIterator()
+{ memset(this, 0, sizeof(*this)); }
+
+template<typename _Tp> inline SeqIterator<_Tp>::SeqIterator(const Seq<_Tp>& _seq, bool seekEnd)
+{
+    cvStartReadSeq(_seq.seq, this);
+    index = seekEnd ? _seq.seq->total : 0;
+}
+
+template<typename _Tp> inline void SeqIterator<_Tp>::seek(size_t pos)
+{
+    cvSetSeqReaderPos(this, (int)pos, false);
+    index = pos;
+}
+
+template<typename _Tp> inline size_t SeqIterator<_Tp>::tell() const
+{ return index; }
+
+template<typename _Tp> inline _Tp& SeqIterator<_Tp>::operator *()
+{ return *(_Tp*)ptr; }
+
+template<typename _Tp> inline const _Tp& SeqIterator<_Tp>::operator *() const
+{ return *(const _Tp*)ptr; }
+
+template<typename _Tp> inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator ++()
+{
+    CV_NEXT_SEQ_ELEM(sizeof(_Tp), *this);
+    if( ++index >= seq->total*2 )
+        index = 0;
+    return *this;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp> SeqIterator<_Tp>::operator ++(int) const
+{
+    SeqIterator<_Tp> it = *this;
+    ++*this;
+    return it;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator --()
+{
+    CV_PREV_SEQ_ELEM(sizeof(_Tp), *this);
+    if( --index < 0 )
+        index = seq->total*2-1;
+    return *this;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp> SeqIterator<_Tp>::operator --(int) const
+{
+    SeqIterator<_Tp> it = *this;
+    --*this;
+    return it;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator +=(int delta)
+{
+    cvSetSeqReaderPos(this, delta, 1);
+    index += delta;
+    int n = seq->total*2;
+    if( index < 0 )
+        index += n;
+    if( index >= n )
+        index -= n;
+    return *this;
+}
+
+template<typename _Tp> inline SeqIterator<_Tp>& SeqIterator<_Tp>::operator -=(int delta)
+{
+    return (*this += -delta);
+}
+
+template<typename _Tp> inline ptrdiff_t operator - (const SeqIterator<_Tp>& a,
+                                                    const SeqIterator<_Tp>& b)
+{
+    ptrdiff_t delta = a.index - b.index, n = a.seq->total;
+    if( delta > n || delta < -n )
+        delta += delta < 0 ? n : -n;
+    return delta;
+}
+
+template<typename _Tp> inline bool operator == (const SeqIterator<_Tp>& a,
+                                                const SeqIterator<_Tp>& b)
+{
+    return a.seq == b.seq && a.index == b.index;
+}
+
+template<typename _Tp> inline bool operator != (const SeqIterator<_Tp>& a,
+                                                const SeqIterator<_Tp>& b)
+{
+    return !(a == b);
+}
+
+//! @}
+
+} // cv
+
+#endif
+
+#endif

+ 1049 - 0
ThresholdTools/include/opencv2/core/cuda.hpp

@@ -0,0 +1,1049 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDA_HPP
+#define OPENCV_CORE_CUDA_HPP
+
+#ifndef __cplusplus
+#  error cuda.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/cuda_types.hpp"
+
+/**
+  @defgroup cuda CUDA-accelerated Computer Vision
+  @{
+    @defgroup cudacore Core part
+    @{
+      @defgroup cudacore_init Initialization and Information
+      @defgroup cudacore_struct Data Structures
+    @}
+  @}
+ */
+
+namespace cv { namespace cuda {
+
+//! @addtogroup cudacore_struct
+//! @{
+
+//===================================================================================
+// GpuMat
+//===================================================================================
+
+/** @brief Base storage class for GPU memory with reference counting.
+
+Its interface matches the Mat interface with the following limitations:
+
+-   no arbitrary dimensions support (only 2D)
+-   no functions that return references to their data (because references on GPU are not valid for
+    CPU)
+-   no expression templates technique support
+
+Beware that the latter limitation may lead to overloaded matrix operators that cause memory
+allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
+passed directly to the kernel.
+
+@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
+aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
+
+@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
+on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
+release function returns error if the CUDA context has been destroyed before.
+
+Some member functions are described as a "Blocking Call" while some are described as a
+"Non-Blocking Call". Blocking functions are synchronous to host. It is guaranteed that the GPU
+operation is finished when the function returns. However, non-blocking functions are asynchronous to
+host. Those functions may return even if the GPU operation is not finished.
+
+Compared to their blocking counterpart, non-blocking functions accept Stream as an additional
+argument. If a non-default stream is passed, the GPU operation may overlap with operations in other
+streams.
+
+@sa Mat
+ */
+class CV_EXPORTS GpuMat
+{
+public:
+    class CV_EXPORTS Allocator
+    {
+    public:
+        virtual ~Allocator() {}
+
+        // allocator must fill data, step and refcount fields
+        virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
+        virtual void free(GpuMat* mat) = 0;
+    };
+
+    //! default allocator
+    static Allocator* defaultAllocator();
+    static void setDefaultAllocator(Allocator* allocator);
+
+    //! default constructor
+    explicit GpuMat(Allocator* allocator = defaultAllocator());
+
+    //! constructs GpuMat of the specified size and type
+    GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
+    GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
+
+    //! constucts GpuMat and fills it with the specified value _s
+    GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
+    GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
+
+    //! copy constructor
+    GpuMat(const GpuMat& m);
+
+    //! constructor for GpuMat headers pointing to user-allocated data
+    GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
+    GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
+
+    //! creates a GpuMat header for a part of the bigger matrix
+    GpuMat(const GpuMat& m, Range rowRange, Range colRange);
+    GpuMat(const GpuMat& m, Rect roi);
+
+    //! builds GpuMat from host memory (Blocking call)
+    explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
+
+    //! destructor - calls release()
+    ~GpuMat();
+
+    //! assignment operators
+    GpuMat& operator =(const GpuMat& m);
+
+    //! allocates new GpuMat data unless the GpuMat already has specified size and type
+    void create(int rows, int cols, int type);
+    void create(Size size, int type);
+
+    //! decreases reference counter, deallocate the data when reference counter reaches 0
+    void release();
+
+    //! swaps with other smart pointer
+    void swap(GpuMat& mat);
+
+    /** @brief Performs data upload to GpuMat (Blocking call)
+
+    This function copies data from host memory to device memory. As being a blocking call, it is
+    guaranteed that the copy operation is finished when this function returns.
+    */
+    void upload(InputArray arr);
+
+    /** @brief Performs data upload to GpuMat (Non-Blocking call)
+
+    This function copies data from host memory to device memory. As being a non-blocking call, this
+    function may return even if the copy operation is not finished.
+
+    The copy operation may be overlapped with operations in other non-default streams if \p stream is
+    not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
+    */
+    void upload(InputArray arr, Stream& stream);
+
+    /** @brief Performs data download from GpuMat (Blocking call)
+
+    This function copies data from device memory to host memory. As being a blocking call, it is
+    guaranteed that the copy operation is finished when this function returns.
+    */
+    void download(OutputArray dst) const;
+
+    /** @brief Performs data download from GpuMat (Non-Blocking call)
+
+    This function copies data from device memory to host memory. As being a non-blocking call, this
+    function may return even if the copy operation is not finished.
+
+    The copy operation may be overlapped with operations in other non-default streams if \p stream is
+    not the default stream and \p dst is HostMem allocated with HostMem::PAGE_LOCKED option.
+    */
+    void download(OutputArray dst, Stream& stream) const;
+
+    //! returns deep copy of the GpuMat, i.e. the data is copied
+    GpuMat clone() const;
+
+    //! copies the GpuMat content to device memory (Blocking call)
+    void copyTo(OutputArray dst) const;
+
+    //! copies the GpuMat content to device memory (Non-Blocking call)
+    void copyTo(OutputArray dst, Stream& stream) const;
+
+    //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
+    void copyTo(OutputArray dst, InputArray mask) const;
+
+    //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
+    void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
+
+    //! sets some of the GpuMat elements to s (Blocking call)
+    GpuMat& setTo(Scalar s);
+
+    //! sets some of the GpuMat elements to s (Non-Blocking call)
+    GpuMat& setTo(Scalar s, Stream& stream);
+
+    //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
+    GpuMat& setTo(Scalar s, InputArray mask);
+
+    //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
+    GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
+
+    //! converts GpuMat to another datatype (Blocking call)
+    void convertTo(OutputArray dst, int rtype) const;
+
+    //! converts GpuMat to another datatype (Non-Blocking call)
+    void convertTo(OutputArray dst, int rtype, Stream& stream) const;
+
+    //! converts GpuMat to another datatype with scaling (Blocking call)
+    void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
+
+    //! converts GpuMat to another datatype with scaling (Non-Blocking call)
+    void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
+
+    //! converts GpuMat to another datatype with scaling (Non-Blocking call)
+    void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
+
+    void assignTo(GpuMat& m, int type=-1) const;
+
+    //! returns pointer to y-th row
+    uchar* ptr(int y = 0);
+    const uchar* ptr(int y = 0) const;
+
+    //! template version of the above method
+    template<typename _Tp> _Tp* ptr(int y = 0);
+    template<typename _Tp> const _Tp* ptr(int y = 0) const;
+
+    template <typename _Tp> operator PtrStepSz<_Tp>() const;
+    template <typename _Tp> operator PtrStep<_Tp>() const;
+
+    //! returns a new GpuMat header for the specified row
+    GpuMat row(int y) const;
+
+    //! returns a new GpuMat header for the specified column
+    GpuMat col(int x) const;
+
+    //! ... for the specified row span
+    GpuMat rowRange(int startrow, int endrow) const;
+    GpuMat rowRange(Range r) const;
+
+    //! ... for the specified column span
+    GpuMat colRange(int startcol, int endcol) const;
+    GpuMat colRange(Range r) const;
+
+    //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
+    GpuMat operator ()(Range rowRange, Range colRange) const;
+    GpuMat operator ()(Rect roi) const;
+
+    //! creates alternative GpuMat header for the same data, with different
+    //! number of channels and/or different number of rows
+    GpuMat reshape(int cn, int rows = 0) const;
+
+    //! locates GpuMat header within a parent GpuMat
+    void locateROI(Size& wholeSize, Point& ofs) const;
+
+    //! moves/resizes the current GpuMat ROI inside the parent GpuMat
+    GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
+
+    //! returns true iff the GpuMat data is continuous
+    //! (i.e. when there are no gaps between successive rows)
+    bool isContinuous() const;
+
+    //! returns element size in bytes
+    size_t elemSize() const;
+
+    //! returns the size of element channel in bytes
+    size_t elemSize1() const;
+
+    //! returns element type
+    int type() const;
+
+    //! returns element type
+    int depth() const;
+
+    //! returns number of channels
+    int channels() const;
+
+    //! returns step/elemSize1()
+    size_t step1() const;
+
+    //! returns GpuMat size : width == number of columns, height == number of rows
+    Size size() const;
+
+    //! returns true if GpuMat data is NULL
+    bool empty() const;
+
+    //! internal use method: updates the continuity flag
+    void updateContinuityFlag();
+
+    /*! includes several bit-fields:
+    - the magic signature
+    - continuity flag
+    - depth
+    - number of channels
+    */
+    int flags;
+
+    //! the number of rows and columns
+    int rows, cols;
+
+    //! a distance between successive rows in bytes; includes the gap if any
+    size_t step;
+
+    //! pointer to the data
+    uchar* data;
+
+    //! pointer to the reference counter;
+    //! when GpuMat points to user-allocated data, the pointer is NULL
+    int* refcount;
+
+    //! helper fields used in locateROI and adjustROI
+    uchar* datastart;
+    const uchar* dataend;
+
+    //! allocator
+    Allocator* allocator;
+};
+
+/** @brief Creates a continuous matrix.
+
+@param rows Row count.
+@param cols Column count.
+@param type Type of the matrix.
+@param arr Destination matrix. This parameter changes only if it has a proper type and area (
+\f$\texttt{rows} \times \texttt{cols}\f$ ).
+
+Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
+end of each row.
+ */
+CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
+
+/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
+
+@param rows Minimum desired number of rows.
+@param cols Minimum desired number of columns.
+@param type Desired matrix type.
+@param arr Destination matrix.
+
+The function does not reallocate memory if the matrix has proper attributes already.
+ */
+CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
+
+/** @brief BufferPool for use with CUDA streams
+
+BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
+only useful when enabled with #setBufferPoolUsage.
+
+@code
+    setBufferPoolUsage(true);
+@endcode
+
+@note #setBufferPoolUsage must be called \em before any Stream declaration.
+
+Users may specify custom allocator for Stream and may implement their own stream based
+functions utilizing the same underlying GPU memory management.
+
+If custom allocator is not specified, BufferPool utilizes StackAllocator by
+default. StackAllocator allocates a chunk of GPU device memory beforehand,
+and when GpuMat is declared later on, it is given the pre-allocated memory.
+This kind of strategy reduces the number of calls for memory allocating APIs
+such as cudaMalloc or cudaMallocPitch.
+
+Below is an example that utilizes BufferPool with StackAllocator:
+
+@code
+    #include <opencv2/opencv.hpp>
+
+    using namespace cv;
+    using namespace cv::cuda
+
+    int main()
+    {
+        setBufferPoolUsage(true);                               // Tell OpenCV that we are going to utilize BufferPool
+        setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2);  // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
+
+        Stream stream1, stream2;                                // Each stream uses 1 stack
+        BufferPool pool1(stream1), pool2(stream2);
+
+        GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1);   // 16MB
+        GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3);   // 48MB, pool1 is now full
+
+        GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1);   // 1MB
+        GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3);   // 3MB
+
+        cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1);
+        cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2);
+    }
+@endcode
+
+If we allocate another GpuMat on pool1 in the above example, it will be carried out by
+the DefaultAllocator since the stack for pool1 is full.
+
+@code
+    GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1);   // Stack for pool1 is full, memory is allocated with DefaultAllocator
+@endcode
+
+If a third stream is declared in the above example, allocating with #getBuffer
+within that stream will also be carried out by the DefaultAllocator because we've run out of
+stacks.
+
+@code
+    Stream stream3;                                         // Only 2 stacks were allocated, we've run out of stacks
+    BufferPool pool3(stream3);
+    GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1);   // Memory is allocated with DefaultAllocator
+@endcode
+
+@warning When utilizing StackAllocator, deallocation order is important.
+
+Just like a stack, deallocation must be done in LIFO order. Below is an example of
+erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this
+sample code will emit CV_Assert error.
+
+@code
+    int main()
+    {
+        setBufferPoolUsage(true);                               // Tell OpenCV that we are going to utilize BufferPool
+        Stream stream;                                          // A default size (10 MB) stack is allocated to this stream
+        BufferPool pool(stream);
+
+        GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1);      // Allocate mat1 (1MB)
+        GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1);      // Allocate mat2 (1MB)
+
+        mat1.release();                                         // erroneous usage : mat2 must be deallocated before mat1
+    }
+@endcode
+
+Since C++ local variables are destroyed in the reverse order of construction,
+the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated
+and the corresponding memory is automatically returned to the pool for later usage.
+
+@code
+    int main()
+    {
+        setBufferPoolUsage(true);                               // Tell OpenCV that we are going to utilize BufferPool
+        setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2);  // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
+
+        Stream stream1, stream2;                                // Each stream uses 1 stack
+        BufferPool pool1(stream1), pool2(stream2);
+
+        for (int i = 0; i < 10; i++)
+        {
+            GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1);   // 16MB
+            GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3);   // 48MB, pool1 is now full
+
+            GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1);   // 1MB
+            GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3);   // 3MB
+
+            d_src1.setTo(Scalar(i), stream1);
+            d_src2.setTo(Scalar(i), stream2);
+
+            cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1);
+            cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2);
+                                                                    // The order of destruction of the local variables is:
+                                                                    //   d_dst2 => d_src2 => d_dst1 => d_src1
+                                                                    // LIFO rule is satisfied, this code runs without error
+        }
+    }
+@endcode
+ */
+class CV_EXPORTS BufferPool
+{
+public:
+
+    //! Gets the BufferPool for the given stream.
+    explicit BufferPool(Stream& stream);
+
+    //! Allocates a new GpuMat of given size and type.
+    GpuMat getBuffer(int rows, int cols, int type);
+
+    //! Allocates a new GpuMat of given size and type.
+    GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
+
+    //! Returns the allocator associated with the stream.
+    Ptr<GpuMat::Allocator> getAllocator() const { return allocator_; }
+
+private:
+    Ptr<GpuMat::Allocator> allocator_;
+};
+
+//! BufferPool management (must be called before Stream creation)
+CV_EXPORTS void setBufferPoolUsage(bool on);
+CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
+
+//===================================================================================
+// HostMem
+//===================================================================================
+
+/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
+
+Its interface is also Mat-like but with additional memory type parameters.
+
+-   **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
+    uploading/downloading data from/to GPU.
+-   **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
+    address space, if supported.
+-   **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
+    used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
+    utilization.
+
+@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
+Pinned Memory APIs* document or *CUDA C Programming Guide*.
+ */
+class CV_EXPORTS HostMem
+{
+public:
+    enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
+
+    static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
+
+    explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
+
+    HostMem(const HostMem& m);
+
+    HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
+    HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
+
+    //! creates from host memory with coping data
+    explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
+
+    ~HostMem();
+
+    HostMem& operator =(const HostMem& m);
+
+    //! swaps with other smart pointer
+    void swap(HostMem& b);
+
+    //! returns deep copy of the matrix, i.e. the data is copied
+    HostMem clone() const;
+
+    //! allocates new matrix data unless the matrix already has specified size and type.
+    void create(int rows, int cols, int type);
+    void create(Size size, int type);
+
+    //! creates alternative HostMem header for the same data, with different
+    //! number of channels and/or different number of rows
+    HostMem reshape(int cn, int rows = 0) const;
+
+    //! decrements reference counter and released memory if needed.
+    void release();
+
+    //! returns matrix header with disabled reference counting for HostMem data.
+    Mat createMatHeader() const;
+
+    /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
+    for it.
+
+    This can be done only if memory was allocated with the SHARED flag and if it is supported by the
+    hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
+    eliminates an extra copy.
+     */
+    GpuMat createGpuMatHeader() const;
+
+    // Please see cv::Mat for descriptions
+    bool isContinuous() const;
+    size_t elemSize() const;
+    size_t elemSize1() const;
+    int type() const;
+    int depth() const;
+    int channels() const;
+    size_t step1() const;
+    Size size() const;
+    bool empty() const;
+
+    // Please see cv::Mat for descriptions
+    int flags;
+    int rows, cols;
+    size_t step;
+
+    uchar* data;
+    int* refcount;
+
+    uchar* datastart;
+    const uchar* dataend;
+
+    AllocType alloc_type;
+};
+
+/** @brief Page-locks the memory of matrix and maps it for the device(s).
+
+@param m Input matrix.
+ */
+CV_EXPORTS void registerPageLocked(Mat& m);
+
+/** @brief Unmaps the memory of matrix and makes it pageable again.
+
+@param m Input matrix.
+ */
+CV_EXPORTS void unregisterPageLocked(Mat& m);
+
+//===================================================================================
+// Stream
+//===================================================================================
+
+/** @brief This class encapsulates a queue of asynchronous calls.
+
+@note Currently, you may face problems if an operation is enqueued twice with different data. Some
+functions use the constant GPU memory, and next call may update the memory before the previous one
+has been finished. But calling different operations asynchronously is safe because each operation
+has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
+also safe.
+
+@note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads.
+
+@code
+void thread1()
+{
+    cv::cuda::Stream stream1;
+    cv::cuda::func1(..., stream1);
+}
+
+void thread2()
+{
+    cv::cuda::Stream stream2;
+    cv::cuda::func2(..., stream2);
+}
+@endcode
+
+@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
+In multi-threading environment the stream objects must be passed explicitly (see previous note).
+ */
+class CV_EXPORTS Stream
+{
+    typedef void (Stream::*bool_type)() const;
+    void this_type_does_not_support_comparisons() const {}
+
+public:
+    typedef void (*StreamCallback)(int status, void* userData);
+
+    //! creates a new asynchronous stream
+    Stream();
+
+    //! creates a new asynchronous stream with custom allocator
+    Stream(const Ptr<GpuMat::Allocator>& allocator);
+
+    /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
+    */
+    bool queryIfComplete() const;
+
+    /** @brief Blocks the current CPU thread until all operations in the stream are complete.
+    */
+    void waitForCompletion();
+
+    /** @brief Makes a compute stream wait on an event.
+    */
+    void waitEvent(const Event& event);
+
+    /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
+    completed.
+
+    @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
+    that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
+    Callbacks without a mandated order (in independent streams) execute in undefined order and may be
+    serialized.
+     */
+    void enqueueHostCallback(StreamCallback callback, void* userData);
+
+    //! return Stream object for default CUDA stream
+    static Stream& Null();
+
+    //! returns true if stream object is not default (!= 0)
+    operator bool_type() const;
+
+    class Impl;
+
+private:
+    Ptr<Impl> impl_;
+    Stream(const Ptr<Impl>& impl);
+
+    friend struct StreamAccessor;
+    friend class BufferPool;
+    friend class DefaultDeviceInitializer;
+};
+
+class CV_EXPORTS Event
+{
+public:
+    enum CreateFlags
+    {
+        DEFAULT        = 0x00,  /**< Default event flag */
+        BLOCKING_SYNC  = 0x01,  /**< Event uses blocking synchronization */
+        DISABLE_TIMING = 0x02,  /**< Event will not record timing data */
+        INTERPROCESS   = 0x04   /**< Event is suitable for interprocess use. DisableTiming must be set */
+    };
+
+    explicit Event(CreateFlags flags = DEFAULT);
+
+    //! records an event
+    void record(Stream& stream = Stream::Null());
+
+    //! queries an event's status
+    bool queryIfComplete() const;
+
+    //! waits for an event to complete
+    void waitForCompletion();
+
+    //! computes the elapsed time between events
+    static float elapsedTime(const Event& start, const Event& end);
+
+    class Impl;
+
+private:
+    Ptr<Impl> impl_;
+    Event(const Ptr<Impl>& impl);
+
+    friend struct EventAccessor;
+};
+
+//! @} cudacore_struct
+
+//===================================================================================
+// Initialization & Info
+//===================================================================================
+
+//! @addtogroup cudacore_init
+//! @{
+
+/** @brief Returns the number of installed CUDA-enabled devices.
+
+Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
+this function returns 0. If the CUDA driver is not installed, or is incompatible, this function
+returns -1.
+ */
+CV_EXPORTS int getCudaEnabledDeviceCount();
+
+/** @brief Sets a device and initializes it for the current thread.
+
+@param device System index of a CUDA device starting with 0.
+
+If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
+ */
+CV_EXPORTS void setDevice(int device);
+
+/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
+ */
+CV_EXPORTS int getDevice();
+
+/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
+process.
+
+Any subsequent API call to this device will reinitialize the device.
+ */
+CV_EXPORTS void resetDevice();
+
+/** @brief Enumeration providing CUDA computing features.
+ */
+enum FeatureSet
+{
+    FEATURE_SET_COMPUTE_10 = 10,
+    FEATURE_SET_COMPUTE_11 = 11,
+    FEATURE_SET_COMPUTE_12 = 12,
+    FEATURE_SET_COMPUTE_13 = 13,
+    FEATURE_SET_COMPUTE_20 = 20,
+    FEATURE_SET_COMPUTE_21 = 21,
+    FEATURE_SET_COMPUTE_30 = 30,
+    FEATURE_SET_COMPUTE_32 = 32,
+    FEATURE_SET_COMPUTE_35 = 35,
+    FEATURE_SET_COMPUTE_50 = 50,
+
+    GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
+    SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
+    NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
+    WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
+    DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
+};
+
+//! checks whether current device supports the given feature
+CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
+
+/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
+built for.
+
+According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
+capability can always be compiled to binary code of greater or equal compute capability".
+ */
+class CV_EXPORTS TargetArchs
+{
+public:
+    /** @brief The following method checks whether the module was built with the support of the given feature:
+
+    @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
+     */
+    static bool builtWith(FeatureSet feature_set);
+
+    /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
+    code for the given architecture(s):
+
+    @param major Major compute capability version.
+    @param minor Minor compute capability version.
+     */
+    static bool has(int major, int minor);
+    static bool hasPtx(int major, int minor);
+    static bool hasBin(int major, int minor);
+
+    static bool hasEqualOrLessPtx(int major, int minor);
+    static bool hasEqualOrGreater(int major, int minor);
+    static bool hasEqualOrGreaterPtx(int major, int minor);
+    static bool hasEqualOrGreaterBin(int major, int minor);
+};
+
+/** @brief Class providing functionality for querying the specified GPU properties.
+ */
+class CV_EXPORTS DeviceInfo
+{
+public:
+    //! creates DeviceInfo object for the current GPU
+    DeviceInfo();
+
+    /** @brief The constructors.
+
+    @param device_id System index of the CUDA device starting with 0.
+
+    Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
+    constructs an object for the current device.
+     */
+    DeviceInfo(int device_id);
+
+    /** @brief Returns system index of the CUDA device starting with 0.
+    */
+    int deviceID() const;
+
+    //! ASCII string identifying device
+    const char* name() const;
+
+    //! global memory available on device in bytes
+    size_t totalGlobalMem() const;
+
+    //! shared memory available per block in bytes
+    size_t sharedMemPerBlock() const;
+
+    //! 32-bit registers available per block
+    int regsPerBlock() const;
+
+    //! warp size in threads
+    int warpSize() const;
+
+    //! maximum pitch in bytes allowed by memory copies
+    size_t memPitch() const;
+
+    //! maximum number of threads per block
+    int maxThreadsPerBlock() const;
+
+    //! maximum size of each dimension of a block
+    Vec3i maxThreadsDim() const;
+
+    //! maximum size of each dimension of a grid
+    Vec3i maxGridSize() const;
+
+    //! clock frequency in kilohertz
+    int clockRate() const;
+
+    //! constant memory available on device in bytes
+    size_t totalConstMem() const;
+
+    //! major compute capability
+    int majorVersion() const;
+
+    //! minor compute capability
+    int minorVersion() const;
+
+    //! alignment requirement for textures
+    size_t textureAlignment() const;
+
+    //! pitch alignment requirement for texture references bound to pitched memory
+    size_t texturePitchAlignment() const;
+
+    //! number of multiprocessors on device
+    int multiProcessorCount() const;
+
+    //! specified whether there is a run time limit on kernels
+    bool kernelExecTimeoutEnabled() const;
+
+    //! device is integrated as opposed to discrete
+    bool integrated() const;
+
+    //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
+    bool canMapHostMemory() const;
+
+    enum ComputeMode
+    {
+        ComputeModeDefault,         /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
+        ComputeModeExclusive,       /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
+        ComputeModeProhibited,      /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
+        ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
+    };
+
+    //! compute mode
+    ComputeMode computeMode() const;
+
+    //! maximum 1D texture size
+    int maxTexture1D() const;
+
+    //! maximum 1D mipmapped texture size
+    int maxTexture1DMipmap() const;
+
+    //! maximum size for 1D textures bound to linear memory
+    int maxTexture1DLinear() const;
+
+    //! maximum 2D texture dimensions
+    Vec2i maxTexture2D() const;
+
+    //! maximum 2D mipmapped texture dimensions
+    Vec2i maxTexture2DMipmap() const;
+
+    //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
+    Vec3i maxTexture2DLinear() const;
+
+    //! maximum 2D texture dimensions if texture gather operations have to be performed
+    Vec2i maxTexture2DGather() const;
+
+    //! maximum 3D texture dimensions
+    Vec3i maxTexture3D() const;
+
+    //! maximum Cubemap texture dimensions
+    int maxTextureCubemap() const;
+
+    //! maximum 1D layered texture dimensions
+    Vec2i maxTexture1DLayered() const;
+
+    //! maximum 2D layered texture dimensions
+    Vec3i maxTexture2DLayered() const;
+
+    //! maximum Cubemap layered texture dimensions
+    Vec2i maxTextureCubemapLayered() const;
+
+    //! maximum 1D surface size
+    int maxSurface1D() const;
+
+    //! maximum 2D surface dimensions
+    Vec2i maxSurface2D() const;
+
+    //! maximum 3D surface dimensions
+    Vec3i maxSurface3D() const;
+
+    //! maximum 1D layered surface dimensions
+    Vec2i maxSurface1DLayered() const;
+
+    //! maximum 2D layered surface dimensions
+    Vec3i maxSurface2DLayered() const;
+
+    //! maximum Cubemap surface dimensions
+    int maxSurfaceCubemap() const;
+
+    //! maximum Cubemap layered surface dimensions
+    Vec2i maxSurfaceCubemapLayered() const;
+
+    //! alignment requirements for surfaces
+    size_t surfaceAlignment() const;
+
+    //! device can possibly execute multiple kernels concurrently
+    bool concurrentKernels() const;
+
+    //! device has ECC support enabled
+    bool ECCEnabled() const;
+
+    //! PCI bus ID of the device
+    int pciBusID() const;
+
+    //! PCI device ID of the device
+    int pciDeviceID() const;
+
+    //! PCI domain ID of the device
+    int pciDomainID() const;
+
+    //! true if device is a Tesla device using TCC driver, false otherwise
+    bool tccDriver() const;
+
+    //! number of asynchronous engines
+    int asyncEngineCount() const;
+
+    //! device shares a unified address space with the host
+    bool unifiedAddressing() const;
+
+    //! peak memory clock frequency in kilohertz
+    int memoryClockRate() const;
+
+    //! global memory bus width in bits
+    int memoryBusWidth() const;
+
+    //! size of L2 cache in bytes
+    int l2CacheSize() const;
+
+    //! maximum resident threads per multiprocessor
+    int maxThreadsPerMultiProcessor() const;
+
+    //! gets free and total device memory
+    void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
+    size_t freeMemory() const;
+    size_t totalMemory() const;
+
+    /** @brief Provides information on CUDA feature support.
+
+    @param feature_set Features to be checked. See cuda::FeatureSet.
+
+    This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
+     */
+    bool supports(FeatureSet feature_set) const;
+
+    /** @brief Checks the CUDA module and device compatibility.
+
+    This function returns true if the CUDA module can be run on the specified device. Otherwise, it
+    returns false .
+     */
+    bool isCompatible() const;
+
+private:
+    int device_id_;
+};
+
+CV_EXPORTS void printCudaDeviceInfo(int device);
+CV_EXPORTS void printShortCudaDeviceInfo(int device);
+
+/** @brief Converts an array to half precision floating number.
+
+@param _src input array.
+@param _dst output array.
+@param stream Stream for the asynchronous version.
+@sa convertFp16
+*/
+CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
+
+//! @} cudacore_init
+
+}} // namespace cv { namespace cuda {
+
+
+#include "opencv2/core/cuda.inl.hpp"
+
+#endif /* OPENCV_CORE_CUDA_HPP */

+ 631 - 0
ThresholdTools/include/opencv2/core/cuda.inl.hpp

@@ -0,0 +1,631 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDAINL_HPP
+#define OPENCV_CORE_CUDAINL_HPP
+
+#include "opencv2/core/cuda.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda {
+
+//===================================================================================
+// GpuMat
+//===================================================================================
+
+inline
+GpuMat::GpuMat(Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{}
+
+inline
+GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (rows_ > 0 && cols_ > 0)
+        create(rows_, cols_, type_);
+}
+
+inline
+GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (size_.height > 0 && size_.width > 0)
+        create(size_.height, size_.width, type_);
+}
+
+inline
+GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (rows_ > 0 && cols_ > 0)
+    {
+        create(rows_, cols_, type_);
+        setTo(s_);
+    }
+}
+
+inline
+GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (size_.height > 0 && size_.width > 0)
+    {
+        create(size_.height, size_.width, type_);
+        setTo(s_);
+    }
+}
+
+inline
+GpuMat::GpuMat(const GpuMat& m)
+    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
+{
+    if (refcount)
+        CV_XADD(refcount, 1);
+}
+
+inline
+GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
+    flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    upload(arr);
+}
+
+inline
+GpuMat::~GpuMat()
+{
+    release();
+}
+
+inline
+GpuMat& GpuMat::operator =(const GpuMat& m)
+{
+    if (this != &m)
+    {
+        GpuMat temp(m);
+        swap(temp);
+    }
+
+    return *this;
+}
+
+inline
+void GpuMat::create(Size size_, int type_)
+{
+    create(size_.height, size_.width, type_);
+}
+
+inline
+void GpuMat::swap(GpuMat& b)
+{
+    std::swap(flags, b.flags);
+    std::swap(rows, b.rows);
+    std::swap(cols, b.cols);
+    std::swap(step, b.step);
+    std::swap(data, b.data);
+    std::swap(datastart, b.datastart);
+    std::swap(dataend, b.dataend);
+    std::swap(refcount, b.refcount);
+    std::swap(allocator, b.allocator);
+}
+
+inline
+GpuMat GpuMat::clone() const
+{
+    GpuMat m;
+    copyTo(m);
+    return m;
+}
+
+inline
+void GpuMat::copyTo(OutputArray dst, InputArray mask) const
+{
+    copyTo(dst, mask, Stream::Null());
+}
+
+inline
+GpuMat& GpuMat::setTo(Scalar s)
+{
+    return setTo(s, Stream::Null());
+}
+
+inline
+GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
+{
+    return setTo(s, mask, Stream::Null());
+}
+
+inline
+void GpuMat::convertTo(OutputArray dst, int rtype) const
+{
+    convertTo(dst, rtype, Stream::Null());
+}
+
+inline
+void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
+{
+    convertTo(dst, rtype, alpha, beta, Stream::Null());
+}
+
+inline
+void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
+{
+    convertTo(dst, rtype, alpha, 0.0, stream);
+}
+
+inline
+void GpuMat::assignTo(GpuMat& m, int _type) const
+{
+    if (_type < 0)
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+inline
+uchar* GpuMat::ptr(int y)
+{
+    CV_DbgAssert( (unsigned)y < (unsigned)rows );
+    return data + step * y;
+}
+
+inline
+const uchar* GpuMat::ptr(int y) const
+{
+    CV_DbgAssert( (unsigned)y < (unsigned)rows );
+    return data + step * y;
+}
+
+template<typename _Tp> inline
+_Tp* GpuMat::ptr(int y)
+{
+    return (_Tp*)ptr(y);
+}
+
+template<typename _Tp> inline
+const _Tp* GpuMat::ptr(int y) const
+{
+    return (const _Tp*)ptr(y);
+}
+
+template <class T> inline
+GpuMat::operator PtrStepSz<T>() const
+{
+    return PtrStepSz<T>(rows, cols, (T*)data, step);
+}
+
+template <class T> inline
+GpuMat::operator PtrStep<T>() const
+{
+    return PtrStep<T>((T*)data, step);
+}
+
+inline
+GpuMat GpuMat::row(int y) const
+{
+    return GpuMat(*this, Range(y, y+1), Range::all());
+}
+
+inline
+GpuMat GpuMat::col(int x) const
+{
+    return GpuMat(*this, Range::all(), Range(x, x+1));
+}
+
+inline
+GpuMat GpuMat::rowRange(int startrow, int endrow) const
+{
+    return GpuMat(*this, Range(startrow, endrow), Range::all());
+}
+
+inline
+GpuMat GpuMat::rowRange(Range r) const
+{
+    return GpuMat(*this, r, Range::all());
+}
+
+inline
+GpuMat GpuMat::colRange(int startcol, int endcol) const
+{
+    return GpuMat(*this, Range::all(), Range(startcol, endcol));
+}
+
+inline
+GpuMat GpuMat::colRange(Range r) const
+{
+    return GpuMat(*this, Range::all(), r);
+}
+
+inline
+GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
+{
+    return GpuMat(*this, rowRange_, colRange_);
+}
+
+inline
+GpuMat GpuMat::operator ()(Rect roi) const
+{
+    return GpuMat(*this, roi);
+}
+
+inline
+bool GpuMat::isContinuous() const
+{
+    return (flags & Mat::CONTINUOUS_FLAG) != 0;
+}
+
+inline
+size_t GpuMat::elemSize() const
+{
+    return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t GpuMat::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int GpuMat::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int GpuMat::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int GpuMat::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t GpuMat::step1() const
+{
+    return step / elemSize1();
+}
+
+inline
+Size GpuMat::size() const
+{
+    return Size(cols, rows);
+}
+
+inline
+bool GpuMat::empty() const
+{
+    return data == 0;
+}
+
+static inline
+GpuMat createContinuous(int rows, int cols, int type)
+{
+    GpuMat m;
+    createContinuous(rows, cols, type, m);
+    return m;
+}
+
+static inline
+void createContinuous(Size size, int type, OutputArray arr)
+{
+    createContinuous(size.height, size.width, type, arr);
+}
+
+static inline
+GpuMat createContinuous(Size size, int type)
+{
+    GpuMat m;
+    createContinuous(size, type, m);
+    return m;
+}
+
+static inline
+void ensureSizeIsEnough(Size size, int type, OutputArray arr)
+{
+    ensureSizeIsEnough(size.height, size.width, type, arr);
+}
+
+static inline
+void swap(GpuMat& a, GpuMat& b)
+{
+    a.swap(b);
+}
+
+//===================================================================================
+// HostMem
+//===================================================================================
+
+inline
+HostMem::HostMem(AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+}
+
+inline
+HostMem::HostMem(const HostMem& m)
+    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
+{
+    if( refcount )
+        CV_XADD(refcount, 1);
+}
+
+inline
+HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    if (rows_ > 0 && cols_ > 0)
+        create(rows_, cols_, type_);
+}
+
+inline
+HostMem::HostMem(Size size_, int type_, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    if (size_.height > 0 && size_.width > 0)
+        create(size_.height, size_.width, type_);
+}
+
+inline
+HostMem::HostMem(InputArray arr, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    arr.getMat().copyTo(*this);
+}
+
+inline
+HostMem::~HostMem()
+{
+    release();
+}
+
+inline
+HostMem& HostMem::operator =(const HostMem& m)
+{
+    if (this != &m)
+    {
+        HostMem temp(m);
+        swap(temp);
+    }
+
+    return *this;
+}
+
+inline
+void HostMem::swap(HostMem& b)
+{
+    std::swap(flags, b.flags);
+    std::swap(rows, b.rows);
+    std::swap(cols, b.cols);
+    std::swap(step, b.step);
+    std::swap(data, b.data);
+    std::swap(datastart, b.datastart);
+    std::swap(dataend, b.dataend);
+    std::swap(refcount, b.refcount);
+    std::swap(alloc_type, b.alloc_type);
+}
+
+inline
+HostMem HostMem::clone() const
+{
+    HostMem m(size(), type(), alloc_type);
+    createMatHeader().copyTo(m);
+    return m;
+}
+
+inline
+void HostMem::create(Size size_, int type_)
+{
+    create(size_.height, size_.width, type_);
+}
+
+inline
+Mat HostMem::createMatHeader() const
+{
+    return Mat(size(), type(), data, step);
+}
+
+inline
+bool HostMem::isContinuous() const
+{
+    return (flags & Mat::CONTINUOUS_FLAG) != 0;
+}
+
+inline
+size_t HostMem::elemSize() const
+{
+    return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t HostMem::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int HostMem::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int HostMem::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int HostMem::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t HostMem::step1() const
+{
+    return step / elemSize1();
+}
+
+inline
+Size HostMem::size() const
+{
+    return Size(cols, rows);
+}
+
+inline
+bool HostMem::empty() const
+{
+    return data == 0;
+}
+
+static inline
+void swap(HostMem& a, HostMem& b)
+{
+    a.swap(b);
+}
+
+//===================================================================================
+// Stream
+//===================================================================================
+
+inline
+Stream::Stream(const Ptr<Impl>& impl)
+    : impl_(impl)
+{
+}
+
+//===================================================================================
+// Event
+//===================================================================================
+
+inline
+Event::Event(const Ptr<Impl>& impl)
+    : impl_(impl)
+{
+}
+
+//===================================================================================
+// Initialization & Info
+//===================================================================================
+
+inline
+bool TargetArchs::has(int major, int minor)
+{
+    return hasPtx(major, minor) || hasBin(major, minor);
+}
+
+inline
+bool TargetArchs::hasEqualOrGreater(int major, int minor)
+{
+    return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
+}
+
+inline
+DeviceInfo::DeviceInfo()
+{
+    device_id_ = getDevice();
+}
+
+inline
+DeviceInfo::DeviceInfo(int device_id)
+{
+    CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
+    device_id_ = device_id;
+}
+
+inline
+int DeviceInfo::deviceID() const
+{
+    return device_id_;
+}
+
+inline
+size_t DeviceInfo::freeMemory() const
+{
+    size_t _totalMemory = 0, _freeMemory = 0;
+    queryMemory(_totalMemory, _freeMemory);
+    return _freeMemory;
+}
+
+inline
+size_t DeviceInfo::totalMemory() const
+{
+    size_t _totalMemory = 0, _freeMemory = 0;
+    queryMemory(_totalMemory, _freeMemory);
+    return _totalMemory;
+}
+
+inline
+bool DeviceInfo::supports(FeatureSet feature_set) const
+{
+    int version = majorVersion() * 10 + minorVersion();
+    return version >= feature_set;
+}
+
+
+}} // namespace cv { namespace cuda {
+
+//===================================================================================
+// Mat
+//===================================================================================
+
+namespace cv {
+
+inline
+Mat::Mat(const cuda::GpuMat& m)
+    : flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows)
+{
+    m.download(*this);
+}
+
+}
+
+//! @endcond
+
+#endif // OPENCV_CORE_CUDAINL_HPP

+ 86 - 0
ThresholdTools/include/opencv2/core/cuda_stream_accessor.hpp

@@ -0,0 +1,86 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
+#define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
+
+#ifndef __cplusplus
+#  error cuda_stream_accessor.hpp header must be compiled as C++
+#endif
+
+/** @file cuda_stream_accessor.hpp
+ * This is only header file that depends on CUDA Runtime API. All other headers are independent.
+ */
+
+#include <cuda_runtime.h>
+#include "opencv2/core/cuda.hpp"
+
+namespace cv
+{
+    namespace cuda
+    {
+
+//! @addtogroup cudacore_struct
+//! @{
+
+        /** @brief Class that enables getting cudaStream_t from cuda::Stream
+         */
+        struct StreamAccessor
+        {
+            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
+            CV_EXPORTS static Stream wrapStream(cudaStream_t stream);
+        };
+
+        /** @brief Class that enables getting cudaEvent_t from cuda::Event
+         */
+        struct EventAccessor
+        {
+            CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
+            CV_EXPORTS static Event wrapEvent(cudaEvent_t event);
+        };
+
+//! @}
+
+    }
+}
+
+#endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */

+ 142 - 0
ThresholdTools/include/opencv2/core/cuda_types.hpp

@@ -0,0 +1,142 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDA_TYPES_HPP
+#define OPENCV_CORE_CUDA_TYPES_HPP
+
+#ifndef __cplusplus
+#  error cuda_types.hpp header must be compiled as C++
+#endif
+
+#if defined(__OPENCV_BUILD) && defined(__clang__)
+#pragma clang diagnostic ignored "-Winconsistent-missing-override"
+#endif
+#if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5
+#pragma GCC diagnostic ignored "-Wsuggest-override"
+#endif
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+#ifdef __CUDACC__
+    #define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__
+#else
+    #define __CV_CUDA_HOST_DEVICE__
+#endif
+
+namespace cv
+{
+    namespace cuda
+    {
+
+        // Simple lightweight structures that encapsulates information about an image on device.
+        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
+
+        template <typename T> struct DevPtr
+        {
+            typedef T elem_type;
+            typedef int index_type;
+
+            enum { elem_size = sizeof(elem_type) };
+
+            T* data;
+
+            __CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {}
+            __CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
+
+            __CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
+            __CV_CUDA_HOST_DEVICE__ operator       T*()       { return data; }
+            __CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; }
+        };
+
+        template <typename T> struct PtrSz : public DevPtr<T>
+        {
+            __CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {}
+            __CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
+
+            size_t size;
+        };
+
+        template <typename T> struct PtrStep : public DevPtr<T>
+        {
+            __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
+            __CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
+
+            size_t step;
+
+            __CV_CUDA_HOST_DEVICE__       T* ptr(int y = 0)       { return (      T*)( (      char*)DevPtr<T>::data + y * step); }
+            __CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
+
+            __CV_CUDA_HOST_DEVICE__       T& operator ()(int y, int x)       { return ptr(y)[x]; }
+            __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
+        };
+
+        template <typename T> struct PtrStepSz : public PtrStep<T>
+        {
+            __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
+            __CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
+                : PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
+
+            template <typename U>
+            explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}
+
+            int cols;
+            int rows;
+        };
+
+        typedef PtrStepSz<unsigned char> PtrStepSzb;
+        typedef PtrStepSz<float> PtrStepSzf;
+        typedef PtrStepSz<int> PtrStepSzi;
+
+        typedef PtrStep<unsigned char> PtrStepb;
+        typedef PtrStep<float> PtrStepf;
+        typedef PtrStep<int> PtrStepi;
+
+    }
+}
+
+//! @endcond
+
+#endif /* OPENCV_CORE_CUDA_TYPES_HPP */

+ 239 - 0
ThresholdTools/include/opencv2/core/cv_cpu_dispatch.h

@@ -0,0 +1,239 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#if defined __OPENCV_BUILD \
+
+#include "cv_cpu_config.h"
+#include "cv_cpu_helper.h"
+
+#ifdef CV_CPU_DISPATCH_MODE
+#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
+#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
+#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
+#else
+#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
+#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
+#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
+#endif
+
+
+#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...)  /* done */
+#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
+#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
+
+
+#if defined CV_ENABLE_INTRINSICS \
+    && !defined CV_DISABLE_OPTIMIZATION \
+    && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
+
+#ifdef CV_CPU_COMPILE_SSE2
+#  include <emmintrin.h>
+#  define CV_MMX 1
+#  define CV_SSE 1
+#  define CV_SSE2 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE3
+#  include <pmmintrin.h>
+#  define CV_SSE3 1
+#endif
+#ifdef CV_CPU_COMPILE_SSSE3
+#  include <tmmintrin.h>
+#  define CV_SSSE3 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE4_1
+#  include <smmintrin.h>
+#  define CV_SSE4_1 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE4_2
+#  include <nmmintrin.h>
+#  define CV_SSE4_2 1
+#endif
+#ifdef CV_CPU_COMPILE_POPCNT
+#  ifdef _MSC_VER
+#    include <nmmintrin.h>
+#    if defined(_M_X64)
+#      define CV_POPCNT_U64 _mm_popcnt_u64
+#    endif
+#    define CV_POPCNT_U32 _mm_popcnt_u32
+#  else
+#    include <popcntintrin.h>
+#    if defined(__x86_64__)
+#      define CV_POPCNT_U64 __builtin_popcountll
+#    endif
+#    define CV_POPCNT_U32 __builtin_popcount
+#  endif
+#  define CV_POPCNT 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX
+#  include <immintrin.h>
+#  define CV_AVX 1
+#endif
+#ifdef CV_CPU_COMPILE_FP16
+#  if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
+#    include <arm_neon.h>
+#  else
+#    include <immintrin.h>
+#  endif
+#  define CV_FP16 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX2
+#  include <immintrin.h>
+#  define CV_AVX2 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX_512F
+#  include <immintrin.h>
+#  define CV_AVX_512F 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_SKX
+#  include <immintrin.h>
+#  define CV_AVX512_SKX 1
+#endif
+#ifdef CV_CPU_COMPILE_FMA3
+#  define CV_FMA3 1
+#endif
+
+#if defined _WIN32 && defined(_M_ARM)
+# include <Intrin.h>
+# include <arm_neon.h>
+# define CV_NEON 1
+#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
+#  include <arm_neon.h>
+#  define CV_NEON 1
+#endif
+
+#if defined(__ARM_NEON__) || defined(__aarch64__)
+#  include <arm_neon.h>
+#endif
+
+#if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
+#  include <altivec.h>
+#  undef vector
+#  undef pixel
+#  undef bool
+#  define CV_VSX 1
+#endif
+
+#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
+
+#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
+struct VZeroUpperGuard {
+#ifdef __GNUC__
+    __attribute__((always_inline))
+#endif
+    inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
+};
+#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; (void)__vzeroupper_guard;
+#endif
+
+#ifdef __CV_AVX_GUARD
+#define CV_AVX_GUARD __CV_AVX_GUARD
+#else
+#define CV_AVX_GUARD
+#endif
+
+#endif // __OPENCV_BUILD
+
+
+
+#if !defined __OPENCV_BUILD /* Compatibility code */ \
+    && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */
+#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
+#  include <emmintrin.h>
+#  define CV_MMX 1
+#  define CV_SSE 1
+#  define CV_SSE2 1
+#elif defined _WIN32 && defined(_M_ARM)
+# include <Intrin.h>
+# include <arm_neon.h>
+# define CV_NEON 1
+#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
+#  include <arm_neon.h>
+#  define CV_NEON 1
+#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
+#  include <altivec.h>
+#  undef vector
+#  undef pixel
+#  undef bool
+#  define CV_VSX 1
+#endif
+
+#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
+
+
+
+#ifndef CV_MMX
+#  define CV_MMX 0
+#endif
+#ifndef CV_SSE
+#  define CV_SSE 0
+#endif
+#ifndef CV_SSE2
+#  define CV_SSE2 0
+#endif
+#ifndef CV_SSE3
+#  define CV_SSE3 0
+#endif
+#ifndef CV_SSSE3
+#  define CV_SSSE3 0
+#endif
+#ifndef CV_SSE4_1
+#  define CV_SSE4_1 0
+#endif
+#ifndef CV_SSE4_2
+#  define CV_SSE4_2 0
+#endif
+#ifndef CV_POPCNT
+#  define CV_POPCNT 0
+#endif
+#ifndef CV_AVX
+#  define CV_AVX 0
+#endif
+#ifndef CV_FP16
+#  define CV_FP16 0
+#endif
+#ifndef CV_AVX2
+#  define CV_AVX2 0
+#endif
+#ifndef CV_FMA3
+#  define CV_FMA3 0
+#endif
+#ifndef CV_AVX_512F
+#  define CV_AVX_512F 0
+#endif
+#ifndef CV_AVX_512BW
+#  define CV_AVX_512BW 0
+#endif
+#ifndef CV_AVX_512CD
+#  define CV_AVX_512CD 0
+#endif
+#ifndef CV_AVX_512DQ
+#  define CV_AVX_512DQ 0
+#endif
+#ifndef CV_AVX_512ER
+#  define CV_AVX_512ER 0
+#endif
+#ifndef CV_AVX_512IFMA512
+#  define CV_AVX_512IFMA512 0
+#endif
+#ifndef CV_AVX_512PF
+#  define CV_AVX_512PF 0
+#endif
+#ifndef CV_AVX_512VBMI
+#  define CV_AVX_512VBMI 0
+#endif
+#ifndef CV_AVX_512VL
+#  define CV_AVX_512VL 0
+#endif
+#ifndef CV_AVX512_SKX
+#  define CV_AVX512_SKX 0
+#endif
+
+#ifndef CV_NEON
+#  define CV_NEON 0
+#endif
+
+#ifndef CV_VSX
+#  define CV_VSX 0
+#endif

+ 319 - 0
ThresholdTools/include/opencv2/core/cv_cpu_helper.h

@@ -0,0 +1,319 @@
+// AUTOGENERATED, DO NOT EDIT
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
+#  define CV_TRY_SSE 1
+#  define CV_CPU_FORCE_SSE 1
+#  define CV_CPU_HAS_SUPPORT_SSE 1
+#  define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
+#  define CV_TRY_SSE 1
+#  define CV_CPU_FORCE_SSE 0
+#  define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
+#  define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
+#  define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
+#else
+#  define CV_TRY_SSE 0
+#  define CV_CPU_FORCE_SSE 0
+#  define CV_CPU_HAS_SUPPORT_SSE 0
+#  define CV_CPU_CALL_SSE(fn, args)
+#  define CV_CPU_CALL_SSE_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...)  CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
+#  define CV_TRY_SSE2 1
+#  define CV_CPU_FORCE_SSE2 1
+#  define CV_CPU_HAS_SUPPORT_SSE2 1
+#  define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
+#  define CV_TRY_SSE2 1
+#  define CV_CPU_FORCE_SSE2 0
+#  define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
+#  define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
+#  define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
+#else
+#  define CV_TRY_SSE2 0
+#  define CV_CPU_FORCE_SSE2 0
+#  define CV_CPU_HAS_SUPPORT_SSE2 0
+#  define CV_CPU_CALL_SSE2(fn, args)
+#  define CV_CPU_CALL_SSE2_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...)  CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
+#  define CV_TRY_SSE3 1
+#  define CV_CPU_FORCE_SSE3 1
+#  define CV_CPU_HAS_SUPPORT_SSE3 1
+#  define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
+#  define CV_TRY_SSE3 1
+#  define CV_CPU_FORCE_SSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
+#  define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
+#  define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
+#else
+#  define CV_TRY_SSE3 0
+#  define CV_CPU_FORCE_SSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSE3 0
+#  define CV_CPU_CALL_SSE3(fn, args)
+#  define CV_CPU_CALL_SSE3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...)  CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
+#  define CV_TRY_SSSE3 1
+#  define CV_CPU_FORCE_SSSE3 1
+#  define CV_CPU_HAS_SUPPORT_SSSE3 1
+#  define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
+#  define CV_TRY_SSSE3 1
+#  define CV_CPU_FORCE_SSSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
+#  define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
+#  define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
+#else
+#  define CV_TRY_SSSE3 0
+#  define CV_CPU_FORCE_SSSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSSE3 0
+#  define CV_CPU_CALL_SSSE3(fn, args)
+#  define CV_CPU_CALL_SSSE3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...)  CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
+#  define CV_TRY_SSE4_1 1
+#  define CV_CPU_FORCE_SSE4_1 1
+#  define CV_CPU_HAS_SUPPORT_SSE4_1 1
+#  define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
+#  define CV_TRY_SSE4_1 1
+#  define CV_CPU_FORCE_SSE4_1 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
+#  define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
+#  define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
+#else
+#  define CV_TRY_SSE4_1 0
+#  define CV_CPU_FORCE_SSE4_1 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_1 0
+#  define CV_CPU_CALL_SSE4_1(fn, args)
+#  define CV_CPU_CALL_SSE4_1_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...)  CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
+#  define CV_TRY_SSE4_2 1
+#  define CV_CPU_FORCE_SSE4_2 1
+#  define CV_CPU_HAS_SUPPORT_SSE4_2 1
+#  define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
+#  define CV_TRY_SSE4_2 1
+#  define CV_CPU_FORCE_SSE4_2 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
+#  define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
+#  define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
+#else
+#  define CV_TRY_SSE4_2 0
+#  define CV_CPU_FORCE_SSE4_2 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_2 0
+#  define CV_CPU_CALL_SSE4_2(fn, args)
+#  define CV_CPU_CALL_SSE4_2_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...)  CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
+#  define CV_TRY_POPCNT 1
+#  define CV_CPU_FORCE_POPCNT 1
+#  define CV_CPU_HAS_SUPPORT_POPCNT 1
+#  define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
+#  define CV_TRY_POPCNT 1
+#  define CV_CPU_FORCE_POPCNT 0
+#  define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
+#  define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
+#  define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
+#else
+#  define CV_TRY_POPCNT 0
+#  define CV_CPU_FORCE_POPCNT 0
+#  define CV_CPU_HAS_SUPPORT_POPCNT 0
+#  define CV_CPU_CALL_POPCNT(fn, args)
+#  define CV_CPU_CALL_POPCNT_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...)  CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
+#  define CV_TRY_AVX 1
+#  define CV_CPU_FORCE_AVX 1
+#  define CV_CPU_HAS_SUPPORT_AVX 1
+#  define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
+#  define CV_TRY_AVX 1
+#  define CV_CPU_FORCE_AVX 0
+#  define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
+#  define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
+#  define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
+#else
+#  define CV_TRY_AVX 0
+#  define CV_CPU_FORCE_AVX 0
+#  define CV_CPU_HAS_SUPPORT_AVX 0
+#  define CV_CPU_CALL_AVX(fn, args)
+#  define CV_CPU_CALL_AVX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...)  CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
+#  define CV_TRY_FP16 1
+#  define CV_CPU_FORCE_FP16 1
+#  define CV_CPU_HAS_SUPPORT_FP16 1
+#  define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
+#  define CV_TRY_FP16 1
+#  define CV_CPU_FORCE_FP16 0
+#  define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
+#  define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
+#  define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
+#else
+#  define CV_TRY_FP16 0
+#  define CV_CPU_FORCE_FP16 0
+#  define CV_CPU_HAS_SUPPORT_FP16 0
+#  define CV_CPU_CALL_FP16(fn, args)
+#  define CV_CPU_CALL_FP16_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...)  CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
+#  define CV_TRY_AVX2 1
+#  define CV_CPU_FORCE_AVX2 1
+#  define CV_CPU_HAS_SUPPORT_AVX2 1
+#  define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
+#  define CV_TRY_AVX2 1
+#  define CV_CPU_FORCE_AVX2 0
+#  define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
+#  define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
+#  define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
+#else
+#  define CV_TRY_AVX2 0
+#  define CV_CPU_FORCE_AVX2 0
+#  define CV_CPU_HAS_SUPPORT_AVX2 0
+#  define CV_CPU_CALL_AVX2(fn, args)
+#  define CV_CPU_CALL_AVX2_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...)  CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
+#  define CV_TRY_FMA3 1
+#  define CV_CPU_FORCE_FMA3 1
+#  define CV_CPU_HAS_SUPPORT_FMA3 1
+#  define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
+#  define CV_TRY_FMA3 1
+#  define CV_CPU_FORCE_FMA3 0
+#  define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
+#  define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
+#  define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
+#else
+#  define CV_TRY_FMA3 0
+#  define CV_CPU_FORCE_FMA3 0
+#  define CV_CPU_HAS_SUPPORT_FMA3 0
+#  define CV_CPU_CALL_FMA3(fn, args)
+#  define CV_CPU_CALL_FMA3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...)  CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F
+#  define CV_TRY_AVX_512F 1
+#  define CV_CPU_FORCE_AVX_512F 1
+#  define CV_CPU_HAS_SUPPORT_AVX_512F 1
+#  define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F
+#  define CV_TRY_AVX_512F 1
+#  define CV_CPU_FORCE_AVX_512F 0
+#  define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F))
+#  define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
+#  define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
+#else
+#  define CV_TRY_AVX_512F 0
+#  define CV_CPU_FORCE_AVX_512F 0
+#  define CV_CPU_HAS_SUPPORT_AVX_512F 0
+#  define CV_CPU_CALL_AVX_512F(fn, args)
+#  define CV_CPU_CALL_AVX_512F_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...)  CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX
+#  define CV_TRY_AVX512_SKX 1
+#  define CV_CPU_FORCE_AVX512_SKX 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_SKX 1
+#  define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX
+#  define CV_TRY_AVX512_SKX 1
+#  define CV_CPU_FORCE_AVX512_SKX 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX))
+#  define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
+#  define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
+#else
+#  define CV_TRY_AVX512_SKX 0
+#  define CV_CPU_FORCE_AVX512_SKX 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_SKX 0
+#  define CV_CPU_CALL_AVX512_SKX(fn, args)
+#  define CV_CPU_CALL_AVX512_SKX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...)  CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
+#  define CV_TRY_NEON 1
+#  define CV_CPU_FORCE_NEON 1
+#  define CV_CPU_HAS_SUPPORT_NEON 1
+#  define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
+#  define CV_TRY_NEON 1
+#  define CV_CPU_FORCE_NEON 0
+#  define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
+#  define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
+#  define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
+#else
+#  define CV_TRY_NEON 0
+#  define CV_CPU_FORCE_NEON 0
+#  define CV_CPU_HAS_SUPPORT_NEON 0
+#  define CV_CPU_CALL_NEON(fn, args)
+#  define CV_CPU_CALL_NEON_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...)  CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
+#  define CV_TRY_VSX 1
+#  define CV_CPU_FORCE_VSX 1
+#  define CV_CPU_HAS_SUPPORT_VSX 1
+#  define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
+#  define CV_TRY_VSX 1
+#  define CV_CPU_FORCE_VSX 0
+#  define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
+#  define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
+#  define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
+#else
+#  define CV_TRY_VSX 0
+#  define CV_CPU_FORCE_VSX 0
+#  define CV_CPU_HAS_SUPPORT_VSX 0
+#  define CV_CPU_CALL_VSX(fn, args)
+#  define CV_CPU_CALL_VSX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...)  CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
+#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...)  CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */

+ 531 - 0
ThresholdTools/include/opencv2/core/cvdef.h

@@ -0,0 +1,531 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CVDEF_H
+#define OPENCV_CORE_CVDEF_H
+
+//! @addtogroup core_utils
+//! @{
+
+#if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD
+#if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \
+    (defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC))
+// Guard to prevent using of binary incompatible binaries / runtimes
+// https://github.com/opencv/opencv/pull/9161
+#define CV__DEBUG_NS_BEGIN namespace debug_build_guard {
+#define CV__DEBUG_NS_END }
+namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; }
+#endif
+#endif
+
+#ifndef CV__DEBUG_NS_BEGIN
+#define CV__DEBUG_NS_BEGIN
+#define CV__DEBUG_NS_END
+#endif
+
+
+#ifdef __OPENCV_BUILD
+#include "cvconfig.h"
+#endif
+
+#ifndef __CV_EXPAND
+#define __CV_EXPAND(x) x
+#endif
+
+#ifndef __CV_CAT
+#define __CV_CAT__(x, y) x ## y
+#define __CV_CAT_(x, y) __CV_CAT__(x, y)
+#define __CV_CAT(x, y) __CV_CAT_(x, y)
+#endif
+
+
+// undef problematic defines sometimes defined by system headers (windows.h in particular)
+#undef small
+#undef min
+#undef max
+#undef abs
+#undef Complex
+
+#include <limits.h>
+#include "opencv2/core/hal/interface.h"
+
+#if defined __ICL
+#  define CV_ICC   __ICL
+#elif defined __ICC
+#  define CV_ICC   __ICC
+#elif defined __ECL
+#  define CV_ICC   __ECL
+#elif defined __ECC
+#  define CV_ICC   __ECC
+#elif defined __INTEL_COMPILER
+#  define CV_ICC   __INTEL_COMPILER
+#endif
+
+#ifndef CV_INLINE
+#  if defined __cplusplus
+#    define CV_INLINE static inline
+#  elif defined _MSC_VER
+#    define CV_INLINE __inline
+#  else
+#    define CV_INLINE static
+#  endif
+#endif
+
+#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
+#  define CV_ENABLE_UNROLLED 0
+#else
+#  define CV_ENABLE_UNROLLED 1
+#endif
+
+#ifdef __GNUC__
+#  define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
+#elif defined _MSC_VER
+#  define CV_DECL_ALIGNED(x) __declspec(align(x))
+#else
+#  define CV_DECL_ALIGNED(x)
+#endif
+
+/* CPU features and intrinsics support */
+#define CV_CPU_NONE             0
+#define CV_CPU_MMX              1
+#define CV_CPU_SSE              2
+#define CV_CPU_SSE2             3
+#define CV_CPU_SSE3             4
+#define CV_CPU_SSSE3            5
+#define CV_CPU_SSE4_1           6
+#define CV_CPU_SSE4_2           7
+#define CV_CPU_POPCNT           8
+#define CV_CPU_FP16             9
+#define CV_CPU_AVX              10
+#define CV_CPU_AVX2             11
+#define CV_CPU_FMA3             12
+
+#define CV_CPU_AVX_512F         13
+#define CV_CPU_AVX_512BW        14
+#define CV_CPU_AVX_512CD        15
+#define CV_CPU_AVX_512DQ        16
+#define CV_CPU_AVX_512ER        17
+#define CV_CPU_AVX_512IFMA512   18 // deprecated
+#define CV_CPU_AVX_512IFMA      18
+#define CV_CPU_AVX_512PF        19
+#define CV_CPU_AVX_512VBMI      20
+#define CV_CPU_AVX_512VL        21
+
+#define CV_CPU_NEON   100
+
+#define CV_CPU_VSX 200
+
+// CPU features groups
+#define CV_CPU_AVX512_SKX       256
+
+// when adding to this list remember to update the following enum
+#define CV_HARDWARE_MAX_FEATURE 512
+
+/** @brief Available CPU features.
+*/
+enum CpuFeatures {
+    CPU_MMX             = 1,
+    CPU_SSE             = 2,
+    CPU_SSE2            = 3,
+    CPU_SSE3            = 4,
+    CPU_SSSE3           = 5,
+    CPU_SSE4_1          = 6,
+    CPU_SSE4_2          = 7,
+    CPU_POPCNT          = 8,
+    CPU_FP16            = 9,
+    CPU_AVX             = 10,
+    CPU_AVX2            = 11,
+    CPU_FMA3            = 12,
+
+    CPU_AVX_512F        = 13,
+    CPU_AVX_512BW       = 14,
+    CPU_AVX_512CD       = 15,
+    CPU_AVX_512DQ       = 16,
+    CPU_AVX_512ER       = 17,
+    CPU_AVX_512IFMA512  = 18, // deprecated
+    CPU_AVX_512IFMA     = 18,
+    CPU_AVX_512PF       = 19,
+    CPU_AVX_512VBMI     = 20,
+    CPU_AVX_512VL       = 21,
+
+    CPU_NEON            = 100,
+
+    CPU_VSX             = 200,
+
+    CPU_AVX512_SKX      = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
+
+    CPU_MAX_FEATURE     = 512  // see CV_HARDWARE_MAX_FEATURE
+};
+
+
+#include "cv_cpu_dispatch.h"
+
+
+/* fundamental constants */
+#define CV_PI   3.1415926535897932384626433832795
+#define CV_2PI  6.283185307179586476925286766559
+#define CV_LOG2 0.69314718055994530941723212145818
+
+#if defined __ARM_FP16_FORMAT_IEEE \
+    && !defined __CUDACC__
+#  define CV_FP16_TYPE 1
+#else
+#  define CV_FP16_TYPE 0
+#endif
+
+typedef union Cv16suf
+{
+    short i;
+#if CV_FP16_TYPE
+    __fp16 h;
+#endif
+    struct _fp16Format
+    {
+        unsigned int significand : 10;
+        unsigned int exponent    : 5;
+        unsigned int sign        : 1;
+    } fmt;
+}
+Cv16suf;
+
+typedef union Cv32suf
+{
+    int i;
+    unsigned u;
+    float f;
+    struct _fp32Format
+    {
+        unsigned int significand : 23;
+        unsigned int exponent    : 8;
+        unsigned int sign        : 1;
+    } fmt;
+}
+Cv32suf;
+
+typedef union Cv64suf
+{
+    int64 i;
+    uint64 u;
+    double f;
+}
+Cv64suf;
+
+#define OPENCV_ABI_COMPATIBILITY 300
+
+#ifdef __OPENCV_BUILD
+#  define DISABLE_OPENCV_24_COMPATIBILITY
+#endif
+
+#ifdef CVAPI_EXPORTS
+# if (defined _WIN32 || defined WINCE || defined __CYGWIN__)
+#   define CV_EXPORTS __declspec(dllexport)
+# elif defined __GNUC__ && __GNUC__ >= 4
+#   define CV_EXPORTS __attribute__ ((visibility ("default")))
+# endif
+#endif
+
+#ifndef CV_EXPORTS
+# define CV_EXPORTS
+#endif
+
+#ifdef _MSC_VER
+#   define CV_EXPORTS_TEMPLATE
+#else
+#   define CV_EXPORTS_TEMPLATE CV_EXPORTS
+#endif
+
+#ifndef CV_DEPRECATED
+#  if defined(__GNUC__)
+#    define CV_DEPRECATED __attribute__ ((deprecated))
+#  elif defined(_MSC_VER)
+#    define CV_DEPRECATED __declspec(deprecated)
+#  else
+#    define CV_DEPRECATED
+#  endif
+#endif
+
+#ifndef CV_EXTERN_C
+#  ifdef __cplusplus
+#    define CV_EXTERN_C extern "C"
+#  else
+#    define CV_EXTERN_C
+#  endif
+#endif
+
+/* special informative macros for wrapper generators */
+#define CV_EXPORTS_W CV_EXPORTS
+#define CV_EXPORTS_W_SIMPLE CV_EXPORTS
+#define CV_EXPORTS_AS(synonym) CV_EXPORTS
+#define CV_EXPORTS_W_MAP CV_EXPORTS
+#define CV_IN_OUT
+#define CV_OUT
+#define CV_PROP
+#define CV_PROP_RW
+#define CV_WRAP
+#define CV_WRAP_AS(synonym)
+
+/****************************************************************************************\
+*                                  Matrix type (Mat)                                     *
+\****************************************************************************************/
+
+#define CV_MAT_CN_MASK          ((CV_CN_MAX - 1) << CV_CN_SHIFT)
+#define CV_MAT_CN(flags)        ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
+#define CV_MAT_TYPE_MASK        (CV_DEPTH_MAX*CV_CN_MAX - 1)
+#define CV_MAT_TYPE(flags)      ((flags) & CV_MAT_TYPE_MASK)
+#define CV_MAT_CONT_FLAG_SHIFT  14
+#define CV_MAT_CONT_FLAG        (1 << CV_MAT_CONT_FLAG_SHIFT)
+#define CV_IS_MAT_CONT(flags)   ((flags) & CV_MAT_CONT_FLAG)
+#define CV_IS_CONT_MAT          CV_IS_MAT_CONT
+#define CV_SUBMAT_FLAG_SHIFT    15
+#define CV_SUBMAT_FLAG          (1 << CV_SUBMAT_FLAG_SHIFT)
+#define CV_IS_SUBMAT(flags)     ((flags) & CV_MAT_SUBMAT_FLAG)
+
+/** Size of each channel item,
+   0x8442211 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
+#define CV_ELEM_SIZE1(type) \
+    ((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15)
+
+/** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
+#define CV_ELEM_SIZE(type) \
+    (CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
+
+#ifndef MIN
+#  define MIN(a,b)  ((a) > (b) ? (b) : (a))
+#endif
+
+#ifndef MAX
+#  define MAX(a,b)  ((a) < (b) ? (b) : (a))
+#endif
+
+/****************************************************************************************\
+*                                    static analysys                                     *
+\****************************************************************************************/
+
+// In practice, some macro are not processed correctly (noreturn is not detected).
+// We need to use simplified definition for them.
+#ifndef CV_STATIC_ANALYSIS
+# if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__)
+#   define CV_STATIC_ANALYSIS
+# endif
+#endif
+
+/****************************************************************************************\
+*                                    Thread sanitizer                                    *
+\****************************************************************************************/
+#ifndef CV_THREAD_SANITIZER
+# if defined(__has_feature)
+#   if __has_feature(thread_sanitizer)
+#     define CV_THREAD_SANITIZER
+#   endif
+# endif
+#endif
+
+/****************************************************************************************\
+*          exchange-add operation for atomic operations on reference counters            *
+\****************************************************************************************/
+
+#ifdef CV_XADD
+  // allow to use user-defined macro
+#elif defined __GNUC__ || defined __clang__
+#  if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
+#    ifdef __ATOMIC_ACQ_REL
+#      define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
+#    else
+#      define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
+#    endif
+#  else
+#    if defined __ATOMIC_ACQ_REL && !defined __clang__
+       // version for gcc >= 4.7
+#      define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
+#    else
+#      define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
+#    endif
+#  endif
+#elif defined _MSC_VER && !defined RC_INVOKED
+#  include <intrin.h>
+#  define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
+#else
+   CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
+#endif
+
+
+/****************************************************************************************\
+*                                  CV_NORETURN attribute                                 *
+\****************************************************************************************/
+
+#ifndef CV_NORETURN
+#  if defined(__GNUC__)
+#    define CV_NORETURN __attribute__((__noreturn__))
+#  elif defined(_MSC_VER) && (_MSC_VER >= 1300)
+#    define CV_NORETURN __declspec(noreturn)
+#  else
+#    define CV_NORETURN /* nothing by default */
+#  endif
+#endif
+
+
+/****************************************************************************************\
+*                                    C++ 11                                              *
+\****************************************************************************************/
+#ifndef CV_CXX11
+#  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
+#    define CV_CXX11 1
+#  endif
+#else
+#  if CV_CXX11 == 0
+#    undef CV_CXX11
+#  endif
+#endif
+
+
+/****************************************************************************************\
+*                                    C++ Move semantics                                  *
+\****************************************************************************************/
+
+#ifndef CV_CXX_MOVE_SEMANTICS
+#  if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(_MSC_VER) && _MSC_VER >= 1600)
+#    define CV_CXX_MOVE_SEMANTICS 1
+#  elif defined(__clang)
+#    if __has_feature(cxx_rvalue_references)
+#      define CV_CXX_MOVE_SEMANTICS 1
+#    endif
+#  endif
+#else
+#  if CV_CXX_MOVE_SEMANTICS == 0
+#    undef CV_CXX_MOVE_SEMANTICS
+#  endif
+#endif
+
+/****************************************************************************************\
+*                                    C++11 std::array                                    *
+\****************************************************************************************/
+
+#ifndef CV_CXX_STD_ARRAY
+#  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
+#    define CV_CXX_STD_ARRAY 1
+#    include <array>
+#  endif
+#else
+#  if CV_CXX_STD_ARRAY == 0
+#    undef CV_CXX_STD_ARRAY
+#  endif
+#endif
+
+
+/****************************************************************************************\
+*                                 C++11 override / final                                 *
+\****************************************************************************************/
+
+#ifndef CV_OVERRIDE
+#  ifdef CV_CXX11
+#    define CV_OVERRIDE override
+#  endif
+#endif
+#ifndef CV_OVERRIDE
+#  define CV_OVERRIDE
+#endif
+
+#ifndef CV_FINAL
+#  ifdef CV_CXX11
+#    define CV_FINAL final
+#  endif
+#endif
+#ifndef CV_FINAL
+#  define CV_FINAL
+#endif
+
+
+
+// Integer types portatibility
+#ifdef OPENCV_STDINT_HEADER
+#include OPENCV_STDINT_HEADER
+#elif defined(__cplusplus)
+#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */
+namespace cv {
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef signed __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+}
+#elif defined(_MSC_VER) || __cplusplus >= 201103L
+#include <cstdint>
+namespace cv {
+using std::int8_t;
+using std::uint8_t;
+using std::int16_t;
+using std::uint16_t;
+using std::int32_t;
+using std::uint32_t;
+using std::int64_t;
+using std::uint64_t;
+}
+#else
+#include <stdint.h>
+namespace cv {
+typedef ::int8_t int8_t;
+typedef ::uint8_t uint8_t;
+typedef ::int16_t int16_t;
+typedef ::uint16_t uint16_t;
+typedef ::int32_t int32_t;
+typedef ::uint32_t uint32_t;
+typedef ::int64_t int64_t;
+typedef ::uint64_t uint64_t;
+}
+#endif
+#else // pure C
+#include <stdint.h>
+#endif
+
+
+//! @}
+
+#ifndef __cplusplus
+#include "opencv2/core/fast_math.hpp" // define cvRound(double)
+#endif
+
+#endif // OPENCV_CORE_CVDEF_H

+ 1040 - 0
ThresholdTools/include/opencv2/core/cvstd.hpp

@@ -0,0 +1,1040 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CVSTD_HPP
+#define OPENCV_CORE_CVSTD_HPP
+
+#ifndef __cplusplus
+#  error cvstd.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/cvdef.h"
+#include <cstddef>
+#include <cstring>
+#include <cctype>
+
+#include <string>
+
+// import useful primitives from stl
+#  include <algorithm>
+#  include <utility>
+#  include <cstdlib> //for abs(int)
+#  include <cmath>
+
+namespace cv
+{
+    static inline uchar abs(uchar a) { return a; }
+    static inline ushort abs(ushort a) { return a; }
+    static inline unsigned abs(unsigned a) { return a; }
+    static inline uint64 abs(uint64 a) { return a; }
+
+    using std::min;
+    using std::max;
+    using std::abs;
+    using std::swap;
+    using std::sqrt;
+    using std::exp;
+    using std::pow;
+    using std::log;
+}
+
+namespace cv {
+
+//! @addtogroup core_utils
+//! @{
+
+//////////////////////////// memory management functions ////////////////////////////
+
+/** @brief Allocates an aligned memory buffer.
+
+The function allocates the buffer of the specified size and returns it. When the buffer size is 16
+bytes or more, the returned buffer is aligned to 16 bytes.
+@param bufSize Allocated buffer size.
+ */
+CV_EXPORTS void* fastMalloc(size_t bufSize);
+
+/** @brief Deallocates a memory buffer.
+
+The function deallocates the buffer allocated with fastMalloc . If NULL pointer is passed, the
+function does nothing. C version of the function clears the pointer *pptr* to avoid problems with
+double memory deallocation.
+@param ptr Pointer to the allocated buffer.
+ */
+CV_EXPORTS void fastFree(void* ptr);
+
+/*!
+  The STL-compilant memory Allocator based on cv::fastMalloc() and cv::fastFree()
+*/
+template<typename _Tp> class Allocator
+{
+public:
+    typedef _Tp value_type;
+    typedef value_type* pointer;
+    typedef const value_type* const_pointer;
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    template<typename U> class rebind { typedef Allocator<U> other; };
+
+    explicit Allocator() {}
+    ~Allocator() {}
+    explicit Allocator(Allocator const&) {}
+    template<typename U>
+    explicit Allocator(Allocator<U> const&) {}
+
+    // address
+    pointer address(reference r) { return &r; }
+    const_pointer address(const_reference r) { return &r; }
+
+    pointer allocate(size_type count, const void* =0) { return reinterpret_cast<pointer>(fastMalloc(count * sizeof (_Tp))); }
+    void deallocate(pointer p, size_type) { fastFree(p); }
+
+    void construct(pointer p, const _Tp& v) { new(static_cast<void*>(p)) _Tp(v); }
+    void destroy(pointer p) { p->~_Tp(); }
+
+    size_type max_size() const { return cv::max(static_cast<_Tp>(-1)/sizeof(_Tp), 1); }
+};
+
+//! @} core_utils
+
+//! @cond IGNORED
+
+namespace detail
+{
+
+// Metafunction to avoid taking a reference to void.
+template<typename T>
+struct RefOrVoid { typedef T& type; };
+
+template<>
+struct RefOrVoid<void>{ typedef void type; };
+
+template<>
+struct RefOrVoid<const void>{ typedef const void type; };
+
+template<>
+struct RefOrVoid<volatile void>{ typedef volatile void type; };
+
+template<>
+struct RefOrVoid<const volatile void>{ typedef const volatile void type; };
+
+// This class would be private to Ptr, if it didn't have to be a non-template.
+struct PtrOwner;
+
+}
+
+template<typename Y>
+struct DefaultDeleter
+{
+    void operator () (Y* p) const;
+};
+
+//! @endcond
+
+//! @addtogroup core_basic
+//! @{
+
+/** @brief Template class for smart pointers with shared ownership
+
+A Ptr\<T\> pretends to be a pointer to an object of type T. Unlike an ordinary pointer, however, the
+object will be automatically cleaned up once all Ptr instances pointing to it are destroyed.
+
+Ptr is similar to boost::shared_ptr that is part of the Boost library
+(<http://www.boost.org/doc/libs/release/libs/smart_ptr/shared_ptr.htm>) and std::shared_ptr from
+the [C++11](http://en.wikipedia.org/wiki/C++11) standard.
+
+This class provides the following advantages:
+-   Default constructor, copy constructor, and assignment operator for an arbitrary C++ class or C
+    structure. For some objects, like files, windows, mutexes, sockets, and others, a copy
+    constructor or an assignment operator are difficult to define. For some other objects, like
+    complex classifiers in OpenCV, copy constructors are absent and not easy to implement. Finally,
+    some of complex OpenCV and your own data structures may be written in C. However, copy
+    constructors and default constructors can simplify programming a lot. Besides, they are often
+    required (for example, by STL containers). By using a Ptr to such an object instead of the
+    object itself, you automatically get all of the necessary constructors and the assignment
+    operator.
+-   *O(1)* complexity of the above-mentioned operations. While some structures, like std::vector,
+    provide a copy constructor and an assignment operator, the operations may take a considerable
+    amount of time if the data structures are large. But if the structures are put into a Ptr, the
+    overhead is small and independent of the data size.
+-   Automatic and customizable cleanup, even for C structures. See the example below with FILE\*.
+-   Heterogeneous collections of objects. The standard STL and most other C++ and OpenCV containers
+    can store only objects of the same type and the same size. The classical solution to store
+    objects of different types in the same container is to store pointers to the base class (Base\*)
+    instead but then you lose the automatic memory management. Again, by using Ptr\<Base\> instead
+    of raw pointers, you can solve the problem.
+
+A Ptr is said to *own* a pointer - that is, for each Ptr there is a pointer that will be deleted
+once all Ptr instances that own it are destroyed. The owned pointer may be null, in which case
+nothing is deleted. Each Ptr also *stores* a pointer. The stored pointer is the pointer the Ptr
+pretends to be; that is, the one you get when you use Ptr::get or the conversion to T\*. It's
+usually the same as the owned pointer, but if you use casts or the general shared-ownership
+constructor, the two may diverge: the Ptr will still own the original pointer, but will itself point
+to something else.
+
+The owned pointer is treated as a black box. The only thing Ptr needs to know about it is how to
+delete it. This knowledge is encapsulated in the *deleter* - an auxiliary object that is associated
+with the owned pointer and shared between all Ptr instances that own it. The default deleter is an
+instance of DefaultDeleter, which uses the standard C++ delete operator; as such it will work with
+any pointer allocated with the standard new operator.
+
+However, if the pointer must be deleted in a different way, you must specify a custom deleter upon
+Ptr construction. A deleter is simply a callable object that accepts the pointer as its sole
+argument. For example, if you want to wrap FILE, you may do so as follows:
+@code
+    Ptr<FILE> f(fopen("myfile.txt", "w"), fclose);
+    if(!f) throw ...;
+    fprintf(f, ....);
+    ...
+    // the file will be closed automatically by f's destructor.
+@endcode
+Alternatively, if you want all pointers of a particular type to be deleted the same way, you can
+specialize DefaultDeleter<T>::operator() for that type, like this:
+@code
+    namespace cv {
+    template<> void DefaultDeleter<FILE>::operator ()(FILE * obj) const
+    {
+        fclose(obj);
+    }
+    }
+@endcode
+For convenience, the following types from the OpenCV C API already have such a specialization that
+calls the appropriate release function:
+-   CvCapture
+-   CvFileStorage
+-   CvHaarClassifierCascade
+-   CvMat
+-   CvMatND
+-   CvMemStorage
+-   CvSparseMat
+-   CvVideoWriter
+-   IplImage
+@note The shared ownership mechanism is implemented with reference counting. As such, cyclic
+ownership (e.g. when object a contains a Ptr to object b, which contains a Ptr to object a) will
+lead to all involved objects never being cleaned up. Avoid such situations.
+@note It is safe to concurrently read (but not write) a Ptr instance from multiple threads and
+therefore it is normally safe to use it in multi-threaded applications. The same is true for Mat and
+other C++ OpenCV classes that use internal reference counts.
+*/
+template<typename T>
+struct Ptr
+{
+    /** Generic programming support. */
+    typedef T element_type;
+
+    /** The default constructor creates a null Ptr - one that owns and stores a null pointer.
+    */
+    Ptr();
+
+    /**
+    If p is null, these are equivalent to the default constructor.
+    Otherwise, these constructors assume ownership of p - that is, the created Ptr owns and stores p
+    and assumes it is the sole owner of it. Don't use them if p is already owned by another Ptr, or
+    else p will get deleted twice.
+    With the first constructor, DefaultDeleter\<Y\>() becomes the associated deleter (so p will
+    eventually be deleted with the standard delete operator). Y must be a complete type at the point
+    of invocation.
+    With the second constructor, d becomes the associated deleter.
+    Y\* must be convertible to T\*.
+    @param p Pointer to own.
+    @note It is often easier to use makePtr instead.
+     */
+    template<typename Y>
+#ifdef DISABLE_OPENCV_24_COMPATIBILITY
+    explicit
+#endif
+    Ptr(Y* p);
+
+    /** @overload
+    @param d Deleter to use for the owned pointer.
+    @param p Pointer to own.
+    */
+    template<typename Y, typename D>
+    Ptr(Y* p, D d);
+
+    /**
+    These constructors create a Ptr that shares ownership with another Ptr - that is, own the same
+    pointer as o.
+    With the first two, the same pointer is stored, as well; for the second, Y\* must be convertible
+    to T\*.
+    With the third, p is stored, and Y may be any type. This constructor allows to have completely
+    unrelated owned and stored pointers, and should be used with care to avoid confusion. A relatively
+    benign use is to create a non-owning Ptr, like this:
+    @code
+        ptr = Ptr<T>(Ptr<T>(), dont_delete_me); // owns nothing; will not delete the pointer.
+    @endcode
+    @param o Ptr to share ownership with.
+    */
+    Ptr(const Ptr& o);
+
+    /** @overload
+    @param o Ptr to share ownership with.
+    */
+    template<typename Y>
+    Ptr(const Ptr<Y>& o);
+
+    /** @overload
+    @param o Ptr to share ownership with.
+    @param p Pointer to store.
+    */
+    template<typename Y>
+    Ptr(const Ptr<Y>& o, T* p);
+
+    /** The destructor is equivalent to calling Ptr::release. */
+    ~Ptr();
+
+    /**
+    Assignment replaces the current Ptr instance with one that owns and stores same pointers as o and
+    then destroys the old instance.
+    @param o Ptr to share ownership with.
+     */
+    Ptr& operator = (const Ptr& o);
+
+    /** @overload */
+    template<typename Y>
+    Ptr& operator = (const Ptr<Y>& o);
+
+    /** If no other Ptr instance owns the owned pointer, deletes it with the associated deleter. Then sets
+    both the owned and the stored pointers to NULL.
+    */
+    void release();
+
+    /**
+    `ptr.reset(...)` is equivalent to `ptr = Ptr<T>(...)`.
+    @param p Pointer to own.
+    */
+    template<typename Y>
+    void reset(Y* p);
+
+    /** @overload
+    @param d Deleter to use for the owned pointer.
+    @param p Pointer to own.
+    */
+    template<typename Y, typename D>
+    void reset(Y* p, D d);
+
+    /**
+    Swaps the owned and stored pointers (and deleters, if any) of this and o.
+    @param o Ptr to swap with.
+    */
+    void swap(Ptr& o);
+
+    /** Returns the stored pointer. */
+    T* get() const;
+
+    /** Ordinary pointer emulation. */
+    typename detail::RefOrVoid<T>::type operator * () const;
+
+    /** Ordinary pointer emulation. */
+    T* operator -> () const;
+
+    /** Equivalent to get(). */
+    operator T* () const;
+
+    /** ptr.empty() is equivalent to `!ptr.get()`. */
+    bool empty() const;
+
+    /** Returns a Ptr that owns the same pointer as this, and stores the same
+       pointer as this, except converted via static_cast to Y*.
+    */
+    template<typename Y>
+    Ptr<Y> staticCast() const;
+
+    /** Ditto for const_cast. */
+    template<typename Y>
+    Ptr<Y> constCast() const;
+
+    /** Ditto for dynamic_cast. */
+    template<typename Y>
+    Ptr<Y> dynamicCast() const;
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+    Ptr(Ptr&& o);
+    Ptr& operator = (Ptr&& o);
+#endif
+
+private:
+    detail::PtrOwner* owner;
+    T* stored;
+
+    template<typename Y>
+    friend struct Ptr; // have to do this for the cross-type copy constructor
+};
+
+/** Equivalent to ptr1.swap(ptr2). Provided to help write generic algorithms. */
+template<typename T>
+void swap(Ptr<T>& ptr1, Ptr<T>& ptr2);
+
+/** Return whether ptr1.get() and ptr2.get() are equal and not equal, respectively. */
+template<typename T>
+bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2);
+template<typename T>
+bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2);
+
+/** `makePtr<T>(...)` is equivalent to `Ptr<T>(new T(...))`. It is shorter than the latter, and it's
+marginally safer than using a constructor or Ptr::reset, since it ensures that the owned pointer
+is new and thus not owned by any other Ptr instance.
+Unfortunately, perfect forwarding is impossible to implement in C++03, and so makePtr is limited
+to constructors of T that have up to 10 arguments, none of which are non-const references.
+ */
+template<typename T>
+Ptr<T> makePtr();
+/** @overload */
+template<typename T, typename A1>
+Ptr<T> makePtr(const A1& a1);
+/** @overload */
+template<typename T, typename A1, typename A2>
+Ptr<T> makePtr(const A1& a1, const A2& a2);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9);
+/** @overload */
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10);
+
+//////////////////////////////// string class ////////////////////////////////
+
+class CV_EXPORTS FileNode; //for string constructor from FileNode
+
+class CV_EXPORTS String
+{
+public:
+    typedef char value_type;
+    typedef char& reference;
+    typedef const char& const_reference;
+    typedef char* pointer;
+    typedef const char* const_pointer;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef char* iterator;
+    typedef const char* const_iterator;
+
+    static const size_t npos = size_t(-1);
+
+    String();
+    String(const String& str);
+    String(const String& str, size_t pos, size_t len = npos);
+    String(const char* s);
+    String(const char* s, size_t n);
+    String(size_t n, char c);
+    String(const char* first, const char* last);
+    template<typename Iterator> String(Iterator first, Iterator last);
+    explicit String(const FileNode& fn);
+    ~String();
+
+    String& operator=(const String& str);
+    String& operator=(const char* s);
+    String& operator=(char c);
+
+    String& operator+=(const String& str);
+    String& operator+=(const char* s);
+    String& operator+=(char c);
+
+    size_t size() const;
+    size_t length() const;
+
+    char operator[](size_t idx) const;
+    char operator[](int idx) const;
+
+    const char* begin() const;
+    const char* end() const;
+
+    const char* c_str() const;
+
+    bool empty() const;
+    void clear();
+
+    int compare(const char* s) const;
+    int compare(const String& str) const;
+
+    void swap(String& str);
+    String substr(size_t pos = 0, size_t len = npos) const;
+
+    size_t find(const char* s, size_t pos, size_t n) const;
+    size_t find(char c, size_t pos = 0) const;
+    size_t find(const String& str, size_t pos = 0) const;
+    size_t find(const char* s, size_t pos = 0) const;
+
+    size_t rfind(const char* s, size_t pos, size_t n) const;
+    size_t rfind(char c, size_t pos = npos) const;
+    size_t rfind(const String& str, size_t pos = npos) const;
+    size_t rfind(const char* s, size_t pos = npos) const;
+
+    size_t find_first_of(const char* s, size_t pos, size_t n) const;
+    size_t find_first_of(char c, size_t pos = 0) const;
+    size_t find_first_of(const String& str, size_t pos = 0) const;
+    size_t find_first_of(const char* s, size_t pos = 0) const;
+
+    size_t find_last_of(const char* s, size_t pos, size_t n) const;
+    size_t find_last_of(char c, size_t pos = npos) const;
+    size_t find_last_of(const String& str, size_t pos = npos) const;
+    size_t find_last_of(const char* s, size_t pos = npos) const;
+
+    friend String operator+ (const String& lhs, const String& rhs);
+    friend String operator+ (const String& lhs, const char*   rhs);
+    friend String operator+ (const char*   lhs, const String& rhs);
+    friend String operator+ (const String& lhs, char          rhs);
+    friend String operator+ (char          lhs, const String& rhs);
+
+    String toLowerCase() const;
+
+    String(const std::string& str);
+    String(const std::string& str, size_t pos, size_t len = npos);
+    String& operator=(const std::string& str);
+    String& operator+=(const std::string& str);
+    operator std::string() const;
+
+    friend String operator+ (const String& lhs, const std::string& rhs);
+    friend String operator+ (const std::string& lhs, const String& rhs);
+
+private:
+    char*  cstr_;
+    size_t len_;
+
+    char* allocate(size_t len); // len without trailing 0
+    void deallocate();
+
+    String(int); // disabled and invalid. Catch invalid usages like, commandLineParser.has(0) problem
+};
+
+//! @} core_basic
+
+////////////////////////// cv::String implementation /////////////////////////
+
+//! @cond IGNORED
+
+inline
+String::String()
+    : cstr_(0), len_(0)
+{}
+
+inline
+String::String(const String& str)
+    : cstr_(str.cstr_), len_(str.len_)
+{
+    if (cstr_)
+        CV_XADD(((int*)cstr_)-1, 1);
+}
+
+inline
+String::String(const String& str, size_t pos, size_t len)
+    : cstr_(0), len_(0)
+{
+    pos = min(pos, str.len_);
+    len = min(str.len_ - pos, len);
+    if (!len) return;
+    if (len == str.len_)
+    {
+        CV_XADD(((int*)str.cstr_)-1, 1);
+        cstr_ = str.cstr_;
+        len_ = str.len_;
+        return;
+    }
+    memcpy(allocate(len), str.cstr_ + pos, len);
+}
+
+inline
+String::String(const char* s)
+    : cstr_(0), len_(0)
+{
+    if (!s) return;
+    size_t len = strlen(s);
+    if (!len) return;
+    memcpy(allocate(len), s, len);
+}
+
+inline
+String::String(const char* s, size_t n)
+    : cstr_(0), len_(0)
+{
+    if (!n) return;
+    if (!s) return;
+    memcpy(allocate(n), s, n);
+}
+
+inline
+String::String(size_t n, char c)
+    : cstr_(0), len_(0)
+{
+    if (!n) return;
+    memset(allocate(n), c, n);
+}
+
+inline
+String::String(const char* first, const char* last)
+    : cstr_(0), len_(0)
+{
+    size_t len = (size_t)(last - first);
+    if (!len) return;
+    memcpy(allocate(len), first, len);
+}
+
+template<typename Iterator> inline
+String::String(Iterator first, Iterator last)
+    : cstr_(0), len_(0)
+{
+    size_t len = (size_t)(last - first);
+    if (!len) return;
+    char* str = allocate(len);
+    while (first != last)
+    {
+        *str++ = *first;
+        ++first;
+    }
+}
+
+inline
+String::~String()
+{
+    deallocate();
+}
+
+inline
+String& String::operator=(const String& str)
+{
+    if (&str == this) return *this;
+
+    deallocate();
+    if (str.cstr_) CV_XADD(((int*)str.cstr_)-1, 1);
+    cstr_ = str.cstr_;
+    len_ = str.len_;
+    return *this;
+}
+
+inline
+String& String::operator=(const char* s)
+{
+    deallocate();
+    if (!s) return *this;
+    size_t len = strlen(s);
+    if (len) memcpy(allocate(len), s, len);
+    return *this;
+}
+
+inline
+String& String::operator=(char c)
+{
+    deallocate();
+    allocate(1)[0] = c;
+    return *this;
+}
+
+inline
+String& String::operator+=(const String& str)
+{
+    *this = *this + str;
+    return *this;
+}
+
+inline
+String& String::operator+=(const char* s)
+{
+    *this = *this + s;
+    return *this;
+}
+
+inline
+String& String::operator+=(char c)
+{
+    *this = *this + c;
+    return *this;
+}
+
+inline
+size_t String::size() const
+{
+    return len_;
+}
+
+inline
+size_t String::length() const
+{
+    return len_;
+}
+
+inline
+char String::operator[](size_t idx) const
+{
+    return cstr_[idx];
+}
+
+inline
+char String::operator[](int idx) const
+{
+    return cstr_[idx];
+}
+
+inline
+const char* String::begin() const
+{
+    return cstr_;
+}
+
+inline
+const char* String::end() const
+{
+    return len_ ? cstr_ + len_ : NULL;
+}
+
+inline
+bool String::empty() const
+{
+    return len_ == 0;
+}
+
+inline
+const char* String::c_str() const
+{
+    return cstr_ ? cstr_ : "";
+}
+
+inline
+void String::swap(String& str)
+{
+    cv::swap(cstr_, str.cstr_);
+    cv::swap(len_, str.len_);
+}
+
+inline
+void String::clear()
+{
+    deallocate();
+}
+
+inline
+int String::compare(const char* s) const
+{
+    if (cstr_ == s) return 0;
+    return strcmp(c_str(), s);
+}
+
+inline
+int String::compare(const String& str) const
+{
+    if (cstr_ == str.cstr_) return 0;
+    return strcmp(c_str(), str.c_str());
+}
+
+inline
+String String::substr(size_t pos, size_t len) const
+{
+    return String(*this, pos, len);
+}
+
+inline
+size_t String::find(const char* s, size_t pos, size_t n) const
+{
+    if (n == 0 || pos + n > len_) return npos;
+    const char* lmax = cstr_ + len_ - n;
+    for (const char* i = cstr_ + pos; i <= lmax; ++i)
+    {
+        size_t j = 0;
+        while (j < n && s[j] == i[j]) ++j;
+        if (j == n) return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::find(char c, size_t pos) const
+{
+    return find(&c, pos, 1);
+}
+
+inline
+size_t String::find(const String& str, size_t pos) const
+{
+    return find(str.c_str(), pos, str.len_);
+}
+
+inline
+size_t String::find(const char* s, size_t pos) const
+{
+    if (pos >= len_ || !s[0]) return npos;
+    const char* lmax = cstr_ + len_;
+    for (const char* i = cstr_ + pos; i < lmax; ++i)
+    {
+        size_t j = 0;
+        while (s[j] && s[j] == i[j])
+        {   if(i + j >= lmax) return npos;
+            ++j;
+        }
+        if (!s[j]) return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::rfind(const char* s, size_t pos, size_t n) const
+{
+    if (n > len_) return npos;
+    if (pos > len_ - n) pos = len_ - n;
+    for (const char* i = cstr_ + pos; i >= cstr_; --i)
+    {
+        size_t j = 0;
+        while (j < n && s[j] == i[j]) ++j;
+        if (j == n) return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::rfind(char c, size_t pos) const
+{
+    return rfind(&c, pos, 1);
+}
+
+inline
+size_t String::rfind(const String& str, size_t pos) const
+{
+    return rfind(str.c_str(), pos, str.len_);
+}
+
+inline
+size_t String::rfind(const char* s, size_t pos) const
+{
+    return rfind(s, pos, strlen(s));
+}
+
+inline
+size_t String::find_first_of(const char* s, size_t pos, size_t n) const
+{
+    if (n == 0 || pos + n > len_) return npos;
+    const char* lmax = cstr_ + len_;
+    for (const char* i = cstr_ + pos; i < lmax; ++i)
+    {
+        for (size_t j = 0; j < n; ++j)
+            if (s[j] == *i)
+                return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::find_first_of(char c, size_t pos) const
+{
+    return find_first_of(&c, pos, 1);
+}
+
+inline
+size_t String::find_first_of(const String& str, size_t pos) const
+{
+    return find_first_of(str.c_str(), pos, str.len_);
+}
+
+inline
+size_t String::find_first_of(const char* s, size_t pos) const
+{
+    if (len_ == 0) return npos;
+    if (pos >= len_ || !s[0]) return npos;
+    const char* lmax = cstr_ + len_;
+    for (const char* i = cstr_ + pos; i < lmax; ++i)
+    {
+        for (size_t j = 0; s[j]; ++j)
+            if (s[j] == *i)
+                return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::find_last_of(const char* s, size_t pos, size_t n) const
+{
+    if (len_ == 0) return npos;
+    if (pos >= len_) pos = len_ - 1;
+    for (const char* i = cstr_ + pos; i >= cstr_; --i)
+    {
+        for (size_t j = 0; j < n; ++j)
+            if (s[j] == *i)
+                return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+size_t String::find_last_of(char c, size_t pos) const
+{
+    return find_last_of(&c, pos, 1);
+}
+
+inline
+size_t String::find_last_of(const String& str, size_t pos) const
+{
+    return find_last_of(str.c_str(), pos, str.len_);
+}
+
+inline
+size_t String::find_last_of(const char* s, size_t pos) const
+{
+    if (len_ == 0) return npos;
+    if (pos >= len_) pos = len_ - 1;
+    for (const char* i = cstr_ + pos; i >= cstr_; --i)
+    {
+        for (size_t j = 0; s[j]; ++j)
+            if (s[j] == *i)
+                return (size_t)(i - cstr_);
+    }
+    return npos;
+}
+
+inline
+String String::toLowerCase() const
+{
+    if (!cstr_)
+        return String();
+    String res(cstr_, len_);
+    for (size_t i = 0; i < len_; ++i)
+        res.cstr_[i] = (char) ::tolower(cstr_[i]);
+
+    return res;
+}
+
+//! @endcond
+
+// ************************* cv::String non-member functions *************************
+
+//! @relates cv::String
+//! @{
+
+inline
+String operator + (const String& lhs, const String& rhs)
+{
+    String s;
+    s.allocate(lhs.len_ + rhs.len_);
+    if (lhs.len_) memcpy(s.cstr_, lhs.cstr_, lhs.len_);
+    if (rhs.len_) memcpy(s.cstr_ + lhs.len_, rhs.cstr_, rhs.len_);
+    return s;
+}
+
+inline
+String operator + (const String& lhs, const char* rhs)
+{
+    String s;
+    size_t rhslen = strlen(rhs);
+    s.allocate(lhs.len_ + rhslen);
+    if (lhs.len_) memcpy(s.cstr_, lhs.cstr_, lhs.len_);
+    if (rhslen) memcpy(s.cstr_ + lhs.len_, rhs, rhslen);
+    return s;
+}
+
+inline
+String operator + (const char* lhs, const String& rhs)
+{
+    String s;
+    size_t lhslen = strlen(lhs);
+    s.allocate(lhslen + rhs.len_);
+    if (lhslen) memcpy(s.cstr_, lhs, lhslen);
+    if (rhs.len_) memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
+    return s;
+}
+
+inline
+String operator + (const String& lhs, char rhs)
+{
+    String s;
+    s.allocate(lhs.len_ + 1);
+    if (lhs.len_) memcpy(s.cstr_, lhs.cstr_, lhs.len_);
+    s.cstr_[lhs.len_] = rhs;
+    return s;
+}
+
+inline
+String operator + (char lhs, const String& rhs)
+{
+    String s;
+    s.allocate(rhs.len_ + 1);
+    s.cstr_[0] = lhs;
+    if (rhs.len_) memcpy(s.cstr_ + 1, rhs.cstr_, rhs.len_);
+    return s;
+}
+
+static inline bool operator== (const String& lhs, const String& rhs) { return 0 == lhs.compare(rhs); }
+static inline bool operator== (const char*   lhs, const String& rhs) { return 0 == rhs.compare(lhs); }
+static inline bool operator== (const String& lhs, const char*   rhs) { return 0 == lhs.compare(rhs); }
+static inline bool operator!= (const String& lhs, const String& rhs) { return 0 != lhs.compare(rhs); }
+static inline bool operator!= (const char*   lhs, const String& rhs) { return 0 != rhs.compare(lhs); }
+static inline bool operator!= (const String& lhs, const char*   rhs) { return 0 != lhs.compare(rhs); }
+static inline bool operator<  (const String& lhs, const String& rhs) { return lhs.compare(rhs) <  0; }
+static inline bool operator<  (const char*   lhs, const String& rhs) { return rhs.compare(lhs) >  0; }
+static inline bool operator<  (const String& lhs, const char*   rhs) { return lhs.compare(rhs) <  0; }
+static inline bool operator<= (const String& lhs, const String& rhs) { return lhs.compare(rhs) <= 0; }
+static inline bool operator<= (const char*   lhs, const String& rhs) { return rhs.compare(lhs) >= 0; }
+static inline bool operator<= (const String& lhs, const char*   rhs) { return lhs.compare(rhs) <= 0; }
+static inline bool operator>  (const String& lhs, const String& rhs) { return lhs.compare(rhs) >  0; }
+static inline bool operator>  (const char*   lhs, const String& rhs) { return rhs.compare(lhs) <  0; }
+static inline bool operator>  (const String& lhs, const char*   rhs) { return lhs.compare(rhs) >  0; }
+static inline bool operator>= (const String& lhs, const String& rhs) { return lhs.compare(rhs) >= 0; }
+static inline bool operator>= (const char*   lhs, const String& rhs) { return rhs.compare(lhs) <= 0; }
+static inline bool operator>= (const String& lhs, const char*   rhs) { return lhs.compare(rhs) >= 0; }
+
+//! @} relates cv::String
+
+} // cv
+
+namespace std
+{
+    static inline void swap(cv::String& a, cv::String& b) { a.swap(b); }
+}
+
+#include "opencv2/core/ptr.inl.hpp"
+
+#endif //OPENCV_CORE_CVSTD_HPP

+ 280 - 0
ThresholdTools/include/opencv2/core/cvstd.inl.hpp

@@ -0,0 +1,280 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CVSTDINL_HPP
+#define OPENCV_CORE_CVSTDINL_HPP
+
+#include <complex>
+#include <ostream>
+
+//! @cond IGNORED
+
+#ifdef _MSC_VER
+#pragma warning( push )
+#pragma warning( disable: 4127 )
+#endif
+
+namespace cv
+{
+
+template<typename _Tp> class DataType< std::complex<_Tp> >
+{
+public:
+    typedef std::complex<_Tp>  value_type;
+    typedef value_type         work_type;
+    typedef _Tp                channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 2,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels) };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+inline
+String::String(const std::string& str)
+    : cstr_(0), len_(0)
+{
+    size_t len = str.size();
+    if (len) memcpy(allocate(len), str.c_str(), len);
+}
+
+inline
+String::String(const std::string& str, size_t pos, size_t len)
+    : cstr_(0), len_(0)
+{
+    size_t strlen = str.size();
+    pos = min(pos, strlen);
+    len = min(strlen - pos, len);
+    if (!len) return;
+    memcpy(allocate(len), str.c_str() + pos, len);
+}
+
+inline
+String& String::operator = (const std::string& str)
+{
+    deallocate();
+    size_t len = str.size();
+    if (len) memcpy(allocate(len), str.c_str(), len);
+    return *this;
+}
+
+inline
+String& String::operator += (const std::string& str)
+{
+    *this = *this + str;
+    return *this;
+}
+
+inline
+String::operator std::string() const
+{
+    return std::string(cstr_, len_);
+}
+
+inline
+String operator + (const String& lhs, const std::string& rhs)
+{
+    String s;
+    size_t rhslen = rhs.size();
+    s.allocate(lhs.len_ + rhslen);
+    if (lhs.len_) memcpy(s.cstr_, lhs.cstr_, lhs.len_);
+    if (rhslen) memcpy(s.cstr_ + lhs.len_, rhs.c_str(), rhslen);
+    return s;
+}
+
+inline
+String operator + (const std::string& lhs, const String& rhs)
+{
+    String s;
+    size_t lhslen = lhs.size();
+    s.allocate(lhslen + rhs.len_);
+    if (lhslen) memcpy(s.cstr_, lhs.c_str(), lhslen);
+    if (rhs.len_) memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
+    return s;
+}
+
+inline
+FileNode::operator std::string() const
+{
+    String value;
+    read(*this, value, value);
+    return value;
+}
+
+template<> inline
+void operator >> (const FileNode& n, std::string& value)
+{
+    read(n, value, std::string());
+}
+
+template<> inline
+FileStorage& operator << (FileStorage& fs, const std::string& value)
+{
+    return fs << cv::String(value);
+}
+
+static inline
+std::ostream& operator << (std::ostream& os, const String& str)
+{
+    return os << str.c_str();
+}
+
+static inline
+std::ostream& operator << (std::ostream& out, Ptr<Formatted> fmtd)
+{
+    fmtd->reset();
+    for(const char* str = fmtd->next(); str; str = fmtd->next())
+        out << str;
+    return out;
+}
+
+static inline
+std::ostream& operator << (std::ostream& out, const Mat& mtx)
+{
+    return out << Formatter::get()->format(mtx);
+}
+
+static inline
+std::ostream& operator << (std::ostream& out, const UMat& m)
+{
+    return out << m.getMat(ACCESS_READ);
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Complex<_Tp>& c)
+{
+    return out << "(" << c.re << "," << c.im << ")";
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const std::vector<Point_<_Tp> >& vec)
+{
+    return out << Formatter::get()->format(Mat(vec));
+}
+
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const std::vector<Point3_<_Tp> >& vec)
+{
+    return out << Formatter::get()->format(Mat(vec));
+}
+
+
+template<typename _Tp, int m, int n> static inline
+std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx)
+{
+    return out << Formatter::get()->format(Mat(matx));
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p)
+{
+    out << "[" << p.x << ", " << p.y << "]";
+    return out;
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p)
+{
+    out << "[" << p.x << ", " << p.y << ", " << p.z << "]";
+    return out;
+}
+
+template<typename _Tp, int n> static inline
+std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
+{
+    out << "[";
+    if (cv::traits::Depth<_Tp>::value <= CV_32S)
+    {
+        for (int i = 0; i < n - 1; ++i) {
+            out << (int)vec[i] << ", ";
+        }
+        out << (int)vec[n-1] << "]";
+    }
+    else
+    {
+        for (int i = 0; i < n - 1; ++i) {
+            out << vec[i] << ", ";
+        }
+        out << vec[n-1] << "]";
+    }
+
+    return out;
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size)
+{
+    return out << "[" << size.width << " x " << size.height << "]";
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
+{
+    return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
+}
+
+static inline std::ostream& operator << (std::ostream& out, const MatSize& msize)
+{
+    int i, dims = msize.dims();
+    for( i = 0; i < dims; i++ )
+    {
+        out << msize[i];
+        if( i < dims-1 )
+            out << " x ";
+    }
+    return out;
+}
+
+} // cv
+
+#ifdef _MSC_VER
+#pragma warning( pop )
+#endif
+
+//! @endcond
+
+#endif // OPENCV_CORE_CVSTDINL_HPP

+ 184 - 0
ThresholdTools/include/opencv2/core/directx.hpp

@@ -0,0 +1,184 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the copyright holders or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_DIRECTX_HPP
+#define OPENCV_CORE_DIRECTX_HPP
+
+#include "mat.hpp"
+#include "ocl.hpp"
+
+#if !defined(__d3d11_h__)
+struct ID3D11Device;
+struct ID3D11Texture2D;
+#endif
+
+#if !defined(__d3d10_h__)
+struct ID3D10Device;
+struct ID3D10Texture2D;
+#endif
+
+#if !defined(_D3D9_H_)
+struct IDirect3DDevice9;
+struct IDirect3DDevice9Ex;
+struct IDirect3DSurface9;
+#endif
+
+
+namespace cv { namespace directx {
+
+namespace ocl {
+using namespace cv::ocl;
+
+//! @addtogroup core_directx
+// This section describes OpenCL and DirectX interoperability.
+//
+// To enable DirectX support, configure OpenCV using CMake with WITH_DIRECTX=ON . Note, DirectX is
+// supported only on Windows.
+//
+// To use OpenCL functionality you should first initialize OpenCL context from DirectX resource.
+//
+//! @{
+
+// TODO static functions in the Context class
+//! @brief Creates OpenCL context from D3D11 device
+//
+//! @param pD3D11Device - pointer to D3D11 device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);
+
+//! @brief Creates OpenCL context from D3D10 device
+//
+//! @param pD3D10Device - pointer to D3D10 device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);
+
+//! @brief Creates OpenCL context from Direct3DDevice9Ex device
+//
+//! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);
+
+//! @brief Creates OpenCL context from Direct3DDevice9 device
+//
+//! @param pDirect3DDevice9 - pointer to Direct3Device9 device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);
+
+//! @}
+
+} // namespace cv::directx::ocl
+
+//! @addtogroup core_directx
+//! @{
+
+//! @brief Converts InputArray to ID3D11Texture2D. If destination texture format is DXGI_FORMAT_NV12 then
+//!        input UMat expected to be in BGR format and data will be downsampled and color-converted to NV12.
+//
+//! @note Note: Destination texture must be allocated by application. Function does memory copy from src to
+//!             pD3D11Texture2D
+//
+//! @param src - source InputArray
+//! @param pD3D11Texture2D - destination D3D11 texture
+CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);
+
+//! @brief Converts ID3D11Texture2D to OutputArray. If input texture format is DXGI_FORMAT_NV12 then
+//!        data will be upsampled and color-converted to BGR format.
+//
+//! @note Note: Destination matrix will be re-allocated if it has not enough memory to match texture size.
+//!             function does memory copy from pD3D11Texture2D to dst
+//
+//! @param pD3D11Texture2D - source D3D11 texture
+//! @param dst             - destination OutputArray
+CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);
+
+//! @brief Converts InputArray to ID3D10Texture2D
+//
+//! @note Note: function does memory copy from src to
+//!             pD3D10Texture2D
+//
+//! @param src             - source InputArray
+//! @param pD3D10Texture2D - destination D3D10 texture
+CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);
+
+//! @brief Converts ID3D10Texture2D to OutputArray
+//
+//! @note Note: function does memory copy from pD3D10Texture2D
+//!             to dst
+//
+//! @param pD3D10Texture2D - source D3D10 texture
+//! @param dst             - destination OutputArray
+CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);
+
+//! @brief Converts InputArray to IDirect3DSurface9
+//
+//! @note Note: function does memory copy from src to
+//!             pDirect3DSurface9
+//
+//! @param src                 - source InputArray
+//! @param pDirect3DSurface9   - destination D3D10 texture
+//! @param surfaceSharedHandle - shared handle
+CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);
+
+//! @brief Converts IDirect3DSurface9 to OutputArray
+//
+//! @note Note: function does memory copy from pDirect3DSurface9
+//!             to dst
+//
+//! @param pDirect3DSurface9   - source D3D10 texture
+//! @param dst                 - destination OutputArray
+//! @param surfaceSharedHandle - shared handle
+CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);
+
+//! @brief Get OpenCV type from DirectX type
+//! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11
+//! @return OpenCV type or -1 if there is no equivalent
+CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11
+
+//! @brief Get OpenCV type from DirectX type
+//! @param iD3DFORMAT - enum D3DTYPE for D3D9
+//! @return OpenCV type or -1 if there is no equivalent
+CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9
+
+//! @}
+
+} } // namespace cv::directx
+
+#endif // OPENCV_CORE_DIRECTX_HPP

+ 280 - 0
ThresholdTools/include/opencv2/core/eigen.hpp

@@ -0,0 +1,280 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#ifndef OPENCV_CORE_EIGEN_HPP
+#define OPENCV_CORE_EIGEN_HPP
+
+#include "opencv2/core.hpp"
+
+#if defined _MSC_VER && _MSC_VER >= 1200
+#pragma warning( disable: 4714 ) //__forceinline is not inlined
+#pragma warning( disable: 4127 ) //conditional expression is constant
+#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data
+#endif
+
+namespace cv
+{
+
+//! @addtogroup core_eigen
+//! @{
+
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, Mat& dst )
+{
+    if( !(src.Flags & Eigen::RowMajorBit) )
+    {
+        Mat _src(src.cols(), src.rows(), traits::Type<_Tp>::value,
+              (void*)src.data(), src.outerStride()*sizeof(_Tp));
+        transpose(_src, dst);
+    }
+    else
+    {
+        Mat _src(src.rows(), src.cols(), traits::Type<_Tp>::value,
+                 (void*)src.data(), src.outerStride()*sizeof(_Tp));
+        _src.copyTo(dst);
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src,
+               Matx<_Tp, _rows, _cols>& dst )
+{
+    if( !(src.Flags & Eigen::RowMajorBit) )
+    {
+        dst = Matx<_Tp, _cols, _rows>(static_cast<const _Tp*>(src.data())).t();
+    }
+    else
+    {
+        dst = Matx<_Tp, _rows, _cols>(static_cast<const _Tp*>(src.data()));
+    }
+}
+
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
+{
+    CV_DbgAssert(src.rows == _rows && src.cols == _cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else if( src.cols == src.rows )
+        {
+            src.convertTo(_dst, _dst.type());
+            transpose(_dst, _dst);
+        }
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
+               Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
+{
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        Mat(src).copyTo(_dst);
+    }
+}
+
+template<typename _Tp>  static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
+{
+    dst.resize(src.rows, src.cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+             dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else if( src.cols == src.rows )
+        {
+            src.convertTo(_dst, _dst.type());
+            transpose(_dst, _dst);
+        }
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows, int _cols> static inline
+void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
+{
+    dst.resize(_rows, _cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
+             dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        Mat(src).copyTo(_dst);
+    }
+}
+
+template<typename _Tp> static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
+{
+    CV_Assert(src.cols == 1);
+    dst.resize(src.rows);
+
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows> static inline
+void cv2eigen( const Matx<_Tp, _rows, 1>& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
+{
+    dst.resize(_rows);
+
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(1, _rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(_rows, 1, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        src.copyTo(_dst);
+    }
+}
+
+
+template<typename _Tp> static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
+{
+    CV_Assert(src.rows == 1);
+    dst.resize(src.cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+//Matx
+template<typename _Tp, int _cols> static inline
+void cv2eigen( const Matx<_Tp, 1, _cols>& src,
+               Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
+{
+    dst.resize(_cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(_cols, 1, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(1, _cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        Mat(src).copyTo(_dst);
+    }
+}
+
+//! @}
+
+} // cv
+
+#endif

+ 271 - 0
ThresholdTools/include/opencv2/core/fast_math.hpp

@@ -0,0 +1,271 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_FAST_MATH_HPP
+#define OPENCV_CORE_FAST_MATH_HPP
+
+#include "opencv2/core/cvdef.h"
+
+#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+    && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
+#include <emmintrin.h>
+#endif
+
+
+//! @addtogroup core_utils
+//! @{
+
+/****************************************************************************************\
+*                                      fast math                                         *
+\****************************************************************************************/
+
+#ifdef __cplusplus
+#  include <cmath>
+#else
+#  ifdef __BORLANDC__
+#    include <fastmath.h>
+#  else
+#    include <math.h>
+#  endif
+#endif
+
+#ifdef HAVE_TEGRA_OPTIMIZATION
+#  include "tegra_round.hpp"
+#endif
+
+#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
+    // 1. general scheme
+    #define ARM_ROUND(_value, _asm_string) \
+        int res; \
+        float temp; \
+        (void)temp; \
+        __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
+        return res
+    // 2. version for double
+    #ifdef __clang__
+        #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
+    #else
+        #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
+    #endif
+    // 3. version for float
+    #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
+#endif
+
+/** @brief Rounds floating-point number to the nearest integer
+
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int
+cvRound( double value )
+{
+#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
+    __m128d t = _mm_set_sd( value );
+    return _mm_cvtsd_si32(t);
+#elif defined _MSC_VER && defined _M_IX86
+    int t;
+    __asm
+    {
+        fld value;
+        fistp t;
+    }
+    return t;
+#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
+        defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
+    TEGRA_ROUND_DBL(value);
+#elif defined CV_ICC || defined __GNUC__
+# if defined ARM_ROUND_DBL
+    ARM_ROUND_DBL(value);
+# else
+    return (int)lrint(value);
+# endif
+#else
+    /* it's ok if round does not comply with IEEE754 standard;
+       the tests should allow +/-1 difference when the tested functions use round */
+    return (int)(value + (value >= 0 ? 0.5 : -0.5));
+#endif
+}
+
+
+/** @brief Rounds floating-point number to the nearest integer not larger than the original.
+
+ The function computes an integer i such that:
+ \f[i \le \texttt{value} < i+1\f]
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int cvFloor( double value )
+{
+    int i = (int)value;
+    return i - (i > value);
+}
+
+/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
+
+ The function computes an integer i such that:
+ \f[i \le \texttt{value} < i+1\f]
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int cvCeil( double value )
+{
+    int i = (int)value;
+    return i + (i < value);
+}
+
+/** @brief Determines if the argument is Not A Number.
+
+ @param value The input floating-point value
+
+ The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
+ otherwise. */
+CV_INLINE int cvIsNaN( double value )
+{
+    Cv64suf ieee754;
+    ieee754.f = value;
+    return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
+           ((unsigned)ieee754.u != 0) > 0x7ff00000;
+}
+
+/** @brief Determines if the argument is Infinity.
+
+ @param value The input floating-point value
+
+ The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
+ and 0 otherwise. */
+CV_INLINE int cvIsInf( double value )
+{
+    Cv64suf ieee754;
+    ieee754.f = value;
+    return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
+            (unsigned)ieee754.u == 0;
+}
+
+#ifdef __cplusplus
+
+/** @overload */
+CV_INLINE int cvRound(float value)
+{
+#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
+    __m128 t = _mm_set_ss( value );
+    return _mm_cvtss_si32(t);
+#elif defined _MSC_VER && defined _M_IX86
+    int t;
+    __asm
+    {
+        fld value;
+        fistp t;
+    }
+    return t;
+#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
+        defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
+    TEGRA_ROUND_FLT(value);
+#elif defined CV_ICC || defined __GNUC__
+# if defined ARM_ROUND_FLT
+    ARM_ROUND_FLT(value);
+# else
+    return (int)lrintf(value);
+# endif
+#else
+    /* it's ok if round does not comply with IEEE754 standard;
+     the tests should allow +/-1 difference when the tested functions use round */
+    return (int)(value + (value >= 0 ? 0.5f : -0.5f));
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvRound( int value )
+{
+    return value;
+}
+
+/** @overload */
+CV_INLINE int cvFloor( float value )
+{
+    int i = (int)value;
+    return i - (i > value);
+}
+
+/** @overload */
+CV_INLINE int cvFloor( int value )
+{
+    return value;
+}
+
+/** @overload */
+CV_INLINE int cvCeil( float value )
+{
+    int i = (int)value;
+    return i + (i < value);
+}
+
+/** @overload */
+CV_INLINE int cvCeil( int value )
+{
+    return value;
+}
+
+/** @overload */
+CV_INLINE int cvIsNaN( float value )
+{
+    Cv32suf ieee754;
+    ieee754.f = value;
+    return (ieee754.u & 0x7fffffff) > 0x7f800000;
+}
+
+/** @overload */
+CV_INLINE int cvIsInf( float value )
+{
+    Cv32suf ieee754;
+    ieee754.f = value;
+    return (ieee754.u & 0x7fffffff) == 0x7f800000;
+}
+
+#endif // __cplusplus
+
+//! @} core_utils
+
+#endif

+ 250 - 0
ThresholdTools/include/opencv2/core/hal/hal.hpp

@@ -0,0 +1,250 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_HPP
+#define OPENCV_HAL_HPP
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/cvstd.hpp"
+#include "opencv2/core/hal/interface.h"
+
+namespace cv { namespace hal {
+
+//! @addtogroup core_hal_functions
+//! @{
+
+CV_EXPORTS int normHamming(const uchar* a, int n);
+CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n);
+
+CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize);
+CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize);
+
+CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
+CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
+CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors);
+CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors);
+
+CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+
+CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
+CV_EXPORTS float normL1_(const float* a, const float* b, int n);
+CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n);
+
+CV_EXPORTS void exp32f(const float* src, float* dst, int n);
+CV_EXPORTS void exp64f(const double* src, double* dst, int n);
+CV_EXPORTS void log32f(const float* src, float* dst, int n);
+CV_EXPORTS void log64f(const double* src, double* dst, int n);
+
+CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
+CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
+CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
+CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
+CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
+CV_EXPORTS void sqrt64f(const double* src, double* dst, int len);
+CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len);
+CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len);
+
+CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn );
+CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn );
+CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn );
+CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn );
+
+CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn );
+CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn );
+CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn );
+CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn );
+
+CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+
+CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
+CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
+
+struct CV_EXPORTS DFT1D
+{
+    static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
+    virtual void apply(const uchar *src, uchar *dst) = 0;
+    virtual ~DFT1D() {}
+};
+
+struct CV_EXPORTS DFT2D
+{
+    static Ptr<DFT2D> create(int width, int height, int depth,
+                             int src_channels, int dst_channels,
+                             int flags, int nonzero_rows = 0);
+    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
+    virtual ~DFT2D() {}
+};
+
+struct CV_EXPORTS DCT2D
+{
+    static Ptr<DCT2D> create(int width, int height, int depth, int flags);
+    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
+    virtual ~DCT2D() {}
+};
+
+//! @} core_hal
+
+//=============================================================================
+// for binary compatibility with 3.0
+
+//! @cond IGNORED
+
+CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+
+CV_EXPORTS void exp(const float* src, float* dst, int n);
+CV_EXPORTS void exp(const double* src, double* dst, int n);
+CV_EXPORTS void log(const float* src, float* dst, int n);
+CV_EXPORTS void log(const double* src, double* dst, int n);
+
+CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
+CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
+CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
+CV_EXPORTS void sqrt(const float* src, float* dst, int len);
+CV_EXPORTS void sqrt(const double* src, double* dst, int len);
+CV_EXPORTS void invSqrt(const float* src, float* dst, int len);
+CV_EXPORTS void invSqrt(const double* src, double* dst, int len);
+
+//! @endcond
+
+}} //cv::hal
+
+#endif //OPENCV_HAL_HPP

+ 182 - 0
ThresholdTools/include/opencv2/core/hal/interface.h

@@ -0,0 +1,182 @@
+#ifndef OPENCV_CORE_HAL_INTERFACE_H
+#define OPENCV_CORE_HAL_INTERFACE_H
+
+//! @addtogroup core_hal_interface
+//! @{
+
+//! @name Return codes
+//! @{
+#define CV_HAL_ERROR_OK 0
+#define CV_HAL_ERROR_NOT_IMPLEMENTED 1
+#define CV_HAL_ERROR_UNKNOWN -1
+//! @}
+
+#ifdef __cplusplus
+#include <cstddef>
+#else
+#include <stddef.h>
+#include <stdbool.h>
+#endif
+
+//! @name Data types
+//! primitive types
+//! - schar  - signed 1 byte integer
+//! - uchar  - unsigned 1 byte integer
+//! - short  - signed 2 byte integer
+//! - ushort - unsigned 2 byte integer
+//! - int    - signed 4 byte integer
+//! - uint   - unsigned 4 byte integer
+//! - int64  - signed 8 byte integer
+//! - uint64 - unsigned 8 byte integer
+//! @{
+#if !defined _MSC_VER && !defined __BORLANDC__
+#  if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
+#    include <cstdint>
+#    ifdef __NEWLIB__
+        typedef unsigned int uint;
+#    else
+        typedef std::uint32_t uint;
+#    endif
+#  else
+#    include <stdint.h>
+     typedef uint32_t uint;
+#  endif
+#else
+   typedef unsigned uint;
+#endif
+
+typedef signed char schar;
+
+#ifndef __IPL_H__
+   typedef unsigned char uchar;
+   typedef unsigned short ushort;
+#endif
+
+#if defined _MSC_VER || defined __BORLANDC__
+   typedef __int64 int64;
+   typedef unsigned __int64 uint64;
+#  define CV_BIG_INT(n)   n##I64
+#  define CV_BIG_UINT(n)  n##UI64
+#else
+   typedef int64_t int64;
+   typedef uint64_t uint64;
+#  define CV_BIG_INT(n)   n##LL
+#  define CV_BIG_UINT(n)  n##ULL
+#endif
+
+#define CV_CN_MAX     512
+#define CV_CN_SHIFT   3
+#define CV_DEPTH_MAX  (1 << CV_CN_SHIFT)
+
+#define CV_8U   0
+#define CV_8S   1
+#define CV_16U  2
+#define CV_16S  3
+#define CV_32S  4
+#define CV_32F  5
+#define CV_64F  6
+#define CV_USRTYPE1 7
+
+#define CV_MAT_DEPTH_MASK       (CV_DEPTH_MAX - 1)
+#define CV_MAT_DEPTH(flags)     ((flags) & CV_MAT_DEPTH_MASK)
+
+#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
+#define CV_MAKE_TYPE CV_MAKETYPE
+
+#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
+#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
+#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
+#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
+#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
+
+#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
+#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
+#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
+#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
+#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
+
+#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
+#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
+#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
+#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
+#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
+
+#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
+#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
+#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
+#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
+#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
+
+#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
+#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
+#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
+#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
+#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
+
+#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
+#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
+#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
+#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
+#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
+
+#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
+#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
+#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
+#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
+#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
+//! @}
+
+//! @name Comparison operation
+//! @sa cv::CmpTypes
+//! @{
+#define CV_HAL_CMP_EQ 0
+#define CV_HAL_CMP_GT 1
+#define CV_HAL_CMP_GE 2
+#define CV_HAL_CMP_LT 3
+#define CV_HAL_CMP_LE 4
+#define CV_HAL_CMP_NE 5
+//! @}
+
+//! @name Border processing modes
+//! @sa cv::BorderTypes
+//! @{
+#define CV_HAL_BORDER_CONSTANT 0
+#define CV_HAL_BORDER_REPLICATE 1
+#define CV_HAL_BORDER_REFLECT 2
+#define CV_HAL_BORDER_WRAP 3
+#define CV_HAL_BORDER_REFLECT_101 4
+#define CV_HAL_BORDER_TRANSPARENT 5
+#define CV_HAL_BORDER_ISOLATED 16
+//! @}
+
+//! @name DFT flags
+//! @{
+#define CV_HAL_DFT_INVERSE        1
+#define CV_HAL_DFT_SCALE          2
+#define CV_HAL_DFT_ROWS           4
+#define CV_HAL_DFT_COMPLEX_OUTPUT 16
+#define CV_HAL_DFT_REAL_OUTPUT    32
+#define CV_HAL_DFT_TWO_STAGE      64
+#define CV_HAL_DFT_STAGE_COLS    128
+#define CV_HAL_DFT_IS_CONTINUOUS 512
+#define CV_HAL_DFT_IS_INPLACE 1024
+//! @}
+
+//! @name SVD flags
+//! @{
+#define CV_HAL_SVD_NO_UV    1
+#define CV_HAL_SVD_SHORT_UV 2
+#define CV_HAL_SVD_MODIFY_A 4
+#define CV_HAL_SVD_FULL_UV  8
+//! @}
+
+//! @name Gemm flags
+//! @{
+#define CV_HAL_GEMM_1_T 1
+#define CV_HAL_GEMM_2_T 2
+#define CV_HAL_GEMM_3_T 4
+//! @}
+
+//! @}
+
+#endif

+ 472 - 0
ThresholdTools/include/opencv2/core/hal/intrin.hpp

@@ -0,0 +1,472 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_INTRIN_HPP
+#define OPENCV_HAL_INTRIN_HPP
+
+#include <cmath>
+#include <float.h>
+#include <stdlib.h>
+#include "opencv2/core/cvdef.h"
+
+#define OPENCV_HAL_ADD(a, b) ((a) + (b))
+#define OPENCV_HAL_AND(a, b) ((a) & (b))
+#define OPENCV_HAL_NOP(a) (a)
+#define OPENCV_HAL_1ST(a, b) (a)
+
+// unlike HAL API, which is in cv::hal,
+// we put intrinsics into cv namespace to make its
+// access from within opencv code more accessible
+namespace cv {
+
+#ifndef CV_DOXYGEN
+
+#ifdef CV_CPU_DISPATCH_MODE
+#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
+#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
+#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
+#else
+#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
+#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
+#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
+#endif
+
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+#endif
+
+//! @addtogroup core_hal_intrin
+//! @{
+
+//! @cond IGNORED
+template<typename _Tp> struct V_TypeTraits
+{
+    typedef _Tp int_type;
+    typedef _Tp uint_type;
+    typedef _Tp abs_type;
+    typedef _Tp sum_type;
+
+    enum { delta = 0, shift = 0 };
+
+    static int_type reinterpret_int(_Tp x) { return x; }
+    static uint_type reinterpet_uint(_Tp x) { return x; }
+    static _Tp reinterpret_from_int(int_type x) { return (_Tp)x; }
+};
+
+template<> struct V_TypeTraits<uchar>
+{
+    typedef uchar value_type;
+    typedef schar int_type;
+    typedef uchar uint_type;
+    typedef uchar abs_type;
+    typedef int sum_type;
+
+    typedef ushort w_type;
+    typedef unsigned q_type;
+
+    enum { delta = 128, shift = 8 };
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<schar>
+{
+    typedef schar value_type;
+    typedef schar int_type;
+    typedef uchar uint_type;
+    typedef uchar abs_type;
+    typedef int sum_type;
+
+    typedef short w_type;
+    typedef int q_type;
+
+    enum { delta = 128, shift = 8 };
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<ushort>
+{
+    typedef ushort value_type;
+    typedef short int_type;
+    typedef ushort uint_type;
+    typedef ushort abs_type;
+    typedef int sum_type;
+
+    typedef unsigned w_type;
+    typedef uchar nu_type;
+
+    enum { delta = 32768, shift = 16 };
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<short>
+{
+    typedef short value_type;
+    typedef short int_type;
+    typedef ushort uint_type;
+    typedef ushort abs_type;
+    typedef int sum_type;
+
+    typedef int w_type;
+    typedef uchar nu_type;
+    typedef schar n_type;
+
+    enum { delta = 128, shift = 8 };
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<unsigned>
+{
+    typedef unsigned value_type;
+    typedef int int_type;
+    typedef unsigned uint_type;
+    typedef unsigned abs_type;
+    typedef unsigned sum_type;
+
+    typedef uint64 w_type;
+    typedef ushort nu_type;
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<int>
+{
+    typedef int value_type;
+    typedef int int_type;
+    typedef unsigned uint_type;
+    typedef unsigned abs_type;
+    typedef int sum_type;
+
+    typedef int64 w_type;
+    typedef short n_type;
+    typedef ushort nu_type;
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<uint64>
+{
+    typedef uint64 value_type;
+    typedef int64 int_type;
+    typedef uint64 uint_type;
+    typedef uint64 abs_type;
+    typedef uint64 sum_type;
+
+    typedef unsigned nu_type;
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+template<> struct V_TypeTraits<int64>
+{
+    typedef int64 value_type;
+    typedef int64 int_type;
+    typedef uint64 uint_type;
+    typedef uint64 abs_type;
+    typedef int64 sum_type;
+
+    typedef int nu_type;
+
+    static int_type reinterpret_int(value_type x) { return (int_type)x; }
+    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
+    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
+};
+
+
+template<> struct V_TypeTraits<float>
+{
+    typedef float value_type;
+    typedef int int_type;
+    typedef unsigned uint_type;
+    typedef float abs_type;
+    typedef float sum_type;
+
+    typedef double w_type;
+
+    static int_type reinterpret_int(value_type x)
+    {
+        Cv32suf u;
+        u.f = x;
+        return u.i;
+    }
+    static uint_type reinterpet_uint(value_type x)
+    {
+        Cv32suf u;
+        u.f = x;
+        return u.u;
+    }
+    static value_type reinterpret_from_int(int_type x)
+    {
+        Cv32suf u;
+        u.i = x;
+        return u.f;
+    }
+};
+
+template<> struct V_TypeTraits<double>
+{
+    typedef double value_type;
+    typedef int64 int_type;
+    typedef uint64 uint_type;
+    typedef double abs_type;
+    typedef double sum_type;
+    static int_type reinterpret_int(value_type x)
+    {
+        Cv64suf u;
+        u.f = x;
+        return u.i;
+    }
+    static uint_type reinterpet_uint(value_type x)
+    {
+        Cv64suf u;
+        u.f = x;
+        return u.u;
+    }
+    static value_type reinterpret_from_int(int_type x)
+    {
+        Cv64suf u;
+        u.i = x;
+        return u.f;
+    }
+};
+
+template <typename T> struct V_SIMD128Traits
+{
+    enum { nlanes = 16 / sizeof(T) };
+};
+
+//! @endcond
+
+//! @}
+
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+#endif
+}
+
+#ifdef CV_DOXYGEN
+#   undef CV_SSE2
+#   undef CV_NEON
+#   undef CV_VSX
+#endif
+
+#if CV_SSE2
+
+#include "opencv2/core/hal/intrin_sse.hpp"
+
+#elif CV_NEON
+
+#include "opencv2/core/hal/intrin_neon.hpp"
+
+#elif CV_VSX
+
+#include "opencv2/core/hal/intrin_vsx.hpp"
+
+#else
+
+#include "opencv2/core/hal/intrin_cpp.hpp"
+
+#endif
+
+//! @addtogroup core_hal_intrin
+//! @{
+
+#ifndef CV_SIMD128
+//! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
+#define CV_SIMD128 0
+#endif
+
+#ifndef CV_SIMD128_64F
+//! Set to 1 if current intrinsics implementation supports 64-bit float vectors
+#define CV_SIMD128_64F 0
+#endif
+
+//! @}
+
+//==================================================================================================
+
+//! @cond IGNORED
+
+namespace cv {
+
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+#endif
+
+template <typename R> struct V_RegTrait128;
+
+template <> struct V_RegTrait128<uchar> {
+    typedef v_uint8x16 reg;
+    typedef v_uint16x8 w_reg;
+    typedef v_uint32x4 q_reg;
+    typedef v_uint8x16 u_reg;
+    static v_uint8x16 zero() { return v_setzero_u8(); }
+    static v_uint8x16 all(uchar val) { return v_setall_u8(val); }
+};
+
+template <> struct V_RegTrait128<schar> {
+    typedef v_int8x16 reg;
+    typedef v_int16x8 w_reg;
+    typedef v_int32x4 q_reg;
+    typedef v_uint8x16 u_reg;
+    static v_int8x16 zero() { return v_setzero_s8(); }
+    static v_int8x16 all(schar val) { return v_setall_s8(val); }
+};
+
+template <> struct V_RegTrait128<ushort> {
+    typedef v_uint16x8 reg;
+    typedef v_uint32x4 w_reg;
+    typedef v_int16x8 int_reg;
+    typedef v_uint16x8 u_reg;
+    static v_uint16x8 zero() { return v_setzero_u16(); }
+    static v_uint16x8 all(ushort val) { return v_setall_u16(val); }
+};
+
+template <> struct V_RegTrait128<short> {
+    typedef v_int16x8 reg;
+    typedef v_int32x4 w_reg;
+    typedef v_uint16x8 u_reg;
+    static v_int16x8 zero() { return v_setzero_s16(); }
+    static v_int16x8 all(short val) { return v_setall_s16(val); }
+};
+
+template <> struct V_RegTrait128<unsigned> {
+    typedef v_uint32x4 reg;
+    typedef v_uint64x2 w_reg;
+    typedef v_int32x4 int_reg;
+    typedef v_uint32x4 u_reg;
+    static v_uint32x4 zero() { return v_setzero_u32(); }
+    static v_uint32x4 all(unsigned val) { return v_setall_u32(val); }
+};
+
+template <> struct V_RegTrait128<int> {
+    typedef v_int32x4 reg;
+    typedef v_int64x2 w_reg;
+    typedef v_uint32x4 u_reg;
+    static v_int32x4 zero() { return v_setzero_s32(); }
+    static v_int32x4 all(int val) { return v_setall_s32(val); }
+};
+
+template <> struct V_RegTrait128<uint64> {
+    typedef v_uint64x2 reg;
+    static v_uint64x2 zero() { return v_setzero_u64(); }
+    static v_uint64x2 all(uint64 val) { return v_setall_u64(val); }
+};
+
+template <> struct V_RegTrait128<int64> {
+    typedef v_int64x2 reg;
+    static v_int64x2 zero() { return v_setzero_s64(); }
+    static v_int64x2 all(int64 val) { return v_setall_s64(val); }
+};
+
+template <> struct V_RegTrait128<float> {
+    typedef v_float32x4 reg;
+    typedef v_int32x4 int_reg;
+    typedef v_float32x4 u_reg;
+    static v_float32x4 zero() { return v_setzero_f32(); }
+    static v_float32x4 all(float val) { return v_setall_f32(val); }
+};
+
+#if CV_SIMD128_64F
+template <> struct V_RegTrait128<double> {
+    typedef v_float64x2 reg;
+    typedef v_int32x4 int_reg;
+    typedef v_float64x2 u_reg;
+    static v_float64x2 zero() { return v_setzero_f64(); }
+    static v_float64x2 all(double val) { return v_setall_f64(val); }
+};
+#endif
+
+inline unsigned int trailingZeros32(unsigned int value) {
+#if defined(_MSC_VER)
+#if (_MSC_VER < 1700) || defined(_M_ARM)
+    unsigned long index = 0;
+    _BitScanForward(&index, value);
+    return (unsigned int)index;
+#else
+    return _tzcnt_u32(value);
+#endif
+#elif defined(__GNUC__) || defined(__GNUG__)
+    return __builtin_ctz(value);
+#elif defined(__ICC) || defined(__INTEL_COMPILER)
+    return _bit_scan_forward(value);
+#elif defined(__clang__)
+    return llvm.cttz.i32(value, true);
+#else
+    static const int MultiplyDeBruijnBitPosition[32] = {
+        0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+        31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
+    return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
+#endif
+}
+
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+#endif
+
+} // cv::
+
+//! @endcond
+
+#endif

+ 1988 - 0
ThresholdTools/include/opencv2/core/hal/intrin_cpp.hpp

@@ -0,0 +1,1988 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_INTRIN_CPP_HPP
+#define OPENCV_HAL_INTRIN_CPP_HPP
+
+#include <limits>
+#include <cstring>
+#include <algorithm>
+#include "opencv2/core/saturate.hpp"
+
+namespace cv
+{
+
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+#endif
+
+/** @addtogroup core_hal_intrin
+
+"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on
+different platforms. Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86
+architectures and __NEON__ on ARM architectures, both allow working with 128 bit registers
+containing packed values of different types. In case when there is no SIMD extension available
+during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as
+expected although it could be slower.
+
+### Types
+
+There are several types representing 128-bit register as a vector of packed values, each type is
+implemented as a structure based on a one SIMD register.
+
+- cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char
+- cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short
+- cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsgined/signed) - int
+- cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64
+- cv::v_float32x4: four 32-bit floating point values (signed) - float
+- cv::v_float64x2: two 64-bit floating point valies (signed) - double
+
+@note
+cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to
+check the CV_SIMD128_64F preprocessor definition:
+@code
+#if CV_SIMD128_64F
+//...
+#endif
+@endcode
+
+### Load and store operations
+
+These operations allow to set contents of the register explicitly or by loading it from some memory
+block and to save contents of the register to memory block.
+
+- Constructors:
+@ref v_reg::v_reg(const _Tp *ptr) "from memory",
+@ref v_reg::v_reg(_Tp s0, _Tp s1) "from two values", ...
+- Other create methods:
+@ref v_setall_s8, @ref v_setall_u8, ...,
+@ref v_setzero_u8, @ref v_setzero_s8, ...
+- Memory operations:
+@ref v_load, @ref v_load_aligned, @ref v_load_low, @ref v_load_halves,
+@ref v_store, @ref v_store_aligned,
+@ref v_store_high, @ref v_store_low
+
+### Value reordering
+
+These operations allow to reorder or recombine elements in one or multiple vectors.
+
+- Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
+- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand
+- Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u,
+@ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store
+- Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high
+- Extract: @ref v_extract
+
+
+### Arithmetic, bitwise and comparison operations
+
+Element-wise binary and unary operations.
+
+- Arithmetics:
+@ref operator +(const v_reg &a, const v_reg &b) "+",
+@ref operator -(const v_reg &a, const v_reg &b) "-",
+@ref operator *(const v_reg &a, const v_reg &b) "*",
+@ref operator /(const v_reg &a, const v_reg &b) "/",
+@ref v_mul_expand
+
+- Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap
+
+- Bitwise shifts:
+@ref operator <<(const v_reg &a, int s) "<<",
+@ref operator >>(const v_reg &a, int s) ">>",
+@ref v_shl, @ref v_shr
+
+- Bitwise logic:
+@ref operator&(const v_reg &a, const v_reg &b) "&",
+@ref operator |(const v_reg &a, const v_reg &b) "|",
+@ref operator ^(const v_reg &a, const v_reg &b) "^",
+@ref operator ~(const v_reg &a) "~"
+
+- Comparison:
+@ref operator >(const v_reg &a, const v_reg &b) ">",
+@ref operator >=(const v_reg &a, const v_reg &b) ">=",
+@ref operator <(const v_reg &a, const v_reg &b) "<",
+@ref operator <=(const v_reg &a, const v_reg &b) "<=",
+@ref operator==(const v_reg &a, const v_reg &b) "==",
+@ref operator !=(const v_reg &a, const v_reg &b) "!="
+
+- min/max: @ref v_min, @ref v_max
+
+### Reduce and mask
+
+Most of these operations return only one value.
+
+- Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum, @ref v_popcount
+- Mask: @ref v_signmask, @ref v_check_all, @ref v_check_any, @ref v_select
+
+### Other math
+
+- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude
+- Absolute values: @ref v_abs, @ref v_absdiff
+
+### Conversions
+
+Different type conversions and casts:
+
+- Rounding: @ref v_round, @ref v_floor, @ref v_ceil, @ref v_trunc,
+- To float: @ref v_cvt_f32, @ref v_cvt_f64
+- Reinterpret: @ref v_reinterpret_as_u8, @ref v_reinterpret_as_s8, ...
+
+### Matrix operations
+
+In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref v_matmul, @ref v_transpose4x4
+
+### Usability
+
+Most operations are implemented only for some subset of the available types, following matrices
+shows the applicability of different operations to the types.
+
+Regular integers:
+
+| Operations\\Types | uint 8x16 | int 8x16 | uint 16x8 | int 16x8 | uint 32x4 | int 32x4 |
+|-------------------|:-:|:-:|:-:|:-:|:-:|:-:|
+|load, store        | x | x | x | x | x | x |
+|interleave         | x | x | x | x | x | x |
+|expand             | x | x | x | x | x | x |
+|expand_q           | x | x |   |   |   |   |
+|add, sub           | x | x | x | x | x | x |
+|add_wrap, sub_wrap | x | x | x | x |   |   |
+|mul                |   |   | x | x | x | x |
+|mul_expand         |   |   | x | x | x |   |
+|compare            | x | x | x | x | x | x |
+|shift              |   |   | x | x | x | x |
+|dotprod            |   |   |   | x |   |   |
+|logical            | x | x | x | x | x | x |
+|min, max           | x | x | x | x | x | x |
+|absdiff            | x | x | x | x | x | x |
+|reduce             |   |   |   |   | x | x |
+|mask               | x | x | x | x | x | x |
+|pack               | x | x | x | x | x | x |
+|pack_u             | x |   | x |   |   |   |
+|unpack             | x | x | x | x | x | x |
+|extract            | x | x | x | x | x | x |
+|rotate (lanes)     | x | x | x | x | x | x |
+|cvt_flt32          |   |   |   |   |   | x |
+|cvt_flt64          |   |   |   |   |   | x |
+|transpose4x4       |   |   |   |   | x | x |
+
+Big integers:
+
+| Operations\\Types | uint 64x2 | int 64x2 |
+|-------------------|:-:|:-:|
+|load, store        | x | x |
+|add, sub           | x | x |
+|shift              | x | x |
+|logical            | x | x |
+|extract            | x | x |
+|rotate (lanes)     | x | x |
+
+Floating point:
+
+| Operations\\Types | float 32x4 | float 64x2 |
+|-------------------|:-:|:-:|
+|load, store        | x | x |
+|interleave         | x |   |
+|add, sub           | x | x |
+|mul                | x | x |
+|div                | x | x |
+|compare            | x | x |
+|min, max           | x | x |
+|absdiff            | x | x |
+|reduce             | x |   |
+|mask               | x | x |
+|unpack             | x | x |
+|cvt_flt32          |   | x |
+|cvt_flt64          | x |   |
+|sqrt, abs          | x | x |
+|float math         | x | x |
+|transpose4x4       | x |   |
+|extract            | x | x |
+|rotate (lanes)     | x | x |
+
+ @{ */
+
+template<typename _Tp, int n> struct v_reg
+{
+//! @cond IGNORED
+    typedef _Tp lane_type;
+    typedef v_reg<typename V_TypeTraits<_Tp>::int_type, n> int_vec;
+    typedef v_reg<typename V_TypeTraits<_Tp>::abs_type, n> abs_vec;
+    enum { nlanes = n };
+// !@endcond
+
+    /** @brief Constructor
+
+    Initializes register with data from memory
+    @param ptr pointer to memory block with data for register */
+    explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; }
+
+    /** @brief Constructor
+
+    Initializes register with two 64-bit values */
+    v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }
+
+    /** @brief Constructor
+
+    Initializes register with four 32-bit values */
+    v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }
+
+    /** @brief Constructor
+
+    Initializes register with eight 16-bit values */
+    v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
+           _Tp s4, _Tp s5, _Tp s6, _Tp s7)
+    {
+        s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
+        s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
+    }
+
+    /** @brief Constructor
+
+    Initializes register with sixteen 8-bit values */
+    v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
+           _Tp s4, _Tp s5, _Tp s6, _Tp s7,
+           _Tp s8, _Tp s9, _Tp s10, _Tp s11,
+           _Tp s12, _Tp s13, _Tp s14, _Tp s15)
+    {
+        s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
+        s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
+        s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;
+        s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;
+    }
+
+    /** @brief Default constructor
+
+    Does not initialize anything*/
+    v_reg() {}
+
+    /** @brief Copy constructor */
+    v_reg(const v_reg<_Tp, n> & r)
+    {
+        for( int i = 0; i < n; i++ )
+            s[i] = r.s[i];
+    }
+    /** @brief Access first value
+
+    Returns value of the first lane according to register type, for example:
+    @code{.cpp}
+    v_int32x4 r(1, 2, 3, 4);
+    int v = r.get0(); // returns 1
+    v_uint64x2 r(1, 2);
+    uint64_t v = r.get0(); // returns 1
+    @endcode
+    */
+    _Tp get0() const { return s[0]; }
+
+//! @cond IGNORED
+    _Tp get(const int i) const { return s[i]; }
+    v_reg<_Tp, n> high() const
+    {
+        v_reg<_Tp, n> c;
+        int i;
+        for( i = 0; i < n/2; i++ )
+        {
+            c.s[i] = s[i+(n/2)];
+            c.s[i+(n/2)] = 0;
+        }
+        return c;
+    }
+
+    static v_reg<_Tp, n> zero()
+    {
+        v_reg<_Tp, n> c;
+        for( int i = 0; i < n; i++ )
+            c.s[i] = (_Tp)0;
+        return c;
+    }
+
+    static v_reg<_Tp, n> all(_Tp s)
+    {
+        v_reg<_Tp, n> c;
+        for( int i = 0; i < n; i++ )
+            c.s[i] = s;
+        return c;
+    }
+
+    template<typename _Tp2, int n2> v_reg<_Tp2, n2> reinterpret_as() const
+    {
+        size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n);
+        v_reg<_Tp2, n2> c;
+        std::memcpy(&c.s[0], &s[0], bytes);
+        return c;
+    }
+
+    _Tp s[n];
+//! @endcond
+};
+
+/** @brief Sixteen 8-bit unsigned integer values */
+typedef v_reg<uchar, 16> v_uint8x16;
+/** @brief Sixteen 8-bit signed integer values */
+typedef v_reg<schar, 16> v_int8x16;
+/** @brief Eight 16-bit unsigned integer values */
+typedef v_reg<ushort, 8> v_uint16x8;
+/** @brief Eight 16-bit signed integer values */
+typedef v_reg<short, 8> v_int16x8;
+/** @brief Four 32-bit unsigned integer values */
+typedef v_reg<unsigned, 4> v_uint32x4;
+/** @brief Four 32-bit signed integer values */
+typedef v_reg<int, 4> v_int32x4;
+/** @brief Four 32-bit floating point values (single precision) */
+typedef v_reg<float, 4> v_float32x4;
+/** @brief Two 64-bit floating point values (double precision) */
+typedef v_reg<double, 2> v_float64x2;
+/** @brief Two 64-bit unsigned integer values */
+typedef v_reg<uint64, 2> v_uint64x2;
+/** @brief Two 64-bit signed integer values */
+typedef v_reg<int64, 2> v_int64x2;
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_BIN_OP(bin_op) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> \
+    operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
+    return c; \
+} \
+template<typename _Tp, int n> inline v_reg<_Tp, n>& \
+    operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    for( int i = 0; i < n; i++ ) \
+        a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
+    return a; \
+}
+
+/** @brief Add values
+
+For all types. */
+OPENCV_HAL_IMPL_BIN_OP(+)
+
+/** @brief Subtract values
+
+For all types. */
+OPENCV_HAL_IMPL_BIN_OP(-)
+
+/** @brief Multiply values
+
+For 16- and 32-bit integer types and floating types. */
+OPENCV_HAL_IMPL_BIN_OP(*)
+
+/** @brief Divide values
+
+For floating types only. */
+OPENCV_HAL_IMPL_BIN_OP(/)
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_BIT_OP(bit_op) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> operator bit_op \
+    (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    typedef typename V_TypeTraits<_Tp>::int_type itype; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
+                                                        V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
+    return c; \
+} \
+template<typename _Tp, int n> inline v_reg<_Tp, n>& operator \
+    bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    typedef typename V_TypeTraits<_Tp>::int_type itype; \
+    for( int i = 0; i < n; i++ ) \
+        a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
+                                                        V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
+    return a; \
+}
+
+/** @brief Bitwise AND
+
+Only for integer types. */
+OPENCV_HAL_IMPL_BIT_OP(&)
+
+/** @brief Bitwise OR
+
+Only for integer types. */
+OPENCV_HAL_IMPL_BIT_OP(|)
+
+/** @brief Bitwise XOR
+
+Only for integer types.*/
+OPENCV_HAL_IMPL_BIT_OP(^)
+
+/** @brief Bitwise NOT
+
+Only for integer types.*/
+template<typename _Tp, int n> inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i]));
+    }
+    return c;
+}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
+template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
+{ \
+    v_reg<_Tp2, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = cfunc(a.s[i]); \
+    return c; \
+}
+
+/** @brief Square root of elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp)
+
+//! @cond IGNORED
+OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
+OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
+//! @endcond
+
+/** @brief Absolute value of elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs,
+                          typename V_TypeTraits<_Tp>::abs_type)
+
+/** @brief Round elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int)
+
+/** @brief Floor elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int)
+
+/** @brief Ceil elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int)
+
+/** @brief Truncate elements
+
+Only for floating point types.*/
+OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int)
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = cfunc(a.s[i], b.s[i]); \
+    return c; \
+}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
+template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
+{ \
+    _Tp c = a.s[0]; \
+    for( int i = 1; i < n; i++ ) \
+        c = cfunc(c, a.s[i]); \
+    return c; \
+}
+
+/** @brief Choose min values for each pair
+
+Scheme:
+@code
+{A1 A2 ...}
+{B1 B2 ...}
+--------------
+{min(A1,B1) min(A2,B2) ...}
+@endcode
+For all types except 64-bit integer. */
+OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min)
+
+/** @brief Choose max values for each pair
+
+Scheme:
+@code
+{A1 A2 ...}
+{B1 B2 ...}
+--------------
+{max(A1,B1) max(A2,B2) ...}
+@endcode
+For all types except 64-bit integer. */
+OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max)
+
+/** @brief Find one min value
+
+Scheme:
+@code
+{A1 A2 A3 ...} => min(A1,A2,A3,...)
+@endcode
+For 32-bit integer and 32-bit floating point types. */
+OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
+
+/** @brief Find one max value
+
+Scheme:
+@code
+{A1 A2 A3 ...} => max(A1,A2,A3,...)
+@endcode
+For 32-bit integer and 32-bit floating point types. */
+OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max)
+
+static const unsigned char popCountTable[] =
+{
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
+};
+/** @brief Count the 1 bits in the vector and return 4 values
+
+Scheme:
+@code
+{A1 A2 A3 ...} => popcount(A1)
+@endcode
+Any types but result will be in v_uint32x4*/
+template<typename _Tp, int n> inline v_uint32x4 v_popcount(const v_reg<_Tp, n>& a)
+{
+    v_uint8x16 b;
+    b = v_reinterpret_as_u8(a);
+    for( int i = 0; i < v_uint8x16::nlanes; i++ )
+    {
+        b.s[i] = popCountTable[b.s[i]];
+    }
+    v_uint32x4 c;
+    for( int i = 0; i < v_uint32x4::nlanes; i++ )
+    {
+        c.s[i] = b.s[i*4] + b.s[i*4+1] + b.s[i*4+2] + b.s[i*4+3];
+    }
+    return c;
+}
+
+
+//! @cond IGNORED
+template<typename _Tp, int n>
+inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                      v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )
+{
+    for( int i = 0; i < n; i++ )
+    {
+        minval.s[i] = std::min(a.s[i], b.s[i]);
+        maxval.s[i] = std::max(a.s[i], b.s[i]);
+    }
+}
+//! @endcond
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
+template<typename _Tp, int n> \
+inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    typedef typename V_TypeTraits<_Tp>::int_type itype; \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \
+    return c; \
+}
+
+/** @brief Less-than comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(<)
+
+/** @brief Greater-than comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(>)
+
+/** @brief Less-than or equal comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(<=)
+
+/** @brief Greater-than or equal comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(>=)
+
+/** @brief Equal comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(==)
+
+/** @brief Not equal comparison
+
+For all types except 64-bit integer values. */
+OPENCV_HAL_IMPL_CMP_OP(!=)
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_ADD_SUB_OP(func, bin_op, cast_op, _Tp2) \
+template<typename _Tp, int n> \
+inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    typedef _Tp2 rtype; \
+    v_reg<rtype, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
+    return c; \
+}
+
+/** @brief Add values without saturation
+
+For 8- and 16-bit integer values. */
+OPENCV_HAL_IMPL_ADD_SUB_OP(v_add_wrap, +, (_Tp), _Tp)
+
+/** @brief Subtract values without saturation
+
+For 8- and 16-bit integer values. */
+OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap, -, (_Tp), _Tp)
+
+//! @cond IGNORED
+template<typename T> inline T _absdiff(T a, T b)
+{
+    return a > b ? a - b : b - a;
+}
+//! @endcond
+
+/** @brief Absolute difference
+
+Returns \f$ |a - b| \f$ converted to corresponding unsigned type.
+Example:
+@code{.cpp}
+v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1}
+v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3}
+@endcode
+For 8-, 16-, 32-bit integer source types. */
+template<typename _Tp, int n>
+inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b)
+{
+    typedef typename V_TypeTraits<_Tp>::abs_type rtype;
+    v_reg<rtype, n> c;
+    const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0);
+    for( int i = 0; i < n; i++ )
+    {
+        rtype ua = a.s[i] ^ mask;
+        rtype ub = b.s[i] ^ mask;
+        c.s[i] = _absdiff(ua, ub);
+    }
+    return c;
+}
+
+/** @overload
+
+For 32-bit floating point values */
+inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
+{
+    v_float32x4 c;
+    for( int i = 0; i < c.nlanes; i++ )
+        c.s[i] = _absdiff(a.s[i], b.s[i]);
+    return c;
+}
+
+/** @overload
+
+For 64-bit floating point values */
+inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
+{
+    v_float64x2 c;
+    for( int i = 0; i < c.nlanes; i++ )
+        c.s[i] = _absdiff(a.s[i], b.s[i]);
+    return c;
+}
+
+/** @brief Inversed square root
+
+Returns \f$ 1/sqrt(a) \f$
+For floating point types only. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = 1.f/std::sqrt(a.s[i]);
+    return c;
+}
+
+/** @brief Magnitude
+
+Returns \f$ sqrt(a^2 + b^2) \f$
+For floating point types only. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);
+    return c;
+}
+
+/** @brief Square of the magnitude
+
+Returns \f$ a^2 + b^2 \f$
+For floating point types only. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];
+    return c;
+}
+
+/** @brief Multiply and add
+
+Returns \f$ a*b + c \f$
+For floating point types and signed 32bit int only. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                              const v_reg<_Tp, n>& c)
+{
+    v_reg<_Tp, n> d;
+    for( int i = 0; i < n; i++ )
+        d.s[i] = a.s[i]*b.s[i] + c.s[i];
+    return d;
+}
+
+/** @brief Dot product of elements
+
+Multiply values in two registers and sum adjacent result pairs.
+Scheme:
+@code
+  {A1 A2 ...} // 16-bit
+x {B1 B2 ...} // 16-bit
+-------------
+{A1B1+A2B2 ...} // 32-bit
+@endcode
+Implemented only for 16-bit signed source type (v_int16x8).
+*/
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+    v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<w_type, n/2> c;
+    for( int i = 0; i < (n/2); i++ )
+        c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];
+    return c;
+}
+
+/** @brief Dot product of elements
+
+Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs.
+Scheme:
+@code
+  {A1 A2 ...} // 16-bit
+x {B1 B2 ...} // 16-bit
+-------------
+  {A1B1+A2B2+C1 ...} // 32-bit
+
+@endcode
+Implemented only for 16-bit signed source type (v_int16x8).
+*/
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
+    v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<w_type, n/2> s;
+    for( int i = 0; i < (n/2); i++ )
+        s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
+    return s;
+}
+
+/** @brief Multiply and expand
+
+Multiply values two registers and store results in two registers with wider pack type.
+Scheme:
+@code
+  {A B C D} // 32-bit
+x {E F G H} // 32-bit
+---------------
+{AE BF}         // 64-bit
+        {CG DH} // 64-bit
+@endcode
+Example:
+@code{.cpp}
+v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2}
+v_uint64x2 c, d; // results
+v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8}
+@endcode
+Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4).
+*/
+template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                                                       v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c,
+                                                       v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& d)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = (w_type)a.s[i]*b.s[i];
+        d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];
+    }
+}
+
+//! @cond IGNORED
+template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a,
+                                                 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];
+    }
+}
+//! @endcond
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
+template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
+{ \
+    v_reg<_Tp, n> c; \
+    for( int i = 0; i < n; i++ ) \
+        c.s[i] = (_Tp)(a.s[i] shift_op imm); \
+    return c; \
+}
+
+/** @brief Bitwise shift left
+
+For 16-, 32- and 64-bit integer values. */
+OPENCV_HAL_IMPL_SHIFT_OP(<< )
+
+/** @brief Bitwise shift right
+
+For 16-, 32- and 64-bit integer values. */
+OPENCV_HAL_IMPL_SHIFT_OP(>> )
+
+/** @brief Element shift left among vector
+
+For all type */
+#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \
+template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \
+{ \
+    v_reg<_Tp, n> b; \
+    for (int i = 0; i < n; i++) \
+    { \
+        int sIndex = i opA imm; \
+        if (0 <= sIndex && sIndex < n) \
+        { \
+            b.s[i] = a.s[sIndex]; \
+        } \
+        else \
+        { \
+            b.s[i] = 0; \
+        } \
+    } \
+    return b; \
+} \
+template<int imm, typename _Tp, int n> inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
+{ \
+    v_reg<_Tp, n> c; \
+    for (int i = 0; i < n; i++) \
+    { \
+        int aIndex = i opA imm; \
+        int bIndex = i opA imm opB n; \
+        if (0 <= bIndex && bIndex < n) \
+        { \
+            c.s[i] = b.s[bIndex]; \
+        } \
+        else if (0 <= aIndex && aIndex < n) \
+        { \
+            c.s[i] = a.s[aIndex]; \
+        } \
+        else \
+        { \
+            c.s[i] = 0; \
+        } \
+    } \
+    return c; \
+}
+
+OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left,  -, +)
+OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -)
+
+/** @brief Sum packed values
+
+Scheme:
+@code
+{A1 A2 A3 ...} => sum{A1,A2,A3,...}
+@endcode
+For 32-bit integer and 32-bit floating point types.*/
+template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a)
+{
+    typename V_TypeTraits<_Tp>::sum_type c = a.s[0];
+    for( int i = 1; i < n; i++ )
+        c += a.s[i];
+    return c;
+}
+
+/** @brief Sums all elements of each input vector, returns the vector of sums
+
+ Scheme:
+ @code
+ result[0] = a[0] + a[1] + a[2] + a[3]
+ result[1] = b[0] + b[1] + b[2] + b[3]
+ result[2] = c[0] + c[1] + c[2] + c[3]
+ result[3] = d[0] + d[1] + d[2] + d[3]
+ @endcode
+*/
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+    v_float32x4 r;
+    r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3];
+    r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3];
+    r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3];
+    r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3];
+    return r;
+}
+
+/** @brief Get negative values mask
+
+Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes.
+Example:
+@code{.cpp}
+v_int32x4 r; // set to {-1, -1, 1, 1}
+int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
+{
+    int mask = 0;
+    for( int i = 0; i < n; i++ )
+        mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i;
+    return mask;
+}
+
+/** @brief Check if all packed values are less than zero
+
+Unsigned values will be casted to signed: `uchar 254 => char -2`.
+For all types except 64-bit. */
+template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 )
+            return false;
+    return true;
+}
+
+/** @brief Check if any of packed values is less than zero
+
+Unsigned values will be casted to signed: `uchar 254 => char -2`.
+For all types except 64-bit. */
+template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 )
+            return true;
+    return false;
+}
+
+/** @brief Per-element select (blend operation)
+
+Return value will be built by combining values _a_ and _b_ using the following scheme:
+    result[i] = mask[i] ? a[i] : b[i];
+
+@note: _mask_ element values are restricted to these values:
+- 0: select element from _b_
+- 0xff/0xffff/etc: select element from _a_
+(fully compatible with bitwise-based operator)
+*/
+template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask,
+                                                           const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    typedef V_TypeTraits<_Tp> Traits;
+    typedef typename Traits::int_type int_type;
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        int_type m = Traits::reinterpret_int(mask.s[i]);
+        CV_DbgAssert(m == 0 || m == (~(int_type)0));  // restrict mask values: 0 or 0xff/0xffff/etc
+        c.s[i] = m ? a.s[i] : b.s[i];
+    }
+    return c;
+}
+
+/** @brief Expand values to the wider pack type
+
+Copy contents of register to two registers with 2x wider pack type.
+Scheme:
+@code
+ int32x4     int64x2 int64x2
+{A B C D} ==> {A B} , {C D}
+@endcode */
+template<typename _Tp, int n> inline void v_expand(const v_reg<_Tp, n>& a,
+                            v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b0,
+                            v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b1)
+{
+    for( int i = 0; i < (n/2); i++ )
+    {
+        b0.s[i] = a.s[i];
+        b1.s[i] = a.s[i+(n/2)];
+    }
+}
+
+//! @cond IGNORED
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
+    v_reinterpret_as_int(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
+    return c;
+}
+
+template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
+    v_reinterpret_as_uint(const v_reg<_Tp, n>& a)
+{
+    v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
+    return c;
+}
+//! @endcond
+
+/** @brief Interleave two vectors
+
+Scheme:
+@code
+  {A1 A2 A3 A4}
+  {B1 B2 B3 B4}
+---------------
+  {A1 B1 A2 B2} and {A3 B3 A4 B4}
+@endcode
+For all types except 64-bit.
+*/
+template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1,
+                                               v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 )
+{
+    int i;
+    for( i = 0; i < n/2; i++ )
+    {
+        b0.s[i*2] = a0.s[i];
+        b0.s[i*2+1] = a1.s[i];
+    }
+    for( ; i < n; i++ )
+    {
+        b1.s[i*2-n] = a0.s[i];
+        b1.s[i*2-n+1] = a1.s[i];
+    }
+}
+
+/** @brief Load register contents from memory
+
+@param ptr pointer to memory block with data
+@return register object
+
+@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.
+ */
+template<typename _Tp>
+inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load(const _Tp* ptr)
+{
+    return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr);
+}
+
+/** @brief Load register contents from memory (aligned)
+
+similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)
+ */
+template<typename _Tp>
+inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_aligned(const _Tp* ptr)
+{
+    return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr);
+}
+
+/** @brief Load 64-bits of data to lower part (high part is undefined).
+
+@param ptr memory block containing data for first half (0..n/2)
+
+@code{.cpp}
+int lo[2] = { 1, 2 };
+v_int32x4 r = v_load_low(lo);
+@endcode
+ */
+template<typename _Tp>
+inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_low(const _Tp* ptr)
+{
+    v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> c;
+    for( int i = 0; i < c.nlanes/2; i++ )
+    {
+        c.s[i] = ptr[i];
+    }
+    return c;
+}
+
+/** @brief Load register contents from two memory blocks
+
+@param loptr memory block containing data for first half (0..n/2)
+@param hiptr memory block containing data for second half (n/2..n)
+
+@code{.cpp}
+int lo[2] = { 1, 2 }, hi[2] = { 3, 4 };
+v_int32x4 r = v_load_halves(lo, hi);
+@endcode
+ */
+template<typename _Tp>
+inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
+{
+    v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> c;
+    for( int i = 0; i < c.nlanes/2; i++ )
+    {
+        c.s[i] = loptr[i];
+        c.s[i+c.nlanes/2] = hiptr[i];
+    }
+    return c;
+}
+
+/** @brief Load register contents from memory with double expand
+
+Same as cv::v_load, but result pack type will be 2x wider than memory type.
+
+@code{.cpp}
+short buf[4] = {1, 2, 3, 4}; // type is int16
+v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32
+@endcode
+For 8-, 16-, 32-bit integer source types. */
+template<typename _Tp>
+inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_SIMD128Traits<_Tp>::nlanes / 2>
+v_load_expand(const _Tp* ptr)
+{
+    typedef typename V_TypeTraits<_Tp>::w_type w_type;
+    v_reg<w_type, V_SIMD128Traits<w_type>::nlanes> c;
+    for( int i = 0; i < c.nlanes; i++ )
+    {
+        c.s[i] = ptr[i];
+    }
+    return c;
+}
+
+/** @brief Load register contents from memory with quad expand
+
+Same as cv::v_load_expand, but result type is 4 times wider than source.
+@code{.cpp}
+char buf[4] = {1, 2, 3, 4}; // type is int8
+v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32
+@endcode
+For 8-bit integer source types. */
+template<typename _Tp>
+inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_SIMD128Traits<_Tp>::nlanes / 4>
+v_load_expand_q(const _Tp* ptr)
+{
+    typedef typename V_TypeTraits<_Tp>::q_type q_type;
+    v_reg<q_type, V_SIMD128Traits<q_type>::nlanes> c;
+    for( int i = 0; i < c.nlanes; i++ )
+    {
+        c.s[i] = ptr[i];
+    }
+    return c;
+}
+
+/** @brief Load and deinterleave (2 channels)
+
+Load data from memory deinterleave and store to 2 registers.
+Scheme:
+@code
+{A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
+                                                            v_reg<_Tp, n>& b)
+{
+    int i, i2;
+    for( i = i2 = 0; i < n; i++, i2 += 2 )
+    {
+        a.s[i] = ptr[i2];
+        b.s[i] = ptr[i2+1];
+    }
+}
+
+/** @brief Load and deinterleave (3 channels)
+
+Load data from memory deinterleave and store to 3 registers.
+Scheme:
+@code
+{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
+                                                            v_reg<_Tp, n>& b, v_reg<_Tp, n>& c)
+{
+    int i, i3;
+    for( i = i3 = 0; i < n; i++, i3 += 3 )
+    {
+        a.s[i] = ptr[i3];
+        b.s[i] = ptr[i3+1];
+        c.s[i] = ptr[i3+2];
+    }
+}
+
+/** @brief Load and deinterleave (4 channels)
+
+Load data from memory deinterleave and store to 4 registers.
+Scheme:
+@code
+{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
+                                v_reg<_Tp, n>& b, v_reg<_Tp, n>& c,
+                                v_reg<_Tp, n>& d)
+{
+    int i, i4;
+    for( i = i4 = 0; i < n; i++, i4 += 4 )
+    {
+        a.s[i] = ptr[i4];
+        b.s[i] = ptr[i4+1];
+        c.s[i] = ptr[i4+2];
+        d.s[i] = ptr[i4+3];
+    }
+}
+
+/** @brief Interleave and store (2 channels)
+
+Interleave and store data from 2 registers to memory.
+Scheme:
+@code
+{A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
+                               const v_reg<_Tp, n>& b)
+{
+    int i, i2;
+    for( i = i2 = 0; i < n; i++, i2 += 2 )
+    {
+        ptr[i2] = a.s[i];
+        ptr[i2+1] = b.s[i];
+    }
+}
+
+/** @brief Interleave and store (3 channels)
+
+Interleave and store data from 3 registers to memory.
+Scheme:
+@code
+{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
+                                const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c)
+{
+    int i, i3;
+    for( i = i3 = 0; i < n; i++, i3 += 3 )
+    {
+        ptr[i3] = a.s[i];
+        ptr[i3+1] = b.s[i];
+        ptr[i3+2] = c.s[i];
+    }
+}
+
+/** @brief Interleave and store (4 channels)
+
+Interleave and store data from 4 registers to memory.
+Scheme:
+@code
+{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
+                                                            const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
+                                                            const v_reg<_Tp, n>& d)
+{
+    int i, i4;
+    for( i = i4 = 0; i < n; i++, i4 += 4 )
+    {
+        ptr[i4] = a.s[i];
+        ptr[i4+1] = b.s[i];
+        ptr[i4+2] = c.s[i];
+        ptr[i4+3] = d.s[i];
+    }
+}
+
+/** @brief Store data to memory
+
+Store register contents to memory.
+Scheme:
+@code
+  REG {A B C D} ==> MEM {A B C D}
+@endcode
+Pointer can be unaligned. */
+template<typename _Tp, int n>
+inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        ptr[i] = a.s[i];
+}
+
+/** @brief Store data to memory (lower half)
+
+Store lower half of register contents to memory.
+Scheme:
+@code
+  REG {A B C D} ==> MEM {A B}
+@endcode */
+template<typename _Tp, int n>
+inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < (n/2); i++ )
+        ptr[i] = a.s[i];
+}
+
+/** @brief Store data to memory (higher half)
+
+Store higher half of register contents to memory.
+Scheme:
+@code
+  REG {A B C D} ==> MEM {C D}
+@endcode */
+template<typename _Tp, int n>
+inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < (n/2); i++ )
+        ptr[i] = a.s[i+(n/2)];
+}
+
+/** @brief Store data to memory (aligned)
+
+Store register contents to memory.
+Scheme:
+@code
+  REG {A B C D} ==> MEM {A B C D}
+@endcode
+Pointer __should__ be aligned by 16-byte boundary. */
+template<typename _Tp, int n>
+inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a)
+{
+    for( int i = 0; i < n; i++ )
+        ptr[i] = a.s[i];
+}
+
+/** @brief Combine vector from first elements of two vectors
+
+Scheme:
+@code
+  {A1 A2 A3 A4}
+  {B1 B2 B3 B4}
+---------------
+  {A1 A2 B1 B2}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = a.s[i];
+        c.s[i+(n/2)] = b.s[i];
+    }
+    return c;
+}
+
+/** @brief Combine vector from last elements of two vectors
+
+Scheme:
+@code
+  {A1 A2 A3 A4}
+  {B1 B2 B3 B4}
+---------------
+  {A3 A4 B3 B4}
+@endcode
+For all types except 64-bit. */
+template<typename _Tp, int n>
+inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> c;
+    for( int i = 0; i < (n/2); i++ )
+    {
+        c.s[i] = a.s[i+(n/2)];
+        c.s[i+(n/2)] = b.s[i+(n/2)];
+    }
+    return c;
+}
+
+/** @brief Combine two vectors from lower and higher parts of two other vectors
+
+@code{.cpp}
+low = cv::v_combine_low(a, b);
+high = cv::v_combine_high(a, b);
+@endcode */
+template<typename _Tp, int n>
+inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
+                        v_reg<_Tp, n>& low, v_reg<_Tp, n>& high)
+{
+    for( int i = 0; i < (n/2); i++ )
+    {
+        low.s[i] = a.s[i];
+        low.s[i+(n/2)] = b.s[i];
+        high.s[i] = a.s[i+(n/2)];
+        high.s[i+(n/2)] = b.s[i+(n/2)];
+    }
+}
+
+/** @brief Vector extract
+
+Scheme:
+@code
+  {A1 A2 A3 A4}
+  {B1 B2 B3 B4}
+========================
+shift = 1  {A2 A3 A4 B1}
+shift = 2  {A3 A4 B1 B2}
+shift = 3  {A4 B1 B2 B3}
+@endcode
+Restriction: 0 <= shift < nlanes
+
+Usage:
+@code
+v_int32x4 a, b, c;
+c = v_extract<2>(a, b);
+@endcode
+For all types. */
+template<int s, typename _Tp, int n>
+inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
+{
+    v_reg<_Tp, n> r;
+    const int shift = n - s;
+    int i = 0;
+    for (; i < shift; ++i)
+        r.s[i] = a.s[i+s];
+    for (; i < n; ++i)
+        r.s[i] = b.s[i-shift];
+    return r;
+}
+
+/** @brief Round
+
+Rounds each value. Input type is float vector ==> output type is int vector.*/
+template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = cvRound(a.s[i]);
+    return c;
+}
+
+/** @brief Floor
+
+Floor each value. Input type is float vector ==> output type is int vector.*/
+template<int n> inline v_reg<int, n> v_floor(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = cvFloor(a.s[i]);
+    return c;
+}
+
+/** @brief Ceil
+
+Ceil each value. Input type is float vector ==> output type is int vector.*/
+template<int n> inline v_reg<int, n> v_ceil(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = cvCeil(a.s[i]);
+    return c;
+}
+
+/** @brief Trunc
+
+Truncate each value. Input type is float vector ==> output type is int vector.*/
+template<int n> inline v_reg<int, n> v_trunc(const v_reg<float, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (int)(a.s[i]);
+    return c;
+}
+
+/** @overload */
+template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a)
+{
+    v_reg<int, n*2> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvRound(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+/** @overload */
+template<int n> inline v_reg<int, n*2> v_floor(const v_reg<double, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvFloor(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+/** @overload */
+template<int n> inline v_reg<int, n*2> v_ceil(const v_reg<double, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvCeil(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+/** @overload */
+template<int n> inline v_reg<int, n*2> v_trunc(const v_reg<double, n>& a)
+{
+    v_reg<int, n> c;
+    for( int i = 0; i < n; i++ )
+    {
+        c.s[i] = cvCeil(a.s[i]);
+        c.s[i+n] = 0;
+    }
+    return c;
+}
+
+/** @brief Convert to float
+
+Supported input type is cv::v_int32x4. */
+template<int n> inline v_reg<float, n> v_cvt_f32(const v_reg<int, n>& a)
+{
+    v_reg<float, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (float)a.s[i];
+    return c;
+}
+
+/** @brief Convert to double
+
+Supported input type is cv::v_int32x4. */
+template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<int, n*2>& a)
+{
+    v_reg<double, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (double)a.s[i];
+    return c;
+}
+
+/** @brief Convert to double
+
+Supported input type is cv::v_float32x4. */
+template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<float, n*2>& a)
+{
+    v_reg<double, n> c;
+    for( int i = 0; i < n; i++ )
+        c.s[i] = (double)a.s[i];
+    return c;
+}
+
+/** @brief Transpose 4x4 matrix
+
+Scheme:
+@code
+a0  {A1 A2 A3 A4}
+a1  {B1 B2 B3 B4}
+a2  {C1 C2 C3 C4}
+a3  {D1 D2 D3 D4}
+===============
+b0  {A1 B1 C1 D1}
+b1  {A2 B2 C2 D2}
+b2  {A3 B3 C3 D3}
+b3  {A4 B4 C4 D4}
+@endcode
+*/
+template<typename _Tp>
+inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1,
+                            const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3,
+                            v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1,
+                            v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 )
+{
+    b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]);
+    b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]);
+    b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]);
+    b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]);
+}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); }
+
+//! @name Init with zero
+//! @{
+//! @brief Create new vector with zero elements
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
+
+//! @name Init with value
+//! @{
+//! @brief Create new vector with elements set to a specific value
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \
+template<typename _Tp0, int n0> inline _Tpvec \
+    v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
+{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); }
+
+//! @name Reinterpret
+//! @{
+//! @brief Convert vector to different type without modifying underlying data.
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \
+template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
+{ return a << n; }
+
+//! @name Left shift
+//! @{
+//! @brief Shift left
+OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short)
+OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int)
+OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \
+template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
+{ return a >> n; }
+
+//! @name Right shift
+//! @{
+//! @brief Shift right
+OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short)
+OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int)
+OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \
+template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
+{ \
+    _Tpvec c; \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+        c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+    return c; \
+}
+
+//! @name Rounding shift
+//! @{
+//! @brief Rounding shift right
+OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \
+inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpnvec c; \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+    { \
+        c.s[i] = cast<_Tpn>(a.s[i]); \
+        c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \
+    } \
+    return c; \
+}
+
+//! @name Pack
+//! @{
+//! @brief Pack values from two vectors to one
+//!
+//! Return vector type have twice more elements than input vector types. Variant with _u_ suffix also
+//! converts to corresponding unsigned type.
+//!
+//! - pack: for 16-, 32- and 64-bit integer input types
+//! - pack_u: for 16- and 32-bit signed integer input types
+//!
+//! @note All variants except 64-bit use saturation.
+OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
+template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpnvec c; \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+    { \
+        c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+        c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+    } \
+    return c; \
+}
+
+//! @name Pack with rounding shift
+//! @{
+//! @brief Pack values from two vectors to one with rounding shift
+//!
+//! Values from the input vectors will be shifted right by _n_ bits with rounding, converted to narrower
+//! type and returned in the result vector. Variant with _u_ suffix converts to unsigned type.
+//!
+//! - pack: for 16-, 32- and 64-bit integer input types
+//! - pack_u: for 16- and 32-bit signed integer input types
+//!
+//! @note All variants except 64-bit use saturation.
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
+inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
+{ \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+        ptr[i] = cast<_Tpn>(a.s[i]); \
+}
+
+//! @name Pack and store
+//! @{
+//! @brief Store values from the input vector into memory with pack
+//!
+//! Values will be stored into memory with conversion to narrower type.
+//! Variant with _u_ suffix converts to corresponding unsigned type.
+//!
+//! - pack: for 16-, 32- and 64-bit integer input types
+//! - pack_u: for 16- and 32-bit signed integer input types
+//!
+//! @note All variants except 64-bit use saturation.
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
+//! @}
+
+//! @brief Helper macro
+//! @ingroup core_hal_intrin_impl
+#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
+template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
+{ \
+    for( int i = 0; i < _Tpvec::nlanes; i++ ) \
+        ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
+}
+
+//! @name Pack and store with rounding shift
+//! @{
+//! @brief Store values from the input vector into memory with pack
+//!
+//! Values will be shifted _n_ bits right with rounding, converted to narrower type and stored into
+//! memory. Variant with _u_ suffix converts to unsigned type.
+//!
+//! - pack: for 16-, 32- and 64-bit integer input types
+//! - pack_u: for 16- and 32-bit signed integer input types
+//!
+//! @note All variants except 64-bit use saturation.
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
+OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
+//! @}
+
+/** @brief Matrix multiplication
+
+Scheme:
+@code
+{A0 A1 A2 A3}   |V0|
+{B0 B1 B2 B3}   |V1|
+{C0 C1 C2 C3}   |V2|
+{D0 D1 D2 D3} x |V3|
+====================
+{R0 R1 R2 R3}, where:
+R0 = A0V0 + A1V1 + A2V2 + A3V3,
+R1 = B0V0 + B1V1 + B2V2 + B3V3
+...
+@endcode
+*/
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0],
+                       v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1],
+                       v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2],
+                       v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]);
+}
+
+/** @brief Matrix multiplication and add
+
+Scheme:
+@code
+{A0 A1 A2   }   |V0|   |D0|
+{B0 B1 B2   }   |V1|   |D1|
+{C0 C1 C2   } x |V2| + |D2|
+====================
+{R0 R1 R2 R3}, where:
+R0 = A0V0 + A1V1 + A2V2 + D0,
+R1 = B0V0 + B1V1 + B2V2 + D1
+...
+@endcode
+*/
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& m3)
+{
+    return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0],
+                       v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1],
+                       v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2],
+                       v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]);
+}
+
+//! @}
+
+//! @name Check SIMD support
+//! @{
+//! @brief Check CPU capability of SIMD operation
+static inline bool hasSIMD128()
+{
+    return false;
+}
+
+//! @}
+
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+#endif
+}
+
+#endif

+ 1345 - 0
ThresholdTools/include/opencv2/core/hal/intrin_neon.hpp

@@ -0,0 +1,1345 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_INTRIN_NEON_HPP
+#define OPENCV_HAL_INTRIN_NEON_HPP
+
+#include <algorithm>
+#include "opencv2/core/utility.hpp"
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+#define CV_SIMD128 1
+#if defined(__aarch64__)
+#define CV_SIMD128_64F 1
+#else
+#define CV_SIMD128_64F 0
+#endif
+
+#if CV_SIMD128_64F
+#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) \
+template <typename T> static inline \
+_Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \
+template <typename T> static inline \
+float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; }
+OPENCV_HAL_IMPL_NEON_REINTERPRET(uint8x16_t, u8)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(int8x16_t, s8)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(uint16x8_t, u16)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(int16x8_t, s16)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(uint32x4_t, u32)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(int32x4_t, s32)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(uint64x2_t, u64)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(int64x2_t, s64)
+OPENCV_HAL_IMPL_NEON_REINTERPRET(float32x4_t, f32)
+#endif
+
+struct v_uint8x16
+{
+    typedef uchar lane_type;
+    enum { nlanes = 16 };
+
+    v_uint8x16() {}
+    explicit v_uint8x16(uint8x16_t v) : val(v) {}
+    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+    {
+        uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+        val = vld1q_u8(v);
+    }
+    uchar get0() const
+    {
+        return vgetq_lane_u8(val, 0);
+    }
+
+    uint8x16_t val;
+};
+
+struct v_int8x16
+{
+    typedef schar lane_type;
+    enum { nlanes = 16 };
+
+    v_int8x16() {}
+    explicit v_int8x16(int8x16_t v) : val(v) {}
+    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+               schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+    {
+        schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+        val = vld1q_s8(v);
+    }
+    schar get0() const
+    {
+        return vgetq_lane_s8(val, 0);
+    }
+
+    int8x16_t val;
+};
+
+struct v_uint16x8
+{
+    typedef ushort lane_type;
+    enum { nlanes = 8 };
+
+    v_uint16x8() {}
+    explicit v_uint16x8(uint16x8_t v) : val(v) {}
+    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+    {
+        ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+        val = vld1q_u16(v);
+    }
+    ushort get0() const
+    {
+        return vgetq_lane_u16(val, 0);
+    }
+
+    uint16x8_t val;
+};
+
+struct v_int16x8
+{
+    typedef short lane_type;
+    enum { nlanes = 8 };
+
+    v_int16x8() {}
+    explicit v_int16x8(int16x8_t v) : val(v) {}
+    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+    {
+        short v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+        val = vld1q_s16(v);
+    }
+    short get0() const
+    {
+        return vgetq_lane_s16(val, 0);
+    }
+
+    int16x8_t val;
+};
+
+struct v_uint32x4
+{
+    typedef unsigned lane_type;
+    enum { nlanes = 4 };
+
+    v_uint32x4() {}
+    explicit v_uint32x4(uint32x4_t v) : val(v) {}
+    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+    {
+        unsigned v[] = {v0, v1, v2, v3};
+        val = vld1q_u32(v);
+    }
+    unsigned get0() const
+    {
+        return vgetq_lane_u32(val, 0);
+    }
+
+    uint32x4_t val;
+};
+
+struct v_int32x4
+{
+    typedef int lane_type;
+    enum { nlanes = 4 };
+
+    v_int32x4() {}
+    explicit v_int32x4(int32x4_t v) : val(v) {}
+    v_int32x4(int v0, int v1, int v2, int v3)
+    {
+        int v[] = {v0, v1, v2, v3};
+        val = vld1q_s32(v);
+    }
+    int get0() const
+    {
+        return vgetq_lane_s32(val, 0);
+    }
+    int32x4_t val;
+};
+
+struct v_float32x4
+{
+    typedef float lane_type;
+    enum { nlanes = 4 };
+
+    v_float32x4() {}
+    explicit v_float32x4(float32x4_t v) : val(v) {}
+    v_float32x4(float v0, float v1, float v2, float v3)
+    {
+        float v[] = {v0, v1, v2, v3};
+        val = vld1q_f32(v);
+    }
+    float get0() const
+    {
+        return vgetq_lane_f32(val, 0);
+    }
+    float32x4_t val;
+};
+
+struct v_uint64x2
+{
+    typedef uint64 lane_type;
+    enum { nlanes = 2 };
+
+    v_uint64x2() {}
+    explicit v_uint64x2(uint64x2_t v) : val(v) {}
+    v_uint64x2(uint64 v0, uint64 v1)
+    {
+        uint64 v[] = {v0, v1};
+        val = vld1q_u64(v);
+    }
+    uint64 get0() const
+    {
+        return vgetq_lane_u64(val, 0);
+    }
+    uint64x2_t val;
+};
+
+struct v_int64x2
+{
+    typedef int64 lane_type;
+    enum { nlanes = 2 };
+
+    v_int64x2() {}
+    explicit v_int64x2(int64x2_t v) : val(v) {}
+    v_int64x2(int64 v0, int64 v1)
+    {
+        int64 v[] = {v0, v1};
+        val = vld1q_s64(v);
+    }
+    int64 get0() const
+    {
+        return vgetq_lane_s64(val, 0);
+    }
+    int64x2_t val;
+};
+
+#if CV_SIMD128_64F
+struct v_float64x2
+{
+    typedef double lane_type;
+    enum { nlanes = 2 };
+
+    v_float64x2() {}
+    explicit v_float64x2(float64x2_t v) : val(v) {}
+    v_float64x2(double v0, double v1)
+    {
+        double v[] = {v0, v1};
+        val = vld1q_f64(v);
+    }
+    double get0() const
+    {
+        return vgetq_lane_f64(val, 0);
+    }
+    float64x2_t val;
+};
+#endif
+
+#if CV_FP16
+// Workaround for old compilers
+template <typename T> static inline int16x4_t vreinterpret_s16_f16(T a)
+{ return (int16x4_t)a; }
+template <typename T> static inline float16x4_t vreinterpret_f16_s16(T a)
+{ return (float16x4_t)a; }
+template <typename T> static inline float16x4_t cv_vld1_f16(const T* ptr)
+{
+#ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
+    return vreinterpret_f16_s16(vld1_s16((const short*)ptr));
+#else
+    return vld1_f16((const __fp16*)ptr);
+#endif
+}
+template <typename T> static inline void cv_vst1_f16(T* ptr, float16x4_t a)
+{
+#ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro
+    vst1_s16((short*)ptr, vreinterpret_s16_f16(a));
+#else
+    vst1_f16((__fp16*)ptr, a);
+#endif
+}
+
+struct v_float16x4
+{
+    typedef short lane_type;
+    enum { nlanes = 4 };
+
+    v_float16x4() {}
+    explicit v_float16x4(float16x4_t v) : val(v) {}
+    v_float16x4(short v0, short v1, short v2, short v3)
+    {
+        short v[] = {v0, v1, v2, v3};
+        val = cv_vld1_f16(v);
+    }
+    short get0() const
+    {
+        return vget_lane_s16(vreinterpret_s16_f16(val), 0);
+    }
+    float16x4_t val;
+};
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
+inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
+inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
+inline _Tpv##_t vreinterpretq_##suffix##_##suffix(_Tpv##_t v) { return v; } \
+inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(vreinterpretq_u8_##suffix(v.val)); } \
+inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(vreinterpretq_s8_##suffix(v.val)); } \
+inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(vreinterpretq_u16_##suffix(v.val)); } \
+inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(vreinterpretq_s16_##suffix(v.val)); } \
+inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(vreinterpretq_u32_##suffix(v.val)); } \
+inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(vreinterpretq_s32_##suffix(v.val)); } \
+inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(vreinterpretq_u64_##suffix(v.val)); } \
+inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(vreinterpretq_s64_##suffix(v.val)); } \
+inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(vreinterpretq_f32_##suffix(v.val)); }
+
+OPENCV_HAL_IMPL_NEON_INIT(uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_NEON_INIT(int8x16, schar, s8)
+OPENCV_HAL_IMPL_NEON_INIT(uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_NEON_INIT(int16x8, short, s16)
+OPENCV_HAL_IMPL_NEON_INIT(uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_NEON_INIT(int32x4, int, s32)
+OPENCV_HAL_IMPL_NEON_INIT(uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_NEON_INIT(int64x2, int64, s64)
+OPENCV_HAL_IMPL_NEON_INIT(float32x4, float, f32)
+#if CV_SIMD128_64F
+#define OPENCV_HAL_IMPL_NEON_INIT_64(_Tpv, suffix) \
+inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(vreinterpretq_f64_##suffix(v.val)); }
+OPENCV_HAL_IMPL_NEON_INIT(float64x2, double, f64)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_INIT_64(int8x16, s8)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_INIT_64(int16x8, s16)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_INIT_64(int32x4, s32)
+OPENCV_HAL_IMPL_NEON_INIT_64(uint64x2, u64)
+OPENCV_HAL_IMPL_NEON_INIT_64(int64x2, s64)
+OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32)
+OPENCV_HAL_IMPL_NEON_INIT_64(float64x2, f64)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr) \
+inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
+{ \
+    hreg a1 = mov(a.val), b1 = mov(b.val); \
+    return _Tpvec(vcombine_##suffix(a1, b1)); \
+} \
+inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
+{ \
+    hreg a1 = mov(a.val); \
+    vst1_##suffix(ptr, a1); \
+} \
+template<int n> inline \
+_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \
+{ \
+    hreg a1 = rshr(a.val, n); \
+    hreg b1 = rshr(b.val, n); \
+    return _Tpvec(vcombine_##suffix(a1, b1)); \
+} \
+template<int n> inline \
+void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
+{ \
+    hreg a1 = rshr(a.val, n); \
+    vst1_##suffix(ptr, a1); \
+}
+
+OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, pack, vqmovn_u16, vqrshrn_n_u16)
+OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, pack, vqmovn_s16, vqrshrn_n_s16)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, pack, vqmovn_u32, vqrshrn_n_u32)
+OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, pack, vqmovn_s32, vqrshrn_n_s32)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, pack, vmovn_u64, vrshrn_n_u64)
+OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, pack, vmovn_s64, vrshrn_n_s64)
+
+OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, pack_u, vqmovun_s16, vqrshrun_n_s16)
+OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, pack_u, vqmovun_s32, vqrshrun_n_s32)
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val);
+    float32x4_t res = vmulq_lane_f32(m0.val, vl, 0);
+    res = vmlaq_lane_f32(res, m1.val, vl, 1);
+    res = vmlaq_lane_f32(res, m2.val, vh, 0);
+    res = vmlaq_lane_f32(res, m3.val, vh, 1);
+    return v_float32x4(res);
+}
+
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& a)
+{
+    float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val);
+    float32x4_t res = vmulq_lane_f32(m0.val, vl, 0);
+    res = vmlaq_lane_f32(res, m1.val, vl, 1);
+    res = vmlaq_lane_f32(res, m2.val, vh, 0);
+    res = vaddq_f32(res, a.val);
+    return v_float32x4(res);
+}
+
+#define OPENCV_HAL_IMPL_NEON_BIN_OP(bin_op, _Tpvec, intrin) \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(intrin(a.val, b.val)); \
+} \
+inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
+{ \
+    a.val = intrin(a.val, b.val); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint8x16, vqaddq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint8x16, vqsubq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint16x8, vmulq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int16x8, vqaddq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int16x8, vqsubq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int16x8, vmulq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint32x4, vaddq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint32x4, vsubq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint32x4, vmulq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float32x4, vaddq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float32x4, vsubq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float32x4, vmulq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int64x2, vaddq_s64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int64x2, vsubq_s64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint64x2, vaddq_u64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint64x2, vsubq_u64)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float32x4, vdivq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float64x2, vaddq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float64x2, vsubq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float64x2, vmulq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float64x2, vdivq_f64)
+#else
+inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b)
+{
+    float32x4_t reciprocal = vrecpeq_f32(b.val);
+    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
+    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
+    return v_float32x4(vmulq_f32(a.val, reciprocal));
+}
+inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b)
+{
+    float32x4_t reciprocal = vrecpeq_f32(b.val);
+    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
+    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
+    a.val = vmulq_f32(a.val, reciprocal);
+    return a;
+}
+#endif
+
+inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
+                         v_int32x4& c, v_int32x4& d)
+{
+    c.val = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
+    d.val = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
+}
+
+inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
+                         v_uint32x4& c, v_uint32x4& d)
+{
+    c.val = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val));
+    d.val = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
+}
+
+inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
+                         v_uint64x2& c, v_uint64x2& d)
+{
+    c.val = vmull_u32(vget_low_u32(a.val), vget_low_u32(b.val));
+    d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val));
+}
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{
+    int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
+    int32x4_t d = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
+    int32x4x2_t cd = vuzpq_s32(c, d);
+    return v_int32x4(vaddq_s32(cd.val[0], cd.val[1]));
+}
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{
+    v_int32x4 s = v_dotprod(a, b);
+    return v_int32x4(vaddq_s32(s.val , c.val));
+}
+
+#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \
+    OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
+    OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \
+    OPENCV_HAL_IMPL_NEON_BIN_OP(^, _Tpvec, veorq_##suffix) \
+    inline _Tpvec operator ~ (const _Tpvec& a) \
+    { \
+        return _Tpvec(vreinterpretq_##suffix##_u8(vmvnq_u8(vreinterpretq_u8_##suffix(a.val)))); \
+    }
+
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int8x16, s8)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int16x8, s16)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int32x4, s32)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint64x2, u64)
+OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int64x2, s64)
+
+#define OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(bin_op, intrin) \
+inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \
+{ \
+    return v_float32x4(vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val)))); \
+} \
+inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \
+{ \
+    a.val = vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val))); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(&, vandq_s32)
+OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(|, vorrq_s32)
+OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(^, veorq_s32)
+
+inline v_float32x4 operator ~ (const v_float32x4& a)
+{
+    return v_float32x4(vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(a.val))));
+}
+
+#if CV_SIMD128_64F
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{
+    return v_float32x4(vsqrtq_f32(x.val));
+}
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{
+    v_float32x4 one = v_setall_f32(1.0f);
+    return one / v_sqrt(x);
+}
+#else
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{
+    float32x4_t x1 = vmaxq_f32(x.val, vdupq_n_f32(FLT_MIN));
+    float32x4_t e = vrsqrteq_f32(x1);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
+    return v_float32x4(vmulq_f32(x.val, e));
+}
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{
+    float32x4_t e = vrsqrteq_f32(x.val);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e);
+    return v_float32x4(e);
+}
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \
+inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); }
+
+OPENCV_HAL_IMPL_NEON_ABS(v_uint8x16, v_int8x16, u8, s8)
+OPENCV_HAL_IMPL_NEON_ABS(v_uint16x8, v_int16x8, u16, s16)
+OPENCV_HAL_IMPL_NEON_ABS(v_uint32x4, v_int32x4, u32, s32)
+
+inline v_float32x4 v_abs(v_float32x4 x)
+{ return v_float32x4(vabsq_f32(x.val)); }
+
+#if CV_SIMD128_64F
+#define OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(bin_op, intrin) \
+inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \
+{ \
+    return v_float64x2(vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val)))); \
+} \
+inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \
+{ \
+    a.val = vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val))); \
+    return a; \
+}
+
+OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(&, vandq_s64)
+OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(|, vorrq_s64)
+OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(^, veorq_s64)
+
+inline v_float64x2 operator ~ (const v_float64x2& a)
+{
+    return v_float64x2(vreinterpretq_f64_s32(vmvnq_s32(vreinterpretq_s32_f64(a.val))));
+}
+
+inline v_float64x2 v_sqrt(const v_float64x2& x)
+{
+    return v_float64x2(vsqrtq_f64(x.val));
+}
+
+inline v_float64x2 v_invsqrt(const v_float64x2& x)
+{
+    v_float64x2 one = v_setall_f64(1.0f);
+    return one / v_sqrt(x);
+}
+
+inline v_float64x2 v_abs(v_float64x2 x)
+{ return v_float64x2(vabsq_f64(x.val)); }
+#endif
+
+// TODO: exp, log, sin, cos
+
+#define OPENCV_HAL_IMPL_NEON_BIN_FUNC(_Tpvec, func, intrin) \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(intrin(a.val, b.val)); \
+}
+
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_min, vminq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_max, vmaxq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_min, vminq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_max, vmaxq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_min, vminq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_max, vmaxq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_min, vminq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_max, vmaxq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_min, vminq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_max, vmaxq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_min, vminq_s32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_max, vmaxq_s32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_min, vminq_f32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_max, vmaxq_f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_min, vminq_f64)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_max, vmaxq_f64)
+#endif
+
+#if CV_SIMD128_64F
+inline int64x2_t vmvnq_s64(int64x2_t a)
+{
+    int64x2_t vx = vreinterpretq_s64_u32(vdupq_n_u32(0xFFFFFFFF));
+    return veorq_s64(a, vx);
+}
+inline uint64x2_t vmvnq_u64(uint64x2_t a)
+{
+    uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF));
+    return veorq_u64(a, vx);
+}
+#endif
+#define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vmvnq_##not_suffix(vceqq_##suffix(a.val, b.val)))); } \
+inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vcltq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vcgtq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vcleq_##suffix(a.val, b.val))); } \
+inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(cast(vcgeq_##suffix(a.val, b.val))); }
+
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint8x16, OPENCV_HAL_NOP, u8, u8)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int8x16, vreinterpretq_s8_u8, s8, u8)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint16x8, OPENCV_HAL_NOP, u16, u16)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64)
+OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64)
+#endif
+
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_add_wrap, vaddq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_add_wrap, vaddq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_sub_wrap, vsubq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_sub_wrap, vsubq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_sub_wrap, vsubq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_sub_wrap, vsubq_s16)
+
+// TODO: absdiff for signed integers
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
+inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec2(cast(intrin(a.val, b.val))); \
+}
+
+OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int8x16, v_uint8x16, vreinterpretq_u8_s8, v_absdiff, vabdq_s8)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int16x8, v_uint16x8, vreinterpretq_u16_s16, v_absdiff, vabdq_s16)
+OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int32x4, v_uint32x4, vreinterpretq_u32_s32, v_absdiff, vabdq_s32)
+
+inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b)
+{
+    v_float32x4 x(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val));
+    return v_sqrt(x);
+}
+
+inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b)
+{
+    return v_float32x4(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val));
+}
+
+inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+{
+    return v_float32x4(vmlaq_f32(c.val, a.val, b.val));
+}
+
+inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+    return v_int32x4(vmlaq_s32(c.val, a.val, b.val));
+}
+
+#if CV_SIMD128_64F
+inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b)
+{
+    v_float64x2 x(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val)));
+    return v_sqrt(x);
+}
+
+inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b)
+{
+    return v_float64x2(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val)));
+}
+
+inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+    return v_float64x2(vaddq_f64(c.val, vmulq_f64(a.val, b.val)));
+}
+#endif
+
+// trade efficiency for convenience
+#define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \
+inline _Tpvec operator << (const _Tpvec& a, int n) \
+{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)n))); } \
+inline _Tpvec operator >> (const _Tpvec& a, int n) \
+{ return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)-n))); } \
+template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
+{ return _Tpvec(vshlq_n_##suffix(a.val, n)); } \
+template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
+{ return _Tpvec(vshrq_n_##suffix(a.val, n)); } \
+template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
+{ return _Tpvec(vrshrq_n_##suffix(a.val, n)); }
+
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, u8, schar, s8)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, s8, schar, s8)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint16x8, u16, short, s16)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int16x8, s16, short, s16)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint32x4, u32, int, s32)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint64x2, u64, int64, s64)
+OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int64x2, s64, int64, s64)
+
+#define OPENCV_HAL_IMPL_NEON_ROTATE_OP(_Tpvec, suffix) \
+template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
+{ return _Tpvec(vextq_##suffix(a.val, vdupq_n_##suffix(0), n)); } \
+template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
+{ return _Tpvec(vextq_##suffix(vdupq_n_##suffix(0), a.val, _Tpvec::nlanes - n)); } \
+template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
+{ return a; } \
+template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(vextq_##suffix(a.val, b.val, n)); } \
+template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); } \
+template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
+{ CV_UNUSED(b); return a; }
+
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int8x16, s8)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int16x8, s16)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int32x4, s32)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float32x4, f32)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint64x2, u64)
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int64x2, s64)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float64x2, f64)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_load(const _Tp* ptr) \
+{ return _Tpvec(vld1q_##suffix(ptr)); } \
+inline _Tpvec v_load_aligned(const _Tp* ptr) \
+{ return _Tpvec(vld1q_##suffix(ptr)); } \
+inline _Tpvec v_load_low(const _Tp* ptr) \
+{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
+{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \
+inline void v_store(_Tp* ptr, const _Tpvec& a) \
+{ vst1q_##suffix(ptr, a.val); } \
+inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
+{ vst1q_##suffix(ptr, a.val); } \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
+{ vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
+{ vst1_##suffix(ptr, vget_high_##suffix(a.val)); }
+
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int8x16, schar, s8)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int16x8, short, s16)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint64x2, uint64, u64)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int64x2, int64, s64)
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64)
+#endif
+
+#if CV_FP16
+// Workaround for old comiplers
+inline v_float16x4 v_load_f16(const short* ptr)
+{ return v_float16x4(cv_vld1_f16(ptr)); }
+inline void v_store_f16(short* ptr, v_float16x4& a)
+{ cv_vst1_f16(ptr, a.val); }
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
+    a0 = vp##vectorfunc##_##suffix(a0, a0); \
+    return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
+}
+
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, sum, add, u16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, max, max, u16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, min, min, u16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, sum, add, s16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16)
+
+#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
+    return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \
+}
+
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, sum, add, u32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, max, max, u32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, min, min, u32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, sum, add, s32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, max, max, s32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, min, min, s32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32)
+OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32)
+
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+    float32x4x2_t ab = vtrnq_f32(a.val, b.val);
+    float32x4x2_t cd = vtrnq_f32(c.val, d.val);
+
+    float32x4_t u0 = vaddq_f32(ab.val[0], ab.val[1]); // a0+a1 b0+b1 a2+a3 b2+b3
+    float32x4_t u1 = vaddq_f32(cd.val[0], cd.val[1]); // c0+c1 d0+d1 c2+c3 d2+d3
+
+    float32x4_t v0 = vcombine_f32(vget_low_f32(u0), vget_low_f32(u1));
+    float32x4_t v1 = vcombine_f32(vget_high_f32(u0), vget_high_f32(u1));
+
+    return v_float32x4(vaddq_f32(v0, v1));
+}
+
+#define OPENCV_HAL_IMPL_NEON_POPCOUNT(_Tpvec, cast) \
+inline v_uint32x4 v_popcount(const _Tpvec& a) \
+{ \
+    uint8x16_t t = vcntq_u8(cast(a.val)); \
+    uint16x8_t t0 = vpaddlq_u8(t);  /* 16 -> 8 */ \
+    uint32x4_t t1 = vpaddlq_u16(t0); /* 8 -> 4 */ \
+    return v_uint32x4(t1); \
+}
+
+OPENCV_HAL_IMPL_NEON_POPCOUNT(v_uint8x16, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_NEON_POPCOUNT(v_uint16x8, vreinterpretq_u8_u16)
+OPENCV_HAL_IMPL_NEON_POPCOUNT(v_uint32x4, vreinterpretq_u8_u32)
+OPENCV_HAL_IMPL_NEON_POPCOUNT(v_int8x16, vreinterpretq_u8_s8)
+OPENCV_HAL_IMPL_NEON_POPCOUNT(v_int16x8, vreinterpretq_u8_s16)
+OPENCV_HAL_IMPL_NEON_POPCOUNT(v_int32x4, vreinterpretq_u8_s32)
+
+inline int v_signmask(const v_uint8x16& a)
+{
+    int8x8_t m0 = vcreate_s8(CV_BIG_UINT(0x0706050403020100));
+    uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_s8(m0, m0));
+    uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v0)));
+    return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 8);
+}
+inline int v_signmask(const v_int8x16& a)
+{ return v_signmask(v_reinterpret_as_u8(a)); }
+
+inline int v_signmask(const v_uint16x8& a)
+{
+    int16x4_t m0 = vcreate_s16(CV_BIG_UINT(0x0003000200010000));
+    uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_s16(m0, m0));
+    uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(v0));
+    return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 4);
+}
+inline int v_signmask(const v_int16x8& a)
+{ return v_signmask(v_reinterpret_as_u16(a)); }
+
+inline int v_signmask(const v_uint32x4& a)
+{
+    int32x2_t m0 = vcreate_s32(CV_BIG_UINT(0x0000000100000000));
+    uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_s32(m0, m0));
+    uint64x2_t v1 = vpaddlq_u32(v0);
+    return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 2);
+}
+inline int v_signmask(const v_int32x4& a)
+{ return v_signmask(v_reinterpret_as_u32(a)); }
+inline int v_signmask(const v_float32x4& a)
+{ return v_signmask(v_reinterpret_as_u32(a)); }
+#if CV_SIMD128_64F
+inline int v_signmask(const v_uint64x2& a)
+{
+    int64x1_t m0 = vdup_n_s64(0);
+    uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0));
+    return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1);
+}
+inline int v_signmask(const v_float64x2& a)
+{ return v_signmask(v_reinterpret_as_u64(a)); }
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
+inline bool v_check_all(const v_##_Tpvec& a) \
+{ \
+    _Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \
+    uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
+    return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \
+} \
+inline bool v_check_any(const v_##_Tpvec& a) \
+{ \
+    _Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \
+    uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
+    return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \
+}
+
+OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7)
+OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15)
+OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint64x2, u64, 63)
+#endif
+
+inline bool v_check_all(const v_int8x16& a)
+{ return v_check_all(v_reinterpret_as_u8(a)); }
+inline bool v_check_all(const v_int16x8& a)
+{ return v_check_all(v_reinterpret_as_u16(a)); }
+inline bool v_check_all(const v_int32x4& a)
+{ return v_check_all(v_reinterpret_as_u32(a)); }
+inline bool v_check_all(const v_float32x4& a)
+{ return v_check_all(v_reinterpret_as_u32(a)); }
+
+inline bool v_check_any(const v_int8x16& a)
+{ return v_check_any(v_reinterpret_as_u8(a)); }
+inline bool v_check_any(const v_int16x8& a)
+{ return v_check_any(v_reinterpret_as_u16(a)); }
+inline bool v_check_any(const v_int32x4& a)
+{ return v_check_any(v_reinterpret_as_u32(a)); }
+inline bool v_check_any(const v_float32x4& a)
+{ return v_check_any(v_reinterpret_as_u32(a)); }
+
+#if CV_SIMD128_64F
+inline bool v_check_all(const v_int64x2& a)
+{ return v_check_all(v_reinterpret_as_u64(a)); }
+inline bool v_check_all(const v_float64x2& a)
+{ return v_check_all(v_reinterpret_as_u64(a)); }
+inline bool v_check_any(const v_int64x2& a)
+{ return v_check_any(v_reinterpret_as_u64(a)); }
+inline bool v_check_any(const v_float64x2& a)
+{ return v_check_any(v_reinterpret_as_u64(a)); }
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(vbslq_##suffix(vreinterpretq_##usuffix##_##suffix(mask.val), a.val, b.val)); \
+}
+
+OPENCV_HAL_IMPL_NEON_SELECT(v_uint8x16, u8, u8)
+OPENCV_HAL_IMPL_NEON_SELECT(v_int8x16, s8, u8)
+OPENCV_HAL_IMPL_NEON_SELECT(v_uint16x8, u16, u16)
+OPENCV_HAL_IMPL_NEON_SELECT(v_int16x8, s16, u16)
+OPENCV_HAL_IMPL_NEON_SELECT(v_uint32x4, u32, u32)
+OPENCV_HAL_IMPL_NEON_SELECT(v_int32x4, s32, u32)
+OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \
+inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
+{ \
+    b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \
+    b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \
+} \
+inline _Tpwvec v_load_expand(const _Tp* ptr) \
+{ \
+    return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
+}
+
+OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_int8x16, v_int16x8, schar, s8)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_uint16x8, v_uint32x4, ushort, u16)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_int16x8, v_int32x4, short, s16)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_uint32x4, v_uint64x2, uint, u32)
+OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32)
+
+inline v_uint32x4 v_load_expand_q(const uchar* ptr)
+{
+    uint8x8_t v0 = vcreate_u8(*(unsigned*)ptr);
+    uint16x4_t v1 = vget_low_u16(vmovl_u8(v0));
+    return v_uint32x4(vmovl_u16(v1));
+}
+
+inline v_int32x4 v_load_expand_q(const schar* ptr)
+{
+    int8x8_t v0 = vcreate_s8(*(unsigned*)ptr);
+    int16x4_t v1 = vget_low_s16(vmovl_s8(v0));
+    return v_int32x4(vmovl_s16(v1));
+}
+
+#if defined(__aarch64__)
+#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
+inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
+{ \
+    b0.val = vzip1q_##suffix(a0.val, a1.val); \
+    b1.val = vzip2q_##suffix(a0.val, a1.val); \
+} \
+inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \
+} \
+inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \
+} \
+inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
+{ \
+    c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
+    d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
+}
+#else
+#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
+inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
+{ \
+    _Tpvec##x2_t p = vzipq_##suffix(a0.val, a1.val); \
+    b0.val = p.val[0]; \
+    b1.val = p.val[1]; \
+} \
+inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \
+} \
+inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \
+} \
+inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
+{ \
+    c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
+    d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
+}
+#endif
+
+OPENCV_HAL_IMPL_NEON_UNPACKS(uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_UNPACKS(int8x16, s8)
+OPENCV_HAL_IMPL_NEON_UNPACKS(uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_UNPACKS(int16x8, s16)
+OPENCV_HAL_IMPL_NEON_UNPACKS(uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32)
+OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64)
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \
+template <int s> \
+inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    return v_##_Tpvec(vextq_##suffix(a.val, b.val, s)); \
+}
+
+OPENCV_HAL_IMPL_NEON_EXTRACT(uint8x16, u8)
+OPENCV_HAL_IMPL_NEON_EXTRACT(int8x16, s8)
+OPENCV_HAL_IMPL_NEON_EXTRACT(uint16x8, u16)
+OPENCV_HAL_IMPL_NEON_EXTRACT(int16x8, s16)
+OPENCV_HAL_IMPL_NEON_EXTRACT(uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32)
+OPENCV_HAL_IMPL_NEON_EXTRACT(uint64x2, u64)
+OPENCV_HAL_IMPL_NEON_EXTRACT(int64x2, s64)
+OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64)
+#endif
+
+#if CV_SIMD128_64F
+inline v_int32x4 v_round(const v_float32x4& a)
+{
+    float32x4_t a_ = a.val;
+    int32x4_t result;
+    __asm__ ("fcvtns %0.4s, %1.4s"
+             : "=w"(result)
+             : "w"(a_)
+             : /* No clobbers */);
+    return v_int32x4(result);
+}
+#else
+inline v_int32x4 v_round(const v_float32x4& a)
+{
+    static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
+        v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
+
+    int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val)));
+    return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition))));
+}
+#endif
+inline v_int32x4 v_floor(const v_float32x4& a)
+{
+    int32x4_t a1 = vcvtq_s32_f32(a.val);
+    uint32x4_t mask = vcgtq_f32(vcvtq_f32_s32(a1), a.val);
+    return v_int32x4(vaddq_s32(a1, vreinterpretq_s32_u32(mask)));
+}
+
+inline v_int32x4 v_ceil(const v_float32x4& a)
+{
+    int32x4_t a1 = vcvtq_s32_f32(a.val);
+    uint32x4_t mask = vcgtq_f32(a.val, vcvtq_f32_s32(a1));
+    return v_int32x4(vsubq_s32(a1, vreinterpretq_s32_u32(mask)));
+}
+
+inline v_int32x4 v_trunc(const v_float32x4& a)
+{ return v_int32x4(vcvtq_s32_f32(a.val)); }
+
+#if CV_SIMD128_64F
+inline v_int32x4 v_round(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
+}
+
+inline v_int32x4 v_floor(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    int64x2_t a1 = vcvtq_s64_f64(a.val);
+    uint64x2_t mask = vcgtq_f64(vcvtq_f64_s64(a1), a.val);
+    a1 = vaddq_s64(a1, vreinterpretq_s64_u64(mask));
+    return v_int32x4(vcombine_s32(vmovn_s64(a1), zero));
+}
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    int64x2_t a1 = vcvtq_s64_f64(a.val);
+    uint64x2_t mask = vcgtq_f64(a.val, vcvtq_f64_s64(a1));
+    a1 = vsubq_s64(a1, vreinterpretq_s64_u64(mask));
+    return v_int32x4(vcombine_s32(vmovn_s64(a1), zero));
+}
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{
+    static const int32x2_t zero = vdup_n_s32(0);
+    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
+}
+#endif
+
+#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \
+inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
+                         const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
+                         v_##_Tpvec& b0, v_##_Tpvec& b1, \
+                         v_##_Tpvec& b2, v_##_Tpvec& b3) \
+{ \
+    /* m00 m01 m02 m03 */ \
+    /* m10 m11 m12 m13 */ \
+    /* m20 m21 m22 m23 */ \
+    /* m30 m31 m32 m33 */ \
+    _Tpvec##x2_t t0 = vtrnq_##suffix(a0.val, a1.val); \
+    _Tpvec##x2_t t1 = vtrnq_##suffix(a2.val, a3.val); \
+    /* m00 m10 m02 m12 */ \
+    /* m01 m11 m03 m13 */ \
+    /* m20 m30 m22 m32 */ \
+    /* m21 m31 m23 m33 */ \
+    b0.val = vcombine_##suffix(vget_low_##suffix(t0.val[0]), vget_low_##suffix(t1.val[0])); \
+    b1.val = vcombine_##suffix(vget_low_##suffix(t0.val[1]), vget_low_##suffix(t1.val[1])); \
+    b2.val = vcombine_##suffix(vget_high_##suffix(t0.val[0]), vget_high_##suffix(t1.val[0])); \
+    b3.val = vcombine_##suffix(vget_high_##suffix(t0.val[1]), vget_high_##suffix(t1.val[1])); \
+}
+
+OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u32)
+OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32)
+OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f32)
+
+#define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \
+{ \
+    _Tpvec##x2_t v = vld2q_##suffix(ptr); \
+    a.val = v.val[0]; \
+    b.val = v.val[1]; \
+} \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \
+{ \
+    _Tpvec##x3_t v = vld3q_##suffix(ptr); \
+    a.val = v.val[0]; \
+    b.val = v.val[1]; \
+    c.val = v.val[2]; \
+} \
+inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
+                                v_##_Tpvec& c, v_##_Tpvec& d) \
+{ \
+    _Tpvec##x4_t v = vld4q_##suffix(ptr); \
+    a.val = v.val[0]; \
+    b.val = v.val[1]; \
+    c.val = v.val[2]; \
+    d.val = v.val[3]; \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b) \
+{ \
+    _Tpvec##x2_t v; \
+    v.val[0] = a.val; \
+    v.val[1] = b.val; \
+    vst2q_##suffix(ptr, v); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \
+{ \
+    _Tpvec##x3_t v; \
+    v.val[0] = a.val; \
+    v.val[1] = b.val; \
+    v.val[2] = c.val; \
+    vst3q_##suffix(ptr, v); \
+} \
+inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
+                               const v_##_Tpvec& c, const v_##_Tpvec& d) \
+{ \
+    _Tpvec##x4_t v; \
+    v.val[0] = a.val; \
+    v.val[1] = b.val; \
+    v.val[2] = c.val; \
+    v.val[3] = d.val; \
+    vst4q_##suffix(ptr, v); \
+}
+
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(int16x8, short, s16)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint32x4, unsigned, u32)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(int32x4, int, s32)
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32)
+#if CV_SIMD128_64F
+OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64)
+#endif
+
+inline v_float32x4 v_cvt_f32(const v_int32x4& a)
+{
+    return v_float32x4(vcvtq_f32_s32(a.val));
+}
+
+#if CV_SIMD128_64F
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{
+    float32x2_t zero = vdup_n_f32(0.0f);
+    return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), zero));
+}
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_low_s32(a.val))));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_high_s32(a.val))));
+}
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vget_low_f32(a.val)));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    return v_float64x2(vcvt_f64_f32(vget_high_f32(a.val)));
+}
+#endif
+
+#if CV_FP16
+inline v_float32x4 v_cvt_f32(const v_float16x4& a)
+{
+    return v_float32x4(vcvt_f32_f16(a.val));
+}
+
+inline v_float16x4 v_cvt_f16(const v_float32x4& a)
+{
+    return v_float16x4(vcvt_f16_f32(a.val));
+}
+#endif
+
+//! @name Check SIMD support
+//! @{
+//! @brief Check CPU capability of SIMD operation
+static inline bool hasSIMD128()
+{
+    return (CV_CPU_HAS_SUPPORT_NEON) ? true : false;
+}
+
+//! @}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+}
+
+#endif

+ 2234 - 0
ThresholdTools/include/opencv2/core/hal/intrin_sse.hpp

@@ -0,0 +1,2234 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_SSE_HPP
+#define OPENCV_HAL_SSE_HPP
+
+#include <algorithm>
+#include "opencv2/core/utility.hpp"
+
+#define CV_SIMD128 1
+#define CV_SIMD128_64F 1
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+struct v_uint8x16
+{
+    typedef uchar lane_type;
+    typedef __m128i vector_type;
+    enum { nlanes = 16 };
+
+    v_uint8x16() : val(_mm_setzero_si128()) {}
+    explicit v_uint8x16(__m128i v) : val(v) {}
+    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+    {
+        val = _mm_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3,
+                            (char)v4, (char)v5, (char)v6, (char)v7,
+                            (char)v8, (char)v9, (char)v10, (char)v11,
+                            (char)v12, (char)v13, (char)v14, (char)v15);
+    }
+    uchar get0() const
+    {
+        return (uchar)_mm_cvtsi128_si32(val);
+    }
+
+    __m128i val;
+};
+
+struct v_int8x16
+{
+    typedef schar lane_type;
+    typedef __m128i vector_type;
+    enum { nlanes = 16 };
+
+    v_int8x16() : val(_mm_setzero_si128()) {}
+    explicit v_int8x16(__m128i v) : val(v) {}
+    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+              schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+    {
+        val = _mm_setr_epi8((char)v0, (char)v1, (char)v2, (char)v3,
+                            (char)v4, (char)v5, (char)v6, (char)v7,
+                            (char)v8, (char)v9, (char)v10, (char)v11,
+                            (char)v12, (char)v13, (char)v14, (char)v15);
+    }
+    schar get0() const
+    {
+        return (schar)_mm_cvtsi128_si32(val);
+    }
+
+    __m128i val;
+};
+
+struct v_uint16x8
+{
+    typedef ushort lane_type;
+    typedef __m128i vector_type;
+    enum { nlanes = 8 };
+
+    v_uint16x8() : val(_mm_setzero_si128()) {}
+    explicit v_uint16x8(__m128i v) : val(v) {}
+    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+    {
+        val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3,
+                             (short)v4, (short)v5, (short)v6, (short)v7);
+    }
+    ushort get0() const
+    {
+        return (ushort)_mm_cvtsi128_si32(val);
+    }
+
+    __m128i val;
+};
+
+struct v_int16x8
+{
+    typedef short lane_type;
+    typedef __m128i vector_type;
+    enum { nlanes = 8 };
+
+    v_int16x8() : val(_mm_setzero_si128()) {}
+    explicit v_int16x8(__m128i v) : val(v) {}
+    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+    {
+        val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3,
+                             (short)v4, (short)v5, (short)v6, (short)v7);
+    }
+    short get0() const
+    {
+        return (short)_mm_cvtsi128_si32(val);
+    }
+    __m128i val;
+};
+
+struct v_uint32x4
+{
+    typedef unsigned lane_type;
+    typedef __m128i vector_type;
+    enum { nlanes = 4 };
+
+    v_uint32x4() : val(_mm_setzero_si128()) {}
+    explicit v_uint32x4(__m128i v) : val(v) {}
+    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+    {
+        val = _mm_setr_epi32((int)v0, (int)v1, (int)v2, (int)v3);
+    }
+    unsigned get0() const
+    {
+        return (unsigned)_mm_cvtsi128_si32(val);
+    }
+    __m128i val;
+};
+
+struct v_int32x4
+{
+    typedef int lane_type;
+    typedef __m128i vector_type;
+    enum { nlanes = 4 };
+
+    v_int32x4() : val(_mm_setzero_si128()) {}
+    explicit v_int32x4(__m128i v) : val(v) {}
+    v_int32x4(int v0, int v1, int v2, int v3)
+    {
+        val = _mm_setr_epi32(v0, v1, v2, v3);
+    }
+    int get0() const
+    {
+        return _mm_cvtsi128_si32(val);
+    }
+    __m128i val;
+};
+
+struct v_float32x4
+{
+    typedef float lane_type;
+    typedef __m128 vector_type;
+    enum { nlanes = 4 };
+
+    v_float32x4() : val(_mm_setzero_ps()) {}
+    explicit v_float32x4(__m128 v) : val(v) {}
+    v_float32x4(float v0, float v1, float v2, float v3)
+    {
+        val = _mm_setr_ps(v0, v1, v2, v3);
+    }
+    float get0() const
+    {
+        return _mm_cvtss_f32(val);
+    }
+    __m128 val;
+};
+
+struct v_uint64x2
+{
+    typedef uint64 lane_type;
+    typedef __m128i vector_type;
+    enum { nlanes = 2 };
+
+    v_uint64x2() : val(_mm_setzero_si128()) {}
+    explicit v_uint64x2(__m128i v) : val(v) {}
+    v_uint64x2(uint64 v0, uint64 v1)
+    {
+        val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32));
+    }
+    uint64 get0() const
+    {
+        int a = _mm_cvtsi128_si32(val);
+        int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32));
+        return (unsigned)a | ((uint64)(unsigned)b << 32);
+    }
+    __m128i val;
+};
+
+struct v_int64x2
+{
+    typedef int64 lane_type;
+    typedef __m128i vector_type;
+    enum { nlanes = 2 };
+
+    v_int64x2() : val(_mm_setzero_si128()) {}
+    explicit v_int64x2(__m128i v) : val(v) {}
+    v_int64x2(int64 v0, int64 v1)
+    {
+        val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32));
+    }
+    int64 get0() const
+    {
+        int a = _mm_cvtsi128_si32(val);
+        int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32));
+        return (int64)((unsigned)a | ((uint64)(unsigned)b << 32));
+    }
+    __m128i val;
+};
+
+struct v_float64x2
+{
+    typedef double lane_type;
+    typedef __m128d vector_type;
+    enum { nlanes = 2 };
+
+    v_float64x2() : val(_mm_setzero_pd()) {}
+    explicit v_float64x2(__m128d v) : val(v) {}
+    v_float64x2(double v0, double v1)
+    {
+        val = _mm_setr_pd(v0, v1);
+    }
+    double get0() const
+    {
+        return _mm_cvtsd_f64(val);
+    }
+    __m128d val;
+};
+
+#if CV_FP16
+struct v_float16x4
+{
+    typedef short lane_type;
+    typedef __m128i vector_type;
+    enum { nlanes = 4 };
+
+    v_float16x4() : val(_mm_setzero_si128()) {}
+    explicit v_float16x4(__m128i v) : val(v) {}
+    v_float16x4(short v0, short v1, short v2, short v3)
+    {
+        val = _mm_setr_epi16(v0, v1, v2, v3, 0, 0, 0, 0);
+    }
+    short get0() const
+    {
+        return (short)_mm_cvtsi128_si32(val);
+    }
+    __m128i val;
+};
+#endif
+
+namespace hal_sse_internal
+{
+    template <typename to_sse_type, typename from_sse_type>
+    to_sse_type v_sse_reinterpret_as(const from_sse_type& val);
+
+#define OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(to_sse_type, from_sse_type, sse_cast_intrin) \
+    template<> inline \
+    to_sse_type v_sse_reinterpret_as(const from_sse_type& a) \
+    { return sse_cast_intrin(a); }
+
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128i, OPENCV_HAL_NOP);
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128, _mm_castps_si128);
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128i, __m128d, _mm_castpd_si128);
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128i, _mm_castsi128_ps);
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128, OPENCV_HAL_NOP);
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128, __m128d, _mm_castpd_ps);
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128i, _mm_castsi128_pd);
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128, _mm_castps_pd);
+    OPENCV_HAL_IMPL_SSE_REINTERPRET_RAW(__m128d, __m128d, OPENCV_HAL_NOP);
+}
+
+#define OPENCV_HAL_IMPL_SSE_INITVEC(_Tpvec, _Tp, suffix, zsuffix, ssuffix, _Tps, cast) \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec(_mm_setzero_##zsuffix()); } \
+inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(_mm_set1_##ssuffix((_Tps)v)); } \
+template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \
+{ return _Tpvec(cast(a.val)); }
+
+OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, char, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, char, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_uint16x8, ushort, u16, si128, epi16, short, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_int16x8, short, s16, si128, epi16, short, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_uint32x4, unsigned, u32, si128, epi32, int, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_int32x4, int, s32, si128, epi32, int, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_float32x4, float, f32, ps, ps, float, _mm_castsi128_ps)
+OPENCV_HAL_IMPL_SSE_INITVEC(v_float64x2, double, f64, pd, pd, double, _mm_castsi128_pd)
+
+inline v_uint64x2 v_setzero_u64() { return v_uint64x2(_mm_setzero_si128()); }
+inline v_int64x2 v_setzero_s64() { return v_int64x2(_mm_setzero_si128()); }
+inline v_uint64x2 v_setall_u64(uint64 val) { return v_uint64x2(val, val); }
+inline v_int64x2 v_setall_s64(int64 val) { return v_int64x2(val, val); }
+
+template<typename _Tpvec> inline
+v_uint64x2 v_reinterpret_as_u64(const _Tpvec& a) { return v_uint64x2(a.val); }
+template<typename _Tpvec> inline
+v_int64x2 v_reinterpret_as_s64(const _Tpvec& a) { return v_int64x2(a.val); }
+inline v_float32x4 v_reinterpret_as_f32(const v_uint64x2& a)
+{ return v_float32x4(_mm_castsi128_ps(a.val)); }
+inline v_float32x4 v_reinterpret_as_f32(const v_int64x2& a)
+{ return v_float32x4(_mm_castsi128_ps(a.val)); }
+inline v_float64x2 v_reinterpret_as_f64(const v_uint64x2& a)
+{ return v_float64x2(_mm_castsi128_pd(a.val)); }
+inline v_float64x2 v_reinterpret_as_f64(const v_int64x2& a)
+{ return v_float64x2(_mm_castsi128_pd(a.val)); }
+
+#define OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(_Tpvec, suffix) \
+inline _Tpvec v_reinterpret_as_##suffix(const v_float32x4& a) \
+{ return _Tpvec(_mm_castps_si128(a.val)); } \
+inline _Tpvec v_reinterpret_as_##suffix(const v_float64x2& a) \
+{ return _Tpvec(_mm_castpd_si128(a.val)); }
+
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint8x16, u8)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int8x16, s8)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint16x8, u16)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int16x8, s16)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint32x4, u32)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int32x4, s32)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_uint64x2, u64)
+OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int64x2, s64)
+
+inline v_float32x4 v_reinterpret_as_f32(const v_float32x4& a) {return a; }
+inline v_float64x2 v_reinterpret_as_f64(const v_float64x2& a) {return a; }
+inline v_float32x4 v_reinterpret_as_f32(const v_float64x2& a) {return v_float32x4(_mm_castpd_ps(a.val)); }
+inline v_float64x2 v_reinterpret_as_f64(const v_float32x4& a) {return v_float64x2(_mm_castps_pd(a.val)); }
+
+//////////////// PACK ///////////////
+inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b)
+{
+    __m128i delta = _mm_set1_epi16(255);
+    return v_uint8x16(_mm_packus_epi16(_mm_subs_epu16(a.val, _mm_subs_epu16(a.val, delta)),
+                                       _mm_subs_epu16(b.val, _mm_subs_epu16(b.val, delta))));
+}
+
+inline void v_pack_store(uchar* ptr, const v_uint16x8& a)
+{
+    __m128i delta = _mm_set1_epi16(255);
+    __m128i a1 = _mm_subs_epu16(a.val, _mm_subs_epu16(a.val, delta));
+    _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1));
+}
+
+inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b)
+{ return v_uint8x16(_mm_packus_epi16(a.val, b.val)); }
+
+inline void v_pack_u_store(uchar* ptr, const v_int16x8& a)
+{ _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a.val, a.val)); }
+
+template<int n> inline
+v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b)
+{
+    // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers.
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    return v_uint8x16(_mm_packus_epi16(_mm_srli_epi16(_mm_adds_epu16(a.val, delta), n),
+                                       _mm_srli_epi16(_mm_adds_epu16(b.val, delta), n)));
+}
+
+template<int n> inline
+void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a)
+{
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    __m128i a1 = _mm_srli_epi16(_mm_adds_epu16(a.val, delta), n);
+    _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1));
+}
+
+template<int n> inline
+v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b)
+{
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    return v_uint8x16(_mm_packus_epi16(_mm_srai_epi16(_mm_adds_epi16(a.val, delta), n),
+                                       _mm_srai_epi16(_mm_adds_epi16(b.val, delta), n)));
+}
+
+template<int n> inline
+void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a)
+{
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    __m128i a1 = _mm_srai_epi16(_mm_adds_epi16(a.val, delta), n);
+    _mm_storel_epi64((__m128i*)ptr, _mm_packus_epi16(a1, a1));
+}
+
+inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b)
+{ return v_int8x16(_mm_packs_epi16(a.val, b.val)); }
+
+inline void v_pack_store(schar* ptr, v_int16x8& a)
+{ _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a.val, a.val)); }
+
+template<int n> inline
+v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b)
+{
+    // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers.
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    return v_int8x16(_mm_packs_epi16(_mm_srai_epi16(_mm_adds_epi16(a.val, delta), n),
+                                     _mm_srai_epi16(_mm_adds_epi16(b.val, delta), n)));
+}
+template<int n> inline
+void v_rshr_pack_store(schar* ptr, const v_int16x8& a)
+{
+    // we assume that n > 0, and so the shifted 16-bit values can be treated as signed numbers.
+    __m128i delta = _mm_set1_epi16((short)(1 << (n-1)));
+    __m128i a1 = _mm_srai_epi16(_mm_adds_epi16(a.val, delta), n);
+    _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi16(a1, a1));
+}
+
+
+// byte-wise "mask ? a : b"
+inline __m128i v_select_si128(__m128i mask, __m128i a, __m128i b)
+{
+#if CV_SSE4_1
+    return _mm_blendv_epi8(b, a, mask);
+#else
+    return _mm_xor_si128(b, _mm_and_si128(_mm_xor_si128(a, b), mask));
+#endif
+}
+
+inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b)
+{
+    __m128i z = _mm_setzero_si128(), maxval32 = _mm_set1_epi32(65535), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, a.val), maxval32, a.val), delta32);
+    __m128i b1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, b.val), maxval32, b.val), delta32);
+    __m128i r = _mm_packs_epi32(a1, b1);
+    return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768)));
+}
+
+inline void v_pack_store(ushort* ptr, const v_uint32x4& a)
+{
+    __m128i z = _mm_setzero_si128(), maxval32 = _mm_set1_epi32(65535), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(v_select_si128(_mm_cmpgt_epi32(z, a.val), maxval32, a.val), delta32);
+    __m128i r = _mm_packs_epi32(a1, a1);
+    _mm_storel_epi64((__m128i*)ptr, _mm_sub_epi16(r, _mm_set1_epi16(-32768)));
+}
+
+template<int n> inline
+v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(a.val, delta), n), delta32);
+    __m128i b1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(b.val, delta), n), delta32);
+    return v_uint16x8(_mm_sub_epi16(_mm_packs_epi32(a1, b1), _mm_set1_epi16(-32768)));
+}
+
+template<int n> inline
+void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(_mm_srli_epi32(_mm_add_epi32(a.val, delta), n), delta32);
+    __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
+    _mm_storel_epi64((__m128i*)ptr, a2);
+}
+
+inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i delta32 = _mm_set1_epi32(32768);
+    __m128i r = _mm_packs_epi32(_mm_sub_epi32(a.val, delta32), _mm_sub_epi32(b.val, delta32));
+    return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768)));
+}
+
+inline void v_pack_u_store(ushort* ptr, const v_int32x4& a)
+{
+    __m128i delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(a.val, delta32);
+    __m128i r = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
+    _mm_storel_epi64((__m128i*)ptr, r);
+}
+
+template<int n> inline
+v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32);
+    __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
+    __m128i b1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(b.val, delta), n), delta32);
+    __m128i b2 = _mm_sub_epi16(_mm_packs_epi32(b1, b1), _mm_set1_epi16(-32768));
+    return v_uint16x8(_mm_unpacklo_epi64(a2, b2));
+}
+
+template<int n> inline
+void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1)), delta32 = _mm_set1_epi32(32768);
+    __m128i a1 = _mm_sub_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n), delta32);
+    __m128i a2 = _mm_sub_epi16(_mm_packs_epi32(a1, a1), _mm_set1_epi16(-32768));
+    _mm_storel_epi64((__m128i*)ptr, a2);
+}
+
+inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b)
+{ return v_int16x8(_mm_packs_epi32(a.val, b.val)); }
+
+inline void v_pack_store(short* ptr, const v_int32x4& a)
+{
+    _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi32(a.val, a.val));
+}
+
+template<int n> inline
+v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1));
+    return v_int16x8(_mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(a.val, delta), n),
+                                     _mm_srai_epi32(_mm_add_epi32(b.val, delta), n)));
+}
+
+template<int n> inline
+void v_rshr_pack_store(short* ptr, const v_int32x4& a)
+{
+    __m128i delta = _mm_set1_epi32(1 << (n-1));
+    __m128i a1 = _mm_srai_epi32(_mm_add_epi32(a.val, delta), n);
+    _mm_storel_epi64((__m128i*)ptr, _mm_packs_epi32(a1, a1));
+}
+
+
+// [a0 0 | b0 0]  [a1 0 | b1 0]
+inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b)
+{
+    __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); // a0 a1 0 0
+    __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); // b0 b1 0 0
+    return v_uint32x4(_mm_unpacklo_epi32(v0, v1));
+}
+
+inline void v_pack_store(unsigned* ptr, const v_uint64x2& a)
+{
+    __m128i a1 = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 2, 2, 0));
+    _mm_storel_epi64((__m128i*)ptr, a1);
+}
+
+// [a0 0 | b0 0]  [a1 0 | b1 0]
+inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b)
+{
+    __m128i v0 = _mm_unpacklo_epi32(a.val, b.val); // a0 a1 0 0
+    __m128i v1 = _mm_unpackhi_epi32(a.val, b.val); // b0 b1 0 0
+    return v_int32x4(_mm_unpacklo_epi32(v0, v1));
+}
+
+inline void v_pack_store(int* ptr, const v_int64x2& a)
+{
+    __m128i a1 = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 2, 2, 0));
+    _mm_storel_epi64((__m128i*)ptr, a1);
+}
+
+template<int n> inline
+v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
+{
+    uint64 delta = (uint64)1 << (n-1);
+    v_uint64x2 delta2(delta, delta);
+    __m128i a1 = _mm_srli_epi64(_mm_add_epi64(a.val, delta2.val), n);
+    __m128i b1 = _mm_srli_epi64(_mm_add_epi64(b.val, delta2.val), n);
+    __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0
+    __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0
+    return v_uint32x4(_mm_unpacklo_epi32(v0, v1));
+}
+
+template<int n> inline
+void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a)
+{
+    uint64 delta = (uint64)1 << (n-1);
+    v_uint64x2 delta2(delta, delta);
+    __m128i a1 = _mm_srli_epi64(_mm_add_epi64(a.val, delta2.val), n);
+    __m128i a2 = _mm_shuffle_epi32(a1, _MM_SHUFFLE(0, 2, 2, 0));
+    _mm_storel_epi64((__m128i*)ptr, a2);
+}
+
+inline __m128i v_sign_epi64(__m128i a)
+{
+    return _mm_shuffle_epi32(_mm_srai_epi32(a, 31), _MM_SHUFFLE(3, 3, 1, 1)); // x m0 | x m1
+}
+
+inline __m128i v_srai_epi64(__m128i a, int imm)
+{
+    __m128i smask = v_sign_epi64(a);
+    return _mm_xor_si128(_mm_srli_epi64(_mm_xor_si128(a, smask), imm), smask);
+}
+
+template<int n> inline
+v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
+{
+    int64 delta = (int64)1 << (n-1);
+    v_int64x2 delta2(delta, delta);
+    __m128i a1 = v_srai_epi64(_mm_add_epi64(a.val, delta2.val), n);
+    __m128i b1 = v_srai_epi64(_mm_add_epi64(b.val, delta2.val), n);
+    __m128i v0 = _mm_unpacklo_epi32(a1, b1); // a0 a1 0 0
+    __m128i v1 = _mm_unpackhi_epi32(a1, b1); // b0 b1 0 0
+    return v_int32x4(_mm_unpacklo_epi32(v0, v1));
+}
+
+template<int n> inline
+void v_rshr_pack_store(int* ptr, const v_int64x2& a)
+{
+    int64 delta = (int64)1 << (n-1);
+    v_int64x2 delta2(delta, delta);
+    __m128i a1 = v_srai_epi64(_mm_add_epi64(a.val, delta2.val), n);
+    __m128i a2 = _mm_shuffle_epi32(a1, _MM_SHUFFLE(0, 2, 2, 0));
+    _mm_storel_epi64((__m128i*)ptr, a2);
+}
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    __m128 v0 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(0, 0, 0, 0)), m0.val);
+    __m128 v1 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(1, 1, 1, 1)), m1.val);
+    __m128 v2 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(2, 2, 2, 2)), m2.val);
+    __m128 v3 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(3, 3, 3, 3)), m3.val);
+
+    return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, v3)));
+}
+
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& a)
+{
+    __m128 v0 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(0, 0, 0, 0)), m0.val);
+    __m128 v1 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(1, 1, 1, 1)), m1.val);
+    __m128 v2 = _mm_mul_ps(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE(2, 2, 2, 2)), m2.val);
+
+    return v_float32x4(_mm_add_ps(_mm_add_ps(v0, v1), _mm_add_ps(v2, a.val)));
+}
+
+#define OPENCV_HAL_IMPL_SSE_BIN_OP(bin_op, _Tpvec, intrin) \
+    inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+    { \
+        return _Tpvec(intrin(a.val, b.val)); \
+    } \
+    inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
+    { \
+        a.val = intrin(a.val, b.val); \
+        return a; \
+    }
+
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint8x16, _mm_adds_epu8)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint8x16, _mm_subs_epu8)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int8x16, _mm_adds_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int8x16, _mm_subs_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint16x8, _mm_adds_epu16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint16x8, _mm_subs_epu16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_uint16x8, _mm_mullo_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int16x8, _mm_adds_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int16x8, _mm_subs_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_int16x8, _mm_mullo_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint32x4, _mm_add_epi32)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint32x4, _mm_sub_epi32)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int32x4, _mm_add_epi32)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int32x4, _mm_sub_epi32)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float32x4, _mm_add_ps)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float32x4, _mm_sub_ps)
+OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float32x4, _mm_mul_ps)
+OPENCV_HAL_IMPL_SSE_BIN_OP(/, v_float32x4, _mm_div_ps)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_float64x2, _mm_add_pd)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_float64x2, _mm_sub_pd)
+OPENCV_HAL_IMPL_SSE_BIN_OP(*, v_float64x2, _mm_mul_pd)
+OPENCV_HAL_IMPL_SSE_BIN_OP(/, v_float64x2, _mm_div_pd)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_uint64x2, _mm_add_epi64)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_uint64x2, _mm_sub_epi64)
+OPENCV_HAL_IMPL_SSE_BIN_OP(+, v_int64x2, _mm_add_epi64)
+OPENCV_HAL_IMPL_SSE_BIN_OP(-, v_int64x2, _mm_sub_epi64)
+
+inline v_uint32x4 operator * (const v_uint32x4& a, const v_uint32x4& b)
+{
+    __m128i c0 = _mm_mul_epu32(a.val, b.val);
+    __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
+    __m128i d0 = _mm_unpacklo_epi32(c0, c1);
+    __m128i d1 = _mm_unpackhi_epi32(c0, c1);
+    return v_uint32x4(_mm_unpacklo_epi64(d0, d1));
+}
+inline v_int32x4 operator * (const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i c0 = _mm_mul_epu32(a.val, b.val);
+    __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
+    __m128i d0 = _mm_unpacklo_epi32(c0, c1);
+    __m128i d1 = _mm_unpackhi_epi32(c0, c1);
+    return v_int32x4(_mm_unpacklo_epi64(d0, d1));
+}
+inline v_uint32x4& operator *= (v_uint32x4& a, const v_uint32x4& b)
+{
+    a = a * b;
+    return a;
+}
+inline v_int32x4& operator *= (v_int32x4& a, const v_int32x4& b)
+{
+    a = a * b;
+    return a;
+}
+
+inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
+                         v_int32x4& c, v_int32x4& d)
+{
+    __m128i v0 = _mm_mullo_epi16(a.val, b.val);
+    __m128i v1 = _mm_mulhi_epi16(a.val, b.val);
+    c.val = _mm_unpacklo_epi16(v0, v1);
+    d.val = _mm_unpackhi_epi16(v0, v1);
+}
+
+inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
+                         v_uint32x4& c, v_uint32x4& d)
+{
+    __m128i v0 = _mm_mullo_epi16(a.val, b.val);
+    __m128i v1 = _mm_mulhi_epu16(a.val, b.val);
+    c.val = _mm_unpacklo_epi16(v0, v1);
+    d.val = _mm_unpackhi_epi16(v0, v1);
+}
+
+inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
+                         v_uint64x2& c, v_uint64x2& d)
+{
+    __m128i c0 = _mm_mul_epu32(a.val, b.val);
+    __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
+    c.val = _mm_unpacklo_epi64(c0, c1);
+    d.val = _mm_unpackhi_epi64(c0, c1);
+}
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{
+    return v_int32x4(_mm_madd_epi16(a.val, b.val));
+}
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{
+    return v_int32x4(_mm_add_epi32(_mm_madd_epi16(a.val, b.val), c.val));
+}
+
+#define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \
+    OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \
+    OPENCV_HAL_IMPL_SSE_BIN_OP(|, _Tpvec, _mm_or_##suffix) \
+    OPENCV_HAL_IMPL_SSE_BIN_OP(^, _Tpvec, _mm_xor_##suffix) \
+    inline _Tpvec operator ~ (const _Tpvec& a) \
+    { \
+        return _Tpvec(_mm_xor_##suffix(a.val, not_const)); \
+    }
+
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint8x16, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int8x16, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint16x8, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int16x8, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint32x4, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int32x4, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_uint64x2, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_int64x2, si128, _mm_set1_epi32(-1))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_float32x4, ps, _mm_castsi128_ps(_mm_set1_epi32(-1)))
+OPENCV_HAL_IMPL_SSE_LOGIC_OP(v_float64x2, pd, _mm_castsi128_pd(_mm_set1_epi32(-1)))
+
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{ return v_float32x4(_mm_sqrt_ps(x.val)); }
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{
+    static const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f);
+    __m128 t = x.val;
+    __m128 h = _mm_mul_ps(t, _0_5);
+    t = _mm_rsqrt_ps(t);
+    t = _mm_mul_ps(t, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t, t), h)));
+    return v_float32x4(t);
+}
+
+inline v_float64x2 v_sqrt(const v_float64x2& x)
+{ return v_float64x2(_mm_sqrt_pd(x.val)); }
+
+inline v_float64x2 v_invsqrt(const v_float64x2& x)
+{
+    static const __m128d v_1 = _mm_set1_pd(1.);
+    return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val)));
+}
+
+#define OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(_Tpuvec, _Tpsvec, func, suffix, subWidth) \
+inline _Tpuvec v_abs(const _Tpsvec& x) \
+{ return _Tpuvec(_mm_##func##_ep##suffix(x.val, _mm_sub_ep##subWidth(_mm_setzero_si128(), x.val))); }
+
+OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint8x16, v_int8x16, min, u8, i8)
+OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint16x8, v_int16x8, max, i16, i16)
+inline v_uint32x4 v_abs(const v_int32x4& x)
+{
+    __m128i s = _mm_srli_epi32(x.val, 31);
+    __m128i f = _mm_srai_epi32(x.val, 31);
+    return v_uint32x4(_mm_add_epi32(_mm_xor_si128(x.val, f), s));
+}
+inline v_float32x4 v_abs(const v_float32x4& x)
+{ return v_float32x4(_mm_and_ps(x.val, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); }
+inline v_float64x2 v_abs(const v_float64x2& x)
+{
+    return v_float64x2(_mm_and_pd(x.val,
+        _mm_castsi128_pd(_mm_srli_epi64(_mm_set1_epi32(-1), 1))));
+}
+
+// TODO: exp, log, sin, cos
+
+#define OPENCV_HAL_IMPL_SSE_BIN_FUNC(_Tpvec, func, intrin) \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(intrin(a.val, b.val)); \
+}
+
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_min, _mm_min_epu8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_max, _mm_max_epu8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_min, _mm_min_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_max, _mm_max_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float32x4, v_min, _mm_min_ps)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float32x4, v_max, _mm_max_ps)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float64x2, v_min, _mm_min_pd)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_float64x2, v_max, _mm_max_pd)
+
+inline v_int8x16 v_min(const v_int8x16& a, const v_int8x16& b)
+{
+#if CV_SSE4_1
+    return v_int8x16(_mm_min_epi8(a.val, b.val));
+#else
+    __m128i delta = _mm_set1_epi8((char)-128);
+    return v_int8x16(_mm_xor_si128(delta, _mm_min_epu8(_mm_xor_si128(a.val, delta),
+                                                       _mm_xor_si128(b.val, delta))));
+#endif
+}
+inline v_int8x16 v_max(const v_int8x16& a, const v_int8x16& b)
+{
+#if CV_SSE4_1
+    return v_int8x16(_mm_max_epi8(a.val, b.val));
+#else
+    __m128i delta = _mm_set1_epi8((char)-128);
+    return v_int8x16(_mm_xor_si128(delta, _mm_max_epu8(_mm_xor_si128(a.val, delta),
+                                                       _mm_xor_si128(b.val, delta))));
+#endif
+}
+inline v_uint16x8 v_min(const v_uint16x8& a, const v_uint16x8& b)
+{
+#if CV_SSE4_1
+    return v_uint16x8(_mm_min_epu16(a.val, b.val));
+#else
+    return v_uint16x8(_mm_subs_epu16(a.val, _mm_subs_epu16(a.val, b.val)));
+#endif
+}
+inline v_uint16x8 v_max(const v_uint16x8& a, const v_uint16x8& b)
+{
+#if CV_SSE4_1
+    return v_uint16x8(_mm_max_epu16(a.val, b.val));
+#else
+    return v_uint16x8(_mm_adds_epu16(_mm_subs_epu16(a.val, b.val), b.val));
+#endif
+}
+inline v_uint32x4 v_min(const v_uint32x4& a, const v_uint32x4& b)
+{
+#if CV_SSE4_1
+    return v_uint32x4(_mm_min_epu32(a.val, b.val));
+#else
+    __m128i delta = _mm_set1_epi32((int)0x80000000);
+    __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta));
+    return v_uint32x4(v_select_si128(mask, b.val, a.val));
+#endif
+}
+inline v_uint32x4 v_max(const v_uint32x4& a, const v_uint32x4& b)
+{
+#if CV_SSE4_1
+    return v_uint32x4(_mm_max_epu32(a.val, b.val));
+#else
+    __m128i delta = _mm_set1_epi32((int)0x80000000);
+    __m128i mask = _mm_cmpgt_epi32(_mm_xor_si128(a.val, delta), _mm_xor_si128(b.val, delta));
+    return v_uint32x4(v_select_si128(mask, a.val, b.val));
+#endif
+}
+inline v_int32x4 v_min(const v_int32x4& a, const v_int32x4& b)
+{
+#if CV_SSE4_1
+    return v_int32x4(_mm_min_epi32(a.val, b.val));
+#else
+    return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), b.val, a.val));
+#endif
+}
+inline v_int32x4 v_max(const v_int32x4& a, const v_int32x4& b)
+{
+#if CV_SSE4_1
+    return v_int32x4(_mm_max_epi32(a.val, b.val));
+#else
+    return v_int32x4(v_select_si128(_mm_cmpgt_epi32(a.val, b.val), a.val, b.val));
+#endif
+}
+
+#define OPENCV_HAL_IMPL_SSE_INT_CMP_OP(_Tpuvec, _Tpsvec, suffix, sbit) \
+inline _Tpuvec operator == (const _Tpuvec& a, const _Tpuvec& b) \
+{ return _Tpuvec(_mm_cmpeq_##suffix(a.val, b.val)); } \
+inline _Tpuvec operator != (const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    return _Tpuvec(_mm_xor_si128(_mm_cmpeq_##suffix(a.val, b.val), not_mask)); \
+} \
+inline _Tpsvec operator == (const _Tpsvec& a, const _Tpsvec& b) \
+{ return _Tpsvec(_mm_cmpeq_##suffix(a.val, b.val)); } \
+inline _Tpsvec operator != (const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    return _Tpsvec(_mm_xor_si128(_mm_cmpeq_##suffix(a.val, b.val), not_mask)); \
+} \
+inline _Tpuvec operator < (const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    __m128i smask = _mm_set1_##suffix(sbit); \
+    return _Tpuvec(_mm_cmpgt_##suffix(_mm_xor_si128(b.val, smask), _mm_xor_si128(a.val, smask))); \
+} \
+inline _Tpuvec operator > (const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    __m128i smask = _mm_set1_##suffix(sbit); \
+    return _Tpuvec(_mm_cmpgt_##suffix(_mm_xor_si128(a.val, smask), _mm_xor_si128(b.val, smask))); \
+} \
+inline _Tpuvec operator <= (const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    __m128i smask = _mm_set1_##suffix(sbit); \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    __m128i res = _mm_cmpgt_##suffix(_mm_xor_si128(a.val, smask), _mm_xor_si128(b.val, smask)); \
+    return _Tpuvec(_mm_xor_si128(res, not_mask)); \
+} \
+inline _Tpuvec operator >= (const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    __m128i smask = _mm_set1_##suffix(sbit); \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    __m128i res = _mm_cmpgt_##suffix(_mm_xor_si128(b.val, smask), _mm_xor_si128(a.val, smask)); \
+    return _Tpuvec(_mm_xor_si128(res, not_mask)); \
+} \
+inline _Tpsvec operator < (const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    return _Tpsvec(_mm_cmpgt_##suffix(b.val, a.val)); \
+} \
+inline _Tpsvec operator > (const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    return _Tpsvec(_mm_cmpgt_##suffix(a.val, b.val)); \
+} \
+inline _Tpsvec operator <= (const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    return _Tpsvec(_mm_xor_si128(_mm_cmpgt_##suffix(a.val, b.val), not_mask)); \
+} \
+inline _Tpsvec operator >= (const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    __m128i not_mask = _mm_set1_epi32(-1); \
+    return _Tpsvec(_mm_xor_si128(_mm_cmpgt_##suffix(b.val, a.val), not_mask)); \
+}
+
+OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint8x16, v_int8x16, epi8, (char)-128)
+OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint16x8, v_int16x8, epi16, (short)-32768)
+OPENCV_HAL_IMPL_SSE_INT_CMP_OP(v_uint32x4, v_int32x4, epi32, (int)0x80000000)
+
+#define OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(_Tpvec, suffix) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmpeq_##suffix(a.val, b.val)); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmpneq_##suffix(a.val, b.val)); } \
+inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmplt_##suffix(a.val, b.val)); } \
+inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmpgt_##suffix(a.val, b.val)); } \
+inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmple_##suffix(a.val, b.val)); } \
+inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(_mm_cmpge_##suffix(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
+OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
+
+#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
+{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
+{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
+
+OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64);
+OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64);
+
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_add_wrap, _mm_add_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_add_wrap, _mm_add_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_add_wrap, _mm_add_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_add_wrap, _mm_add_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint8x16, v_sub_wrap, _mm_sub_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int8x16, v_sub_wrap, _mm_sub_epi8)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_uint16x8, v_sub_wrap, _mm_sub_epi16)
+OPENCV_HAL_IMPL_SSE_BIN_FUNC(v_int16x8, v_sub_wrap, _mm_sub_epi16)
+
+#define OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(_Tpuvec, _Tpsvec, bits, smask32) \
+inline _Tpuvec v_absdiff(const _Tpuvec& a, const _Tpuvec& b) \
+{ \
+    return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a.val, b.val), _mm_subs_epu##bits(b.val, a.val))); \
+} \
+inline _Tpuvec v_absdiff(const _Tpsvec& a, const _Tpsvec& b) \
+{ \
+    __m128i smask = _mm_set1_epi32(smask32); \
+    __m128i a1 = _mm_xor_si128(a.val, smask); \
+    __m128i b1 = _mm_xor_si128(b.val, smask); \
+    return _Tpuvec(_mm_add_epi##bits(_mm_subs_epu##bits(a1, b1), _mm_subs_epu##bits(b1, a1))); \
+}
+
+OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint8x16, v_int8x16, 8, (int)0x80808080)
+OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16(v_uint16x8, v_int16x8, 16, (int)0x80008000)
+
+inline v_uint32x4 v_absdiff(const v_uint32x4& a, const v_uint32x4& b)
+{
+    return v_max(a, b) - v_min(a, b);
+}
+
+inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
+{
+    __m128i d = _mm_sub_epi32(a.val, b.val);
+    __m128i m = _mm_cmpgt_epi32(b.val, a.val);
+    return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m));
+}
+inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{
+    return a * b + c;
+}
+
+#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \
+inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpreg absmask = _mm_castsi128_##suffix(absmask_vec); \
+    return _Tpvec(_mm_and_##suffix(_mm_sub_##suffix(a.val, b.val), absmask)); \
+} \
+inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpreg res = _mm_add_##suffix(_mm_mul_##suffix(a.val, a.val), _mm_mul_##suffix(b.val, b.val)); \
+    return _Tpvec(_mm_sqrt_##suffix(res)); \
+} \
+inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    _Tpreg res = _mm_add_##suffix(_mm_mul_##suffix(a.val, a.val), _mm_mul_##suffix(b.val, b.val)); \
+    return _Tpvec(res); \
+} \
+inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
+{ \
+    return _Tpvec(_mm_add_##suffix(_mm_mul_##suffix(a.val, b.val), c.val)); \
+}
+
+OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(v_float32x4, float, __m128, ps, _mm_set1_epi32((int)0x7fffffff))
+OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(v_float64x2, double, __m128d, pd, _mm_srli_epi64(_mm_set1_epi32(-1), 1))
+
+#define OPENCV_HAL_IMPL_SSE_SHIFT_OP(_Tpuvec, _Tpsvec, suffix, srai) \
+inline _Tpuvec operator << (const _Tpuvec& a, int imm) \
+{ \
+    return _Tpuvec(_mm_slli_##suffix(a.val, imm)); \
+} \
+inline _Tpsvec operator << (const _Tpsvec& a, int imm) \
+{ \
+    return _Tpsvec(_mm_slli_##suffix(a.val, imm)); \
+} \
+inline _Tpuvec operator >> (const _Tpuvec& a, int imm) \
+{ \
+    return _Tpuvec(_mm_srli_##suffix(a.val, imm)); \
+} \
+inline _Tpsvec operator >> (const _Tpsvec& a, int imm) \
+{ \
+    return _Tpsvec(srai(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpuvec v_shl(const _Tpuvec& a) \
+{ \
+    return _Tpuvec(_mm_slli_##suffix(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpsvec v_shl(const _Tpsvec& a) \
+{ \
+    return _Tpsvec(_mm_slli_##suffix(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpuvec v_shr(const _Tpuvec& a) \
+{ \
+    return _Tpuvec(_mm_srli_##suffix(a.val, imm)); \
+} \
+template<int imm> \
+inline _Tpsvec v_shr(const _Tpsvec& a) \
+{ \
+    return _Tpsvec(srai(a.val, imm)); \
+}
+
+OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint16x8, v_int16x8, epi16, _mm_srai_epi16)
+OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint32x4, v_int32x4, epi32, _mm_srai_epi32)
+OPENCV_HAL_IMPL_SSE_SHIFT_OP(v_uint64x2, v_int64x2, epi64, v_srai_epi64)
+
+namespace hal_sse_internal
+{
+    template <int imm,
+        bool is_invalid = ((imm < 0) || (imm > 16)),
+        bool is_first = (imm == 0),
+        bool is_half = (imm == 8),
+        bool is_second = (imm == 16),
+        bool is_other = (((imm > 0) && (imm < 8)) || ((imm > 8) && (imm < 16)))>
+    class v_sse_palignr_u8_class;
+
+    template <int imm>
+    class v_sse_palignr_u8_class<imm, true, false, false, false, false>;
+
+    template <int imm>
+    class v_sse_palignr_u8_class<imm, false, true, false, false, false>
+    {
+    public:
+        inline __m128i operator()(const __m128i& a, const __m128i&) const
+        {
+            return a;
+        }
+    };
+
+    template <int imm>
+    class v_sse_palignr_u8_class<imm, false, false, true, false, false>
+    {
+    public:
+        inline __m128i operator()(const __m128i& a, const __m128i& b) const
+        {
+            return _mm_unpacklo_epi64(_mm_unpackhi_epi64(a, a), b);
+        }
+    };
+
+    template <int imm>
+    class v_sse_palignr_u8_class<imm, false, false, false, true, false>
+    {
+    public:
+        inline __m128i operator()(const __m128i&, const __m128i& b) const
+        {
+            return b;
+        }
+    };
+
+    template <int imm>
+    class v_sse_palignr_u8_class<imm, false, false, false, false, true>
+    {
+#if CV_SSSE3
+    public:
+        inline __m128i operator()(const __m128i& a, const __m128i& b) const
+        {
+            return _mm_alignr_epi8(b, a, imm);
+        }
+#else
+    public:
+        inline __m128i operator()(const __m128i& a, const __m128i& b) const
+        {
+            enum { imm2 = (sizeof(__m128i) - imm) };
+            return _mm_or_si128(_mm_srli_si128(a, imm), _mm_slli_si128(b, imm2));
+        }
+#endif
+    };
+
+    template <int imm>
+    inline __m128i v_sse_palignr_u8(const __m128i& a, const __m128i& b)
+    {
+        CV_StaticAssert((imm >= 0) && (imm <= 16), "Invalid imm for v_sse_palignr_u8.");
+        return v_sse_palignr_u8_class<imm>()(a, b);
+    }
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_right(const _Tpvec &a)
+{
+    using namespace hal_sse_internal;
+    enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) };
+    return _Tpvec(v_sse_reinterpret_as<typename _Tpvec::vector_type>(
+        _mm_srli_si128(
+            v_sse_reinterpret_as<__m128i>(a.val), imm2)));
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_left(const _Tpvec &a)
+{
+    using namespace hal_sse_internal;
+    enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) };
+    return _Tpvec(v_sse_reinterpret_as<typename _Tpvec::vector_type>(
+        _mm_slli_si128(
+            v_sse_reinterpret_as<__m128i>(a.val), imm2)));
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_right(const _Tpvec &a, const _Tpvec &b)
+{
+    using namespace hal_sse_internal;
+    enum { imm2 = (imm * sizeof(typename _Tpvec::lane_type)) };
+    return _Tpvec(v_sse_reinterpret_as<typename _Tpvec::vector_type>(
+        v_sse_palignr_u8<imm2>(
+            v_sse_reinterpret_as<__m128i>(a.val),
+            v_sse_reinterpret_as<__m128i>(b.val))));
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_left(const _Tpvec &a, const _Tpvec &b)
+{
+    using namespace hal_sse_internal;
+    enum { imm2 = ((_Tpvec::nlanes - imm) * sizeof(typename _Tpvec::lane_type)) };
+    return _Tpvec(v_sse_reinterpret_as<typename _Tpvec::vector_type>(
+        v_sse_palignr_u8<imm2>(
+            v_sse_reinterpret_as<__m128i>(b.val),
+            v_sse_reinterpret_as<__m128i>(a.val))));
+}
+
+#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(_Tpvec, _Tp) \
+inline _Tpvec v_load(const _Tp* ptr) \
+{ return _Tpvec(_mm_loadu_si128((const __m128i*)ptr)); } \
+inline _Tpvec v_load_aligned(const _Tp* ptr) \
+{ return _Tpvec(_mm_load_si128((const __m128i*)ptr)); } \
+inline _Tpvec v_load_low(const _Tp* ptr) \
+{ return _Tpvec(_mm_loadl_epi64((const __m128i*)ptr)); } \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
+{ \
+    return _Tpvec(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \
+                                     _mm_loadl_epi64((const __m128i*)ptr1))); \
+} \
+inline void v_store(_Tp* ptr, const _Tpvec& a) \
+{ _mm_storeu_si128((__m128i*)ptr, a.val); } \
+inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
+{ _mm_store_si128((__m128i*)ptr, a.val); } \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
+{ _mm_storel_epi64((__m128i*)ptr, a.val); } \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
+{ _mm_storel_epi64((__m128i*)ptr, _mm_unpackhi_epi64(a.val, a.val)); }
+
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint8x16, uchar)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int8x16, schar)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint16x8, ushort)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int16x8, short)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint32x4, unsigned)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int32x4, int)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_uint64x2, uint64)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INT_OP(v_int64x2, int64)
+
+#define OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(_Tpvec, _Tp, suffix) \
+inline _Tpvec v_load(const _Tp* ptr) \
+{ return _Tpvec(_mm_loadu_##suffix(ptr)); } \
+inline _Tpvec v_load_aligned(const _Tp* ptr) \
+{ return _Tpvec(_mm_load_##suffix(ptr)); } \
+inline _Tpvec v_load_low(const _Tp* ptr) \
+{ return _Tpvec(_mm_castsi128_##suffix(_mm_loadl_epi64((const __m128i*)ptr))); } \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
+{ \
+    return _Tpvec(_mm_castsi128_##suffix( \
+        _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \
+                           _mm_loadl_epi64((const __m128i*)ptr1)))); \
+} \
+inline void v_store(_Tp* ptr, const _Tpvec& a) \
+{ _mm_storeu_##suffix(ptr, a.val); } \
+inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
+{ _mm_store_##suffix(ptr, a.val); } \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
+{ _mm_storel_epi64((__m128i*)ptr, _mm_cast##suffix##_si128(a.val)); } \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
+{ \
+    __m128i a1 = _mm_cast##suffix##_si128(a.val); \
+    _mm_storel_epi64((__m128i*)ptr, _mm_unpackhi_epi64(a1, a1)); \
+}
+
+OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd)
+
+#if CV_FP16
+inline v_float16x4 v_load_f16(const short* ptr)
+{ return v_float16x4(_mm_loadl_epi64((const __m128i*)ptr)); }
+inline void v_store_f16(short* ptr, v_float16x4& a)
+{ _mm_storel_epi64((__m128i*)ptr, a.val); }
+#endif
+
+#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(_Tpvec, scalartype, func, suffix, sbit) \
+inline scalartype v_reduce_##func(const v_##_Tpvec& a) \
+{ \
+    __m128i val = a.val; \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \
+    return (scalartype)_mm_cvtsi128_si32(val); \
+} \
+inline unsigned scalartype v_reduce_##func(const v_u##_Tpvec& a) \
+{ \
+    __m128i val = a.val; \
+    __m128i smask = _mm_set1_epi16(sbit); \
+    val = _mm_xor_si128(val, smask); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,8)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,4)); \
+    val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \
+    return (unsigned scalartype)(_mm_cvtsi128_si32(val) ^  sbit); \
+}
+#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8_SUM(_Tpvec, scalartype, suffix) \
+inline scalartype v_reduce_sum(const v_##_Tpvec& a) \
+{ \
+    __m128i val = a.val; \
+    val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 8)); \
+    val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 4)); \
+    val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 2)); \
+    return (scalartype)_mm_cvtsi128_si32(val); \
+} \
+inline unsigned scalartype v_reduce_sum(const v_u##_Tpvec& a) \
+{ \
+    __m128i val = a.val; \
+    val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 8)); \
+    val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 4)); \
+    val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 2)); \
+    return (unsigned scalartype)_mm_cvtsi128_si32(val); \
+}
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, max, epi16, (short)-32768)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, min, epi16, (short)-32768)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_8_SUM(int16x8, short, 16)
+
+#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, cast_from, cast_to, extract) \
+inline scalartype v_reduce_sum(const _Tpvec& a) \
+{ \
+    regtype val = a.val; \
+    val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 8))); \
+    val = _mm_add_##suffix(val, cast_to(_mm_srli_si128(cast_from(val), 4))); \
+    return (scalartype)_mm_cvt##extract(val); \
+}
+
+#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \
+inline scalartype v_reduce_##func(const _Tpvec& a) \
+{ \
+    scalartype CV_DECL_ALIGNED(16) buf[4]; \
+    v_store_aligned(buf, a); \
+    scalartype s0 = scalar_func(buf[0], buf[1]); \
+    scalartype s1 = scalar_func(buf[2], buf[3]); \
+    return scalar_func(s0, s1); \
+}
+
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_uint32x4, unsigned, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_int32x4, int, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_si128, _mm_castsi128_ps, ss_f32)
+
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+#if CV_SSE3
+    __m128 ab = _mm_hadd_ps(a.val, b.val);
+    __m128 cd = _mm_hadd_ps(c.val, d.val);
+    return v_float32x4(_mm_hadd_ps(ab, cd));
+#else
+    __m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val));
+    __m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val));
+    return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd)));
+#endif
+}
+
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, min, std::min)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, max, std::max)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_int32x4, int, min, std::min)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, max, std::max)
+OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min)
+
+#define OPENCV_HAL_IMPL_SSE_POPCOUNT(_Tpvec) \
+inline v_uint32x4 v_popcount(const _Tpvec& a) \
+{ \
+    __m128i m1 = _mm_set1_epi32(0x55555555); \
+    __m128i m2 = _mm_set1_epi32(0x33333333); \
+    __m128i m4 = _mm_set1_epi32(0x0f0f0f0f); \
+    __m128i p = a.val; \
+    p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 1), m1), _mm_and_si128(p, m1)); \
+    p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 2), m2), _mm_and_si128(p, m2)); \
+    p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 4), m4), _mm_and_si128(p, m4)); \
+    p = _mm_adds_epi8(p, _mm_srli_si128(p, 1)); \
+    p = _mm_adds_epi8(p, _mm_srli_si128(p, 2)); \
+    return v_uint32x4(_mm_and_si128(p, _mm_set1_epi32(0x000000ff))); \
+}
+
+OPENCV_HAL_IMPL_SSE_POPCOUNT(v_uint8x16)
+OPENCV_HAL_IMPL_SSE_POPCOUNT(v_uint16x8)
+OPENCV_HAL_IMPL_SSE_POPCOUNT(v_uint32x4)
+OPENCV_HAL_IMPL_SSE_POPCOUNT(v_int8x16)
+OPENCV_HAL_IMPL_SSE_POPCOUNT(v_int16x8)
+OPENCV_HAL_IMPL_SSE_POPCOUNT(v_int32x4)
+
+#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, pack_op, and_op, signmask, allmask) \
+inline int v_signmask(const _Tpvec& a) \
+{ \
+    return and_op(_mm_movemask_##suffix(pack_op(a.val)), signmask); \
+} \
+inline bool v_check_all(const _Tpvec& a) \
+{ return and_op(_mm_movemask_##suffix(a.val), allmask) == allmask; } \
+inline bool v_check_any(const _Tpvec& a) \
+{ return and_op(_mm_movemask_##suffix(a.val), allmask) != 0; }
+
+#define OPENCV_HAL_PACKS(a) _mm_packs_epi16(a, a)
+inline __m128i v_packq_epi32(__m128i a)
+{
+    __m128i b = _mm_packs_epi32(a, a);
+    return _mm_packs_epi16(b, b);
+}
+
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15)
+OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3)
+
+#if CV_SSE4_1
+#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, cast_ret, cast, suffix) \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(cast_ret(_mm_blendv_##suffix(cast(b.val), cast(a.val), cast(mask.val)))); \
+}
+
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, OPENCV_HAL_NOP, OPENCV_HAL_NOP, epi8)
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, _mm_castps_si128, _mm_castsi128_ps, ps)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, _mm_castps_si128, _mm_castsi128_ps, ps)
+// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, TBD, TBD, pd)
+// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, TBD, TBD, ps)
+OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, OPENCV_HAL_NOP, OPENCV_HAL_NOP, ps)
+OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, OPENCV_HAL_NOP, OPENCV_HAL_NOP, pd)
+
+#else // CV_SSE4_1
+
+#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix) \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ \
+    return _Tpvec(_mm_xor_##suffix(b.val, _mm_and_##suffix(_mm_xor_##suffix(b.val, a.val), mask.val))); \
+}
+
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint8x16, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int8x16, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int16x8, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_uint32x4, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, si128)
+// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, si128)
+// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128)
+OPENCV_HAL_IMPL_SSE_SELECT(v_float32x4, ps)
+OPENCV_HAL_IMPL_SSE_SELECT(v_float64x2, pd)
+#endif
+
+#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpuvec, _Tpwuvec, _Tpu, _Tpsvec, _Tpwsvec, _Tps, suffix, wsuffix, shift) \
+inline void v_expand(const _Tpuvec& a, _Tpwuvec& b0, _Tpwuvec& b1) \
+{ \
+    __m128i z = _mm_setzero_si128(); \
+    b0.val = _mm_unpacklo_##suffix(a.val, z); \
+    b1.val = _mm_unpackhi_##suffix(a.val, z); \
+} \
+inline _Tpwuvec v_load_expand(const _Tpu* ptr) \
+{ \
+    __m128i z = _mm_setzero_si128(); \
+    return _Tpwuvec(_mm_unpacklo_##suffix(_mm_loadl_epi64((const __m128i*)ptr), z)); \
+} \
+inline void v_expand(const _Tpsvec& a, _Tpwsvec& b0, _Tpwsvec& b1) \
+{ \
+    b0.val = _mm_srai_##wsuffix(_mm_unpacklo_##suffix(a.val, a.val), shift); \
+    b1.val = _mm_srai_##wsuffix(_mm_unpackhi_##suffix(a.val, a.val), shift); \
+} \
+inline _Tpwsvec v_load_expand(const _Tps* ptr) \
+{ \
+    __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \
+    return _Tpwsvec(_mm_srai_##wsuffix(_mm_unpacklo_##suffix(a, a), shift)); \
+}
+
+OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, v_int8x16, v_int16x8, schar, epi8, epi16, 8)
+OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, v_int16x8, v_int32x4, short, epi16, epi32, 16)
+
+inline void v_expand(const v_uint32x4& a, v_uint64x2& b0, v_uint64x2& b1)
+{
+    __m128i z = _mm_setzero_si128();
+    b0.val = _mm_unpacklo_epi32(a.val, z);
+    b1.val = _mm_unpackhi_epi32(a.val, z);
+}
+inline v_uint64x2 v_load_expand(const unsigned* ptr)
+{
+    __m128i z = _mm_setzero_si128();
+    return v_uint64x2(_mm_unpacklo_epi32(_mm_loadl_epi64((const __m128i*)ptr), z));
+}
+inline void v_expand(const v_int32x4& a, v_int64x2& b0, v_int64x2& b1)
+{
+    __m128i s = _mm_srai_epi32(a.val, 31);
+    b0.val = _mm_unpacklo_epi32(a.val, s);
+    b1.val = _mm_unpackhi_epi32(a.val, s);
+}
+inline v_int64x2 v_load_expand(const int* ptr)
+{
+    __m128i a = _mm_loadl_epi64((const __m128i*)ptr);
+    __m128i s = _mm_srai_epi32(a, 31);
+    return v_int64x2(_mm_unpacklo_epi32(a, s));
+}
+
+inline v_uint32x4 v_load_expand_q(const uchar* ptr)
+{
+    __m128i z = _mm_setzero_si128();
+    __m128i a = _mm_cvtsi32_si128(*(const int*)ptr);
+    return v_uint32x4(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z));
+}
+
+inline v_int32x4 v_load_expand_q(const schar* ptr)
+{
+    __m128i a = _mm_cvtsi32_si128(*(const int*)ptr);
+    a = _mm_unpacklo_epi8(a, a);
+    a = _mm_unpacklo_epi8(a, a);
+    return v_int32x4(_mm_srai_epi32(a, 24));
+}
+
+#define OPENCV_HAL_IMPL_SSE_UNPACKS(_Tpvec, suffix, cast_from, cast_to) \
+inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1) \
+{ \
+    b0.val = _mm_unpacklo_##suffix(a0.val, a1.val); \
+    b1.val = _mm_unpackhi_##suffix(a0.val, a1.val); \
+} \
+inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \
+    return _Tpvec(cast_to(_mm_unpacklo_epi64(a1, b1))); \
+} \
+inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b) \
+{ \
+    __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \
+    return _Tpvec(cast_to(_mm_unpackhi_epi64(a1, b1))); \
+} \
+inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) \
+{ \
+    __m128i a1 = cast_from(a.val), b1 = cast_from(b.val); \
+    c.val = cast_to(_mm_unpacklo_epi64(a1, b1)); \
+    d.val = cast_to(_mm_unpackhi_epi64(a1, b1)); \
+}
+
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint16x8, epi16, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_int16x8, epi16, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
+OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd)
+
+template<int s, typename _Tpvec>
+inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
+{
+    return v_rotate_right<s>(a, b);
+}
+
+inline v_int32x4 v_round(const v_float32x4& a)
+{ return v_int32x4(_mm_cvtps_epi32(a.val)); }
+
+inline v_int32x4 v_floor(const v_float32x4& a)
+{
+    __m128i a1 = _mm_cvtps_epi32(a.val);
+    __m128i mask = _mm_castps_si128(_mm_cmpgt_ps(_mm_cvtepi32_ps(a1), a.val));
+    return v_int32x4(_mm_add_epi32(a1, mask));
+}
+
+inline v_int32x4 v_ceil(const v_float32x4& a)
+{
+    __m128i a1 = _mm_cvtps_epi32(a.val);
+    __m128i mask = _mm_castps_si128(_mm_cmpgt_ps(a.val, _mm_cvtepi32_ps(a1)));
+    return v_int32x4(_mm_sub_epi32(a1, mask));
+}
+
+inline v_int32x4 v_trunc(const v_float32x4& a)
+{ return v_int32x4(_mm_cvttps_epi32(a.val)); }
+
+inline v_int32x4 v_round(const v_float64x2& a)
+{ return v_int32x4(_mm_cvtpd_epi32(a.val)); }
+
+inline v_int32x4 v_floor(const v_float64x2& a)
+{
+    __m128i a1 = _mm_cvtpd_epi32(a.val);
+    __m128i mask = _mm_castpd_si128(_mm_cmpgt_pd(_mm_cvtepi32_pd(a1), a.val));
+    mask = _mm_srli_si128(_mm_slli_si128(mask, 4), 8); // m0 m0 m1 m1 => m0 m1 0 0
+    return v_int32x4(_mm_add_epi32(a1, mask));
+}
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{
+    __m128i a1 = _mm_cvtpd_epi32(a.val);
+    __m128i mask = _mm_castpd_si128(_mm_cmpgt_pd(a.val, _mm_cvtepi32_pd(a1)));
+    mask = _mm_srli_si128(_mm_slli_si128(mask, 4), 8); // m0 m0 m1 m1 => m0 m1 0 0
+    return v_int32x4(_mm_sub_epi32(a1, mask));
+}
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{ return v_int32x4(_mm_cvttpd_epi32(a.val)); }
+
+#define OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(_Tpvec, suffix, cast_from, cast_to) \
+inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \
+                           const _Tpvec& a2, const _Tpvec& a3, \
+                           _Tpvec& b0, _Tpvec& b1, \
+                           _Tpvec& b2, _Tpvec& b3) \
+{ \
+    __m128i t0 = cast_from(_mm_unpacklo_##suffix(a0.val, a1.val)); \
+    __m128i t1 = cast_from(_mm_unpacklo_##suffix(a2.val, a3.val)); \
+    __m128i t2 = cast_from(_mm_unpackhi_##suffix(a0.val, a1.val)); \
+    __m128i t3 = cast_from(_mm_unpackhi_##suffix(a2.val, a3.val)); \
+\
+    b0.val = cast_to(_mm_unpacklo_epi64(t0, t1)); \
+    b1.val = cast_to(_mm_unpackhi_epi64(t0, t1)); \
+    b2.val = cast_to(_mm_unpacklo_epi64(t2, t3)); \
+    b3.val = cast_to(_mm_unpackhi_epi64(t2, t3)); \
+}
+
+OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
+OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
+
+// adopted from sse_utils.hpp
+inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
+{
+    __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16));
+
+    __m128i t10 = _mm_unpacklo_epi8(t00, t01);
+    __m128i t11 = _mm_unpackhi_epi8(t00, t01);
+
+    __m128i t20 = _mm_unpacklo_epi8(t10, t11);
+    __m128i t21 = _mm_unpackhi_epi8(t10, t11);
+
+    __m128i t30 = _mm_unpacklo_epi8(t20, t21);
+    __m128i t31 = _mm_unpackhi_epi8(t20, t21);
+
+    a.val = _mm_unpacklo_epi8(t30, t31);
+    b.val = _mm_unpackhi_epi8(t30, t31);
+}
+
+inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
+{
+#if CV_SSSE3
+    static const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14);
+    static const __m128i m1 = _mm_alignr_epi8(m0, m0, 11);
+    static const __m128i m2 = _mm_alignr_epi8(m0, m0, 6);
+
+    __m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 16));
+    __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 32));
+
+    __m128i s0 = _mm_shuffle_epi8(t0, m0);
+    __m128i s1 = _mm_shuffle_epi8(t1, m1);
+    __m128i s2 = _mm_shuffle_epi8(t2, m2);
+
+    t0 = _mm_alignr_epi8(s1, _mm_slli_si128(s0, 10), 5);
+    a.val = _mm_alignr_epi8(s2, t0, 5);
+
+    t1 = _mm_alignr_epi8(_mm_srli_si128(s1, 5), _mm_slli_si128(s0, 5), 6);
+    b.val = _mm_alignr_epi8(_mm_srli_si128(s2, 5), t1, 5);
+
+    t2 = _mm_alignr_epi8(_mm_srli_si128(s2, 10), s1, 11);
+    c.val = _mm_alignr_epi8(t2, s0, 11);
+#else
+    __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 16));
+    __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 32));
+
+    __m128i t10 = _mm_unpacklo_epi8(t00, _mm_unpackhi_epi64(t01, t01));
+    __m128i t11 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t00, t00), t02);
+    __m128i t12 = _mm_unpacklo_epi8(t01, _mm_unpackhi_epi64(t02, t02));
+
+    __m128i t20 = _mm_unpacklo_epi8(t10, _mm_unpackhi_epi64(t11, t11));
+    __m128i t21 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t10, t10), t12);
+    __m128i t22 = _mm_unpacklo_epi8(t11, _mm_unpackhi_epi64(t12, t12));
+
+    __m128i t30 = _mm_unpacklo_epi8(t20, _mm_unpackhi_epi64(t21, t21));
+    __m128i t31 = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t20, t20), t22);
+    __m128i t32 = _mm_unpacklo_epi8(t21, _mm_unpackhi_epi64(t22, t22));
+
+    a.val = _mm_unpacklo_epi8(t30, _mm_unpackhi_epi64(t31, t31));
+    b.val = _mm_unpacklo_epi8(_mm_unpackhi_epi64(t30, t30), t32);
+    c.val = _mm_unpacklo_epi8(t31, _mm_unpackhi_epi64(t32, t32));
+#endif
+}
+
+inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d)
+{
+    __m128i u0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 c0 d0 a1 b1 c1 d1 ...
+    __m128i u1 = _mm_loadu_si128((const __m128i*)(ptr + 16)); // a4 b4 c4 d4 ...
+    __m128i u2 = _mm_loadu_si128((const __m128i*)(ptr + 32)); // a8 b8 c8 d8 ...
+    __m128i u3 = _mm_loadu_si128((const __m128i*)(ptr + 48)); // a12 b12 c12 d12 ...
+
+    __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 a8 b0 b8 ...
+    __m128i v1 = _mm_unpackhi_epi8(u0, u2); // a2 a10 b2 b10 ...
+    __m128i v2 = _mm_unpacklo_epi8(u1, u3); // a4 a12 b4 b12 ...
+    __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a6 a14 b6 b14 ...
+
+    u0 = _mm_unpacklo_epi8(v0, v2); // a0 a4 a8 a12 ...
+    u1 = _mm_unpacklo_epi8(v1, v3); // a2 a6 a10 a14 ...
+    u2 = _mm_unpackhi_epi8(v0, v2); // a1 a5 a9 a13 ...
+    u3 = _mm_unpackhi_epi8(v1, v3); // a3 a7 a11 a15 ...
+
+    v0 = _mm_unpacklo_epi8(u0, u1); // a0 a2 a4 a6 ...
+    v1 = _mm_unpacklo_epi8(u2, u3); // a1 a3 a5 a7 ...
+    v2 = _mm_unpackhi_epi8(u0, u1); // c0 c2 c4 c6 ...
+    v3 = _mm_unpackhi_epi8(u2, u3); // c1 c3 c5 c7 ...
+
+    a.val = _mm_unpacklo_epi8(v0, v1);
+    b.val = _mm_unpackhi_epi8(v0, v1);
+    c.val = _mm_unpacklo_epi8(v2, v3);
+    d.val = _mm_unpackhi_epi8(v2, v3);
+}
+
+inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c)
+{
+    __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 8));
+    __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 16));
+
+    __m128i t10 = _mm_unpacklo_epi16(t00, _mm_unpackhi_epi64(t01, t01));
+    __m128i t11 = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t00, t00), t02);
+    __m128i t12 = _mm_unpacklo_epi16(t01, _mm_unpackhi_epi64(t02, t02));
+
+    __m128i t20 = _mm_unpacklo_epi16(t10, _mm_unpackhi_epi64(t11, t11));
+    __m128i t21 = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t10, t10), t12);
+    __m128i t22 = _mm_unpacklo_epi16(t11, _mm_unpackhi_epi64(t12, t12));
+
+    a.val = _mm_unpacklo_epi16(t20, _mm_unpackhi_epi64(t21, t21));
+    b.val = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t20, t20), t22);
+    c.val = _mm_unpacklo_epi16(t21, _mm_unpackhi_epi64(t22, t22));
+}
+
+inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d)
+{
+    __m128i u0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0 c0 d0 a1 b1 c1 d1
+    __m128i u1 = _mm_loadu_si128((const __m128i*)(ptr + 8)); // a2 b2 c2 d2 ...
+    __m128i u2 = _mm_loadu_si128((const __m128i*)(ptr + 16)); // a4 b4 c4 d4 ...
+    __m128i u3 = _mm_loadu_si128((const __m128i*)(ptr + 24)); // a6 b6 c6 d6 ...
+
+    __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 a4 b0 b4 ...
+    __m128i v1 = _mm_unpackhi_epi16(u0, u2); // a1 a5 b1 b5 ...
+    __m128i v2 = _mm_unpacklo_epi16(u1, u3); // a2 a6 b2 b6 ...
+    __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a3 a7 b3 b7 ...
+
+    u0 = _mm_unpacklo_epi16(v0, v2); // a0 a2 a4 a6 ...
+    u1 = _mm_unpacklo_epi16(v1, v3); // a1 a3 a5 a7 ...
+    u2 = _mm_unpackhi_epi16(v0, v2); // c0 c2 c4 c6 ...
+    u3 = _mm_unpackhi_epi16(v1, v3); // c1 c3 c5 c7 ...
+
+    a.val = _mm_unpacklo_epi16(u0, u1);
+    b.val = _mm_unpackhi_epi16(u0, u1);
+    c.val = _mm_unpacklo_epi16(u2, u3);
+    d.val = _mm_unpackhi_epi16(u2, u3);
+}
+
+inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c)
+{
+    __m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 4));
+    __m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 8));
+
+    __m128i t10 = _mm_unpacklo_epi32(t00, _mm_unpackhi_epi64(t01, t01));
+    __m128i t11 = _mm_unpacklo_epi32(_mm_unpackhi_epi64(t00, t00), t02);
+    __m128i t12 = _mm_unpacklo_epi32(t01, _mm_unpackhi_epi64(t02, t02));
+
+    a.val = _mm_unpacklo_epi32(t10, _mm_unpackhi_epi64(t11, t11));
+    b.val = _mm_unpacklo_epi32(_mm_unpackhi_epi64(t10, t10), t12);
+    c.val = _mm_unpacklo_epi32(t11, _mm_unpackhi_epi64(t12, t12));
+}
+
+inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d)
+{
+    v_uint32x4 u0(_mm_loadu_si128((const __m128i*)ptr));        // a0 b0 c0 d0
+    v_uint32x4 u1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1
+    v_uint32x4 u2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2
+    v_uint32x4 u3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3
+
+    v_transpose4x4(u0, u1, u2, u3, a, b, c, d);
+}
+
+inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c)
+{
+    __m128 t0 = _mm_loadu_ps(ptr + 0);
+    __m128 t1 = _mm_loadu_ps(ptr + 4);
+    __m128 t2 = _mm_loadu_ps(ptr + 8);
+
+    __m128 at12 = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(0, 1, 0, 2));
+    a.val = _mm_shuffle_ps(t0, at12, _MM_SHUFFLE(2, 0, 3, 0));
+
+    __m128 bt01 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(0, 0, 0, 1));
+    __m128 bt12 = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(0, 2, 0, 3));
+    b.val = _mm_shuffle_ps(bt01, bt12, _MM_SHUFFLE(2, 0, 2, 0));
+
+    __m128 ct01 = _mm_shuffle_ps(t0, t1, _MM_SHUFFLE(0, 1, 0, 2));
+    c.val = _mm_shuffle_ps(ct01, t2, _MM_SHUFFLE(3, 0, 2, 0));
+}
+
+inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d)
+{
+    __m128 t0 = _mm_loadu_ps(ptr +  0);
+    __m128 t1 = _mm_loadu_ps(ptr +  4);
+    __m128 t2 = _mm_loadu_ps(ptr +  8);
+    __m128 t3 = _mm_loadu_ps(ptr + 12);
+    __m128 t02lo = _mm_unpacklo_ps(t0, t2);
+    __m128 t13lo = _mm_unpacklo_ps(t1, t3);
+    __m128 t02hi = _mm_unpackhi_ps(t0, t2);
+    __m128 t13hi = _mm_unpackhi_ps(t1, t3);
+    a.val = _mm_unpacklo_ps(t02lo, t13lo);
+    b.val = _mm_unpackhi_ps(t02lo, t13lo);
+    c.val = _mm_unpacklo_ps(t02hi, t13hi);
+    d.val = _mm_unpackhi_ps(t02hi, t13hi);
+}
+
+inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
+{
+    __m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
+    __m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2));
+    __m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4));
+
+    a = v_uint64x2(_mm_unpacklo_epi64(t0, _mm_unpackhi_epi64(t1, t1)));
+    b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2));
+    c = v_uint64x2(_mm_unpacklo_epi64(t1, _mm_unpackhi_epi64(t2, t2)));
+}
+
+inline void v_load_deinterleave(const int64 *ptr, v_int64x2& a, v_int64x2& b, v_int64x2& c)
+{
+    v_uint64x2 t0, t1, t2;
+    v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
+    a = v_reinterpret_as_s64(t0);
+    b = v_reinterpret_as_s64(t1);
+    c = v_reinterpret_as_s64(t2);
+}
+
+inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2& b, v_float64x2& c)
+{
+    v_uint64x2 t0, t1, t2;
+    v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
+    a = v_reinterpret_as_f64(t0);
+    b = v_reinterpret_as_f64(t1);
+    c = v_reinterpret_as_f64(t2);
+}
+
+// 2-channel
+inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
+{
+    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+
+    __m128 u0 = _mm_loadu_ps(ptr);       // a0 b0 a1 b1
+    __m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3
+
+    a.val = _mm_shuffle_ps(u0, u1, mask_lo); // a0 a1 a2 a3
+    b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3
+}
+
+inline void v_load_deinterleave(const short* ptr, v_int16x8& a, v_int16x8& b)
+{
+    __m128i v0 = _mm_loadu_si128((__m128i*)(ptr));     // a0 b0 a1 b1 a2 b2 a3 b3
+    __m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7
+
+    __m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5
+    __m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7
+    __m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6
+    __m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7
+
+    a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7
+    b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7
+}
+
+inline void v_load_deinterleave(const ushort*ptr, v_uint16x8& a, v_uint16x8& b)
+{
+    v_int16x8 sa, sb;
+    v_load_deinterleave((const short*)ptr, sa, sb);
+    a = v_reinterpret_as_u16(sa);
+    b = v_reinterpret_as_u16(sb);
+}
+
+inline void v_store_interleave(short* ptr, const v_int16x8& a, const v_int16x8& b)
+{
+    __m128i t0, t1;
+    t0 = _mm_unpacklo_epi16(a.val, b.val);
+    t1 = _mm_unpackhi_epi16(a.val, b.val);
+    _mm_storeu_si128((__m128i*)(ptr), t0);
+    _mm_storeu_si128((__m128i*)(ptr + 8), t1);
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b)
+{
+    __m128i v0 = _mm_unpacklo_epi8(a.val, b.val);
+    __m128i v1 = _mm_unpackhi_epi8(a.val, b.val);
+
+    _mm_storeu_si128((__m128i*)(ptr), v0);
+    _mm_storeu_si128((__m128i*)(ptr + 16), v1);
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
+                                const v_uint8x16& c )
+{
+#if CV_SSSE3
+    static const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5);
+    static const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10);
+    static const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15);
+
+    __m128i t0 = _mm_alignr_epi8(b.val, _mm_slli_si128(a.val, 10), 5);
+    t0 = _mm_alignr_epi8(c.val, t0, 5);
+    __m128i s0 = _mm_shuffle_epi8(t0, m0);
+
+    __m128i t1 = _mm_alignr_epi8(_mm_srli_si128(b.val, 5), _mm_slli_si128(a.val, 5), 6);
+    t1 = _mm_alignr_epi8(_mm_srli_si128(c.val, 5), t1, 5);
+    __m128i s1 = _mm_shuffle_epi8(t1, m1);
+
+    __m128i t2 = _mm_alignr_epi8(_mm_srli_si128(c.val, 10), b.val, 11);
+    t2 = _mm_alignr_epi8(t2, a.val, 11);
+    __m128i s2 = _mm_shuffle_epi8(t2, m2);
+
+    _mm_storeu_si128((__m128i*)ptr, s0);
+    _mm_storeu_si128((__m128i*)(ptr + 16), s1);
+    _mm_storeu_si128((__m128i*)(ptr + 32), s2);
+#else
+    __m128i z = _mm_setzero_si128();
+    __m128i ab0 = _mm_unpacklo_epi8(a.val, b.val);
+    __m128i ab1 = _mm_unpackhi_epi8(a.val, b.val);
+    __m128i c0 = _mm_unpacklo_epi8(c.val, z);
+    __m128i c1 = _mm_unpackhi_epi8(c.val, z);
+
+    __m128i p00 = _mm_unpacklo_epi16(ab0, c0);
+    __m128i p01 = _mm_unpackhi_epi16(ab0, c0);
+    __m128i p02 = _mm_unpacklo_epi16(ab1, c1);
+    __m128i p03 = _mm_unpackhi_epi16(ab1, c1);
+
+    __m128i p10 = _mm_unpacklo_epi32(p00, p01);
+    __m128i p11 = _mm_unpackhi_epi32(p00, p01);
+    __m128i p12 = _mm_unpacklo_epi32(p02, p03);
+    __m128i p13 = _mm_unpackhi_epi32(p02, p03);
+
+    __m128i p20 = _mm_unpacklo_epi64(p10, p11);
+    __m128i p21 = _mm_unpackhi_epi64(p10, p11);
+    __m128i p22 = _mm_unpacklo_epi64(p12, p13);
+    __m128i p23 = _mm_unpackhi_epi64(p12, p13);
+
+    p20 = _mm_slli_si128(p20, 1);
+    p22 = _mm_slli_si128(p22, 1);
+
+    __m128i p30 = _mm_slli_epi64(_mm_unpacklo_epi32(p20, p21), 8);
+    __m128i p31 = _mm_srli_epi64(_mm_unpackhi_epi32(p20, p21), 8);
+    __m128i p32 = _mm_slli_epi64(_mm_unpacklo_epi32(p22, p23), 8);
+    __m128i p33 = _mm_srli_epi64(_mm_unpackhi_epi32(p22, p23), 8);
+
+    __m128i p40 = _mm_unpacklo_epi64(p30, p31);
+    __m128i p41 = _mm_unpackhi_epi64(p30, p31);
+    __m128i p42 = _mm_unpacklo_epi64(p32, p33);
+    __m128i p43 = _mm_unpackhi_epi64(p32, p33);
+
+    __m128i v0 = _mm_or_si128(_mm_srli_si128(p40, 2), _mm_slli_si128(p41, 10));
+    __m128i v1 = _mm_or_si128(_mm_srli_si128(p41, 6), _mm_slli_si128(p42, 6));
+    __m128i v2 = _mm_or_si128(_mm_srli_si128(p42, 10), _mm_slli_si128(p43, 2));
+
+    _mm_storeu_si128((__m128i*)(ptr), v0);
+    _mm_storeu_si128((__m128i*)(ptr + 16), v1);
+    _mm_storeu_si128((__m128i*)(ptr + 32), v2);
+#endif
+}
+
+inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
+                                const v_uint8x16& c, const v_uint8x16& d)
+{
+    // a0 a1 a2 a3 ....
+    // b0 b1 b2 b3 ....
+    // c0 c1 c2 c3 ....
+    // d0 d1 d2 d3 ....
+    __m128i u0 = _mm_unpacklo_epi8(a.val, c.val); // a0 c0 a1 c1 ...
+    __m128i u1 = _mm_unpackhi_epi8(a.val, c.val); // a8 c8 a9 c9 ...
+    __m128i u2 = _mm_unpacklo_epi8(b.val, d.val); // b0 d0 b1 d1 ...
+    __m128i u3 = _mm_unpackhi_epi8(b.val, d.val); // b8 d8 b9 d9 ...
+
+    __m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 b0 c0 d0 ...
+    __m128i v1 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ...
+    __m128i v2 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ...
+    __m128i v3 = _mm_unpackhi_epi8(u1, u3); // a12 b12 c12 d12 ...
+
+    _mm_storeu_si128((__m128i*)ptr, v0);
+    _mm_storeu_si128((__m128i*)(ptr + 16), v2);
+    _mm_storeu_si128((__m128i*)(ptr + 32), v1);
+    _mm_storeu_si128((__m128i*)(ptr + 48), v3);
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
+                                const v_uint16x8& b,
+                                const v_uint16x8& c )
+{
+    __m128i z = _mm_setzero_si128();
+    __m128i ab0 = _mm_unpacklo_epi16(a.val, b.val);
+    __m128i ab1 = _mm_unpackhi_epi16(a.val, b.val);
+    __m128i c0 = _mm_unpacklo_epi16(c.val, z);
+    __m128i c1 = _mm_unpackhi_epi16(c.val, z);
+
+    __m128i p10 = _mm_unpacklo_epi32(ab0, c0);
+    __m128i p11 = _mm_unpackhi_epi32(ab0, c0);
+    __m128i p12 = _mm_unpacklo_epi32(ab1, c1);
+    __m128i p13 = _mm_unpackhi_epi32(ab1, c1);
+
+    __m128i p20 = _mm_unpacklo_epi64(p10, p11);
+    __m128i p21 = _mm_unpackhi_epi64(p10, p11);
+    __m128i p22 = _mm_unpacklo_epi64(p12, p13);
+    __m128i p23 = _mm_unpackhi_epi64(p12, p13);
+
+    p20 = _mm_slli_si128(p20, 2);
+    p22 = _mm_slli_si128(p22, 2);
+
+    __m128i p30 = _mm_unpacklo_epi64(p20, p21);
+    __m128i p31 = _mm_unpackhi_epi64(p20, p21);
+    __m128i p32 = _mm_unpacklo_epi64(p22, p23);
+    __m128i p33 = _mm_unpackhi_epi64(p22, p23);
+
+    __m128i v0 = _mm_or_si128(_mm_srli_si128(p30, 2), _mm_slli_si128(p31, 10));
+    __m128i v1 = _mm_or_si128(_mm_srli_si128(p31, 6), _mm_slli_si128(p32, 6));
+    __m128i v2 = _mm_or_si128(_mm_srli_si128(p32, 10), _mm_slli_si128(p33, 2));
+
+    _mm_storeu_si128((__m128i*)(ptr), v0);
+    _mm_storeu_si128((__m128i*)(ptr + 8), v1);
+    _mm_storeu_si128((__m128i*)(ptr + 16), v2);
+}
+
+inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
+                                const v_uint16x8& c, const v_uint16x8& d)
+{
+    // a0 a1 a2 a3 ....
+    // b0 b1 b2 b3 ....
+    // c0 c1 c2 c3 ....
+    // d0 d1 d2 d3 ....
+    __m128i u0 = _mm_unpacklo_epi16(a.val, c.val); // a0 c0 a1 c1 ...
+    __m128i u1 = _mm_unpackhi_epi16(a.val, c.val); // a4 c4 a5 c5 ...
+    __m128i u2 = _mm_unpacklo_epi16(b.val, d.val); // b0 d0 b1 d1 ...
+    __m128i u3 = _mm_unpackhi_epi16(b.val, d.val); // b4 d4 b5 d5 ...
+
+    __m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 b0 c0 d0 ...
+    __m128i v1 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ...
+    __m128i v2 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ...
+    __m128i v3 = _mm_unpackhi_epi16(u1, u3); // a6 b6 c6 d6 ...
+
+    _mm_storeu_si128((__m128i*)ptr, v0);
+    _mm_storeu_si128((__m128i*)(ptr + 8), v2);
+    _mm_storeu_si128((__m128i*)(ptr + 16), v1);
+    _mm_storeu_si128((__m128i*)(ptr + 24), v3);
+}
+
+inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
+                                const v_uint32x4& c )
+{
+    v_uint32x4 z = v_setzero_u32(), u0, u1, u2, u3;
+    v_transpose4x4(a, b, c, z, u0, u1, u2, u3);
+
+    __m128i v0 = _mm_or_si128(u0.val, _mm_slli_si128(u1.val, 12));
+    __m128i v1 = _mm_or_si128(_mm_srli_si128(u1.val, 4), _mm_slli_si128(u2.val, 8));
+    __m128i v2 = _mm_or_si128(_mm_srli_si128(u2.val, 8), _mm_slli_si128(u3.val, 4));
+
+    _mm_storeu_si128((__m128i*)ptr, v0);
+    _mm_storeu_si128((__m128i*)(ptr + 4), v1);
+    _mm_storeu_si128((__m128i*)(ptr + 8), v2);
+}
+
+inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
+                               const v_uint32x4& c, const v_uint32x4& d)
+{
+    v_uint32x4 t0, t1, t2, t3;
+    v_transpose4x4(a, b, c, d, t0, t1, t2, t3);
+    v_store(ptr, t0);
+    v_store(ptr + 4, t1);
+    v_store(ptr + 8, t2);
+    v_store(ptr + 12, t3);
+}
+
+// 2-channel, float only
+inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b)
+{
+    // a0 a1 a2 a3 ...
+    // b0 b1 b2 b3 ...
+    __m128 u0 = _mm_unpacklo_ps(a.val, b.val); // a0 b0 a1 b1
+    __m128 u1 = _mm_unpackhi_ps(a.val, b.val); // a2 b2 a3 b3
+
+    _mm_storeu_ps(ptr, u0);
+    _mm_storeu_ps((ptr + 4), u1);
+}
+
+inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
+{
+    __m128 u0 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(0, 0, 0, 0));
+    __m128 u1 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(1, 1, 0, 0));
+    __m128 v0 = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(2, 0, 2, 0));
+    __m128 u2 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(1, 1, 1, 1));
+    __m128 u3 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(2, 2, 2, 2));
+    __m128 v1 = _mm_shuffle_ps(u2, u3, _MM_SHUFFLE(2, 0, 2, 0));
+    __m128 u4 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(3, 3, 2, 2));
+    __m128 u5 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(3, 3, 3, 3));
+    __m128 v2 = _mm_shuffle_ps(u4, u5, _MM_SHUFFLE(2, 0, 2, 0));
+
+    _mm_storeu_ps(ptr + 0, v0);
+    _mm_storeu_ps(ptr + 4, v1);
+    _mm_storeu_ps(ptr + 8, v2);
+}
+
+inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
+                               const v_float32x4& c, const v_float32x4& d)
+{
+    __m128 u0 = _mm_unpacklo_ps(a.val, c.val);
+    __m128 u1 = _mm_unpacklo_ps(b.val, d.val);
+    __m128 u2 = _mm_unpackhi_ps(a.val, c.val);
+    __m128 u3 = _mm_unpackhi_ps(b.val, d.val);
+    __m128 v0 = _mm_unpacklo_ps(u0, u1);
+    __m128 v2 = _mm_unpacklo_ps(u2, u3);
+    __m128 v1 = _mm_unpackhi_ps(u0, u1);
+    __m128 v3 = _mm_unpackhi_ps(u2, u3);
+
+    _mm_storeu_ps(ptr +  0, v0);
+    _mm_storeu_ps(ptr +  4, v1);
+    _mm_storeu_ps(ptr +  8, v2);
+    _mm_storeu_ps(ptr + 12, v3);
+}
+
+inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c)
+{
+    __m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
+    __m128i t1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val));
+    __m128i t2 = _mm_unpackhi_epi64(b.val, c.val);
+
+    _mm_storeu_si128((__m128i*)ptr, t0);
+    _mm_storeu_si128((__m128i*)(ptr + 2), t1);
+    _mm_storeu_si128((__m128i*)(ptr + 4), t2);
+}
+
+inline void v_store_interleave(int64 *ptr, const v_int64x2& a, const v_int64x2& b, const v_int64x2& c)
+{
+    v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
+}
+
+inline void v_store_interleave(double *ptr, const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
+{
+    v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
+}
+
+#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \
+inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
+                                 _Tpvec& b0, _Tpvec& c0 ) \
+{ \
+    _Tpuvec a1, b1, c1; \
+    v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1); \
+    a0 = v_reinterpret_as_##suffix(a1); \
+    b0 = v_reinterpret_as_##suffix(b1); \
+    c0 = v_reinterpret_as_##suffix(c1); \
+} \
+inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
+                                 _Tpvec& b0, _Tpvec& c0, _Tpvec& d0 ) \
+{ \
+    _Tpuvec a1, b1, c1, d1; \
+    v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1, d1); \
+    a0 = v_reinterpret_as_##suffix(a1); \
+    b0 = v_reinterpret_as_##suffix(b1); \
+    c0 = v_reinterpret_as_##suffix(c1); \
+    d0 = v_reinterpret_as_##suffix(d1); \
+} \
+inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, \
+                               const _Tpvec& b0, const _Tpvec& c0 ) \
+{ \
+    _Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \
+    _Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \
+    _Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \
+    v_store_interleave((_Tpu*)ptr, a1, b1, c1); \
+} \
+inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, const _Tpvec& b0, \
+                               const _Tpvec& c0, const _Tpvec& d0 ) \
+{ \
+    _Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \
+    _Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \
+    _Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \
+    _Tpuvec d1 = v_reinterpret_as_##usuffix(d0); \
+    v_store_interleave((_Tpu*)ptr, a1, b1, c1, d1); \
+}
+
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16)
+OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32)
+//OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float32x4, float, f32, v_uint32x4, unsigned, u32)
+
+inline v_float32x4 v_cvt_f32(const v_int32x4& a)
+{
+    return v_float32x4(_mm_cvtepi32_ps(a.val));
+}
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{
+    return v_float32x4(_mm_cvtpd_ps(a.val));
+}
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{
+    return v_float64x2(_mm_cvtepi32_pd(a.val));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+    return v_float64x2(_mm_cvtepi32_pd(_mm_srli_si128(a.val,8)));
+}
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{
+    return v_float64x2(_mm_cvtps_pd(a.val));
+}
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    return v_float64x2(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a.val),8))));
+}
+
+#if CV_FP16
+inline v_float32x4 v_cvt_f32(const v_float16x4& a)
+{
+    return v_float32x4(_mm_cvtph_ps(a.val));
+}
+
+inline v_float16x4 v_cvt_f16(const v_float32x4& a)
+{
+    return v_float16x4(_mm_cvtps_ph(a.val, 0));
+}
+#endif
+
+//! @name Check SIMD support
+//! @{
+//! @brief Check CPU capability of SIMD operation
+static inline bool hasSIMD128()
+{
+    return (CV_CPU_HAS_SUPPORT_SSE2) ? true : false;
+}
+
+//! @}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+}
+
+#endif

+ 917 - 0
ThresholdTools/include/opencv2/core/hal/intrin_vsx.hpp

@@ -0,0 +1,917 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef OPENCV_HAL_VSX_HPP
+#define OPENCV_HAL_VSX_HPP
+
+#include <algorithm>
+#include "opencv2/core/utility.hpp"
+
+#define CV_SIMD128 1
+#define CV_SIMD128_64F 1
+
+/**
+ * todo: supporting half precision for power9
+ * convert instractions xvcvhpsp, xvcvsphp
+**/
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+///////// Types ////////////
+
+struct v_uint8x16
+{
+    typedef uchar lane_type;
+    enum { nlanes = 16 };
+    vec_uchar16 val;
+
+    explicit v_uint8x16(const vec_uchar16& v) : val(v)
+    {}
+    v_uint8x16() : val(vec_uchar16_z)
+    {}
+    v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v))
+    {}
+    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+        : val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
+    {}
+    uchar get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int8x16
+{
+    typedef schar lane_type;
+    enum { nlanes = 16 };
+    vec_char16 val;
+
+    explicit v_int8x16(const vec_char16& v) : val(v)
+    {}
+    v_int8x16() : val(vec_char16_z)
+    {}
+    v_int8x16(vec_bchar16 v) : val(vec_char16_c(v))
+    {}
+    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+              schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+        : val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
+    {}
+    schar get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_uint16x8
+{
+    typedef ushort lane_type;
+    enum { nlanes = 8 };
+    vec_ushort8 val;
+
+    explicit v_uint16x8(const vec_ushort8& v) : val(v)
+    {}
+    v_uint16x8() : val(vec_ushort8_z)
+    {}
+    v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v))
+    {}
+    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+        : val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7))
+    {}
+    ushort get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int16x8
+{
+    typedef short lane_type;
+    enum { nlanes = 8 };
+    vec_short8 val;
+
+    explicit v_int16x8(const vec_short8& v) : val(v)
+    {}
+    v_int16x8() : val(vec_short8_z)
+    {}
+    v_int16x8(vec_bshort8 v) : val(vec_short8_c(v))
+    {}
+    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+        : val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7))
+    {}
+    short get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_uint32x4
+{
+    typedef unsigned lane_type;
+    enum { nlanes = 4 };
+    vec_uint4 val;
+
+    explicit v_uint32x4(const vec_uint4& v) : val(v)
+    {}
+    v_uint32x4() : val(vec_uint4_z)
+    {}
+    v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v))
+    {}
+    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) : val(vec_uint4_set(v0, v1, v2, v3))
+    {}
+    uint get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int32x4
+{
+    typedef int lane_type;
+    enum { nlanes = 4 };
+    vec_int4 val;
+
+    explicit v_int32x4(const vec_int4& v) : val(v)
+    {}
+    v_int32x4() : val(vec_int4_z)
+    {}
+    v_int32x4(vec_bint4 v) : val(vec_int4_c(v))
+    {}
+    v_int32x4(int v0, int v1, int v2, int v3) : val(vec_int4_set(v0, v1, v2, v3))
+    {}
+    int get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_float32x4
+{
+    typedef float lane_type;
+    enum { nlanes = 4 };
+    vec_float4 val;
+
+    explicit v_float32x4(const vec_float4& v) : val(v)
+    {}
+    v_float32x4() : val(vec_float4_z)
+    {}
+    v_float32x4(vec_bint4 v) : val(vec_float4_c(v))
+    {}
+    v_float32x4(float v0, float v1, float v2, float v3) : val(vec_float4_set(v0, v1, v2, v3))
+    {}
+    float get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_uint64x2
+{
+    typedef uint64 lane_type;
+    enum { nlanes = 2 };
+    vec_udword2 val;
+
+    explicit v_uint64x2(const vec_udword2& v) : val(v)
+    {}
+    v_uint64x2() : val(vec_udword2_z)
+    {}
+    v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v))
+    {}
+    v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1))
+    {}
+    uint64 get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_int64x2
+{
+    typedef int64 lane_type;
+    enum { nlanes = 2 };
+    vec_dword2 val;
+
+    explicit v_int64x2(const vec_dword2& v) : val(v)
+    {}
+    v_int64x2() : val(vec_dword2_z)
+    {}
+    v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v))
+    {}
+    v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1))
+    {}
+    int64 get0() const
+    { return vec_extract(val, 0); }
+};
+
+struct v_float64x2
+{
+    typedef double lane_type;
+    enum { nlanes = 2 };
+    vec_double2 val;
+
+    explicit v_float64x2(const vec_double2& v) : val(v)
+    {}
+    v_float64x2() : val(vec_double2_z)
+    {}
+    v_float64x2(vec_bdword2 v) : val(vec_double2_c(v))
+    {}
+    v_float64x2(double v0, double v1) : val(vec_double2_set(v0, v1))
+    {}
+    double get0() const
+    { return vec_extract(val, 0); }
+};
+
+//////////////// Load and store operations ///////////////
+
+/*
+ * clang-5 aborted during parse "vec_xxx_c" only if it's
+ * inside a function template which is defined by preprocessor macro.
+ *
+ * if vec_xxx_c defined as C++ cast, clang-5 will pass it
+*/
+#define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast)                        \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec(); }                               \
+inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));}          \
+template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a)  \
+{ return _Tpvec((cast)a.val); }
+
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint8x16, uchar, u8, vec_uchar16)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int8x16, schar, s8, vec_char16)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint16x8, ushort, u16, vec_ushort8)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int16x8, short, s16, vec_short8)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint32x4, uint, u32, vec_uint4)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int32x4, int, s32, vec_int4)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_uint64x2, uint64, u64, vec_udword2)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
+OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)
+
+#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a)    \
+inline _Tpvec v_load(const _Tp* ptr)                                        \
+{ return _Tpvec(ld(0, ptr)); }                                              \
+inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr))                    \
+{ return _Tpvec(ld_a(0, ptr)); }                                            \
+inline _Tpvec v_load_low(const _Tp* ptr)                                    \
+{ return _Tpvec(vec_ld_l8(ptr)); }                                          \
+inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1)               \
+{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); }          \
+inline void v_store(_Tp* ptr, const _Tpvec& a)                              \
+{ st(a.val, 0, ptr); }                                                      \
+inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a)          \
+{ st_a(a.val, 0, ptr); }                                                    \
+inline void v_store_low(_Tp* ptr, const _Tpvec& a)                          \
+{ vec_st_l8(a.val, ptr); }                                                  \
+inline void v_store_high(_Tp* ptr, const _Tpvec& a)                         \
+{ vec_st_h8(a.val, ptr); }
+
+#define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st)
+
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16,  uchar)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16,   schar)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8,  ushort)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8,   short)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4,  uint)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4,   int)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float)
+
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld,  vsx_ld,  vsx_st,  vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2,  uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2,    int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
+
+//////////////// Value reordering ///////////////
+
+/* de&interleave */
+#define OPENCV_HAL_IMPL_VSX_INTERLEAVE(_Tp, _Tpvec)                          \
+inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b)        \
+{ vec_ld_deinterleave(ptr, a.val, b.val);}                                   \
+inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a,                   \
+                                _Tpvec& b, _Tpvec& c)                        \
+{ vec_ld_deinterleave(ptr, a.val, b.val, c.val); }                           \
+inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b,        \
+                                                _Tpvec& c, _Tpvec& d)        \
+{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); }                    \
+inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b)   \
+{ vec_st_interleave(a.val, b.val, ptr); }                                    \
+inline void v_store_interleave(_Tp* ptr, const _Tpvec& a,                    \
+                               const _Tpvec& b, const _Tpvec& c)             \
+{ vec_st_interleave(a.val, b.val, c.val, ptr); }                             \
+inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b,   \
+                                         const _Tpvec& c, const _Tpvec& d)   \
+{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); }
+
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(schar, v_int8x16)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(ushort, v_uint16x8)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(short, v_int16x8)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4)
+OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2)
+
+/* Expand */
+#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh)  \
+inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1)   \
+{                                                                 \
+    b0.val = fh(a.val);                                           \
+    b1.val = fl(a.val);                                           \
+}                                                                 \
+inline _Tpwvec v_load_expand(const _Tp* ptr)                      \
+{ return _Tpwvec(fh(vec_ld_l8(ptr))); }
+
+OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_uint16x8, v_uint32x4, ushort, vec_unpacklu, vec_unpackhu)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_int16x8, v_int32x4, short, vec_unpackl, vec_unpackh)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpackhu)
+OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)
+
+inline v_uint32x4 v_load_expand_q(const uchar* ptr)
+{ return v_uint32x4(vec_uint4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
+
+inline v_int32x4 v_load_expand_q(const schar* ptr)
+{ return v_int32x4(vec_int4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
+
+/* pack */
+#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack)    \
+inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b)                                          \
+{                                                                                                   \
+    return _Tpvec(pkfnc(a.val, b.val));                                                             \
+}                                                                                                   \
+inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a)                                            \
+{                                                                                                   \
+    vec_st_l8(pkfnc(a.val, a.val), ptr);                                                            \
+}                                                                                                   \
+template<int n>                                                                                     \
+inline _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b)                                     \
+{                                                                                                   \
+    const __vector _Tpvn vn = vec_splats((_Tpvn)n);                                                 \
+    const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1)));                         \
+    return _Tpvec(pkfnc(sfnc(addfnc(a.val, delta), vn), sfnc(addfnc(b.val, delta), vn)));           \
+}                                                                                                   \
+template<int n>                                                                                     \
+inline void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a)                                       \
+{                                                                                                   \
+    const __vector _Tpvn vn = vec_splats((_Tpvn)n);                                                 \
+    const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1)));                         \
+    vec_st_l8(pkfnc(sfnc(addfnc(a.val, delta), vn), delta), ptr);                                   \
+}
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_uint16x8, unsigned short, unsigned short,
+                         vec_sr, vec_packs, vec_adds, pack)
+OPENCV_HAL_IMPL_VSX_PACK(v_int8x16, schar, v_int16x8, unsigned short, short,
+                         vec_sra, vec_packs, vec_adds, pack)
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_uint32x4, unsigned int, unsigned int,
+                         vec_sr, vec_packs, vec_add, pack)
+OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int,
+                         vec_sra, vec_packs, vec_add, pack)
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_uint64x2, unsigned long long, unsigned long long,
+                         vec_sr, vec_pack, vec_add, pack)
+OPENCV_HAL_IMPL_VSX_PACK(v_int32x4, int, v_int64x2, unsigned long long, long long,
+                         vec_sra, vec_pack, vec_add, pack)
+
+OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_int16x8, unsigned short, short,
+                         vec_sra, vec_packsu, vec_adds, pack_u)
+OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int,
+                         vec_sra, vec_packsu, vec_add, pack_u)
+// Following variant is not implemented on other platforms:
+//OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long,
+//                         vec_sra, vec_packsu, vec_add, pack_u)
+
+/* Recombine */
+template <typename _Tpvec>
+inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
+{
+    b0.val = vec_mergeh(a0.val, a1.val);
+    b1.val = vec_mergel(a0.val, a1.val);
+}
+
+template <typename _Tpvec>
+inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b)
+{ return _Tpvec(vec_mergesql(a.val, b.val)); }
+
+template <typename _Tpvec>
+inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b)
+{ return _Tpvec(vec_mergesqh(a.val, b.val)); }
+
+template <typename _Tpvec>
+inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
+{
+    c.val = vec_mergesqh(a.val, b.val);
+    d.val = vec_mergesql(a.val, b.val);
+}
+
+////////// Arithmetic, bitwise and comparison operations /////////
+
+/* Element-wise binary and unary operations */
+/** Arithmetics **/
+#define OPENCV_HAL_IMPL_VSX_BIN_OP(bin_op, _Tpvec, intrin)       \
+inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(intrin(a.val, b.val)); }                         \
+inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b)   \
+{ a.val = intrin(a.val, b.val); return a; }
+
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint8x16, vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint8x16, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int8x16,  vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int8x16, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint16x8, vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint16x8, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint16x8, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int16x8, vec_adds)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int16x8, vec_subs)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int16x8, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint32x4, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint32x4, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint32x4, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int32x4, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int32x4, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int32x4, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float32x4, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float32x4, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float32x4, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float32x4, vec_div)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float64x2, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float64x2, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float64x2, vec_mul)
+OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float64x2, vec_div)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint64x2, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint64x2, vec_sub)
+OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int64x2, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int64x2, vec_sub)
+
+inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, v_int32x4& c, v_int32x4& d)
+{
+    c.val = vec_mul(vec_unpackh(a.val), vec_unpackh(b.val));
+    d.val = vec_mul(vec_unpackl(a.val), vec_unpackl(b.val));
+}
+inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, v_uint32x4& c, v_uint32x4& d)
+{
+    c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
+    d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
+}
+inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, v_uint64x2& c, v_uint64x2& d)
+{
+    c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
+    d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
+}
+
+/** Non-saturating arithmetics **/
+#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin)    \
+template<typename _Tpvec>                             \
+inline _Tpvec func(const _Tpvec& a, const _Tpvec& b)  \
+{ return _Tpvec(intrin(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub)
+
+/** Bitwise shifts **/
+#define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpvec, shr, splfunc)   \
+inline _Tpvec operator << (const _Tpvec& a, int imm)         \
+{ return _Tpvec(vec_sl(a.val, splfunc(imm))); }              \
+inline _Tpvec operator >> (const _Tpvec& a, int imm)         \
+{ return _Tpvec(shr(a.val, splfunc(imm))); }                 \
+template<int imm> inline _Tpvec v_shl(const _Tpvec& a)       \
+{ return _Tpvec(vec_sl(a.val, splfunc(imm))); }              \
+template<int imm> inline _Tpvec v_shr(const _Tpvec& a)       \
+{ return _Tpvec(shr(a.val, splfunc(imm))); }
+
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint8x16, vec_sr, vec_uchar16_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint16x8, vec_sr, vec_ushort8_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint32x4, vec_sr, vec_uint4_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint64x2, vec_sr, vec_udword2_sp)
+// algebraic right shift
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int8x16, vec_sra, vec_uchar16_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int16x8, vec_sra, vec_ushort8_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int32x4, vec_sra, vec_uint4_sp)
+OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int64x2, vec_sra, vec_udword2_sp)
+
+/** Bitwise logic **/
+#define OPENCV_HAL_IMPL_VSX_LOGIC_OP(_Tpvec)    \
+OPENCV_HAL_IMPL_VSX_BIN_OP(&, _Tpvec, vec_and)  \
+OPENCV_HAL_IMPL_VSX_BIN_OP(|, _Tpvec, vec_or)   \
+OPENCV_HAL_IMPL_VSX_BIN_OP(^, _Tpvec, vec_xor)  \
+inline _Tpvec operator ~ (const _Tpvec& a)      \
+{ return _Tpvec(vec_not(a.val)); }
+
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint8x16)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int8x16)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint16x8)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int16x8)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint32x4)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int32x4)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint64x2)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int64x2)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float32x4)
+OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float64x2)
+
+/** Bitwise select **/
+#define OPENCV_HAL_IMPL_VSX_SELECT(_Tpvec, cast)                             \
+inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
+{ return _Tpvec(vec_sel(b.val, a.val, cast(mask.val))); }
+
+OPENCV_HAL_IMPL_VSX_SELECT(v_uint8x16, vec_bchar16_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_int8x16, vec_bchar16_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_uint16x8, vec_bshort8_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_int16x8, vec_bshort8_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_uint32x4, vec_bint4_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_int32x4, vec_bint4_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_float32x4, vec_bint4_c)
+OPENCV_HAL_IMPL_VSX_SELECT(v_float64x2, vec_bdword2_c)
+
+/** Comparison **/
+#define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec)                 \
+inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmpeq(a.val, b.val)); }                    \
+inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmpne(a.val, b.val)); }                    \
+inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b)    \
+{ return _Tpvec(vec_cmplt(a.val, b.val)); }                    \
+inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b)    \
+{ return _Tpvec(vec_cmpgt(a.val, b.val)); }                    \
+inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmple(a.val, b.val)); }                    \
+inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b)   \
+{ return _Tpvec(vec_cmpge(a.val, b.val)); }
+
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint8x16)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int8x16)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint16x8)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int16x8)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint32x4)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int32x4)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float32x4)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2)
+OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2)
+
+/** min/max **/
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max)
+
+/** Rotate **/
+#define OPENCV_IMPL_VSX_ROTATE(_Tpvec, suffix, shf, cast)                       \
+template<int imm>                                                               \
+inline _Tpvec v_rotate_##suffix(const _Tpvec& a)                                \
+{                                                                               \
+    const int wd = imm * sizeof(typename _Tpvec::lane_type);                    \
+    if (wd > 15)                                                                \
+        return _Tpvec();                                                        \
+    return _Tpvec((cast)shf(vec_uchar16_c(a.val), vec_uchar16_sp(wd << 3)));    \
+}
+
+#define OPENCV_IMPL_VSX_ROTATE_LR(_Tpvec, cast)     \
+OPENCV_IMPL_VSX_ROTATE(_Tpvec, left, vec_slo, cast) \
+OPENCV_IMPL_VSX_ROTATE(_Tpvec, right, vec_sro, cast)
+
+OPENCV_IMPL_VSX_ROTATE_LR(v_uint8x16, vec_uchar16)
+OPENCV_IMPL_VSX_ROTATE_LR(v_int8x16,  vec_char16)
+OPENCV_IMPL_VSX_ROTATE_LR(v_uint16x8, vec_ushort8)
+OPENCV_IMPL_VSX_ROTATE_LR(v_int16x8,  vec_short8)
+OPENCV_IMPL_VSX_ROTATE_LR(v_uint32x4, vec_uint4)
+OPENCV_IMPL_VSX_ROTATE_LR(v_int32x4,  vec_int4)
+OPENCV_IMPL_VSX_ROTATE_LR(v_float32x4, vec_float4)
+OPENCV_IMPL_VSX_ROTATE_LR(v_uint64x2, vec_udword2)
+OPENCV_IMPL_VSX_ROTATE_LR(v_int64x2,  vec_dword2)
+OPENCV_IMPL_VSX_ROTATE_LR(v_float64x2, vec_double2)
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b)
+{
+    enum { CV_SHIFT = 16 - imm * (sizeof(typename _Tpvec::lane_type)) };
+    if (CV_SHIFT == 16)
+        return a;
+#ifdef __IBMCPP__
+    return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT & 15));
+#else
+    return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT));
+#endif
+}
+
+template<int imm, typename _Tpvec>
+inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b)
+{
+    enum { CV_SHIFT = imm * (sizeof(typename _Tpvec::lane_type)) };
+    if (CV_SHIFT == 16)
+        return b;
+    return _Tpvec(vec_sld(a.val, b.val, CV_SHIFT));
+}
+
+#define OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, suffix, rg1, rg2)   \
+template<int imm>                                                 \
+inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
+{                                                                 \
+    if (imm == 1)                                                 \
+        return _Tpvec(vec_permi(rg1.val, rg2.val, 2));            \
+    return imm ? b : a;                                           \
+}
+
+#define OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(_Tpvec)    \
+OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, left,  b, a)  \
+OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, right, a, b)
+
+OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2)
+OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2)
+OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2)
+
+/* Extract */
+template<int s, typename _Tpvec>
+inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
+{ return v_rotate_right<s>(a, b); }
+
+////////// Reduce and mask /////////
+
+/** Reduce **/
+inline short v_reduce_sum(const v_int16x8& a)
+{
+    const vec_int4 zero = vec_int4_z;
+    return saturate_cast<short>(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3));
+}
+inline ushort v_reduce_sum(const v_uint16x8& a)
+{
+    const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8))));
+    return saturate_cast<ushort>(vec_extract(vec_sums(v4, vec_int4_z), 3));
+}
+
+#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \
+inline scalartype v_reduce_##suffix(const _Tpvec& a)                               \
+{                                                                                  \
+    const _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8));                      \
+    return vec_extract(func(rs, vec_sld(rs, rs, 4)), 0);                           \
+}
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, sum, vec_add)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, min, vec_min)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, sum, vec_add)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, min, vec_min)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min)
+
+#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \
+inline scalartype v_reduce_##suffix(const _Tpvec& a)                               \
+{                                                                                  \
+    _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8));                            \
+    rs = func(rs, vec_sld(rs, rs, 4));                                             \
+    return vec_extract(func(rs, vec_sld(rs, rs, 2)), 0);                           \
+}
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, min, vec_min)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, max, vec_max)
+OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, min, vec_min)
+
+inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
+                                 const v_float32x4& c, const v_float32x4& d)
+{
+    vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val));
+    ac = vec_add(ac, vec_sld(ac, ac, 8));
+
+    vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val));
+    bd = vec_add(bd, vec_sld(bd, bd, 8));
+    return v_float32x4(vec_mergeh(ac, bd));
+}
+
+/** Popcount **/
+template<typename _Tpvec>
+inline v_uint32x4 v_popcount(const _Tpvec& a)
+{ return v_uint32x4(vec_popcntu(vec_uint4_c(a.val))); }
+
+/** Mask **/
+inline int v_signmask(const v_uint8x16& a)
+{
+    vec_uchar16 sv  = vec_sr(a.val, vec_uchar16_sp(7));
+    static const vec_uchar16 slm = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
+    sv = vec_sl(sv, slm);
+    vec_uint4 sv4 = vec_sum4s(sv, vec_uint4_z);
+    static const vec_uint4 slm4 = {0, 0, 8, 8};
+    sv4 = vec_sl(sv4, slm4);
+    return vec_extract(vec_sums((vec_int4) sv4, vec_int4_z), 3);
+}
+inline int v_signmask(const v_int8x16& a)
+{ return v_signmask(v_reinterpret_as_u8(a)); }
+
+inline int v_signmask(const v_int16x8& a)
+{
+    static const vec_ushort8 slm = {0, 1, 2, 3, 4, 5, 6, 7};
+    vec_short8 sv = vec_sr(a.val, vec_ushort8_sp(15));
+    sv = vec_sl(sv, slm);
+    vec_int4 svi = vec_int4_z;
+    svi = vec_sums(vec_sum4s(sv, svi), svi);
+    return vec_extract(svi, 3);
+}
+inline int v_signmask(const v_uint16x8& a)
+{ return v_signmask(v_reinterpret_as_s16(a)); }
+
+inline int v_signmask(const v_int32x4& a)
+{
+    static const vec_uint4 slm = {0, 1, 2, 3};
+    vec_int4 sv = vec_sr(a.val, vec_uint4_sp(31));
+    sv = vec_sl(sv, slm);
+    sv = vec_sums(sv, vec_int4_z);
+    return vec_extract(sv, 3);
+}
+inline int v_signmask(const v_uint32x4& a)
+{ return v_signmask(v_reinterpret_as_s32(a)); }
+inline int v_signmask(const v_float32x4& a)
+{ return v_signmask(v_reinterpret_as_s32(a)); }
+
+inline int v_signmask(const v_int64x2& a)
+{
+    VSX_UNUSED(const vec_dword2) sv = vec_sr(a.val, vec_udword2_sp(63));
+    return (int)vec_extract(sv, 0) | (int)vec_extract(sv, 1) << 1;
+}
+inline int v_signmask(const v_uint64x2& a)
+{ return v_signmask(v_reinterpret_as_s64(a)); }
+inline int v_signmask(const v_float64x2& a)
+{ return v_signmask(v_reinterpret_as_s64(a)); }
+
+template<typename _Tpvec>
+inline bool v_check_all(const _Tpvec& a)
+{ return vec_all_lt(a.val, _Tpvec().val); }
+inline bool v_check_all(const v_uint8x16& a)
+{ return v_check_all(v_reinterpret_as_s8(a)); }
+inline bool v_check_all(const v_uint16x8& a)
+{ return v_check_all(v_reinterpret_as_s16(a)); }
+inline bool v_check_all(const v_uint32x4& a)
+{ return v_check_all(v_reinterpret_as_s32(a)); }
+inline bool v_check_all(const v_float32x4& a)
+{ return v_check_all(v_reinterpret_as_s32(a)); }
+inline bool v_check_all(const v_float64x2& a)
+{ return v_check_all(v_reinterpret_as_s64(a)); }
+
+template<typename _Tpvec>
+inline bool v_check_any(const _Tpvec& a)
+{ return vec_any_lt(a.val, _Tpvec().val); }
+inline bool v_check_any(const v_uint8x16& a)
+{ return v_check_any(v_reinterpret_as_s8(a)); }
+inline bool v_check_any(const v_uint16x8& a)
+{ return v_check_any(v_reinterpret_as_s16(a)); }
+inline bool v_check_any(const v_uint32x4& a)
+{ return v_check_any(v_reinterpret_as_s32(a)); }
+inline bool v_check_any(const v_float32x4& a)
+{ return v_check_any(v_reinterpret_as_s32(a)); }
+inline bool v_check_any(const v_float64x2& a)
+{ return v_check_any(v_reinterpret_as_s64(a)); }
+
+////////// Other math /////////
+
+/** Some frequent operations **/
+inline v_float32x4 v_sqrt(const v_float32x4& x)
+{ return v_float32x4(vec_sqrt(x.val)); }
+inline v_float64x2 v_sqrt(const v_float64x2& x)
+{ return v_float64x2(vec_sqrt(x.val)); }
+
+inline v_float32x4 v_invsqrt(const v_float32x4& x)
+{ return v_float32x4(vec_rsqrt(x.val)); }
+inline v_float64x2 v_invsqrt(const v_float64x2& x)
+{ return v_float64x2(vec_rsqrt(x.val)); }
+
+#define OPENCV_HAL_IMPL_VSX_MULADD(_Tpvec)                                  \
+inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b)                 \
+{ return _Tpvec(vec_sqrt(vec_madd(a.val, a.val, vec_mul(b.val, b.val)))); } \
+inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b)             \
+{ return _Tpvec(vec_madd(a.val, a.val, vec_mul(b.val, b.val))); }           \
+inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)   \
+{ return _Tpvec(vec_madd(a.val, b.val, c.val)); }
+
+OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4)
+OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2)
+
+inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
+{ return a * b + c; }
+
+// TODO: exp, log, sin, cos
+
+/** Absolute values **/
+inline v_uint8x16 v_abs(const v_int8x16& x)
+{ return v_uint8x16(vec_uchar16_c(vec_abs(x.val))); }
+
+inline v_uint16x8 v_abs(const v_int16x8& x)
+{ return v_uint16x8(vec_ushort8_c(vec_abs(x.val))); }
+
+inline v_uint32x4 v_abs(const v_int32x4& x)
+{ return v_uint32x4(vec_uint4_c(vec_abs(x.val))); }
+
+inline v_float32x4 v_abs(const v_float32x4& x)
+{ return v_float32x4(vec_abs(x.val)); }
+
+inline v_float64x2 v_abs(const v_float64x2& x)
+{ return v_float64x2(vec_abs(x.val)); }
+
+OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd)
+
+#define OPENCV_HAL_IMPL_VSX_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin)  \
+inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b)                       \
+{ return _Tpvec2(cast(intrin(a.val, b.val))); }
+
+OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int8x16, v_uint8x16, vec_uchar16_c, v_absdiff, vec_absd)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int16x8, v_uint16x8, vec_ushort8_c, v_absdiff, vec_absd)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int32x4, v_uint32x4, vec_uint4_c, v_absdiff, vec_absd)
+OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int64x2, v_uint64x2, vec_udword2_c, v_absdiff, vec_absd)
+
+////////// Conversions /////////
+
+/** Rounding **/
+inline v_int32x4 v_round(const v_float32x4& a)
+{ return v_int32x4(vec_cts(vec_round(a.val))); }
+
+inline v_int32x4 v_round(const v_float64x2& a)
+{ return v_int32x4(vec_mergesqo(vec_ctso(vec_round(a.val)), vec_int4_z)); }
+
+inline v_int32x4 v_floor(const v_float32x4& a)
+{ return v_int32x4(vec_cts(vec_floor(a.val))); }
+
+inline v_int32x4 v_floor(const v_float64x2& a)
+{ return v_int32x4(vec_mergesqo(vec_ctso(vec_floor(a.val)), vec_int4_z)); }
+
+inline v_int32x4 v_ceil(const v_float32x4& a)
+{ return v_int32x4(vec_cts(vec_ceil(a.val))); }
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{ return v_int32x4(vec_mergesqo(vec_ctso(vec_ceil(a.val)), vec_int4_z)); }
+
+inline v_int32x4 v_trunc(const v_float32x4& a)
+{ return v_int32x4(vec_cts(a.val)); }
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{ return v_int32x4(vec_mergesqo(vec_ctso(a.val), vec_int4_z)); }
+
+/** To float **/
+inline v_float32x4 v_cvt_f32(const v_int32x4& a)
+{ return v_float32x4(vec_ctf(a.val)); }
+
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_float4_z)); }
+
+inline v_float64x2 v_cvt_f64(const v_int32x4& a)
+{ return v_float64x2(vec_ctdo(vec_mergeh(a.val, a.val))); }
+
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{ return v_float64x2(vec_ctdo(vec_mergel(a.val, a.val))); }
+
+inline v_float64x2 v_cvt_f64(const v_float32x4& a)
+{ return v_float64x2(vec_cvfo(vec_mergeh(a.val, a.val))); }
+
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{ return v_float64x2(vec_cvfo(vec_mergel(a.val, a.val))); }
+
+/** Reinterpret **/
+/** its up there with load and store operations **/
+
+////////// Matrix operations /////////
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
+{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
+
+inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
+{ return v_int32x4(vec_msum(a.val, b.val, c.val)); }
+
+inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
+                            const v_float32x4& m1, const v_float32x4& m2,
+                            const v_float32x4& m3)
+{
+    const vec_float4 v0 = vec_splat(v.val, 0);
+    const vec_float4 v1 = vec_splat(v.val, 1);
+    const vec_float4 v2 = vec_splat(v.val, 2);
+    VSX_UNUSED(const vec_float4) v3 = vec_splat(v.val, 3);
+    return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, vec_mul(v3, m3.val)))));
+}
+
+inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
+                               const v_float32x4& m1, const v_float32x4& m2,
+                               const v_float32x4& a)
+{
+    const vec_float4 v0 = vec_splat(v.val, 0);
+    const vec_float4 v1 = vec_splat(v.val, 1);
+    const vec_float4 v2 = vec_splat(v.val, 2);
+    return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, a.val))));
+}
+
+#define OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(_Tpvec, _Tpvec2)                        \
+inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1,                   \
+                           const _Tpvec& a2, const _Tpvec& a3,                   \
+                           _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3)       \
+{                                                                                \
+    _Tpvec2 a02 = vec_mergeh(a0.val, a2.val);                                    \
+    _Tpvec2 a13 = vec_mergeh(a1.val, a3.val);                                    \
+    b0.val = vec_mergeh(a02, a13);                                               \
+    b1.val = vec_mergel(a02, a13);                                               \
+    a02 = vec_mergel(a0.val, a2.val);                                            \
+    a13 = vec_mergel(a1.val, a3.val);                                            \
+    b2.val  = vec_mergeh(a02, a13);                                              \
+    b3.val  = vec_mergel(a02, a13);                                              \
+}
+OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4)
+OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4)
+OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4)
+
+//! @name Check SIMD support
+//! @{
+//! @brief Check CPU capability of SIMD operation
+static inline bool hasSIMD128()
+{
+    return (CV_CPU_HAS_SUPPORT_VSX) ? true : false;
+}
+
+//! @}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+}
+
+#endif // OPENCV_HAL_VSX_HPP

+ 195 - 0
ThresholdTools/include/opencv2/core/ippasync.hpp

@@ -0,0 +1,195 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_IPPASYNC_HPP
+#define OPENCV_CORE_IPPASYNC_HPP
+
+#ifdef HAVE_IPP_A
+
+#include "opencv2/core.hpp"
+#include <ipp_async_op.h>
+#include <ipp_async_accel.h>
+
+namespace cv
+{
+
+namespace hpp
+{
+
+/** @addtogroup core_ipp
+This section describes conversion between OpenCV and [Intel&reg; IPP Asynchronous
+C/C++](http://software.intel.com/en-us/intel-ipp-preview) library. [Getting Started
+Guide](http://registrationcenter.intel.com/irc_nas/3727/ipp_async_get_started.htm) help you to
+install the library, configure header and library build paths.
+ */
+//! @{
+
+    //! convert OpenCV data type to hppDataType
+    inline int toHppType(const int cvType)
+    {
+        int depth = CV_MAT_DEPTH(cvType);
+        int hppType = depth == CV_8U ? HPP_DATA_TYPE_8U :
+                     depth == CV_16U ? HPP_DATA_TYPE_16U :
+                     depth == CV_16S ? HPP_DATA_TYPE_16S :
+                     depth == CV_32S ? HPP_DATA_TYPE_32S :
+                     depth == CV_32F ? HPP_DATA_TYPE_32F :
+                     depth == CV_64F ? HPP_DATA_TYPE_64F : -1;
+        CV_Assert( hppType >= 0 );
+        return hppType;
+    }
+
+    //! convert hppDataType to OpenCV data type
+    inline int toCvType(const int hppType)
+    {
+        int cvType = hppType == HPP_DATA_TYPE_8U ? CV_8U :
+                    hppType == HPP_DATA_TYPE_16U ? CV_16U :
+                    hppType == HPP_DATA_TYPE_16S ? CV_16S :
+                    hppType == HPP_DATA_TYPE_32S ? CV_32S :
+                    hppType == HPP_DATA_TYPE_32F ? CV_32F :
+                    hppType == HPP_DATA_TYPE_64F ? CV_64F : -1;
+        CV_Assert( cvType >= 0 );
+        return cvType;
+    }
+
+    /** @brief Convert hppiMatrix to Mat.
+
+    This function allocates and initializes new matrix (if needed) that has the same size and type as
+    input matrix. Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
+    @param src input hppiMatrix.
+    @param dst output matrix.
+    @param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
+    @param cn number of channels.
+     */
+    inline void copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn)
+    {
+        hppDataType type;
+        hpp32u width, height;
+        hppStatus sts;
+
+        if (src == NULL)
+            return dst.release();
+
+        sts = hppiInquireMatrix(src, &type, &width, &height);
+
+        CV_Assert( sts == HPP_STATUS_NO_ERROR);
+
+        int matType = CV_MAKETYPE(toCvType(type), cn);
+
+        CV_Assert(width%cn == 0);
+
+        width /= cn;
+
+        dst.create((int)height, (int)width, (int)matType);
+
+        size_t newSize = (size_t)(height*(hpp32u)(dst.step));
+
+        sts = hppiGetMatrixData(accel,src,(hpp32u)(dst.step),dst.data,&newSize);
+
+        CV_Assert( sts == HPP_STATUS_NO_ERROR);
+    }
+
+    /** @brief Create Mat from hppiMatrix.
+
+    This function allocates and initializes the Mat that has the same size and type as input matrix.
+    Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
+    @param src input hppiMatrix.
+    @param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
+    @param cn number of channels.
+    @sa howToUseIPPAconversion, hpp::copyHppToMat, hpp::getHpp.
+     */
+    inline Mat getMat(hppiMatrix* src, hppAccel accel, int cn)
+    {
+        Mat dst;
+        copyHppToMat(src, dst, accel, cn);
+        return dst;
+    }
+
+    /** @brief Create hppiMatrix from Mat.
+
+    This function allocates and initializes the hppiMatrix that has the same size and type as input
+    matrix, returns the hppiMatrix*.
+
+    If you want to use zero-copy for GPU you should to have 4KB aligned matrix data. See details
+    [hppiCreateSharedMatrix](http://software.intel.com/ru-ru/node/501697).
+
+    Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
+
+    @note The hppiMatrix pointer to the image buffer in system memory refers to the src.data. Control
+    the lifetime of the matrix and don't change its data, if there is no special need.
+    @param src input matrix.
+    @param accel accelerator instance. Supports type:
+    -   **HPP_ACCEL_TYPE_CPU** - accelerated by optimized CPU instructions.
+    -   **HPP_ACCEL_TYPE_GPU** - accelerated by GPU programmable units or fixed-function
+        accelerators.
+    -   **HPP_ACCEL_TYPE_ANY** - any acceleration or no acceleration available.
+    @sa howToUseIPPAconversion, hpp::getMat
+     */
+    inline hppiMatrix* getHpp(const Mat& src, hppAccel accel)
+    {
+        int htype = toHppType(src.type());
+        int cn = src.channels();
+
+        CV_Assert(src.data);
+        hppAccelType accelType = hppQueryAccelType(accel);
+
+        if (accelType!=HPP_ACCEL_TYPE_CPU)
+        {
+            hpp32u pitch, size;
+            hppQueryMatrixAllocParams(accel, src.cols*cn, src.rows, htype, &pitch, &size);
+            if (pitch!=0 && size!=0)
+                if ((int)(src.data)%4096==0 && pitch==(hpp32u)(src.step))
+                {
+                    return hppiCreateSharedMatrix(htype, src.cols*cn, src.rows, src.data, pitch, size);
+                }
+        }
+
+        return hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));;
+    }
+
+//! @}
+}}
+
+#endif
+
+#endif

+ 3669 - 0
ThresholdTools/include/opencv2/core/mat.hpp

@@ -0,0 +1,3669 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_MAT_HPP
+#define OPENCV_CORE_MAT_HPP
+
+#ifndef __cplusplus
+#  error mat.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/matx.hpp"
+#include "opencv2/core/types.hpp"
+
+#include "opencv2/core/bufferpool.hpp"
+
+#ifdef CV_CXX11
+#include <type_traits>
+#endif
+
+namespace cv
+{
+
+//! @addtogroup core_basic
+//! @{
+
+enum { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25,
+    ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 };
+
+CV__DEBUG_NS_BEGIN
+
+class CV_EXPORTS _OutputArray;
+
+//////////////////////// Input/Output Array Arguments /////////////////////////////////
+
+/** @brief This is the proxy class for passing read-only input arrays into OpenCV functions.
+
+It is defined as:
+@code
+    typedef const _InputArray& InputArray;
+@endcode
+where _InputArray is a class that can be constructed from `Mat`, `Mat_<T>`, `Matx<T, m, n>`,
+`std::vector<T>`, `std::vector<std::vector<T> >`, `std::vector<Mat>`, `std::vector<Mat_<T> >`,
+`UMat`, `std::vector<UMat>` or `double`. It can also be constructed from a matrix expression.
+
+Since this is mostly implementation-level class, and its interface may change in future versions, we
+do not describe it in details. There are a few key things, though, that should be kept in mind:
+
+-   When you see in the reference manual or in OpenCV source code a function that takes
+    InputArray, it means that you can actually pass `Mat`, `Matx`, `vector<T>` etc. (see above the
+    complete list).
+-   Optional input arguments: If some of the input arrays may be empty, pass cv::noArray() (or
+    simply cv::Mat() as you probably did before).
+-   The class is designed solely for passing parameters. That is, normally you *should not*
+    declare class members, local and global variables of this type.
+-   If you want to design your own function or a class method that can operate of arrays of
+    multiple types, you can use InputArray (or OutputArray) for the respective parameters. Inside
+    a function you should use _InputArray::getMat() method to construct a matrix header for the
+    array (without copying data). _InputArray::kind() can be used to distinguish Mat from
+    `vector<>` etc., but normally it is not needed.
+
+Here is how you can use a function that takes InputArray :
+@code
+    std::vector<Point2f> vec;
+    // points or a circle
+    for( int i = 0; i < 30; i++ )
+        vec.push_back(Point2f((float)(100 + 30*cos(i*CV_PI*2/5)),
+                              (float)(100 - 30*sin(i*CV_PI*2/5))));
+    cv::transform(vec, vec, cv::Matx23f(0.707, -0.707, 10, 0.707, 0.707, 20));
+@endcode
+That is, we form an STL vector containing points, and apply in-place affine transformation to the
+vector using the 2x3 matrix created inline as `Matx<float, 2, 3>` instance.
+
+Here is how such a function can be implemented (for simplicity, we implement a very specific case of
+it, according to the assertion statement inside) :
+@code
+    void myAffineTransform(InputArray _src, OutputArray _dst, InputArray _m)
+    {
+        // get Mat headers for input arrays. This is O(1) operation,
+        // unless _src and/or _m are matrix expressions.
+        Mat src = _src.getMat(), m = _m.getMat();
+        CV_Assert( src.type() == CV_32FC2 && m.type() == CV_32F && m.size() == Size(3, 2) );
+
+        // [re]create the output array so that it has the proper size and type.
+        // In case of Mat it calls Mat::create, in case of STL vector it calls vector::resize.
+        _dst.create(src.size(), src.type());
+        Mat dst = _dst.getMat();
+
+        for( int i = 0; i < src.rows; i++ )
+            for( int j = 0; j < src.cols; j++ )
+            {
+                Point2f pt = src.at<Point2f>(i, j);
+                dst.at<Point2f>(i, j) = Point2f(m.at<float>(0, 0)*pt.x +
+                                                m.at<float>(0, 1)*pt.y +
+                                                m.at<float>(0, 2),
+                                                m.at<float>(1, 0)*pt.x +
+                                                m.at<float>(1, 1)*pt.y +
+                                                m.at<float>(1, 2));
+            }
+    }
+@endcode
+There is another related type, InputArrayOfArrays, which is currently defined as a synonym for
+InputArray:
+@code
+    typedef InputArray InputArrayOfArrays;
+@endcode
+It denotes function arguments that are either vectors of vectors or vectors of matrices. A separate
+synonym is needed to generate Python/Java etc. wrappers properly. At the function implementation
+level their use is similar, but _InputArray::getMat(idx) should be used to get header for the
+idx-th component of the outer vector and _InputArray::size().area() should be used to find the
+number of components (vectors/matrices) of the outer vector.
+ */
+class CV_EXPORTS _InputArray
+{
+public:
+    enum {
+        KIND_SHIFT = 16,
+        FIXED_TYPE = 0x8000 << KIND_SHIFT,
+        FIXED_SIZE = 0x4000 << KIND_SHIFT,
+        KIND_MASK = 31 << KIND_SHIFT,
+
+        NONE              = 0 << KIND_SHIFT,
+        MAT               = 1 << KIND_SHIFT,
+        MATX              = 2 << KIND_SHIFT,
+        STD_VECTOR        = 3 << KIND_SHIFT,
+        STD_VECTOR_VECTOR = 4 << KIND_SHIFT,
+        STD_VECTOR_MAT    = 5 << KIND_SHIFT,
+        EXPR              = 6 << KIND_SHIFT,
+        OPENGL_BUFFER     = 7 << KIND_SHIFT,
+        CUDA_HOST_MEM     = 8 << KIND_SHIFT,
+        CUDA_GPU_MAT      = 9 << KIND_SHIFT,
+        UMAT              =10 << KIND_SHIFT,
+        STD_VECTOR_UMAT   =11 << KIND_SHIFT,
+        STD_BOOL_VECTOR   =12 << KIND_SHIFT,
+        STD_VECTOR_CUDA_GPU_MAT = 13 << KIND_SHIFT,
+        STD_ARRAY         =14 << KIND_SHIFT,
+        STD_ARRAY_MAT     =15 << KIND_SHIFT
+    };
+
+    _InputArray();
+    _InputArray(int _flags, void* _obj);
+    _InputArray(const Mat& m);
+    _InputArray(const MatExpr& expr);
+    _InputArray(const std::vector<Mat>& vec);
+    template<typename _Tp> _InputArray(const Mat_<_Tp>& m);
+    template<typename _Tp> _InputArray(const std::vector<_Tp>& vec);
+    _InputArray(const std::vector<bool>& vec);
+    template<typename _Tp> _InputArray(const std::vector<std::vector<_Tp> >& vec);
+    _InputArray(const std::vector<std::vector<bool> >&);
+    template<typename _Tp> _InputArray(const std::vector<Mat_<_Tp> >& vec);
+    template<typename _Tp> _InputArray(const _Tp* vec, int n);
+    template<typename _Tp, int m, int n> _InputArray(const Matx<_Tp, m, n>& matx);
+    _InputArray(const double& val);
+    _InputArray(const cuda::GpuMat& d_mat);
+    _InputArray(const std::vector<cuda::GpuMat>& d_mat_array);
+    _InputArray(const ogl::Buffer& buf);
+    _InputArray(const cuda::HostMem& cuda_mem);
+    template<typename _Tp> _InputArray(const cudev::GpuMat_<_Tp>& m);
+    _InputArray(const UMat& um);
+    _InputArray(const std::vector<UMat>& umv);
+
+#ifdef CV_CXX_STD_ARRAY
+    template<typename _Tp, std::size_t _Nm> _InputArray(const std::array<_Tp, _Nm>& arr);
+    template<std::size_t _Nm> _InputArray(const std::array<Mat, _Nm>& arr);
+#endif
+
+    Mat getMat(int idx=-1) const;
+    Mat getMat_(int idx=-1) const;
+    UMat getUMat(int idx=-1) const;
+    void getMatVector(std::vector<Mat>& mv) const;
+    void getUMatVector(std::vector<UMat>& umv) const;
+    void getGpuMatVector(std::vector<cuda::GpuMat>& gpumv) const;
+    cuda::GpuMat getGpuMat() const;
+    ogl::Buffer getOGlBuffer() const;
+
+    int getFlags() const;
+    void* getObj() const;
+    Size getSz() const;
+
+    int kind() const;
+    int dims(int i=-1) const;
+    int cols(int i=-1) const;
+    int rows(int i=-1) const;
+    Size size(int i=-1) const;
+    int sizend(int* sz, int i=-1) const;
+    bool sameSize(const _InputArray& arr) const;
+    size_t total(int i=-1) const;
+    int type(int i=-1) const;
+    int depth(int i=-1) const;
+    int channels(int i=-1) const;
+    bool isContinuous(int i=-1) const;
+    bool isSubmatrix(int i=-1) const;
+    bool empty() const;
+    void copyTo(const _OutputArray& arr) const;
+    void copyTo(const _OutputArray& arr, const _InputArray & mask) const;
+    size_t offset(int i=-1) const;
+    size_t step(int i=-1) const;
+    bool isMat() const;
+    bool isUMat() const;
+    bool isMatVector() const;
+    bool isUMatVector() const;
+    bool isMatx() const;
+    bool isVector() const;
+    bool isGpuMat() const;
+    bool isGpuMatVector() const;
+    ~_InputArray();
+
+protected:
+    int flags;
+    void* obj;
+    Size sz;
+
+    void init(int _flags, const void* _obj);
+    void init(int _flags, const void* _obj, Size _sz);
+};
+
+
+/** @brief This type is very similar to InputArray except that it is used for input/output and output function
+parameters.
+
+Just like with InputArray, OpenCV users should not care about OutputArray, they just pass `Mat`,
+`vector<T>` etc. to the functions. The same limitation as for `InputArray`: *Do not explicitly
+create OutputArray instances* applies here too.
+
+If you want to make your function polymorphic (i.e. accept different arrays as output parameters),
+it is also not very difficult. Take the sample above as the reference. Note that
+_OutputArray::create() needs to be called before _OutputArray::getMat(). This way you guarantee
+that the output array is properly allocated.
+
+Optional output parameters. If you do not need certain output array to be computed and returned to
+you, pass cv::noArray(), just like you would in the case of optional input array. At the
+implementation level, use _OutputArray::needed() to check if certain output array needs to be
+computed or not.
+
+There are several synonyms for OutputArray that are used to assist automatic Python/Java/... wrapper
+generators:
+@code
+    typedef OutputArray OutputArrayOfArrays;
+    typedef OutputArray InputOutputArray;
+    typedef OutputArray InputOutputArrayOfArrays;
+@endcode
+ */
+class CV_EXPORTS _OutputArray : public _InputArray
+{
+public:
+    enum
+    {
+        DEPTH_MASK_8U = 1 << CV_8U,
+        DEPTH_MASK_8S = 1 << CV_8S,
+        DEPTH_MASK_16U = 1 << CV_16U,
+        DEPTH_MASK_16S = 1 << CV_16S,
+        DEPTH_MASK_32S = 1 << CV_32S,
+        DEPTH_MASK_32F = 1 << CV_32F,
+        DEPTH_MASK_64F = 1 << CV_64F,
+        DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1,
+        DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S,
+        DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F
+    };
+
+    _OutputArray();
+    _OutputArray(int _flags, void* _obj);
+    _OutputArray(Mat& m);
+    _OutputArray(std::vector<Mat>& vec);
+    _OutputArray(cuda::GpuMat& d_mat);
+    _OutputArray(std::vector<cuda::GpuMat>& d_mat);
+    _OutputArray(ogl::Buffer& buf);
+    _OutputArray(cuda::HostMem& cuda_mem);
+    template<typename _Tp> _OutputArray(cudev::GpuMat_<_Tp>& m);
+    template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
+    _OutputArray(std::vector<bool>& vec);
+    template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
+    _OutputArray(std::vector<std::vector<bool> >&);
+    template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
+    template<typename _Tp> _OutputArray(Mat_<_Tp>& m);
+    template<typename _Tp> _OutputArray(_Tp* vec, int n);
+    template<typename _Tp, int m, int n> _OutputArray(Matx<_Tp, m, n>& matx);
+    _OutputArray(UMat& m);
+    _OutputArray(std::vector<UMat>& vec);
+
+    _OutputArray(const Mat& m);
+    _OutputArray(const std::vector<Mat>& vec);
+    _OutputArray(const cuda::GpuMat& d_mat);
+    _OutputArray(const std::vector<cuda::GpuMat>& d_mat);
+    _OutputArray(const ogl::Buffer& buf);
+    _OutputArray(const cuda::HostMem& cuda_mem);
+    template<typename _Tp> _OutputArray(const cudev::GpuMat_<_Tp>& m);
+    template<typename _Tp> _OutputArray(const std::vector<_Tp>& vec);
+    template<typename _Tp> _OutputArray(const std::vector<std::vector<_Tp> >& vec);
+    template<typename _Tp> _OutputArray(const std::vector<Mat_<_Tp> >& vec);
+    template<typename _Tp> _OutputArray(const Mat_<_Tp>& m);
+    template<typename _Tp> _OutputArray(const _Tp* vec, int n);
+    template<typename _Tp, int m, int n> _OutputArray(const Matx<_Tp, m, n>& matx);
+    _OutputArray(const UMat& m);
+    _OutputArray(const std::vector<UMat>& vec);
+
+#ifdef CV_CXX_STD_ARRAY
+    template<typename _Tp, std::size_t _Nm> _OutputArray(std::array<_Tp, _Nm>& arr);
+    template<typename _Tp, std::size_t _Nm> _OutputArray(const std::array<_Tp, _Nm>& arr);
+    template<std::size_t _Nm> _OutputArray(std::array<Mat, _Nm>& arr);
+    template<std::size_t _Nm> _OutputArray(const std::array<Mat, _Nm>& arr);
+#endif
+
+    bool fixedSize() const;
+    bool fixedType() const;
+    bool needed() const;
+    Mat& getMatRef(int i=-1) const;
+    UMat& getUMatRef(int i=-1) const;
+    cuda::GpuMat& getGpuMatRef() const;
+    std::vector<cuda::GpuMat>& getGpuMatVecRef() const;
+    ogl::Buffer& getOGlBufferRef() const;
+    cuda::HostMem& getHostMemRef() const;
+    void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
+    void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
+    void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
+    void createSameSize(const _InputArray& arr, int mtype) const;
+    void release() const;
+    void clear() const;
+    void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const;
+
+    void assign(const UMat& u) const;
+    void assign(const Mat& m) const;
+
+    void assign(const std::vector<UMat>& v) const;
+    void assign(const std::vector<Mat>& v) const;
+};
+
+
+class CV_EXPORTS _InputOutputArray : public _OutputArray
+{
+public:
+    _InputOutputArray();
+    _InputOutputArray(int _flags, void* _obj);
+    _InputOutputArray(Mat& m);
+    _InputOutputArray(std::vector<Mat>& vec);
+    _InputOutputArray(cuda::GpuMat& d_mat);
+    _InputOutputArray(ogl::Buffer& buf);
+    _InputOutputArray(cuda::HostMem& cuda_mem);
+    template<typename _Tp> _InputOutputArray(cudev::GpuMat_<_Tp>& m);
+    template<typename _Tp> _InputOutputArray(std::vector<_Tp>& vec);
+    _InputOutputArray(std::vector<bool>& vec);
+    template<typename _Tp> _InputOutputArray(std::vector<std::vector<_Tp> >& vec);
+    template<typename _Tp> _InputOutputArray(std::vector<Mat_<_Tp> >& vec);
+    template<typename _Tp> _InputOutputArray(Mat_<_Tp>& m);
+    template<typename _Tp> _InputOutputArray(_Tp* vec, int n);
+    template<typename _Tp, int m, int n> _InputOutputArray(Matx<_Tp, m, n>& matx);
+    _InputOutputArray(UMat& m);
+    _InputOutputArray(std::vector<UMat>& vec);
+
+    _InputOutputArray(const Mat& m);
+    _InputOutputArray(const std::vector<Mat>& vec);
+    _InputOutputArray(const cuda::GpuMat& d_mat);
+    _InputOutputArray(const std::vector<cuda::GpuMat>& d_mat);
+    _InputOutputArray(const ogl::Buffer& buf);
+    _InputOutputArray(const cuda::HostMem& cuda_mem);
+    template<typename _Tp> _InputOutputArray(const cudev::GpuMat_<_Tp>& m);
+    template<typename _Tp> _InputOutputArray(const std::vector<_Tp>& vec);
+    template<typename _Tp> _InputOutputArray(const std::vector<std::vector<_Tp> >& vec);
+    template<typename _Tp> _InputOutputArray(const std::vector<Mat_<_Tp> >& vec);
+    template<typename _Tp> _InputOutputArray(const Mat_<_Tp>& m);
+    template<typename _Tp> _InputOutputArray(const _Tp* vec, int n);
+    template<typename _Tp, int m, int n> _InputOutputArray(const Matx<_Tp, m, n>& matx);
+    _InputOutputArray(const UMat& m);
+    _InputOutputArray(const std::vector<UMat>& vec);
+
+#ifdef CV_CXX_STD_ARRAY
+    template<typename _Tp, std::size_t _Nm> _InputOutputArray(std::array<_Tp, _Nm>& arr);
+    template<typename _Tp, std::size_t _Nm> _InputOutputArray(const std::array<_Tp, _Nm>& arr);
+    template<std::size_t _Nm> _InputOutputArray(std::array<Mat, _Nm>& arr);
+    template<std::size_t _Nm> _InputOutputArray(const std::array<Mat, _Nm>& arr);
+#endif
+
+};
+
+CV__DEBUG_NS_END
+
+typedef const _InputArray& InputArray;
+typedef InputArray InputArrayOfArrays;
+typedef const _OutputArray& OutputArray;
+typedef OutputArray OutputArrayOfArrays;
+typedef const _InputOutputArray& InputOutputArray;
+typedef InputOutputArray InputOutputArrayOfArrays;
+
+CV_EXPORTS InputOutputArray noArray();
+
+/////////////////////////////////// MatAllocator //////////////////////////////////////
+
+//! Usage flags for allocator
+enum UMatUsageFlags
+{
+    USAGE_DEFAULT = 0,
+
+    // buffer allocation policy is platform and usage specific
+    USAGE_ALLOCATE_HOST_MEMORY = 1 << 0,
+    USAGE_ALLOCATE_DEVICE_MEMORY = 1 << 1,
+    USAGE_ALLOCATE_SHARED_MEMORY = 1 << 2, // It is not equal to: USAGE_ALLOCATE_HOST_MEMORY | USAGE_ALLOCATE_DEVICE_MEMORY
+
+    __UMAT_USAGE_FLAGS_32BIT = 0x7fffffff // Binary compatibility hint
+};
+
+struct CV_EXPORTS UMatData;
+
+/** @brief  Custom array allocator
+*/
+class CV_EXPORTS MatAllocator
+{
+public:
+    MatAllocator() {}
+    virtual ~MatAllocator() {}
+
+    // let's comment it off for now to detect and fix all the uses of allocator
+    //virtual void allocate(int dims, const int* sizes, int type, int*& refcount,
+    //                      uchar*& datastart, uchar*& data, size_t* step) = 0;
+    //virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0;
+    virtual UMatData* allocate(int dims, const int* sizes, int type,
+                               void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const = 0;
+    virtual bool allocate(UMatData* data, int accessflags, UMatUsageFlags usageFlags) const = 0;
+    virtual void deallocate(UMatData* data) const = 0;
+    virtual void map(UMatData* data, int accessflags) const;
+    virtual void unmap(UMatData* data) const;
+    virtual void download(UMatData* data, void* dst, int dims, const size_t sz[],
+                          const size_t srcofs[], const size_t srcstep[],
+                          const size_t dststep[]) const;
+    virtual void upload(UMatData* data, const void* src, int dims, const size_t sz[],
+                        const size_t dstofs[], const size_t dststep[],
+                        const size_t srcstep[]) const;
+    virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[],
+                      const size_t srcofs[], const size_t srcstep[],
+                      const size_t dstofs[], const size_t dststep[], bool sync) const;
+
+    // default implementation returns DummyBufferPoolController
+    virtual BufferPoolController* getBufferPoolController(const char* id = NULL) const;
+};
+
+
+//////////////////////////////// MatCommaInitializer //////////////////////////////////
+
+/** @brief  Comma-separated Matrix Initializer
+
+ The class instances are usually not created explicitly.
+ Instead, they are created on "matrix << firstValue" operator.
+
+ The sample below initializes 2x2 rotation matrix:
+
+ \code
+ double angle = 30, a = cos(angle*CV_PI/180), b = sin(angle*CV_PI/180);
+ Mat R = (Mat_<double>(2,2) << a, -b, b, a);
+ \endcode
+*/
+template<typename _Tp> class MatCommaInitializer_
+{
+public:
+    //! the constructor, created by "matrix << firstValue" operator, where matrix is cv::Mat
+    MatCommaInitializer_(Mat_<_Tp>* _m);
+    //! the operator that takes the next value and put it to the matrix
+    template<typename T2> MatCommaInitializer_<_Tp>& operator , (T2 v);
+    //! another form of conversion operator
+    operator Mat_<_Tp>() const;
+protected:
+    MatIterator_<_Tp> it;
+};
+
+
+/////////////////////////////////////// Mat ///////////////////////////////////////////
+
+// note that umatdata might be allocated together
+// with the matrix data, not as a separate object.
+// therefore, it does not have constructor or destructor;
+// it should be explicitly initialized using init().
+struct CV_EXPORTS UMatData
+{
+    enum { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2,
+        DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24,
+        USER_ALLOCATED=32, DEVICE_MEM_MAPPED=64,
+        ASYNC_CLEANUP=128
+    };
+    UMatData(const MatAllocator* allocator);
+    ~UMatData();
+
+    // provide atomic access to the structure
+    void lock();
+    void unlock();
+
+    bool hostCopyObsolete() const;
+    bool deviceCopyObsolete() const;
+    bool deviceMemMapped() const;
+    bool copyOnMap() const;
+    bool tempUMat() const;
+    bool tempCopiedUMat() const;
+    void markHostCopyObsolete(bool flag);
+    void markDeviceCopyObsolete(bool flag);
+    void markDeviceMemMapped(bool flag);
+
+    const MatAllocator* prevAllocator;
+    const MatAllocator* currAllocator;
+    int urefcount;
+    int refcount;
+    uchar* data;
+    uchar* origdata;
+    size_t size;
+
+    int flags;
+    void* handle;
+    void* userdata;
+    int allocatorFlags_;
+    int mapcount;
+    UMatData* originalUMatData;
+};
+
+
+struct CV_EXPORTS MatSize
+{
+    explicit MatSize(int* _p);
+    int dims() const;
+    Size operator()() const;
+    const int& operator[](int i) const;
+    int& operator[](int i);
+    operator const int*() const;  // TODO OpenCV 4.0: drop this
+    bool operator == (const MatSize& sz) const;
+    bool operator != (const MatSize& sz) const;
+
+    int* p;
+};
+
+struct CV_EXPORTS MatStep
+{
+    MatStep();
+    explicit MatStep(size_t s);
+    const size_t& operator[](int i) const;
+    size_t& operator[](int i);
+    operator size_t() const;
+    MatStep& operator = (size_t s);
+
+    size_t* p;
+    size_t buf[2];
+protected:
+    MatStep& operator = (const MatStep&);
+};
+
+/** @example cout_mat.cpp
+An example demonstrating the serial out capabilities of cv::Mat
+*/
+
+ /** @brief n-dimensional dense array class \anchor CVMat_Details
+
+The class Mat represents an n-dimensional dense numerical single-channel or multi-channel array. It
+can be used to store real or complex-valued vectors and matrices, grayscale or color images, voxel
+volumes, vector fields, point clouds, tensors, histograms (though, very high-dimensional histograms
+may be better stored in a SparseMat ). The data layout of the array `M` is defined by the array
+`M.step[]`, so that the address of element \f$(i_0,...,i_{M.dims-1})\f$, where \f$0\leq i_k<M.size[k]\f$, is
+computed as:
+\f[addr(M_{i_0,...,i_{M.dims-1}}) = M.data + M.step[0]*i_0 + M.step[1]*i_1 + ... + M.step[M.dims-1]*i_{M.dims-1}\f]
+In case of a 2-dimensional array, the above formula is reduced to:
+\f[addr(M_{i,j}) = M.data + M.step[0]*i + M.step[1]*j\f]
+Note that `M.step[i] >= M.step[i+1]` (in fact, `M.step[i] >= M.step[i+1]*M.size[i+1]` ). This means
+that 2-dimensional matrices are stored row-by-row, 3-dimensional matrices are stored plane-by-plane,
+and so on. M.step[M.dims-1] is minimal and always equal to the element size M.elemSize() .
+
+So, the data layout in Mat is fully compatible with CvMat, IplImage, and CvMatND types from OpenCV
+1.x. It is also compatible with the majority of dense array types from the standard toolkits and
+SDKs, such as Numpy (ndarray), Win32 (independent device bitmaps), and others, that is, with any
+array that uses *steps* (or *strides*) to compute the position of a pixel. Due to this
+compatibility, it is possible to make a Mat header for user-allocated data and process it in-place
+using OpenCV functions.
+
+There are many different ways to create a Mat object. The most popular options are listed below:
+
+- Use the create(nrows, ncols, type) method or the similar Mat(nrows, ncols, type[, fillValue])
+constructor. A new array of the specified size and type is allocated. type has the same meaning as
+in the cvCreateMat method. For example, CV_8UC1 means a 8-bit single-channel array, CV_32FC2
+means a 2-channel (complex) floating-point array, and so on.
+@code
+    // make a 7x7 complex matrix filled with 1+3j.
+    Mat M(7,7,CV_32FC2,Scalar(1,3));
+    // and now turn M to a 100x60 15-channel 8-bit matrix.
+    // The old content will be deallocated
+    M.create(100,60,CV_8UC(15));
+@endcode
+As noted in the introduction to this chapter, create() allocates only a new array when the shape
+or type of the current array are different from the specified ones.
+
+- Create a multi-dimensional array:
+@code
+    // create a 100x100x100 8-bit array
+    int sz[] = {100, 100, 100};
+    Mat bigCube(3, sz, CV_8U, Scalar::all(0));
+@endcode
+It passes the number of dimensions =1 to the Mat constructor but the created array will be
+2-dimensional with the number of columns set to 1. So, Mat::dims is always \>= 2 (can also be 0
+when the array is empty).
+
+- Use a copy constructor or assignment operator where there can be an array or expression on the
+right side (see below). As noted in the introduction, the array assignment is an O(1) operation
+because it only copies the header and increases the reference counter. The Mat::clone() method can
+be used to get a full (deep) copy of the array when you need it.
+
+- Construct a header for a part of another array. It can be a single row, single column, several
+rows, several columns, rectangular region in the array (called a *minor* in algebra) or a
+diagonal. Such operations are also O(1) because the new header references the same data. You can
+actually modify a part of the array using this feature, for example:
+@code
+    // add the 5-th row, multiplied by 3 to the 3rd row
+    M.row(3) = M.row(3) + M.row(5)*3;
+    // now copy the 7-th column to the 1-st column
+    // M.col(1) = M.col(7); // this will not work
+    Mat M1 = M.col(1);
+    M.col(7).copyTo(M1);
+    // create a new 320x240 image
+    Mat img(Size(320,240),CV_8UC3);
+    // select a ROI
+    Mat roi(img, Rect(10,10,100,100));
+    // fill the ROI with (0,255,0) (which is green in RGB space);
+    // the original 320x240 image will be modified
+    roi = Scalar(0,255,0);
+@endcode
+Due to the additional datastart and dataend members, it is possible to compute a relative
+sub-array position in the main *container* array using locateROI():
+@code
+    Mat A = Mat::eye(10, 10, CV_32S);
+    // extracts A columns, 1 (inclusive) to 3 (exclusive).
+    Mat B = A(Range::all(), Range(1, 3));
+    // extracts B rows, 5 (inclusive) to 9 (exclusive).
+    // that is, C \~ A(Range(5, 9), Range(1, 3))
+    Mat C = B(Range(5, 9), Range::all());
+    Size size; Point ofs;
+    C.locateROI(size, ofs);
+    // size will be (width=10,height=10) and the ofs will be (x=1, y=5)
+@endcode
+As in case of whole matrices, if you need a deep copy, use the `clone()` method of the extracted
+sub-matrices.
+
+- Make a header for user-allocated data. It can be useful to do the following:
+    -# Process "foreign" data using OpenCV (for example, when you implement a DirectShow\* filter or
+    a processing module for gstreamer, and so on). For example:
+    @code
+        void process_video_frame(const unsigned char* pixels,
+                                 int width, int height, int step)
+        {
+            Mat img(height, width, CV_8UC3, pixels, step);
+            GaussianBlur(img, img, Size(7,7), 1.5, 1.5);
+        }
+    @endcode
+    -# Quickly initialize small matrices and/or get a super-fast element access.
+    @code
+        double m[3][3] = {{a, b, c}, {d, e, f}, {g, h, i}};
+        Mat M = Mat(3, 3, CV_64F, m).inv();
+    @endcode
+    .
+    Partial yet very common cases of this *user-allocated data* case are conversions from CvMat and
+    IplImage to Mat. For this purpose, there is function cv::cvarrToMat taking pointers to CvMat or
+    IplImage and the optional flag indicating whether to copy the data or not.
+    @snippet samples/cpp/image.cpp iplimage
+
+- Use MATLAB-style array initializers, zeros(), ones(), eye(), for example:
+@code
+    // create a double-precision identity matrix and add it to M.
+    M += Mat::eye(M.rows, M.cols, CV_64F);
+@endcode
+
+- Use a comma-separated initializer:
+@code
+    // create a 3x3 double-precision identity matrix
+    Mat M = (Mat_<double>(3,3) << 1, 0, 0, 0, 1, 0, 0, 0, 1);
+@endcode
+With this approach, you first call a constructor of the Mat class with the proper parameters, and
+then you just put `<< operator` followed by comma-separated values that can be constants,
+variables, expressions, and so on. Also, note the extra parentheses required to avoid compilation
+errors.
+
+Once the array is created, it is automatically managed via a reference-counting mechanism. If the
+array header is built on top of user-allocated data, you should handle the data by yourself. The
+array data is deallocated when no one points to it. If you want to release the data pointed by a
+array header before the array destructor is called, use Mat::release().
+
+The next important thing to learn about the array class is element access. This manual already
+described how to compute an address of each array element. Normally, you are not required to use the
+formula directly in the code. If you know the array element type (which can be retrieved using the
+method Mat::type() ), you can access the element \f$M_{ij}\f$ of a 2-dimensional array as:
+@code
+    M.at<double>(i,j) += 1.f;
+@endcode
+assuming that `M` is a double-precision floating-point array. There are several variants of the method
+at for a different number of dimensions.
+
+If you need to process a whole row of a 2D array, the most efficient way is to get the pointer to
+the row first, and then just use the plain C operator [] :
+@code
+    // compute sum of positive matrix elements
+    // (assuming that M is a double-precision matrix)
+    double sum=0;
+    for(int i = 0; i < M.rows; i++)
+    {
+        const double* Mi = M.ptr<double>(i);
+        for(int j = 0; j < M.cols; j++)
+            sum += std::max(Mi[j], 0.);
+    }
+@endcode
+Some operations, like the one above, do not actually depend on the array shape. They just process
+elements of an array one by one (or elements from multiple arrays that have the same coordinates,
+for example, array addition). Such operations are called *element-wise*. It makes sense to check
+whether all the input/output arrays are continuous, namely, have no gaps at the end of each row. If
+yes, process them as a long single row:
+@code
+    // compute the sum of positive matrix elements, optimized variant
+    double sum=0;
+    int cols = M.cols, rows = M.rows;
+    if(M.isContinuous())
+    {
+        cols *= rows;
+        rows = 1;
+    }
+    for(int i = 0; i < rows; i++)
+    {
+        const double* Mi = M.ptr<double>(i);
+        for(int j = 0; j < cols; j++)
+            sum += std::max(Mi[j], 0.);
+    }
+@endcode
+In case of the continuous matrix, the outer loop body is executed just once. So, the overhead is
+smaller, which is especially noticeable in case of small matrices.
+
+Finally, there are STL-style iterators that are smart enough to skip gaps between successive rows:
+@code
+    // compute sum of positive matrix elements, iterator-based variant
+    double sum=0;
+    MatConstIterator_<double> it = M.begin<double>(), it_end = M.end<double>();
+    for(; it != it_end; ++it)
+        sum += std::max(*it, 0.);
+@endcode
+The matrix iterators are random-access iterators, so they can be passed to any STL algorithm,
+including std::sort().
+
+@note Matrix Expressions and arithmetic see MatExpr
+*/
+class CV_EXPORTS Mat
+{
+public:
+    /**
+    These are various constructors that form a matrix. As noted in the AutomaticAllocation, often
+    the default constructor is enough, and the proper matrix will be allocated by an OpenCV function.
+    The constructed matrix can further be assigned to another matrix or matrix expression or can be
+    allocated with Mat::create . In the former case, the old content is de-referenced.
+     */
+    Mat();
+
+    /** @overload
+    @param rows Number of rows in a 2D array.
+    @param cols Number of columns in a 2D array.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    */
+    Mat(int rows, int cols, int type);
+
+    /** @overload
+    @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the
+    number of columns go in the reverse order.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+      */
+    Mat(Size size, int type);
+
+    /** @overload
+    @param rows Number of rows in a 2D array.
+    @param cols Number of columns in a 2D array.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param s An optional value to initialize each matrix element with. To set all the matrix elements to
+    the particular value after the construction, use the assignment operator
+    Mat::operator=(const Scalar& value) .
+    */
+    Mat(int rows, int cols, int type, const Scalar& s);
+
+    /** @overload
+    @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the
+    number of columns go in the reverse order.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param s An optional value to initialize each matrix element with. To set all the matrix elements to
+    the particular value after the construction, use the assignment operator
+    Mat::operator=(const Scalar& value) .
+      */
+    Mat(Size size, int type, const Scalar& s);
+
+    /** @overload
+    @param ndims Array dimensionality.
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    */
+    Mat(int ndims, const int* sizes, int type);
+
+    /** @overload
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    */
+    Mat(const std::vector<int>& sizes, int type);
+
+    /** @overload
+    @param ndims Array dimensionality.
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param s An optional value to initialize each matrix element with. To set all the matrix elements to
+    the particular value after the construction, use the assignment operator
+    Mat::operator=(const Scalar& value) .
+    */
+    Mat(int ndims, const int* sizes, int type, const Scalar& s);
+
+    /** @overload
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param s An optional value to initialize each matrix element with. To set all the matrix elements to
+    the particular value after the construction, use the assignment operator
+    Mat::operator=(const Scalar& value) .
+    */
+    Mat(const std::vector<int>& sizes, int type, const Scalar& s);
+
+
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    */
+    Mat(const Mat& m);
+
+    /** @overload
+    @param rows Number of rows in a 2D array.
+    @param cols Number of columns in a 2D array.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
+    allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+    data, which means that no data is copied. This operation is very efficient and can be used to
+    process external data using OpenCV functions. The external data is not automatically deallocated, so
+    you should take care of it.
+    @param step Number of bytes each matrix row occupies. The value should include the padding bytes at
+    the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed
+    and the actual step is calculated as cols*elemSize(). See Mat::elemSize.
+    */
+    Mat(int rows, int cols, int type, void* data, size_t step=AUTO_STEP);
+
+    /** @overload
+    @param size 2D array size: Size(cols, rows) . In the Size() constructor, the number of rows and the
+    number of columns go in the reverse order.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
+    allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+    data, which means that no data is copied. This operation is very efficient and can be used to
+    process external data using OpenCV functions. The external data is not automatically deallocated, so
+    you should take care of it.
+    @param step Number of bytes each matrix row occupies. The value should include the padding bytes at
+    the end of each row, if any. If the parameter is missing (set to AUTO_STEP ), no padding is assumed
+    and the actual step is calculated as cols*elemSize(). See Mat::elemSize.
+    */
+    Mat(Size size, int type, void* data, size_t step=AUTO_STEP);
+
+    /** @overload
+    @param ndims Array dimensionality.
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
+    allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+    data, which means that no data is copied. This operation is very efficient and can be used to
+    process external data using OpenCV functions. The external data is not automatically deallocated, so
+    you should take care of it.
+    @param steps Array of ndims-1 steps in case of a multi-dimensional array (the last step is always
+    set to the element size). If not specified, the matrix is assumed to be continuous.
+    */
+    Mat(int ndims, const int* sizes, int type, void* data, const size_t* steps=0);
+
+    /** @overload
+    @param sizes Array of integers specifying an n-dimensional array shape.
+    @param type Array type. Use CV_8UC1, ..., CV_64FC4 to create 1-4 channel matrices, or
+    CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+    @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
+    allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+    data, which means that no data is copied. This operation is very efficient and can be used to
+    process external data using OpenCV functions. The external data is not automatically deallocated, so
+    you should take care of it.
+    @param steps Array of ndims-1 steps in case of a multi-dimensional array (the last step is always
+    set to the element size). If not specified, the matrix is assumed to be continuous.
+    */
+    Mat(const std::vector<int>& sizes, int type, void* data, const size_t* steps=0);
+
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    @param rowRange Range of the m rows to take. As usual, the range start is inclusive and the range
+    end is exclusive. Use Range::all() to take all the rows.
+    @param colRange Range of the m columns to take. Use Range::all() to take all the columns.
+    */
+    Mat(const Mat& m, const Range& rowRange, const Range& colRange=Range::all());
+
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    @param roi Region of interest.
+    */
+    Mat(const Mat& m, const Rect& roi);
+
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    @param ranges Array of selected ranges of m along each dimensionality.
+    */
+    Mat(const Mat& m, const Range* ranges);
+
+    /** @overload
+    @param m Array that (as a whole or partly) is assigned to the constructed matrix. No data is copied
+    by these constructors. Instead, the header pointing to m data or its sub-array is constructed and
+    associated with it. The reference counter, if any, is incremented. So, when you modify the matrix
+    formed using such a constructor, you also modify the corresponding elements of m . If you want to
+    have an independent copy of the sub-array, use Mat::clone() .
+    @param ranges Array of selected ranges of m along each dimensionality.
+    */
+    Mat(const Mat& m, const std::vector<Range>& ranges);
+
+    /** @overload
+    @param vec STL vector whose elements form the matrix. The matrix has a single column and the number
+    of rows equal to the number of vector elements. Type of the matrix matches the type of vector
+    elements. The constructor can handle arbitrary types, for which there is a properly declared
+    DataType . This means that the vector elements must be primitive numbers or uni-type numerical
+    tuples of numbers. Mixed-type structures are not supported. The corresponding constructor is
+    explicit. Since STL vectors are not automatically converted to Mat instances, you should write
+    Mat(vec) explicitly. Unless you copy the data into the matrix ( copyData=true ), no new elements
+    will be added to the vector because it can potentially yield vector data reallocation, and, thus,
+    the matrix data pointer will be invalid.
+    @param copyData Flag to specify whether the underlying data of the STL vector should be copied
+    to (true) or shared with (false) the newly constructed matrix. When the data is copied, the
+    allocated buffer is managed using Mat reference counting mechanism. While the data is shared,
+    the reference counter is NULL, and you should not deallocate the data until the matrix is not
+    destructed.
+    */
+    template<typename _Tp> explicit Mat(const std::vector<_Tp>& vec, bool copyData=false);
+
+#ifdef CV_CXX11
+    /** @overload
+    */
+    template<typename _Tp, typename = typename std::enable_if<std::is_arithmetic<_Tp>::value>::type>
+    explicit Mat(const std::initializer_list<_Tp> list);
+
+    /** @overload
+    */
+    template<typename _Tp> explicit Mat(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> list);
+#endif
+
+#ifdef CV_CXX_STD_ARRAY
+    /** @overload
+    */
+    template<typename _Tp, size_t _Nm> explicit Mat(const std::array<_Tp, _Nm>& arr, bool copyData=false);
+#endif
+
+    /** @overload
+    */
+    template<typename _Tp, int n> explicit Mat(const Vec<_Tp, n>& vec, bool copyData=true);
+
+    /** @overload
+    */
+    template<typename _Tp, int m, int n> explicit Mat(const Matx<_Tp, m, n>& mtx, bool copyData=true);
+
+    /** @overload
+    */
+    template<typename _Tp> explicit Mat(const Point_<_Tp>& pt, bool copyData=true);
+
+    /** @overload
+    */
+    template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt, bool copyData=true);
+
+    /** @overload
+    */
+    template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);
+
+    //! download data from GpuMat
+    explicit Mat(const cuda::GpuMat& m);
+
+    //! destructor - calls release()
+    ~Mat();
+
+    /** @brief assignment operators
+
+    These are available assignment operators. Since they all are very different, make sure to read the
+    operator parameters description.
+    @param m Assigned, right-hand-side matrix. Matrix assignment is an O(1) operation. This means that
+    no data is copied but the data is shared and the reference counter, if any, is incremented. Before
+    assigning new data, the old data is de-referenced via Mat::release .
+     */
+    Mat& operator = (const Mat& m);
+
+    /** @overload
+    @param expr Assigned matrix expression object. As opposite to the first form of the assignment
+    operation, the second form can reuse already allocated matrix if it has the right size and type to
+    fit the matrix expression result. It is automatically handled by the real function that the matrix
+    expressions is expanded to. For example, C=A+B is expanded to add(A, B, C), and add takes care of
+    automatic C reallocation.
+    */
+    Mat& operator = (const MatExpr& expr);
+
+    //! retrieve UMat from Mat
+    UMat getUMat(int accessFlags, UMatUsageFlags usageFlags = USAGE_DEFAULT) const;
+
+    /** @brief Creates a matrix header for the specified matrix row.
+
+    The method makes a new header for the specified matrix row and returns it. This is an O(1)
+    operation, regardless of the matrix size. The underlying data of the new matrix is shared with the
+    original matrix. Here is the example of one of the classical basic matrix processing operations,
+    axpy, used by LU and many other algorithms:
+    @code
+        inline void matrix_axpy(Mat& A, int i, int j, double alpha)
+        {
+            A.row(i) += A.row(j)*alpha;
+        }
+    @endcode
+    @note In the current implementation, the following code does not work as expected:
+    @code
+        Mat A;
+        ...
+        A.row(i) = A.row(j); // will not work
+    @endcode
+    This happens because A.row(i) forms a temporary header that is further assigned to another header.
+    Remember that each of these operations is O(1), that is, no data is copied. Thus, the above
+    assignment is not true if you may have expected the j-th row to be copied to the i-th row. To
+    achieve that, you should either turn this simple assignment into an expression or use the
+    Mat::copyTo method:
+    @code
+        Mat A;
+        ...
+        // works, but looks a bit obscure.
+        A.row(i) = A.row(j) + 0;
+        // this is a bit longer, but the recommended method.
+        A.row(j).copyTo(A.row(i));
+    @endcode
+    @param y A 0-based row index.
+     */
+    Mat row(int y) const;
+
+    /** @brief Creates a matrix header for the specified matrix column.
+
+    The method makes a new header for the specified matrix column and returns it. This is an O(1)
+    operation, regardless of the matrix size. The underlying data of the new matrix is shared with the
+    original matrix. See also the Mat::row description.
+    @param x A 0-based column index.
+     */
+    Mat col(int x) const;
+
+    /** @brief Creates a matrix header for the specified row span.
+
+    The method makes a new header for the specified row span of the matrix. Similarly to Mat::row and
+    Mat::col , this is an O(1) operation.
+    @param startrow An inclusive 0-based start index of the row span.
+    @param endrow An exclusive 0-based ending index of the row span.
+     */
+    Mat rowRange(int startrow, int endrow) const;
+
+    /** @overload
+    @param r Range structure containing both the start and the end indices.
+    */
+    Mat rowRange(const Range& r) const;
+
+    /** @brief Creates a matrix header for the specified column span.
+
+    The method makes a new header for the specified column span of the matrix. Similarly to Mat::row and
+    Mat::col , this is an O(1) operation.
+    @param startcol An inclusive 0-based start index of the column span.
+    @param endcol An exclusive 0-based ending index of the column span.
+     */
+    Mat colRange(int startcol, int endcol) const;
+
+    /** @overload
+    @param r Range structure containing both the start and the end indices.
+    */
+    Mat colRange(const Range& r) const;
+
+    /** @brief Extracts a diagonal from a matrix
+
+    The method makes a new header for the specified matrix diagonal. The new matrix is represented as a
+    single-column matrix. Similarly to Mat::row and Mat::col, this is an O(1) operation.
+    @param d index of the diagonal, with the following values:
+    - `d=0` is the main diagonal.
+    - `d<0` is a diagonal from the lower half. For example, d=-1 means the diagonal is set
+      immediately below the main one.
+    - `d>0` is a diagonal from the upper half. For example, d=1 means the diagonal is set
+      immediately above the main one.
+    For example:
+    @code
+        Mat m = (Mat_<int>(3,3) <<
+                    1,2,3,
+                    4,5,6,
+                    7,8,9);
+        Mat d0 = m.diag(0);
+        Mat d1 = m.diag(1);
+        Mat d_1 = m.diag(-1);
+    @endcode
+    The resulting matrices are
+    @code
+     d0 =
+       [1;
+        5;
+        9]
+     d1 =
+       [2;
+        6]
+     d_1 =
+       [4;
+        8]
+    @endcode
+     */
+    Mat diag(int d=0) const;
+
+    /** @brief creates a diagonal matrix
+
+    The method creates a square diagonal matrix from specified main diagonal.
+    @param d One-dimensional matrix that represents the main diagonal.
+     */
+    static Mat diag(const Mat& d);
+
+    /** @brief Creates a full copy of the array and the underlying data.
+
+    The method creates a full copy of the array. The original step[] is not taken into account. So, the
+    array copy is a continuous array occupying total()*elemSize() bytes.
+     */
+    Mat clone() const;
+
+    /** @brief Copies the matrix to another one.
+
+    The method copies the matrix data to another matrix. Before copying the data, the method invokes :
+    @code
+        m.create(this->size(), this->type());
+    @endcode
+    so that the destination matrix is reallocated if needed. While m.copyTo(m); works flawlessly, the
+    function does not handle the case of a partial overlap between the source and the destination
+    matrices.
+
+    When the operation mask is specified, if the Mat::create call shown above reallocates the matrix,
+    the newly allocated matrix is initialized with all zeros before copying the data.
+    @param m Destination matrix. If it does not have a proper size or type before the operation, it is
+    reallocated.
+     */
+    void copyTo( OutputArray m ) const;
+
+    /** @overload
+    @param m Destination matrix. If it does not have a proper size or type before the operation, it is
+    reallocated.
+    @param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix
+    elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels.
+    */
+    void copyTo( OutputArray m, InputArray mask ) const;
+
+    /** @brief Converts an array to another data type with optional scaling.
+
+    The method converts source pixel values to the target data type. saturate_cast\<\> is applied at
+    the end to avoid possible overflows:
+
+    \f[m(x,y) = saturate \_ cast<rType>( \alpha (*this)(x,y) +  \beta )\f]
+    @param m output matrix; if it does not have a proper size or type before the operation, it is
+    reallocated.
+    @param rtype desired output matrix type or, rather, the depth since the number of channels are the
+    same as the input has; if rtype is negative, the output matrix will have the same type as the input.
+    @param alpha optional scale factor.
+    @param beta optional delta added to the scaled values.
+     */
+    void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const;
+
+    /** @brief Provides a functional form of convertTo.
+
+    This is an internally used method called by the @ref MatrixExpressions engine.
+    @param m Destination array.
+    @param type Desired destination array depth (or -1 if it should be the same as the source type).
+     */
+    void assignTo( Mat& m, int type=-1 ) const;
+
+    /** @brief Sets all or some of the array elements to the specified value.
+    @param s Assigned scalar converted to the actual array type.
+    */
+    Mat& operator = (const Scalar& s);
+
+    /** @brief Sets all or some of the array elements to the specified value.
+
+    This is an advanced variant of the Mat::operator=(const Scalar& s) operator.
+    @param value Assigned scalar converted to the actual array type.
+    @param mask Operation mask of the same size as \*this. Its non-zero elements indicate which matrix
+    elements need to be copied. The mask has to be of type CV_8U and can have 1 or multiple channels
+     */
+    Mat& setTo(InputArray value, InputArray mask=noArray());
+
+    /** @brief Changes the shape and/or the number of channels of a 2D matrix without copying the data.
+
+    The method makes a new matrix header for \*this elements. The new matrix may have a different size
+    and/or different number of channels. Any combination is possible if:
+    -   No extra elements are included into the new matrix and no elements are excluded. Consequently,
+        the product rows\*cols\*channels() must stay the same after the transformation.
+    -   No data is copied. That is, this is an O(1) operation. Consequently, if you change the number of
+        rows, or the operation changes the indices of elements row in some other way, the matrix must be
+        continuous. See Mat::isContinuous .
+
+    For example, if there is a set of 3D points stored as an STL vector, and you want to represent the
+    points as a 3xN matrix, do the following:
+    @code
+        std::vector<Point3f> vec;
+        ...
+        Mat pointMat = Mat(vec). // convert vector to Mat, O(1) operation
+                          reshape(1). // make Nx3 1-channel matrix out of Nx1 3-channel.
+                                      // Also, an O(1) operation
+                             t(); // finally, transpose the Nx3 matrix.
+                                  // This involves copying all the elements
+    @endcode
+    @param cn New number of channels. If the parameter is 0, the number of channels remains the same.
+    @param rows New number of rows. If the parameter is 0, the number of rows remains the same.
+     */
+    Mat reshape(int cn, int rows=0) const;
+
+    /** @overload */
+    Mat reshape(int cn, int newndims, const int* newsz) const;
+
+    /** @overload */
+    Mat reshape(int cn, const std::vector<int>& newshape) const;
+
+    /** @brief Transposes a matrix.
+
+    The method performs matrix transposition by means of matrix expressions. It does not perform the
+    actual transposition but returns a temporary matrix transposition object that can be further used as
+    a part of more complex matrix expressions or can be assigned to a matrix:
+    @code
+        Mat A1 = A + Mat::eye(A.size(), A.type())*lambda;
+        Mat C = A1.t()*A1; // compute (A + lambda*I)^t * (A + lamda*I)
+    @endcode
+     */
+    MatExpr t() const;
+
+    /** @brief Inverses a matrix.
+
+    The method performs a matrix inversion by means of matrix expressions. This means that a temporary
+    matrix inversion object is returned by the method and can be used further as a part of more complex
+    matrix expressions or can be assigned to a matrix.
+    @param method Matrix inversion method. One of cv::DecompTypes
+     */
+    MatExpr inv(int method=DECOMP_LU) const;
+
+    /** @brief Performs an element-wise multiplication or division of the two matrices.
+
+    The method returns a temporary object encoding per-element array multiplication, with optional
+    scale. Note that this is not a matrix multiplication that corresponds to a simpler "\*" operator.
+
+    Example:
+    @code
+        Mat C = A.mul(5/B); // equivalent to divide(A, B, C, 5)
+    @endcode
+    @param m Another array of the same type and the same size as \*this, or a matrix expression.
+    @param scale Optional scale factor.
+     */
+    MatExpr mul(InputArray m, double scale=1) const;
+
+    /** @brief Computes a cross-product of two 3-element vectors.
+
+    The method computes a cross-product of two 3-element vectors. The vectors must be 3-element
+    floating-point vectors of the same shape and size. The result is another 3-element vector of the
+    same shape and type as operands.
+    @param m Another cross-product operand.
+     */
+    Mat cross(InputArray m) const;
+
+    /** @brief Computes a dot-product of two vectors.
+
+    The method computes a dot-product of two matrices. If the matrices are not single-column or
+    single-row vectors, the top-to-bottom left-to-right scan ordering is used to treat them as 1D
+    vectors. The vectors must have the same size and type. If the matrices have more than one channel,
+    the dot products from all the channels are summed together.
+    @param m another dot-product operand.
+     */
+    double dot(InputArray m) const;
+
+    /** @brief Returns a zero array of the specified size and type.
+
+    The method returns a Matlab-style zero array initializer. It can be used to quickly form a constant
+    array as a function parameter, part of a matrix expression, or as a matrix initializer. :
+    @code
+        Mat A;
+        A = Mat::zeros(3, 3, CV_32F);
+    @endcode
+    In the example above, a new matrix is allocated only if A is not a 3x3 floating-point matrix.
+    Otherwise, the existing matrix A is filled with zeros.
+    @param rows Number of rows.
+    @param cols Number of columns.
+    @param type Created matrix type.
+     */
+    static MatExpr zeros(int rows, int cols, int type);
+
+    /** @overload
+    @param size Alternative to the matrix size specification Size(cols, rows) .
+    @param type Created matrix type.
+    */
+    static MatExpr zeros(Size size, int type);
+
+    /** @overload
+    @param ndims Array dimensionality.
+    @param sz Array of integers specifying the array shape.
+    @param type Created matrix type.
+    */
+    static MatExpr zeros(int ndims, const int* sz, int type);
+
+    /** @brief Returns an array of all 1's of the specified size and type.
+
+    The method returns a Matlab-style 1's array initializer, similarly to Mat::zeros. Note that using
+    this method you can initialize an array with an arbitrary value, using the following Matlab idiom:
+    @code
+        Mat A = Mat::ones(100, 100, CV_8U)*3; // make 100x100 matrix filled with 3.
+    @endcode
+    The above operation does not form a 100x100 matrix of 1's and then multiply it by 3. Instead, it
+    just remembers the scale factor (3 in this case) and use it when actually invoking the matrix
+    initializer.
+    @param rows Number of rows.
+    @param cols Number of columns.
+    @param type Created matrix type.
+     */
+    static MatExpr ones(int rows, int cols, int type);
+
+    /** @overload
+    @param size Alternative to the matrix size specification Size(cols, rows) .
+    @param type Created matrix type.
+    */
+    static MatExpr ones(Size size, int type);
+
+    /** @overload
+    @param ndims Array dimensionality.
+    @param sz Array of integers specifying the array shape.
+    @param type Created matrix type.
+    */
+    static MatExpr ones(int ndims, const int* sz, int type);
+
+    /** @brief Returns an identity matrix of the specified size and type.
+
+    The method returns a Matlab-style identity matrix initializer, similarly to Mat::zeros. Similarly to
+    Mat::ones, you can use a scale operation to create a scaled identity matrix efficiently:
+    @code
+        // make a 4x4 diagonal matrix with 0.1's on the diagonal.
+        Mat A = Mat::eye(4, 4, CV_32F)*0.1;
+    @endcode
+    @param rows Number of rows.
+    @param cols Number of columns.
+    @param type Created matrix type.
+     */
+    static MatExpr eye(int rows, int cols, int type);
+
+    /** @overload
+    @param size Alternative matrix size specification as Size(cols, rows) .
+    @param type Created matrix type.
+    */
+    static MatExpr eye(Size size, int type);
+
+    /** @brief Allocates new array data if needed.
+
+    This is one of the key Mat methods. Most new-style OpenCV functions and methods that produce arrays
+    call this method for each output array. The method uses the following algorithm:
+
+    -# If the current array shape and the type match the new ones, return immediately. Otherwise,
+       de-reference the previous data by calling Mat::release.
+    -# Initialize the new header.
+    -# Allocate the new data of total()\*elemSize() bytes.
+    -# Allocate the new, associated with the data, reference counter and set it to 1.
+
+    Such a scheme makes the memory management robust and efficient at the same time and helps avoid
+    extra typing for you. This means that usually there is no need to explicitly allocate output arrays.
+    That is, instead of writing:
+    @code
+        Mat color;
+        ...
+        Mat gray(color.rows, color.cols, color.depth());
+        cvtColor(color, gray, COLOR_BGR2GRAY);
+    @endcode
+    you can simply write:
+    @code
+        Mat color;
+        ...
+        Mat gray;
+        cvtColor(color, gray, COLOR_BGR2GRAY);
+    @endcode
+    because cvtColor, as well as the most of OpenCV functions, calls Mat::create() for the output array
+    internally.
+    @param rows New number of rows.
+    @param cols New number of columns.
+    @param type New matrix type.
+     */
+    void create(int rows, int cols, int type);
+
+    /** @overload
+    @param size Alternative new matrix size specification: Size(cols, rows)
+    @param type New matrix type.
+    */
+    void create(Size size, int type);
+
+    /** @overload
+    @param ndims New array dimensionality.
+    @param sizes Array of integers specifying a new array shape.
+    @param type New matrix type.
+    */
+    void create(int ndims, const int* sizes, int type);
+
+    /** @overload
+    @param sizes Array of integers specifying a new array shape.
+    @param type New matrix type.
+    */
+    void create(const std::vector<int>& sizes, int type);
+
+    /** @brief Increments the reference counter.
+
+    The method increments the reference counter associated with the matrix data. If the matrix header
+    points to an external data set (see Mat::Mat ), the reference counter is NULL, and the method has no
+    effect in this case. Normally, to avoid memory leaks, the method should not be called explicitly. It
+    is called implicitly by the matrix assignment operator. The reference counter increment is an atomic
+    operation on the platforms that support it. Thus, it is safe to operate on the same matrices
+    asynchronously in different threads.
+     */
+    void addref();
+
+    /** @brief Decrements the reference counter and deallocates the matrix if needed.
+
+    The method decrements the reference counter associated with the matrix data. When the reference
+    counter reaches 0, the matrix data is deallocated and the data and the reference counter pointers
+    are set to NULL's. If the matrix header points to an external data set (see Mat::Mat ), the
+    reference counter is NULL, and the method has no effect in this case.
+
+    This method can be called manually to force the matrix data deallocation. But since this method is
+    automatically called in the destructor, or by any other method that changes the data pointer, it is
+    usually not needed. The reference counter decrement and check for 0 is an atomic operation on the
+    platforms that support it. Thus, it is safe to operate on the same matrices asynchronously in
+    different threads.
+     */
+    void release();
+
+    //! internal use function, consider to use 'release' method instead; deallocates the matrix data
+    void deallocate();
+    //! internal use function; properly re-allocates _size, _step arrays
+    void copySize(const Mat& m);
+
+    /** @brief Reserves space for the certain number of rows.
+
+    The method reserves space for sz rows. If the matrix already has enough space to store sz rows,
+    nothing happens. If the matrix is reallocated, the first Mat::rows rows are preserved. The method
+    emulates the corresponding method of the STL vector class.
+    @param sz Number of rows.
+     */
+    void reserve(size_t sz);
+
+    /** @brief Reserves space for the certain number of bytes.
+
+    The method reserves space for sz bytes. If the matrix already has enough space to store sz bytes,
+    nothing happens. If matrix has to be reallocated its previous content could be lost.
+    @param sz Number of bytes.
+    */
+    void reserveBuffer(size_t sz);
+
+    /** @brief Changes the number of matrix rows.
+
+    The methods change the number of matrix rows. If the matrix is reallocated, the first
+    min(Mat::rows, sz) rows are preserved. The methods emulate the corresponding methods of the STL
+    vector class.
+    @param sz New number of rows.
+     */
+    void resize(size_t sz);
+
+    /** @overload
+    @param sz New number of rows.
+    @param s Value assigned to the newly added elements.
+     */
+    void resize(size_t sz, const Scalar& s);
+
+    //! internal function
+    void push_back_(const void* elem);
+
+    /** @brief Adds elements to the bottom of the matrix.
+
+    The methods add one or more elements to the bottom of the matrix. They emulate the corresponding
+    method of the STL vector class. When elem is Mat , its type and the number of columns must be the
+    same as in the container matrix.
+    @param elem Added element(s).
+     */
+    template<typename _Tp> void push_back(const _Tp& elem);
+
+    /** @overload
+    @param elem Added element(s).
+    */
+    template<typename _Tp> void push_back(const Mat_<_Tp>& elem);
+
+    /** @overload
+    @param elem Added element(s).
+    */
+    template<typename _Tp> void push_back(const std::vector<_Tp>& elem);
+
+    /** @overload
+    @param m Added line(s).
+    */
+    void push_back(const Mat& m);
+
+    /** @brief Removes elements from the bottom of the matrix.
+
+    The method removes one or more rows from the bottom of the matrix.
+    @param nelems Number of removed rows. If it is greater than the total number of rows, an exception
+    is thrown.
+     */
+    void pop_back(size_t nelems=1);
+
+    /** @brief Locates the matrix header within a parent matrix.
+
+    After you extracted a submatrix from a matrix using Mat::row, Mat::col, Mat::rowRange,
+    Mat::colRange, and others, the resultant submatrix points just to the part of the original big
+    matrix. However, each submatrix contains information (represented by datastart and dataend
+    fields) that helps reconstruct the original matrix size and the position of the extracted
+    submatrix within the original matrix. The method locateROI does exactly that.
+    @param wholeSize Output parameter that contains the size of the whole matrix containing *this*
+    as a part.
+    @param ofs Output parameter that contains an offset of *this* inside the whole matrix.
+     */
+    void locateROI( Size& wholeSize, Point& ofs ) const;
+
+    /** @brief Adjusts a submatrix size and position within the parent matrix.
+
+    The method is complimentary to Mat::locateROI . The typical use of these functions is to determine
+    the submatrix position within the parent matrix and then shift the position somehow. Typically, it
+    can be required for filtering operations when pixels outside of the ROI should be taken into
+    account. When all the method parameters are positive, the ROI needs to grow in all directions by the
+    specified amount, for example:
+    @code
+        A.adjustROI(2, 2, 2, 2);
+    @endcode
+    In this example, the matrix size is increased by 4 elements in each direction. The matrix is shifted
+    by 2 elements to the left and 2 elements up, which brings in all the necessary pixels for the
+    filtering with the 5x5 kernel.
+
+    adjustROI forces the adjusted ROI to be inside of the parent matrix that is boundaries of the
+    adjusted ROI are constrained by boundaries of the parent matrix. For example, if the submatrix A is
+    located in the first row of a parent matrix and you called A.adjustROI(2, 2, 2, 2) then A will not
+    be increased in the upward direction.
+
+    The function is used internally by the OpenCV filtering functions, like filter2D , morphological
+    operations, and so on.
+    @param dtop Shift of the top submatrix boundary upwards.
+    @param dbottom Shift of the bottom submatrix boundary downwards.
+    @param dleft Shift of the left submatrix boundary to the left.
+    @param dright Shift of the right submatrix boundary to the right.
+    @sa copyMakeBorder
+     */
+    Mat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+
+    /** @brief Extracts a rectangular submatrix.
+
+    The operators make a new header for the specified sub-array of \*this . They are the most
+    generalized forms of Mat::row, Mat::col, Mat::rowRange, and Mat::colRange . For example,
+    `A(Range(0, 10), Range::all())` is equivalent to `A.rowRange(0, 10)`. Similarly to all of the above,
+    the operators are O(1) operations, that is, no matrix data is copied.
+    @param rowRange Start and end row of the extracted submatrix. The upper boundary is not included. To
+    select all the rows, use Range::all().
+    @param colRange Start and end column of the extracted submatrix. The upper boundary is not included.
+    To select all the columns, use Range::all().
+     */
+    Mat operator()( Range rowRange, Range colRange ) const;
+
+    /** @overload
+    @param roi Extracted submatrix specified as a rectangle.
+    */
+    Mat operator()( const Rect& roi ) const;
+
+    /** @overload
+    @param ranges Array of selected ranges along each array dimension.
+    */
+    Mat operator()( const Range* ranges ) const;
+
+    /** @overload
+    @param ranges Array of selected ranges along each array dimension.
+    */
+    Mat operator()(const std::vector<Range>& ranges) const;
+
+    // //! converts header to CvMat; no data is copied
+    // operator CvMat() const;
+    // //! converts header to CvMatND; no data is copied
+    // operator CvMatND() const;
+    // //! converts header to IplImage; no data is copied
+    // operator IplImage() const;
+
+    template<typename _Tp> operator std::vector<_Tp>() const;
+    template<typename _Tp, int n> operator Vec<_Tp, n>() const;
+    template<typename _Tp, int m, int n> operator Matx<_Tp, m, n>() const;
+
+#ifdef CV_CXX_STD_ARRAY
+    template<typename _Tp, std::size_t _Nm> operator std::array<_Tp, _Nm>() const;
+#endif
+
+    /** @brief Reports whether the matrix is continuous or not.
+
+    The method returns true if the matrix elements are stored continuously without gaps at the end of
+    each row. Otherwise, it returns false. Obviously, 1x1 or 1xN matrices are always continuous.
+    Matrices created with Mat::create are always continuous. But if you extract a part of the matrix
+    using Mat::col, Mat::diag, and so on, or constructed a matrix header for externally allocated data,
+    such matrices may no longer have this property.
+
+    The continuity flag is stored as a bit in the Mat::flags field and is computed automatically when
+    you construct a matrix header. Thus, the continuity check is a very fast operation, though
+    theoretically it could be done as follows:
+    @code
+        // alternative implementation of Mat::isContinuous()
+        bool myCheckMatContinuity(const Mat& m)
+        {
+            //return (m.flags & Mat::CONTINUOUS_FLAG) != 0;
+            return m.rows == 1 || m.step == m.cols*m.elemSize();
+        }
+    @endcode
+    The method is used in quite a few of OpenCV functions. The point is that element-wise operations
+    (such as arithmetic and logical operations, math functions, alpha blending, color space
+    transformations, and others) do not depend on the image geometry. Thus, if all the input and output
+    arrays are continuous, the functions can process them as very long single-row vectors. The example
+    below illustrates how an alpha-blending function can be implemented:
+    @code
+        template<typename T>
+        void alphaBlendRGBA(const Mat& src1, const Mat& src2, Mat& dst)
+        {
+            const float alpha_scale = (float)std::numeric_limits<T>::max(),
+                        inv_scale = 1.f/alpha_scale;
+
+            CV_Assert( src1.type() == src2.type() &&
+                       src1.type() == CV_MAKETYPE(traits::Depth<T>::value, 4) &&
+                       src1.size() == src2.size());
+            Size size = src1.size();
+            dst.create(size, src1.type());
+
+            // here is the idiom: check the arrays for continuity and,
+            // if this is the case,
+            // treat the arrays as 1D vectors
+            if( src1.isContinuous() && src2.isContinuous() && dst.isContinuous() )
+            {
+                size.width *= size.height;
+                size.height = 1;
+            }
+            size.width *= 4;
+
+            for( int i = 0; i < size.height; i++ )
+            {
+                // when the arrays are continuous,
+                // the outer loop is executed only once
+                const T* ptr1 = src1.ptr<T>(i);
+                const T* ptr2 = src2.ptr<T>(i);
+                T* dptr = dst.ptr<T>(i);
+
+                for( int j = 0; j < size.width; j += 4 )
+                {
+                    float alpha = ptr1[j+3]*inv_scale, beta = ptr2[j+3]*inv_scale;
+                    dptr[j] = saturate_cast<T>(ptr1[j]*alpha + ptr2[j]*beta);
+                    dptr[j+1] = saturate_cast<T>(ptr1[j+1]*alpha + ptr2[j+1]*beta);
+                    dptr[j+2] = saturate_cast<T>(ptr1[j+2]*alpha + ptr2[j+2]*beta);
+                    dptr[j+3] = saturate_cast<T>((1 - (1-alpha)*(1-beta))*alpha_scale);
+                }
+            }
+        }
+    @endcode
+    This approach, while being very simple, can boost the performance of a simple element-operation by
+    10-20 percents, especially if the image is rather small and the operation is quite simple.
+
+    Another OpenCV idiom in this function, a call of Mat::create for the destination array, that
+    allocates the destination array unless it already has the proper size and type. And while the newly
+    allocated arrays are always continuous, you still need to check the destination array because
+    Mat::create does not always allocate a new matrix.
+     */
+    bool isContinuous() const;
+
+    //! returns true if the matrix is a submatrix of another matrix
+    bool isSubmatrix() const;
+
+    /** @brief Returns the matrix element size in bytes.
+
+    The method returns the matrix element size in bytes. For example, if the matrix type is CV_16SC3 ,
+    the method returns 3\*sizeof(short) or 6.
+     */
+    size_t elemSize() const;
+
+    /** @brief Returns the size of each matrix element channel in bytes.
+
+    The method returns the matrix element channel size in bytes, that is, it ignores the number of
+    channels. For example, if the matrix type is CV_16SC3 , the method returns sizeof(short) or 2.
+     */
+    size_t elemSize1() const;
+
+    /** @brief Returns the type of a matrix element.
+
+    The method returns a matrix element type. This is an identifier compatible with the CvMat type
+    system, like CV_16SC3 or 16-bit signed 3-channel array, and so on.
+     */
+    int type() const;
+
+    /** @brief Returns the depth of a matrix element.
+
+    The method returns the identifier of the matrix element depth (the type of each individual channel).
+    For example, for a 16-bit signed element array, the method returns CV_16S . A complete list of
+    matrix types contains the following values:
+    -   CV_8U - 8-bit unsigned integers ( 0..255 )
+    -   CV_8S - 8-bit signed integers ( -128..127 )
+    -   CV_16U - 16-bit unsigned integers ( 0..65535 )
+    -   CV_16S - 16-bit signed integers ( -32768..32767 )
+    -   CV_32S - 32-bit signed integers ( -2147483648..2147483647 )
+    -   CV_32F - 32-bit floating-point numbers ( -FLT_MAX..FLT_MAX, INF, NAN )
+    -   CV_64F - 64-bit floating-point numbers ( -DBL_MAX..DBL_MAX, INF, NAN )
+     */
+    int depth() const;
+
+    /** @brief Returns the number of matrix channels.
+
+    The method returns the number of matrix channels.
+     */
+    int channels() const;
+
+    /** @brief Returns a normalized step.
+
+    The method returns a matrix step divided by Mat::elemSize1() . It can be useful to quickly access an
+    arbitrary matrix element.
+     */
+    size_t step1(int i=0) const;
+
+    /** @brief Returns true if the array has no elements.
+
+    The method returns true if Mat::total() is 0 or if Mat::data is NULL. Because of pop_back() and
+    resize() methods `M.total() == 0` does not imply that `M.data == NULL`.
+     */
+    bool empty() const;
+
+    /** @brief Returns the total number of array elements.
+
+    The method returns the number of array elements (a number of pixels if the array represents an
+    image).
+     */
+    size_t total() const;
+
+    /** @brief Returns the total number of array elements.
+
+     The method returns the number of elements within a certain sub-array slice with startDim <= dim < endDim
+     */
+    size_t total(int startDim, int endDim=INT_MAX) const;
+
+    /**
+     * @param elemChannels Number of channels or number of columns the matrix should have.
+     *                     For a 2-D matrix, when the matrix has only 1 column, then it should have
+     *                     elemChannels channels; When the matrix has only 1 channel,
+     *                     then it should have elemChannels columns.
+     *                     For a 3-D matrix, it should have only one channel. Furthermore,
+     *                     if the number of planes is not one, then the number of rows
+     *                     within every plane has to be 1; if the number of rows within
+     *                     every plane is not 1, then the number of planes has to be 1.
+     * @param depth The depth the matrix should have. Set it to -1 when any depth is fine.
+     * @param requireContinuous Set it to true to require the matrix to be continuous
+     * @return -1 if the requirement is not satisfied.
+     *         Otherwise, it returns the number of elements in the matrix. Note
+     *         that an element may have multiple channels.
+     *
+     * The following code demonstrates its usage for a 2-d matrix:
+     * @snippet snippets/core_mat_checkVector.cpp example-2d
+     *
+     * The following code demonstrates its usage for a 3-d matrix:
+     * @snippet snippets/core_mat_checkVector.cpp example-3d
+     */
+    int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const;
+
+    /** @brief Returns a pointer to the specified matrix row.
+
+    The methods return `uchar*` or typed pointer to the specified matrix row. See the sample in
+    Mat::isContinuous to know how to use these methods.
+    @param i0 A 0-based row index.
+     */
+    uchar* ptr(int i0=0);
+    /** @overload */
+    const uchar* ptr(int i0=0) const;
+
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    uchar* ptr(int row, int col);
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    const uchar* ptr(int row, int col) const;
+
+    /** @overload */
+    uchar* ptr(int i0, int i1, int i2);
+    /** @overload */
+    const uchar* ptr(int i0, int i1, int i2) const;
+
+    /** @overload */
+    uchar* ptr(const int* idx);
+    /** @overload */
+    const uchar* ptr(const int* idx) const;
+    /** @overload */
+    template<int n> uchar* ptr(const Vec<int, n>& idx);
+    /** @overload */
+    template<int n> const uchar* ptr(const Vec<int, n>& idx) const;
+
+    /** @overload */
+    template<typename _Tp> _Tp* ptr(int i0=0);
+    /** @overload */
+    template<typename _Tp> const _Tp* ptr(int i0=0) const;
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    template<typename _Tp> _Tp* ptr(int row, int col);
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    template<typename _Tp> const _Tp* ptr(int row, int col) const;
+    /** @overload */
+    template<typename _Tp> _Tp* ptr(int i0, int i1, int i2);
+    /** @overload */
+    template<typename _Tp> const _Tp* ptr(int i0, int i1, int i2) const;
+    /** @overload */
+    template<typename _Tp> _Tp* ptr(const int* idx);
+    /** @overload */
+    template<typename _Tp> const _Tp* ptr(const int* idx) const;
+    /** @overload */
+    template<typename _Tp, int n> _Tp* ptr(const Vec<int, n>& idx);
+    /** @overload */
+    template<typename _Tp, int n> const _Tp* ptr(const Vec<int, n>& idx) const;
+
+    /** @brief Returns a reference to the specified array element.
+
+    The template methods return a reference to the specified array element. For the sake of higher
+    performance, the index range checks are only performed in the Debug configuration.
+
+    Note that the variants with a single index (i) can be used to access elements of single-row or
+    single-column 2-dimensional arrays. That is, if, for example, A is a 1 x N floating-point matrix and
+    B is an M x 1 integer matrix, you can simply write `A.at<float>(k+4)` and `B.at<int>(2*i+1)`
+    instead of `A.at<float>(0,k+4)` and `B.at<int>(2*i+1,0)`, respectively.
+
+    The example below initializes a Hilbert matrix:
+    @code
+        Mat H(100, 100, CV_64F);
+        for(int i = 0; i < H.rows; i++)
+            for(int j = 0; j < H.cols; j++)
+                H.at<double>(i,j)=1./(i+j+1);
+    @endcode
+
+    Keep in mind that the size identifier used in the at operator cannot be chosen at random. It depends
+    on the image from which you are trying to retrieve the data. The table below gives a better insight in this:
+     - If matrix is of type `CV_8U` then use `Mat.at<uchar>(y,x)`.
+     - If matrix is of type `CV_8S` then use `Mat.at<schar>(y,x)`.
+     - If matrix is of type `CV_16U` then use `Mat.at<ushort>(y,x)`.
+     - If matrix is of type `CV_16S` then use `Mat.at<short>(y,x)`.
+     - If matrix is of type `CV_32S`  then use `Mat.at<int>(y,x)`.
+     - If matrix is of type `CV_32F`  then use `Mat.at<float>(y,x)`.
+     - If matrix is of type `CV_64F` then use `Mat.at<double>(y,x)`.
+
+    @param i0 Index along the dimension 0
+     */
+    template<typename _Tp> _Tp& at(int i0=0);
+    /** @overload
+    @param i0 Index along the dimension 0
+    */
+    template<typename _Tp> const _Tp& at(int i0=0) const;
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    template<typename _Tp> _Tp& at(int row, int col);
+    /** @overload
+    @param row Index along the dimension 0
+    @param col Index along the dimension 1
+    */
+    template<typename _Tp> const _Tp& at(int row, int col) const;
+
+    /** @overload
+    @param i0 Index along the dimension 0
+    @param i1 Index along the dimension 1
+    @param i2 Index along the dimension 2
+    */
+    template<typename _Tp> _Tp& at(int i0, int i1, int i2);
+    /** @overload
+    @param i0 Index along the dimension 0
+    @param i1 Index along the dimension 1
+    @param i2 Index along the dimension 2
+    */
+    template<typename _Tp> const _Tp& at(int i0, int i1, int i2) const;
+
+    /** @overload
+    @param idx Array of Mat::dims indices.
+    */
+    template<typename _Tp> _Tp& at(const int* idx);
+    /** @overload
+    @param idx Array of Mat::dims indices.
+    */
+    template<typename _Tp> const _Tp& at(const int* idx) const;
+
+    /** @overload */
+    template<typename _Tp, int n> _Tp& at(const Vec<int, n>& idx);
+    /** @overload */
+    template<typename _Tp, int n> const _Tp& at(const Vec<int, n>& idx) const;
+
+    /** @overload
+    special versions for 2D arrays (especially convenient for referencing image pixels)
+    @param pt Element position specified as Point(j,i) .
+    */
+    template<typename _Tp> _Tp& at(Point pt);
+    /** @overload
+    special versions for 2D arrays (especially convenient for referencing image pixels)
+    @param pt Element position specified as Point(j,i) .
+    */
+    template<typename _Tp> const _Tp& at(Point pt) const;
+
+    /** @brief Returns the matrix iterator and sets it to the first matrix element.
+
+    The methods return the matrix read-only or read-write iterators. The use of matrix iterators is very
+    similar to the use of bi-directional STL iterators. In the example below, the alpha blending
+    function is rewritten using the matrix iterators:
+    @code
+        template<typename T>
+        void alphaBlendRGBA(const Mat& src1, const Mat& src2, Mat& dst)
+        {
+            typedef Vec<T, 4> VT;
+
+            const float alpha_scale = (float)std::numeric_limits<T>::max(),
+                        inv_scale = 1.f/alpha_scale;
+
+            CV_Assert( src1.type() == src2.type() &&
+                       src1.type() == traits::Type<VT>::value &&
+                       src1.size() == src2.size());
+            Size size = src1.size();
+            dst.create(size, src1.type());
+
+            MatConstIterator_<VT> it1 = src1.begin<VT>(), it1_end = src1.end<VT>();
+            MatConstIterator_<VT> it2 = src2.begin<VT>();
+            MatIterator_<VT> dst_it = dst.begin<VT>();
+
+            for( ; it1 != it1_end; ++it1, ++it2, ++dst_it )
+            {
+                VT pix1 = *it1, pix2 = *it2;
+                float alpha = pix1[3]*inv_scale, beta = pix2[3]*inv_scale;
+                *dst_it = VT(saturate_cast<T>(pix1[0]*alpha + pix2[0]*beta),
+                             saturate_cast<T>(pix1[1]*alpha + pix2[1]*beta),
+                             saturate_cast<T>(pix1[2]*alpha + pix2[2]*beta),
+                             saturate_cast<T>((1 - (1-alpha)*(1-beta))*alpha_scale));
+            }
+        }
+    @endcode
+     */
+    template<typename _Tp> MatIterator_<_Tp> begin();
+    template<typename _Tp> MatConstIterator_<_Tp> begin() const;
+
+    /** @brief Returns the matrix iterator and sets it to the after-last matrix element.
+
+    The methods return the matrix read-only or read-write iterators, set to the point following the last
+    matrix element.
+     */
+    template<typename _Tp> MatIterator_<_Tp> end();
+    template<typename _Tp> MatConstIterator_<_Tp> end() const;
+
+    /** @brief Runs the given functor over all matrix elements in parallel.
+
+    The operation passed as argument has to be a function pointer, a function object or a lambda(C++11).
+
+    Example 1. All of the operations below put 0xFF the first channel of all matrix elements:
+    @code
+        Mat image(1920, 1080, CV_8UC3);
+        typedef cv::Point3_<uint8_t> Pixel;
+
+        // first. raw pointer access.
+        for (int r = 0; r < image.rows; ++r) {
+            Pixel* ptr = image.ptr<Pixel>(r, 0);
+            const Pixel* ptr_end = ptr + image.cols;
+            for (; ptr != ptr_end; ++ptr) {
+                ptr->x = 255;
+            }
+        }
+
+        // Using MatIterator. (Simple but there are a Iterator's overhead)
+        for (Pixel &p : cv::Mat_<Pixel>(image)) {
+            p.x = 255;
+        }
+
+        // Parallel execution with function object.
+        struct Operator {
+            void operator ()(Pixel &pixel, const int * position) {
+                pixel.x = 255;
+            }
+        };
+        image.forEach<Pixel>(Operator());
+
+        // Parallel execution using C++11 lambda.
+        image.forEach<Pixel>([](Pixel &p, const int * position) -> void {
+            p.x = 255;
+        });
+    @endcode
+    Example 2. Using the pixel's position:
+    @code
+        // Creating 3D matrix (255 x 255 x 255) typed uint8_t
+        // and initialize all elements by the value which equals elements position.
+        // i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3).
+
+        int sizes[] = { 255, 255, 255 };
+        typedef cv::Point3_<uint8_t> Pixel;
+
+        Mat_<Pixel> image = Mat::zeros(3, sizes, CV_8UC3);
+
+        image.forEach<Pixel>([&](Pixel& pixel, const int position[]) -> void {
+            pixel.x = position[0];
+            pixel.y = position[1];
+            pixel.z = position[2];
+        });
+    @endcode
+     */
+    template<typename _Tp, typename Functor> void forEach(const Functor& operation);
+    /** @overload */
+    template<typename _Tp, typename Functor> void forEach(const Functor& operation) const;
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+    Mat(Mat&& m);
+    Mat& operator = (Mat&& m);
+#endif
+
+    enum { MAGIC_VAL  = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
+    enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
+
+    /*! includes several bit-fields:
+         - the magic signature
+         - continuity flag
+         - depth
+         - number of channels
+     */
+    int flags;
+    //! the matrix dimensionality, >= 2
+    int dims;
+    //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
+    int rows, cols;
+    //! pointer to the data
+    uchar* data;
+
+    //! helper fields used in locateROI and adjustROI
+    const uchar* datastart;
+    const uchar* dataend;
+    const uchar* datalimit;
+
+    //! custom allocator
+    MatAllocator* allocator;
+    //! and the standard allocator
+    static MatAllocator* getStdAllocator();
+    static MatAllocator* getDefaultAllocator();
+    static void setDefaultAllocator(MatAllocator* allocator);
+
+    //! internal use method: updates the continuity flag
+    void updateContinuityFlag();
+
+    //! interaction with UMat
+    UMatData* u;
+
+    MatSize size;
+    MatStep step;
+
+protected:
+    template<typename _Tp, typename Functor> void forEach_impl(const Functor& operation);
+};
+
+
+///////////////////////////////// Mat_<_Tp> ////////////////////////////////////
+
+/** @brief Template matrix class derived from Mat
+
+@code{.cpp}
+    template<typename _Tp> class Mat_ : public Mat
+    {
+    public:
+        // ... some specific methods
+        //         and
+        // no new extra fields
+    };
+@endcode
+The class `Mat_<_Tp>` is a *thin* template wrapper on top of the Mat class. It does not have any
+extra data fields. Nor this class nor Mat has any virtual methods. Thus, references or pointers to
+these two classes can be freely but carefully converted one to another. For example:
+@code{.cpp}
+    // create a 100x100 8-bit matrix
+    Mat M(100,100,CV_8U);
+    // this will be compiled fine. no any data conversion will be done.
+    Mat_<float>& M1 = (Mat_<float>&)M;
+    // the program is likely to crash at the statement below
+    M1(99,99) = 1.f;
+@endcode
+While Mat is sufficient in most cases, Mat_ can be more convenient if you use a lot of element
+access operations and if you know matrix type at the compilation time. Note that
+`Mat::at(int y,int x)` and `Mat_::operator()(int y,int x)` do absolutely the same
+and run at the same speed, but the latter is certainly shorter:
+@code{.cpp}
+    Mat_<double> M(20,20);
+    for(int i = 0; i < M.rows; i++)
+        for(int j = 0; j < M.cols; j++)
+            M(i,j) = 1./(i+j+1);
+    Mat E, V;
+    eigen(M,E,V);
+    cout << E.at<double>(0,0)/E.at<double>(M.rows-1,0);
+@endcode
+To use Mat_ for multi-channel images/matrices, pass Vec as a Mat_ parameter:
+@code{.cpp}
+    // allocate a 320x240 color image and fill it with green (in RGB space)
+    Mat_<Vec3b> img(240, 320, Vec3b(0,255,0));
+    // now draw a diagonal white line
+    for(int i = 0; i < 100; i++)
+        img(i,i)=Vec3b(255,255,255);
+    // and now scramble the 2nd (red) channel of each pixel
+    for(int i = 0; i < img.rows; i++)
+        for(int j = 0; j < img.cols; j++)
+            img(i,j)[2] ^= (uchar)(i ^ j);
+@endcode
+Mat_ is fully compatible with C++11 range-based for loop. For example such loop
+can be used to safely apply look-up table:
+@code{.cpp}
+void applyTable(Mat_<uchar>& I, const uchar* const table)
+{
+    for(auto& pixel : I)
+    {
+        pixel = table[pixel];
+    }
+}
+@endcode
+ */
+template<typename _Tp> class Mat_ : public Mat
+{
+public:
+    typedef _Tp value_type;
+    typedef typename DataType<_Tp>::channel_type channel_type;
+    typedef MatIterator_<_Tp> iterator;
+    typedef MatConstIterator_<_Tp> const_iterator;
+
+    //! default constructor
+    Mat_();
+    //! equivalent to Mat(_rows, _cols, DataType<_Tp>::type)
+    Mat_(int _rows, int _cols);
+    //! constructor that sets each matrix element to specified value
+    Mat_(int _rows, int _cols, const _Tp& value);
+    //! equivalent to Mat(_size, DataType<_Tp>::type)
+    explicit Mat_(Size _size);
+    //! constructor that sets each matrix element to specified value
+    Mat_(Size _size, const _Tp& value);
+    //! n-dim array constructor
+    Mat_(int _ndims, const int* _sizes);
+    //! n-dim array constructor that sets each matrix element to specified value
+    Mat_(int _ndims, const int* _sizes, const _Tp& value);
+    //! copy/conversion constructor. If m is of different type, it's converted
+    Mat_(const Mat& m);
+    //! copy constructor
+    Mat_(const Mat_& m);
+    //! constructs a matrix on top of user-allocated data. step is in bytes(!!!), regardless of the type
+    Mat_(int _rows, int _cols, _Tp* _data, size_t _step=AUTO_STEP);
+    //! constructs n-dim matrix on top of user-allocated data. steps are in bytes(!!!), regardless of the type
+    Mat_(int _ndims, const int* _sizes, _Tp* _data, const size_t* _steps=0);
+    //! selects a submatrix
+    Mat_(const Mat_& m, const Range& rowRange, const Range& colRange=Range::all());
+    //! selects a submatrix
+    Mat_(const Mat_& m, const Rect& roi);
+    //! selects a submatrix, n-dim version
+    Mat_(const Mat_& m, const Range* ranges);
+    //! selects a submatrix, n-dim version
+    Mat_(const Mat_& m, const std::vector<Range>& ranges);
+    //! from a matrix expression
+    explicit Mat_(const MatExpr& e);
+    //! makes a matrix out of Vec, std::vector, Point_ or Point3_. The matrix will have a single column
+    explicit Mat_(const std::vector<_Tp>& vec, bool copyData=false);
+    template<int n> explicit Mat_(const Vec<typename DataType<_Tp>::channel_type, n>& vec, bool copyData=true);
+    template<int m, int n> explicit Mat_(const Matx<typename DataType<_Tp>::channel_type, m, n>& mtx, bool copyData=true);
+    explicit Mat_(const Point_<typename DataType<_Tp>::channel_type>& pt, bool copyData=true);
+    explicit Mat_(const Point3_<typename DataType<_Tp>::channel_type>& pt, bool copyData=true);
+    explicit Mat_(const MatCommaInitializer_<_Tp>& commaInitializer);
+
+#ifdef CV_CXX11
+    Mat_(std::initializer_list<_Tp> values);
+    explicit Mat_(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> values);
+#endif
+
+#ifdef CV_CXX_STD_ARRAY
+    template <std::size_t _Nm> explicit Mat_(const std::array<_Tp, _Nm>& arr, bool copyData=false);
+#endif
+
+    Mat_& operator = (const Mat& m);
+    Mat_& operator = (const Mat_& m);
+    //! set all the elements to s.
+    Mat_& operator = (const _Tp& s);
+    //! assign a matrix expression
+    Mat_& operator = (const MatExpr& e);
+
+    //! iterators; they are smart enough to skip gaps in the end of rows
+    iterator begin();
+    iterator end();
+    const_iterator begin() const;
+    const_iterator end() const;
+
+    //! template methods for for operation over all matrix elements.
+    // the operations take care of skipping gaps in the end of rows (if any)
+    template<typename Functor> void forEach(const Functor& operation);
+    template<typename Functor> void forEach(const Functor& operation) const;
+
+    //! equivalent to Mat::create(_rows, _cols, DataType<_Tp>::type)
+    void create(int _rows, int _cols);
+    //! equivalent to Mat::create(_size, DataType<_Tp>::type)
+    void create(Size _size);
+    //! equivalent to Mat::create(_ndims, _sizes, DatType<_Tp>::type)
+    void create(int _ndims, const int* _sizes);
+    //! equivalent to Mat::release()
+    void release();
+    //! cross-product
+    Mat_ cross(const Mat_& m) const;
+    //! data type conversion
+    template<typename T2> operator Mat_<T2>() const;
+    //! overridden forms of Mat::row() etc.
+    Mat_ row(int y) const;
+    Mat_ col(int x) const;
+    Mat_ diag(int d=0) const;
+    Mat_ clone() const;
+
+    //! overridden forms of Mat::elemSize() etc.
+    size_t elemSize() const;
+    size_t elemSize1() const;
+    int type() const;
+    int depth() const;
+    int channels() const;
+    size_t step1(int i=0) const;
+    //! returns step()/sizeof(_Tp)
+    size_t stepT(int i=0) const;
+
+    //! overridden forms of Mat::zeros() etc. Data type is omitted, of course
+    static MatExpr zeros(int rows, int cols);
+    static MatExpr zeros(Size size);
+    static MatExpr zeros(int _ndims, const int* _sizes);
+    static MatExpr ones(int rows, int cols);
+    static MatExpr ones(Size size);
+    static MatExpr ones(int _ndims, const int* _sizes);
+    static MatExpr eye(int rows, int cols);
+    static MatExpr eye(Size size);
+
+    //! some more overridden methods
+    Mat_& adjustROI( int dtop, int dbottom, int dleft, int dright );
+    Mat_ operator()( const Range& rowRange, const Range& colRange ) const;
+    Mat_ operator()( const Rect& roi ) const;
+    Mat_ operator()( const Range* ranges ) const;
+    Mat_ operator()(const std::vector<Range>& ranges) const;
+
+    //! more convenient forms of row and element access operators
+    _Tp* operator [](int y);
+    const _Tp* operator [](int y) const;
+
+    //! returns reference to the specified element
+    _Tp& operator ()(const int* idx);
+    //! returns read-only reference to the specified element
+    const _Tp& operator ()(const int* idx) const;
+
+    //! returns reference to the specified element
+    template<int n> _Tp& operator ()(const Vec<int, n>& idx);
+    //! returns read-only reference to the specified element
+    template<int n> const _Tp& operator ()(const Vec<int, n>& idx) const;
+
+    //! returns reference to the specified element (1D case)
+    _Tp& operator ()(int idx0);
+    //! returns read-only reference to the specified element (1D case)
+    const _Tp& operator ()(int idx0) const;
+    //! returns reference to the specified element (2D case)
+    _Tp& operator ()(int row, int col);
+    //! returns read-only reference to the specified element (2D case)
+    const _Tp& operator ()(int row, int col) const;
+    //! returns reference to the specified element (3D case)
+    _Tp& operator ()(int idx0, int idx1, int idx2);
+    //! returns read-only reference to the specified element (3D case)
+    const _Tp& operator ()(int idx0, int idx1, int idx2) const;
+
+    _Tp& operator ()(Point pt);
+    const _Tp& operator ()(Point pt) const;
+
+    //! conversion to vector.
+    operator std::vector<_Tp>() const;
+
+#ifdef CV_CXX_STD_ARRAY
+    //! conversion to array.
+    template<std::size_t _Nm> operator std::array<_Tp, _Nm>() const;
+#endif
+
+    //! conversion to Vec
+    template<int n> operator Vec<typename DataType<_Tp>::channel_type, n>() const;
+    //! conversion to Matx
+    template<int m, int n> operator Matx<typename DataType<_Tp>::channel_type, m, n>() const;
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+    Mat_(Mat_&& m);
+    Mat_& operator = (Mat_&& m);
+
+    Mat_(Mat&& m);
+    Mat_& operator = (Mat&& m);
+
+    Mat_(MatExpr&& e);
+#endif
+};
+
+typedef Mat_<uchar> Mat1b;
+typedef Mat_<Vec2b> Mat2b;
+typedef Mat_<Vec3b> Mat3b;
+typedef Mat_<Vec4b> Mat4b;
+
+typedef Mat_<short> Mat1s;
+typedef Mat_<Vec2s> Mat2s;
+typedef Mat_<Vec3s> Mat3s;
+typedef Mat_<Vec4s> Mat4s;
+
+typedef Mat_<ushort> Mat1w;
+typedef Mat_<Vec2w> Mat2w;
+typedef Mat_<Vec3w> Mat3w;
+typedef Mat_<Vec4w> Mat4w;
+
+typedef Mat_<int>   Mat1i;
+typedef Mat_<Vec2i> Mat2i;
+typedef Mat_<Vec3i> Mat3i;
+typedef Mat_<Vec4i> Mat4i;
+
+typedef Mat_<float> Mat1f;
+typedef Mat_<Vec2f> Mat2f;
+typedef Mat_<Vec3f> Mat3f;
+typedef Mat_<Vec4f> Mat4f;
+
+typedef Mat_<double> Mat1d;
+typedef Mat_<Vec2d> Mat2d;
+typedef Mat_<Vec3d> Mat3d;
+typedef Mat_<Vec4d> Mat4d;
+
+/** @todo document */
+class CV_EXPORTS UMat
+{
+public:
+    //! default constructor
+    UMat(UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    //! constructs 2D matrix of the specified size and type
+    // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+    UMat(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    UMat(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    //! constucts 2D matrix and fills it with the specified value _s.
+    UMat(int rows, int cols, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    UMat(Size size, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+
+    //! constructs n-dimensional matrix
+    UMat(int ndims, const int* sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    UMat(int ndims, const int* sizes, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+
+    //! copy constructor
+    UMat(const UMat& m);
+
+    //! creates a matrix header for a part of the bigger matrix
+    UMat(const UMat& m, const Range& rowRange, const Range& colRange=Range::all());
+    UMat(const UMat& m, const Rect& roi);
+    UMat(const UMat& m, const Range* ranges);
+    UMat(const UMat& m, const std::vector<Range>& ranges);
+    //! builds matrix from std::vector with or without copying the data
+    template<typename _Tp> explicit UMat(const std::vector<_Tp>& vec, bool copyData=false);
+
+    //! builds matrix from cv::Vec; the data is copied by default
+    template<typename _Tp, int n> explicit UMat(const Vec<_Tp, n>& vec, bool copyData=true);
+    //! builds matrix from cv::Matx; the data is copied by default
+    template<typename _Tp, int m, int n> explicit UMat(const Matx<_Tp, m, n>& mtx, bool copyData=true);
+    //! builds matrix from a 2D point
+    template<typename _Tp> explicit UMat(const Point_<_Tp>& pt, bool copyData=true);
+    //! builds matrix from a 3D point
+    template<typename _Tp> explicit UMat(const Point3_<_Tp>& pt, bool copyData=true);
+    //! builds matrix from comma initializer
+    template<typename _Tp> explicit UMat(const MatCommaInitializer_<_Tp>& commaInitializer);
+
+    //! destructor - calls release()
+    ~UMat();
+    //! assignment operators
+    UMat& operator = (const UMat& m);
+
+    Mat getMat(int flags) const;
+
+    //! returns a new matrix header for the specified row
+    UMat row(int y) const;
+    //! returns a new matrix header for the specified column
+    UMat col(int x) const;
+    //! ... for the specified row span
+    UMat rowRange(int startrow, int endrow) const;
+    UMat rowRange(const Range& r) const;
+    //! ... for the specified column span
+    UMat colRange(int startcol, int endcol) const;
+    UMat colRange(const Range& r) const;
+    //! ... for the specified diagonal
+    //! (d=0 - the main diagonal,
+    //!  >0 - a diagonal from the upper half,
+    //!  <0 - a diagonal from the lower half)
+    UMat diag(int d=0) const;
+    //! constructs a square diagonal matrix which main diagonal is vector "d"
+    static UMat diag(const UMat& d);
+
+    //! returns deep copy of the matrix, i.e. the data is copied
+    UMat clone() const;
+    //! copies the matrix content to "m".
+    // It calls m.create(this->size(), this->type()).
+    void copyTo( OutputArray m ) const;
+    //! copies those matrix elements to "m" that are marked with non-zero mask elements.
+    void copyTo( OutputArray m, InputArray mask ) const;
+    //! converts matrix to another datatype with optional scaling. See cvConvertScale.
+    void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const;
+
+    void assignTo( UMat& m, int type=-1 ) const;
+
+    //! sets every matrix element to s
+    UMat& operator = (const Scalar& s);
+    //! sets some of the matrix elements to s, according to the mask
+    UMat& setTo(InputArray value, InputArray mask=noArray());
+    //! creates alternative matrix header for the same data, with different
+    // number of channels and/or different number of rows. see cvReshape.
+    UMat reshape(int cn, int rows=0) const;
+    UMat reshape(int cn, int newndims, const int* newsz) const;
+
+    //! matrix transposition by means of matrix expressions
+    UMat t() const;
+    //! matrix inversion by means of matrix expressions
+    UMat inv(int method=DECOMP_LU) const;
+    //! per-element matrix multiplication by means of matrix expressions
+    UMat mul(InputArray m, double scale=1) const;
+
+    //! computes dot-product
+    double dot(InputArray m) const;
+
+    //! Matlab-style matrix initialization
+    static UMat zeros(int rows, int cols, int type);
+    static UMat zeros(Size size, int type);
+    static UMat zeros(int ndims, const int* sz, int type);
+    static UMat ones(int rows, int cols, int type);
+    static UMat ones(Size size, int type);
+    static UMat ones(int ndims, const int* sz, int type);
+    static UMat eye(int rows, int cols, int type);
+    static UMat eye(Size size, int type);
+
+    //! allocates new matrix data unless the matrix already has specified size and type.
+    // previous data is unreferenced if needed.
+    void create(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    void create(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    void create(int ndims, const int* sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+    void create(const std::vector<int>& sizes, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
+
+    //! increases the reference counter; use with care to avoid memleaks
+    void addref();
+    //! decreases reference counter;
+    // deallocates the data when reference counter reaches 0.
+    void release();
+
+    //! deallocates the matrix data
+    void deallocate();
+    //! internal use function; properly re-allocates _size, _step arrays
+    void copySize(const UMat& m);
+
+    //! locates matrix header within a parent matrix. See below
+    void locateROI( Size& wholeSize, Point& ofs ) const;
+    //! moves/resizes the current matrix ROI inside the parent matrix.
+    UMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+    //! extracts a rectangular sub-matrix
+    // (this is a generalized form of row, rowRange etc.)
+    UMat operator()( Range rowRange, Range colRange ) const;
+    UMat operator()( const Rect& roi ) const;
+    UMat operator()( const Range* ranges ) const;
+    UMat operator()(const std::vector<Range>& ranges) const;
+
+    //! returns true iff the matrix data is continuous
+    // (i.e. when there are no gaps between successive rows).
+    // similar to CV_IS_MAT_CONT(cvmat->type)
+    bool isContinuous() const;
+
+    //! returns true if the matrix is a submatrix of another matrix
+    bool isSubmatrix() const;
+
+    //! returns element size in bytes,
+    // similar to CV_ELEM_SIZE(cvmat->type)
+    size_t elemSize() const;
+    //! returns the size of element channel in bytes.
+    size_t elemSize1() const;
+    //! returns element type, similar to CV_MAT_TYPE(cvmat->type)
+    int type() const;
+    //! returns element type, similar to CV_MAT_DEPTH(cvmat->type)
+    int depth() const;
+    //! returns element type, similar to CV_MAT_CN(cvmat->type)
+    int channels() const;
+    //! returns step/elemSize1()
+    size_t step1(int i=0) const;
+    //! returns true if matrix data is NULL
+    bool empty() const;
+    //! returns the total number of matrix elements
+    size_t total() const;
+
+    //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise
+    int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const;
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+    UMat(UMat&& m);
+    UMat& operator = (UMat&& m);
+#endif
+
+    /*! Returns the OpenCL buffer handle on which UMat operates on.
+        The UMat instance should be kept alive during the use of the handle to prevent the buffer to be
+        returned to the OpenCV buffer pool.
+     */
+    void* handle(int accessFlags) const;
+    void ndoffset(size_t* ofs) const;
+
+    enum { MAGIC_VAL  = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
+    enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
+
+    /*! includes several bit-fields:
+         - the magic signature
+         - continuity flag
+         - depth
+         - number of channels
+     */
+    int flags;
+    //! the matrix dimensionality, >= 2
+    int dims;
+    //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
+    int rows, cols;
+
+    //! custom allocator
+    MatAllocator* allocator;
+    UMatUsageFlags usageFlags; // usage flags for allocator
+    //! and the standard allocator
+    static MatAllocator* getStdAllocator();
+
+    //! internal use method: updates the continuity flag
+    void updateContinuityFlag();
+
+    // black-box container of UMat data
+    UMatData* u;
+
+    // offset of the submatrix (or 0)
+    size_t offset;
+
+    MatSize size;
+    MatStep step;
+
+protected:
+};
+
+
+/////////////////////////// multi-dimensional sparse matrix //////////////////////////
+
+/** @brief The class SparseMat represents multi-dimensional sparse numerical arrays.
+
+Such a sparse array can store elements of any type that Mat can store. *Sparse* means that only
+non-zero elements are stored (though, as a result of operations on a sparse matrix, some of its
+stored elements can actually become 0. It is up to you to detect such elements and delete them
+using SparseMat::erase ). The non-zero elements are stored in a hash table that grows when it is
+filled so that the search time is O(1) in average (regardless of whether element is there or not).
+Elements can be accessed using the following methods:
+-   Query operations (SparseMat::ptr and the higher-level SparseMat::ref, SparseMat::value and
+    SparseMat::find), for example:
+    @code
+        const int dims = 5;
+        int size[5] = {10, 10, 10, 10, 10};
+        SparseMat sparse_mat(dims, size, CV_32F);
+        for(int i = 0; i < 1000; i++)
+        {
+            int idx[dims];
+            for(int k = 0; k < dims; k++)
+                idx[k] = rand() % size[k];
+            sparse_mat.ref<float>(idx) += 1.f;
+        }
+        cout << "nnz = " << sparse_mat.nzcount() << endl;
+    @endcode
+-   Sparse matrix iterators. They are similar to MatIterator but different from NAryMatIterator.
+    That is, the iteration loop is familiar to STL users:
+    @code
+        // prints elements of a sparse floating-point matrix
+        // and the sum of elements.
+        SparseMatConstIterator_<float>
+            it = sparse_mat.begin<float>(),
+            it_end = sparse_mat.end<float>();
+        double s = 0;
+        int dims = sparse_mat.dims();
+        for(; it != it_end; ++it)
+        {
+            // print element indices and the element value
+            const SparseMat::Node* n = it.node();
+            printf("(");
+            for(int i = 0; i < dims; i++)
+                printf("%d%s", n->idx[i], i < dims-1 ? ", " : ")");
+            printf(": %g\n", it.value<float>());
+            s += *it;
+        }
+        printf("Element sum is %g\n", s);
+    @endcode
+    If you run this loop, you will notice that elements are not enumerated in a logical order
+    (lexicographical, and so on). They come in the same order as they are stored in the hash table
+    (semi-randomly). You may collect pointers to the nodes and sort them to get the proper ordering.
+    Note, however, that pointers to the nodes may become invalid when you add more elements to the
+    matrix. This may happen due to possible buffer reallocation.
+-   Combination of the above 2 methods when you need to process 2 or more sparse matrices
+    simultaneously. For example, this is how you can compute unnormalized cross-correlation of the 2
+    floating-point sparse matrices:
+    @code
+        double cross_corr(const SparseMat& a, const SparseMat& b)
+        {
+            const SparseMat *_a = &a, *_b = &b;
+            // if b contains less elements than a,
+            // it is faster to iterate through b
+            if(_a->nzcount() > _b->nzcount())
+                std::swap(_a, _b);
+            SparseMatConstIterator_<float> it = _a->begin<float>(),
+                                           it_end = _a->end<float>();
+            double ccorr = 0;
+            for(; it != it_end; ++it)
+            {
+                // take the next element from the first matrix
+                float avalue = *it;
+                const Node* anode = it.node();
+                // and try to find an element with the same index in the second matrix.
+                // since the hash value depends only on the element index,
+                // reuse the hash value stored in the node
+                float bvalue = _b->value<float>(anode->idx,&anode->hashval);
+                ccorr += avalue*bvalue;
+            }
+            return ccorr;
+        }
+    @endcode
+ */
+class CV_EXPORTS SparseMat
+{
+public:
+    typedef SparseMatIterator iterator;
+    typedef SparseMatConstIterator const_iterator;
+
+    enum { MAGIC_VAL=0x42FD0000, MAX_DIM=32, HASH_SCALE=0x5bd1e995, HASH_BIT=0x80000000 };
+
+    //! the sparse matrix header
+    struct CV_EXPORTS Hdr
+    {
+        Hdr(int _dims, const int* _sizes, int _type);
+        void clear();
+        int refcount;
+        int dims;
+        int valueOffset;
+        size_t nodeSize;
+        size_t nodeCount;
+        size_t freeList;
+        std::vector<uchar> pool;
+        std::vector<size_t> hashtab;
+        int size[MAX_DIM];
+    };
+
+    //! sparse matrix node - element of a hash table
+    struct CV_EXPORTS Node
+    {
+        //! hash value
+        size_t hashval;
+        //! index of the next node in the same hash table entry
+        size_t next;
+        //! index of the matrix element
+        int idx[MAX_DIM];
+    };
+
+    /** @brief Various SparseMat constructors.
+     */
+    SparseMat();
+
+    /** @overload
+    @param dims Array dimensionality.
+    @param _sizes Sparce matrix size on all dementions.
+    @param _type Sparse matrix data type.
+    */
+    SparseMat(int dims, const int* _sizes, int _type);
+
+    /** @overload
+    @param m Source matrix for copy constructor. If m is dense matrix (ocvMat) then it will be converted
+    to sparse representation.
+    */
+    SparseMat(const SparseMat& m);
+
+    /** @overload
+    @param m Source matrix for copy constructor. If m is dense matrix (ocvMat) then it will be converted
+    to sparse representation.
+    */
+    explicit SparseMat(const Mat& m);
+
+    //! the destructor
+    ~SparseMat();
+
+    //! assignment operator. This is O(1) operation, i.e. no data is copied
+    SparseMat& operator = (const SparseMat& m);
+    //! equivalent to the corresponding constructor
+    SparseMat& operator = (const Mat& m);
+
+    //! creates full copy of the matrix
+    SparseMat clone() const;
+
+    //! copies all the data to the destination matrix. All the previous content of m is erased
+    void copyTo( SparseMat& m ) const;
+    //! converts sparse matrix to dense matrix.
+    void copyTo( Mat& m ) const;
+    //! multiplies all the matrix elements by the specified scale factor alpha and converts the results to the specified data type
+    void convertTo( SparseMat& m, int rtype, double alpha=1 ) const;
+    //! converts sparse matrix to dense n-dim matrix with optional type conversion and scaling.
+    /*!
+        @param [out] m - output matrix; if it does not have a proper size or type before the operation,
+            it is reallocated
+        @param [in] rtype - desired output matrix type or, rather, the depth since the number of channels
+            are the same as the input has; if rtype is negative, the output matrix will have the
+            same type as the input.
+        @param [in] alpha - optional scale factor
+        @param [in] beta - optional delta added to the scaled values
+    */
+    void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const;
+
+    // not used now
+    void assignTo( SparseMat& m, int type=-1 ) const;
+
+    //! reallocates sparse matrix.
+    /*!
+        If the matrix already had the proper size and type,
+        it is simply cleared with clear(), otherwise,
+        the old matrix is released (using release()) and the new one is allocated.
+    */
+    void create(int dims, const int* _sizes, int _type);
+    //! sets all the sparse matrix elements to 0, which means clearing the hash table.
+    void clear();
+    //! manually increments the reference counter to the header.
+    void addref();
+    // decrements the header reference counter. When the counter reaches 0, the header and all the underlying data are deallocated.
+    void release();
+
+    //! converts sparse matrix to the old-style representation; all the elements are copied.
+    //operator CvSparseMat*() const;
+    //! returns the size of each element in bytes (not including the overhead - the space occupied by SparseMat::Node elements)
+    size_t elemSize() const;
+    //! returns elemSize()/channels()
+    size_t elemSize1() const;
+
+    //! returns type of sparse matrix elements
+    int type() const;
+    //! returns the depth of sparse matrix elements
+    int depth() const;
+    //! returns the number of channels
+    int channels() const;
+
+    //! returns the array of sizes, or NULL if the matrix is not allocated
+    const int* size() const;
+    //! returns the size of i-th matrix dimension (or 0)
+    int size(int i) const;
+    //! returns the matrix dimensionality
+    int dims() const;
+    //! returns the number of non-zero elements (=the number of hash table nodes)
+    size_t nzcount() const;
+
+    //! computes the element hash value (1D case)
+    size_t hash(int i0) const;
+    //! computes the element hash value (2D case)
+    size_t hash(int i0, int i1) const;
+    //! computes the element hash value (3D case)
+    size_t hash(int i0, int i1, int i2) const;
+    //! computes the element hash value (nD case)
+    size_t hash(const int* idx) const;
+
+    //!@{
+    /*!
+     specialized variants for 1D, 2D, 3D cases and the generic_type one for n-D case.
+     return pointer to the matrix element.
+      - if the element is there (it's non-zero), the pointer to it is returned
+      - if it's not there and createMissing=false, NULL pointer is returned
+      - if it's not there and createMissing=true, then the new element
+        is created and initialized with 0. Pointer to it is returned
+      - if the optional hashval pointer is not NULL, the element hash value is
+        not computed, but *hashval is taken instead.
+    */
+    //! returns pointer to the specified element (1D case)
+    uchar* ptr(int i0, bool createMissing, size_t* hashval=0);
+    //! returns pointer to the specified element (2D case)
+    uchar* ptr(int i0, int i1, bool createMissing, size_t* hashval=0);
+    //! returns pointer to the specified element (3D case)
+    uchar* ptr(int i0, int i1, int i2, bool createMissing, size_t* hashval=0);
+    //! returns pointer to the specified element (nD case)
+    uchar* ptr(const int* idx, bool createMissing, size_t* hashval=0);
+    //!@}
+
+    //!@{
+    /*!
+     return read-write reference to the specified sparse matrix element.
+
+     `ref<_Tp>(i0,...[,hashval])` is equivalent to `*(_Tp*)ptr(i0,...,true[,hashval])`.
+     The methods always return a valid reference.
+     If the element did not exist, it is created and initialiazed with 0.
+    */
+    //! returns reference to the specified element (1D case)
+    template<typename _Tp> _Tp& ref(int i0, size_t* hashval=0);
+    //! returns reference to the specified element (2D case)
+    template<typename _Tp> _Tp& ref(int i0, int i1, size_t* hashval=0);
+    //! returns reference to the specified element (3D case)
+    template<typename _Tp> _Tp& ref(int i0, int i1, int i2, size_t* hashval=0);
+    //! returns reference to the specified element (nD case)
+    template<typename _Tp> _Tp& ref(const int* idx, size_t* hashval=0);
+    //!@}
+
+    //!@{
+    /*!
+     return value of the specified sparse matrix element.
+
+     `value<_Tp>(i0,...[,hashval])` is equivalent to
+     @code
+     { const _Tp* p = find<_Tp>(i0,...[,hashval]); return p ? *p : _Tp(); }
+     @endcode
+
+     That is, if the element did not exist, the methods return 0.
+     */
+    //! returns value of the specified element (1D case)
+    template<typename _Tp> _Tp value(int i0, size_t* hashval=0) const;
+    //! returns value of the specified element (2D case)
+    template<typename _Tp> _Tp value(int i0, int i1, size_t* hashval=0) const;
+    //! returns value of the specified element (3D case)
+    template<typename _Tp> _Tp value(int i0, int i1, int i2, size_t* hashval=0) const;
+    //! returns value of the specified element (nD case)
+    template<typename _Tp> _Tp value(const int* idx, size_t* hashval=0) const;
+    //!@}
+
+    //!@{
+    /*!
+     Return pointer to the specified sparse matrix element if it exists
+
+     `find<_Tp>(i0,...[,hashval])` is equivalent to `(_const Tp*)ptr(i0,...false[,hashval])`.
+
+     If the specified element does not exist, the methods return NULL.
+    */
+    //! returns pointer to the specified element (1D case)
+    template<typename _Tp> const _Tp* find(int i0, size_t* hashval=0) const;
+    //! returns pointer to the specified element (2D case)
+    template<typename _Tp> const _Tp* find(int i0, int i1, size_t* hashval=0) const;
+    //! returns pointer to the specified element (3D case)
+    template<typename _Tp> const _Tp* find(int i0, int i1, int i2, size_t* hashval=0) const;
+    //! returns pointer to the specified element (nD case)
+    template<typename _Tp> const _Tp* find(const int* idx, size_t* hashval=0) const;
+    //!@}
+
+    //! erases the specified element (2D case)
+    void erase(int i0, int i1, size_t* hashval=0);
+    //! erases the specified element (3D case)
+    void erase(int i0, int i1, int i2, size_t* hashval=0);
+    //! erases the specified element (nD case)
+    void erase(const int* idx, size_t* hashval=0);
+
+    //!@{
+    /*!
+       return the sparse matrix iterator pointing to the first sparse matrix element
+    */
+    //! returns the sparse matrix iterator at the matrix beginning
+    SparseMatIterator begin();
+    //! returns the sparse matrix iterator at the matrix beginning
+    template<typename _Tp> SparseMatIterator_<_Tp> begin();
+    //! returns the read-only sparse matrix iterator at the matrix beginning
+    SparseMatConstIterator begin() const;
+    //! returns the read-only sparse matrix iterator at the matrix beginning
+    template<typename _Tp> SparseMatConstIterator_<_Tp> begin() const;
+    //!@}
+    /*!
+       return the sparse matrix iterator pointing to the element following the last sparse matrix element
+    */
+    //! returns the sparse matrix iterator at the matrix end
+    SparseMatIterator end();
+    //! returns the read-only sparse matrix iterator at the matrix end
+    SparseMatConstIterator end() const;
+    //! returns the typed sparse matrix iterator at the matrix end
+    template<typename _Tp> SparseMatIterator_<_Tp> end();
+    //! returns the typed read-only sparse matrix iterator at the matrix end
+    template<typename _Tp> SparseMatConstIterator_<_Tp> end() const;
+
+    //! returns the value stored in the sparse martix node
+    template<typename _Tp> _Tp& value(Node* n);
+    //! returns the value stored in the sparse martix node
+    template<typename _Tp> const _Tp& value(const Node* n) const;
+
+    ////////////// some internal-use methods ///////////////
+    Node* node(size_t nidx);
+    const Node* node(size_t nidx) const;
+
+    uchar* newNode(const int* idx, size_t hashval);
+    void removeNode(size_t hidx, size_t nidx, size_t previdx);
+    void resizeHashTab(size_t newsize);
+
+    int flags;
+    Hdr* hdr;
+};
+
+
+
+///////////////////////////////// SparseMat_<_Tp> ////////////////////////////////////
+
+/** @brief Template sparse n-dimensional array class derived from SparseMat
+
+SparseMat_ is a thin wrapper on top of SparseMat created in the same way as Mat_ . It simplifies
+notation of some operations:
+@code
+    int sz[] = {10, 20, 30};
+    SparseMat_<double> M(3, sz);
+    ...
+    M.ref(1, 2, 3) = M(4, 5, 6) + M(7, 8, 9);
+@endcode
+ */
+template<typename _Tp> class SparseMat_ : public SparseMat
+{
+public:
+    typedef SparseMatIterator_<_Tp> iterator;
+    typedef SparseMatConstIterator_<_Tp> const_iterator;
+
+    //! the default constructor
+    SparseMat_();
+    //! the full constructor equivalent to SparseMat(dims, _sizes, DataType<_Tp>::type)
+    SparseMat_(int dims, const int* _sizes);
+    //! the copy constructor. If DataType<_Tp>.type != m.type(), the m elements are converted
+    SparseMat_(const SparseMat& m);
+    //! the copy constructor. This is O(1) operation - no data is copied
+    SparseMat_(const SparseMat_& m);
+    //! converts dense matrix to the sparse form
+    SparseMat_(const Mat& m);
+    //! converts the old-style sparse matrix to the C++ class. All the elements are copied
+    //SparseMat_(const CvSparseMat* m);
+    //! the assignment operator. If DataType<_Tp>.type != m.type(), the m elements are converted
+    SparseMat_& operator = (const SparseMat& m);
+    //! the assignment operator. This is O(1) operation - no data is copied
+    SparseMat_& operator = (const SparseMat_& m);
+    //! converts dense matrix to the sparse form
+    SparseMat_& operator = (const Mat& m);
+
+    //! makes full copy of the matrix. All the elements are duplicated
+    SparseMat_ clone() const;
+    //! equivalent to cv::SparseMat::create(dims, _sizes, DataType<_Tp>::type)
+    void create(int dims, const int* _sizes);
+    //! converts sparse matrix to the old-style CvSparseMat. All the elements are copied
+    //operator CvSparseMat*() const;
+
+    //! returns type of the matrix elements
+    int type() const;
+    //! returns depth of the matrix elements
+    int depth() const;
+    //! returns the number of channels in each matrix element
+    int channels() const;
+
+    //! equivalent to SparseMat::ref<_Tp>(i0, hashval)
+    _Tp& ref(int i0, size_t* hashval=0);
+    //! equivalent to SparseMat::ref<_Tp>(i0, i1, hashval)
+    _Tp& ref(int i0, int i1, size_t* hashval=0);
+    //! equivalent to SparseMat::ref<_Tp>(i0, i1, i2, hashval)
+    _Tp& ref(int i0, int i1, int i2, size_t* hashval=0);
+    //! equivalent to SparseMat::ref<_Tp>(idx, hashval)
+    _Tp& ref(const int* idx, size_t* hashval=0);
+
+    //! equivalent to SparseMat::value<_Tp>(i0, hashval)
+    _Tp operator()(int i0, size_t* hashval=0) const;
+    //! equivalent to SparseMat::value<_Tp>(i0, i1, hashval)
+    _Tp operator()(int i0, int i1, size_t* hashval=0) const;
+    //! equivalent to SparseMat::value<_Tp>(i0, i1, i2, hashval)
+    _Tp operator()(int i0, int i1, int i2, size_t* hashval=0) const;
+    //! equivalent to SparseMat::value<_Tp>(idx, hashval)
+    _Tp operator()(const int* idx, size_t* hashval=0) const;
+
+    //! returns sparse matrix iterator pointing to the first sparse matrix element
+    SparseMatIterator_<_Tp> begin();
+    //! returns read-only sparse matrix iterator pointing to the first sparse matrix element
+    SparseMatConstIterator_<_Tp> begin() const;
+    //! returns sparse matrix iterator pointing to the element following the last sparse matrix element
+    SparseMatIterator_<_Tp> end();
+    //! returns read-only sparse matrix iterator pointing to the element following the last sparse matrix element
+    SparseMatConstIterator_<_Tp> end() const;
+};
+
+
+
+////////////////////////////////// MatConstIterator //////////////////////////////////
+
+class CV_EXPORTS MatConstIterator
+{
+public:
+    typedef uchar* value_type;
+    typedef ptrdiff_t difference_type;
+    typedef const uchar** pointer;
+    typedef uchar* reference;
+
+    typedef std::random_access_iterator_tag iterator_category;
+
+    //! default constructor
+    MatConstIterator();
+    //! constructor that sets the iterator to the beginning of the matrix
+    MatConstIterator(const Mat* _m);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator(const Mat* _m, int _row, int _col=0);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator(const Mat* _m, Point _pt);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator(const Mat* _m, const int* _idx);
+    //! copy constructor
+    MatConstIterator(const MatConstIterator& it);
+
+    //! copy operator
+    MatConstIterator& operator = (const MatConstIterator& it);
+    //! returns the current matrix element
+    const uchar* operator *() const;
+    //! returns the i-th matrix element, relative to the current
+    const uchar* operator [](ptrdiff_t i) const;
+
+    //! shifts the iterator forward by the specified number of elements
+    MatConstIterator& operator += (ptrdiff_t ofs);
+    //! shifts the iterator backward by the specified number of elements
+    MatConstIterator& operator -= (ptrdiff_t ofs);
+    //! decrements the iterator
+    MatConstIterator& operator --();
+    //! decrements the iterator
+    MatConstIterator operator --(int);
+    //! increments the iterator
+    MatConstIterator& operator ++();
+    //! increments the iterator
+    MatConstIterator operator ++(int);
+    //! returns the current iterator position
+    Point pos() const;
+    //! returns the current iterator position
+    void pos(int* _idx) const;
+
+    ptrdiff_t lpos() const;
+    void seek(ptrdiff_t ofs, bool relative = false);
+    void seek(const int* _idx, bool relative = false);
+
+    const Mat* m;
+    size_t elemSize;
+    const uchar* ptr;
+    const uchar* sliceStart;
+    const uchar* sliceEnd;
+};
+
+
+
+////////////////////////////////// MatConstIterator_ /////////////////////////////////
+
+/** @brief Matrix read-only iterator
+ */
+template<typename _Tp>
+class MatConstIterator_ : public MatConstIterator
+{
+public:
+    typedef _Tp value_type;
+    typedef ptrdiff_t difference_type;
+    typedef const _Tp* pointer;
+    typedef const _Tp& reference;
+
+    typedef std::random_access_iterator_tag iterator_category;
+
+    //! default constructor
+    MatConstIterator_();
+    //! constructor that sets the iterator to the beginning of the matrix
+    MatConstIterator_(const Mat_<_Tp>* _m);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator_(const Mat_<_Tp>* _m, int _row, int _col=0);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator_(const Mat_<_Tp>* _m, Point _pt);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatConstIterator_(const Mat_<_Tp>* _m, const int* _idx);
+    //! copy constructor
+    MatConstIterator_(const MatConstIterator_& it);
+
+    //! copy operator
+    MatConstIterator_& operator = (const MatConstIterator_& it);
+    //! returns the current matrix element
+    const _Tp& operator *() const;
+    //! returns the i-th matrix element, relative to the current
+    const _Tp& operator [](ptrdiff_t i) const;
+
+    //! shifts the iterator forward by the specified number of elements
+    MatConstIterator_& operator += (ptrdiff_t ofs);
+    //! shifts the iterator backward by the specified number of elements
+    MatConstIterator_& operator -= (ptrdiff_t ofs);
+    //! decrements the iterator
+    MatConstIterator_& operator --();
+    //! decrements the iterator
+    MatConstIterator_ operator --(int);
+    //! increments the iterator
+    MatConstIterator_& operator ++();
+    //! increments the iterator
+    MatConstIterator_ operator ++(int);
+    //! returns the current iterator position
+    Point pos() const;
+};
+
+
+
+//////////////////////////////////// MatIterator_ ////////////////////////////////////
+
+/** @brief Matrix read-write iterator
+*/
+template<typename _Tp>
+class MatIterator_ : public MatConstIterator_<_Tp>
+{
+public:
+    typedef _Tp* pointer;
+    typedef _Tp& reference;
+
+    typedef std::random_access_iterator_tag iterator_category;
+
+    //! the default constructor
+    MatIterator_();
+    //! constructor that sets the iterator to the beginning of the matrix
+    MatIterator_(Mat_<_Tp>* _m);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatIterator_(Mat_<_Tp>* _m, int _row, int _col=0);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatIterator_(Mat_<_Tp>* _m, Point _pt);
+    //! constructor that sets the iterator to the specified element of the matrix
+    MatIterator_(Mat_<_Tp>* _m, const int* _idx);
+    //! copy constructor
+    MatIterator_(const MatIterator_& it);
+    //! copy operator
+    MatIterator_& operator = (const MatIterator_<_Tp>& it );
+
+    //! returns the current matrix element
+    _Tp& operator *() const;
+    //! returns the i-th matrix element, relative to the current
+    _Tp& operator [](ptrdiff_t i) const;
+
+    //! shifts the iterator forward by the specified number of elements
+    MatIterator_& operator += (ptrdiff_t ofs);
+    //! shifts the iterator backward by the specified number of elements
+    MatIterator_& operator -= (ptrdiff_t ofs);
+    //! decrements the iterator
+    MatIterator_& operator --();
+    //! decrements the iterator
+    MatIterator_ operator --(int);
+    //! increments the iterator
+    MatIterator_& operator ++();
+    //! increments the iterator
+    MatIterator_ operator ++(int);
+};
+
+
+
+/////////////////////////////// SparseMatConstIterator ///////////////////////////////
+
+/**  @brief Read-Only Sparse Matrix Iterator.
+
+ Here is how to use the iterator to compute the sum of floating-point sparse matrix elements:
+
+ \code
+ SparseMatConstIterator it = m.begin(), it_end = m.end();
+ double s = 0;
+ CV_Assert( m.type() == CV_32F );
+ for( ; it != it_end; ++it )
+    s += it.value<float>();
+ \endcode
+*/
+class CV_EXPORTS SparseMatConstIterator
+{
+public:
+    //! the default constructor
+    SparseMatConstIterator();
+    //! the full constructor setting the iterator to the first sparse matrix element
+    SparseMatConstIterator(const SparseMat* _m);
+    //! the copy constructor
+    SparseMatConstIterator(const SparseMatConstIterator& it);
+
+    //! the assignment operator
+    SparseMatConstIterator& operator = (const SparseMatConstIterator& it);
+
+    //! template method returning the current matrix element
+    template<typename _Tp> const _Tp& value() const;
+    //! returns the current node of the sparse matrix. it.node->idx is the current element index
+    const SparseMat::Node* node() const;
+
+    //! moves iterator to the previous element
+    SparseMatConstIterator& operator --();
+    //! moves iterator to the previous element
+    SparseMatConstIterator operator --(int);
+    //! moves iterator to the next element
+    SparseMatConstIterator& operator ++();
+    //! moves iterator to the next element
+    SparseMatConstIterator operator ++(int);
+
+    //! moves iterator to the element after the last element
+    void seekEnd();
+
+    const SparseMat* m;
+    size_t hashidx;
+    uchar* ptr;
+};
+
+
+
+////////////////////////////////// SparseMatIterator /////////////////////////////////
+
+/** @brief  Read-write Sparse Matrix Iterator
+
+ The class is similar to cv::SparseMatConstIterator,
+ but can be used for in-place modification of the matrix elements.
+*/
+class CV_EXPORTS SparseMatIterator : public SparseMatConstIterator
+{
+public:
+    //! the default constructor
+    SparseMatIterator();
+    //! the full constructor setting the iterator to the first sparse matrix element
+    SparseMatIterator(SparseMat* _m);
+    //! the full constructor setting the iterator to the specified sparse matrix element
+    SparseMatIterator(SparseMat* _m, const int* idx);
+    //! the copy constructor
+    SparseMatIterator(const SparseMatIterator& it);
+
+    //! the assignment operator
+    SparseMatIterator& operator = (const SparseMatIterator& it);
+    //! returns read-write reference to the current sparse matrix element
+    template<typename _Tp> _Tp& value() const;
+    //! returns pointer to the current sparse matrix node. it.node->idx is the index of the current element (do not modify it!)
+    SparseMat::Node* node() const;
+
+    //! moves iterator to the next element
+    SparseMatIterator& operator ++();
+    //! moves iterator to the next element
+    SparseMatIterator operator ++(int);
+};
+
+
+
+/////////////////////////////// SparseMatConstIterator_ //////////////////////////////
+
+/** @brief  Template Read-Only Sparse Matrix Iterator Class.
+
+ This is the derived from SparseMatConstIterator class that
+ introduces more convenient operator *() for accessing the current element.
+*/
+template<typename _Tp> class SparseMatConstIterator_ : public SparseMatConstIterator
+{
+public:
+
+    typedef std::forward_iterator_tag iterator_category;
+
+    //! the default constructor
+    SparseMatConstIterator_();
+    //! the full constructor setting the iterator to the first sparse matrix element
+    SparseMatConstIterator_(const SparseMat_<_Tp>* _m);
+    SparseMatConstIterator_(const SparseMat* _m);
+    //! the copy constructor
+    SparseMatConstIterator_(const SparseMatConstIterator_& it);
+
+    //! the assignment operator
+    SparseMatConstIterator_& operator = (const SparseMatConstIterator_& it);
+    //! the element access operator
+    const _Tp& operator *() const;
+
+    //! moves iterator to the next element
+    SparseMatConstIterator_& operator ++();
+    //! moves iterator to the next element
+    SparseMatConstIterator_ operator ++(int);
+};
+
+
+
+///////////////////////////////// SparseMatIterator_ /////////////////////////////////
+
+/** @brief  Template Read-Write Sparse Matrix Iterator Class.
+
+ This is the derived from cv::SparseMatConstIterator_ class that
+ introduces more convenient operator *() for accessing the current element.
+*/
+template<typename _Tp> class SparseMatIterator_ : public SparseMatConstIterator_<_Tp>
+{
+public:
+
+    typedef std::forward_iterator_tag iterator_category;
+
+    //! the default constructor
+    SparseMatIterator_();
+    //! the full constructor setting the iterator to the first sparse matrix element
+    SparseMatIterator_(SparseMat_<_Tp>* _m);
+    SparseMatIterator_(SparseMat* _m);
+    //! the copy constructor
+    SparseMatIterator_(const SparseMatIterator_& it);
+
+    //! the assignment operator
+    SparseMatIterator_& operator = (const SparseMatIterator_& it);
+    //! returns the reference to the current element
+    _Tp& operator *() const;
+
+    //! moves the iterator to the next element
+    SparseMatIterator_& operator ++();
+    //! moves the iterator to the next element
+    SparseMatIterator_ operator ++(int);
+};
+
+
+
+/////////////////////////////////// NAryMatIterator //////////////////////////////////
+
+/** @brief n-ary multi-dimensional array iterator.
+
+Use the class to implement unary, binary, and, generally, n-ary element-wise operations on
+multi-dimensional arrays. Some of the arguments of an n-ary function may be continuous arrays, some
+may be not. It is possible to use conventional MatIterator 's for each array but incrementing all of
+the iterators after each small operations may be a big overhead. In this case consider using
+NAryMatIterator to iterate through several matrices simultaneously as long as they have the same
+geometry (dimensionality and all the dimension sizes are the same). On each iteration `it.planes[0]`,
+`it.planes[1]`,... will be the slices of the corresponding matrices.
+
+The example below illustrates how you can compute a normalized and threshold 3D color histogram:
+@code
+    void computeNormalizedColorHist(const Mat& image, Mat& hist, int N, double minProb)
+    {
+        const int histSize[] = {N, N, N};
+
+        // make sure that the histogram has a proper size and type
+        hist.create(3, histSize, CV_32F);
+
+        // and clear it
+        hist = Scalar(0);
+
+        // the loop below assumes that the image
+        // is a 8-bit 3-channel. check it.
+        CV_Assert(image.type() == CV_8UC3);
+        MatConstIterator_<Vec3b> it = image.begin<Vec3b>(),
+                                 it_end = image.end<Vec3b>();
+        for( ; it != it_end; ++it )
+        {
+            const Vec3b& pix = *it;
+            hist.at<float>(pix[0]*N/256, pix[1]*N/256, pix[2]*N/256) += 1.f;
+        }
+
+        minProb *= image.rows*image.cols;
+
+        // initialize iterator (the style is different from STL).
+        // after initialization the iterator will contain
+        // the number of slices or planes the iterator will go through.
+        // it simultaneously increments iterators for several matrices
+        // supplied as a null terminated list of pointers
+        const Mat* arrays[] = {&hist, 0};
+        Mat planes[1];
+        NAryMatIterator itNAry(arrays, planes, 1);
+        double s = 0;
+        // iterate through the matrix. on each iteration
+        // itNAry.planes[i] (of type Mat) will be set to the current plane
+        // of the i-th n-dim matrix passed to the iterator constructor.
+        for(int p = 0; p < itNAry.nplanes; p++, ++itNAry)
+        {
+            threshold(itNAry.planes[0], itNAry.planes[0], minProb, 0, THRESH_TOZERO);
+            s += sum(itNAry.planes[0])[0];
+        }
+
+        s = 1./s;
+        itNAry = NAryMatIterator(arrays, planes, 1);
+        for(int p = 0; p < itNAry.nplanes; p++, ++itNAry)
+            itNAry.planes[0] *= s;
+    }
+@endcode
+ */
+class CV_EXPORTS NAryMatIterator
+{
+public:
+    //! the default constructor
+    NAryMatIterator();
+    //! the full constructor taking arbitrary number of n-dim matrices
+    NAryMatIterator(const Mat** arrays, uchar** ptrs, int narrays=-1);
+    //! the full constructor taking arbitrary number of n-dim matrices
+    NAryMatIterator(const Mat** arrays, Mat* planes, int narrays=-1);
+    //! the separate iterator initialization method
+    void init(const Mat** arrays, Mat* planes, uchar** ptrs, int narrays=-1);
+
+    //! proceeds to the next plane of every iterated matrix
+    NAryMatIterator& operator ++();
+    //! proceeds to the next plane of every iterated matrix (postfix increment operator)
+    NAryMatIterator operator ++(int);
+
+    //! the iterated arrays
+    const Mat** arrays;
+    //! the current planes
+    Mat* planes;
+    //! data pointers
+    uchar** ptrs;
+    //! the number of arrays
+    int narrays;
+    //! the number of hyper-planes that the iterator steps through
+    size_t nplanes;
+    //! the size of each segment (in elements)
+    size_t size;
+protected:
+    int iterdepth;
+    size_t idx;
+};
+
+
+
+///////////////////////////////// Matrix Expressions /////////////////////////////////
+
+class CV_EXPORTS MatOp
+{
+public:
+    MatOp();
+    virtual ~MatOp();
+
+    virtual bool elementWise(const MatExpr& expr) const;
+    virtual void assign(const MatExpr& expr, Mat& m, int type=-1) const = 0;
+    virtual void roi(const MatExpr& expr, const Range& rowRange,
+                     const Range& colRange, MatExpr& res) const;
+    virtual void diag(const MatExpr& expr, int d, MatExpr& res) const;
+    virtual void augAssignAdd(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignSubtract(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignMultiply(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignDivide(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignAnd(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignOr(const MatExpr& expr, Mat& m) const;
+    virtual void augAssignXor(const MatExpr& expr, Mat& m) const;
+
+    virtual void add(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const;
+    virtual void add(const MatExpr& expr1, const Scalar& s, MatExpr& res) const;
+
+    virtual void subtract(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const;
+    virtual void subtract(const Scalar& s, const MatExpr& expr, MatExpr& res) const;
+
+    virtual void multiply(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res, double scale=1) const;
+    virtual void multiply(const MatExpr& expr1, double s, MatExpr& res) const;
+
+    virtual void divide(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res, double scale=1) const;
+    virtual void divide(double s, const MatExpr& expr, MatExpr& res) const;
+
+    virtual void abs(const MatExpr& expr, MatExpr& res) const;
+
+    virtual void transpose(const MatExpr& expr, MatExpr& res) const;
+    virtual void matmul(const MatExpr& expr1, const MatExpr& expr2, MatExpr& res) const;
+    virtual void invert(const MatExpr& expr, int method, MatExpr& res) const;
+
+    virtual Size size(const MatExpr& expr) const;
+    virtual int type(const MatExpr& expr) const;
+};
+
+/** @brief Matrix expression representation
+@anchor MatrixExpressions
+This is a list of implemented matrix operations that can be combined in arbitrary complex
+expressions (here A, B stand for matrices ( Mat ), s for a scalar ( Scalar ), alpha for a
+real-valued scalar ( double )):
+-   Addition, subtraction, negation: `A+B`, `A-B`, `A+s`, `A-s`, `s+A`, `s-A`, `-A`
+-   Scaling: `A*alpha`
+-   Per-element multiplication and division: `A.mul(B)`, `A/B`, `alpha/A`
+-   Matrix multiplication: `A*B`
+-   Transposition: `A.t()` (means A<sup>T</sup>)
+-   Matrix inversion and pseudo-inversion, solving linear systems and least-squares problems:
+    `A.inv([method]) (~ A<sup>-1</sup>)`,   `A.inv([method])*B (~ X: AX=B)`
+-   Comparison: `A cmpop B`, `A cmpop alpha`, `alpha cmpop A`, where *cmpop* is one of
+  `>`, `>=`, `==`, `!=`, `<=`, `<`. The result of comparison is an 8-bit single channel mask whose
+    elements are set to 255 (if the particular element or pair of elements satisfy the condition) or
+    0.
+-   Bitwise logical operations: `A logicop B`, `A logicop s`, `s logicop A`, `~A`, where *logicop* is one of
+  `&`, `|`, `^`.
+-   Element-wise minimum and maximum: `min(A, B)`, `min(A, alpha)`, `max(A, B)`, `max(A, alpha)`
+-   Element-wise absolute value: `abs(A)`
+-   Cross-product, dot-product: `A.cross(B)`, `A.dot(B)`
+-   Any function of matrix or matrices and scalars that returns a matrix or a scalar, such as norm,
+    mean, sum, countNonZero, trace, determinant, repeat, and others.
+-   Matrix initializers ( Mat::eye(), Mat::zeros(), Mat::ones() ), matrix comma-separated
+    initializers, matrix constructors and operators that extract sub-matrices (see Mat description).
+-   Mat_<destination_type>() constructors to cast the result to the proper type.
+@note Comma-separated initializers and probably some other operations may require additional
+explicit Mat() or Mat_<T>() constructor calls to resolve a possible ambiguity.
+
+Here are examples of matrix expressions:
+@code
+    // compute pseudo-inverse of A, equivalent to A.inv(DECOMP_SVD)
+    SVD svd(A);
+    Mat pinvA = svd.vt.t()*Mat::diag(1./svd.w)*svd.u.t();
+
+    // compute the new vector of parameters in the Levenberg-Marquardt algorithm
+    x -= (A.t()*A + lambda*Mat::eye(A.cols,A.cols,A.type())).inv(DECOMP_CHOLESKY)*(A.t()*err);
+
+    // sharpen image using "unsharp mask" algorithm
+    Mat blurred; double sigma = 1, threshold = 5, amount = 1;
+    GaussianBlur(img, blurred, Size(), sigma, sigma);
+    Mat lowContrastMask = abs(img - blurred) < threshold;
+    Mat sharpened = img*(1+amount) + blurred*(-amount);
+    img.copyTo(sharpened, lowContrastMask);
+@endcode
+*/
+class CV_EXPORTS MatExpr
+{
+public:
+    MatExpr();
+    explicit MatExpr(const Mat& m);
+
+    MatExpr(const MatOp* _op, int _flags, const Mat& _a = Mat(), const Mat& _b = Mat(),
+            const Mat& _c = Mat(), double _alpha = 1, double _beta = 1, const Scalar& _s = Scalar());
+
+    operator Mat() const;
+    template<typename _Tp> operator Mat_<_Tp>() const;
+
+    Size size() const;
+    int type() const;
+
+    MatExpr row(int y) const;
+    MatExpr col(int x) const;
+    MatExpr diag(int d = 0) const;
+    MatExpr operator()( const Range& rowRange, const Range& colRange ) const;
+    MatExpr operator()( const Rect& roi ) const;
+
+    MatExpr t() const;
+    MatExpr inv(int method = DECOMP_LU) const;
+    MatExpr mul(const MatExpr& e, double scale=1) const;
+    MatExpr mul(const Mat& m, double scale=1) const;
+
+    Mat cross(const Mat& m) const;
+    double dot(const Mat& m) const;
+
+    const MatOp* op;
+    int flags;
+
+    Mat a, b, c;
+    double alpha, beta;
+    Scalar s;
+};
+
+//! @} core_basic
+
+//! @relates cv::MatExpr
+//! @{
+CV_EXPORTS MatExpr operator + (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator + (const Mat& a, const Scalar& s);
+CV_EXPORTS MatExpr operator + (const Scalar& s, const Mat& a);
+CV_EXPORTS MatExpr operator + (const MatExpr& e, const Mat& m);
+CV_EXPORTS MatExpr operator + (const Mat& m, const MatExpr& e);
+CV_EXPORTS MatExpr operator + (const MatExpr& e, const Scalar& s);
+CV_EXPORTS MatExpr operator + (const Scalar& s, const MatExpr& e);
+CV_EXPORTS MatExpr operator + (const MatExpr& e1, const MatExpr& e2);
+
+CV_EXPORTS MatExpr operator - (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator - (const Mat& a, const Scalar& s);
+CV_EXPORTS MatExpr operator - (const Scalar& s, const Mat& a);
+CV_EXPORTS MatExpr operator - (const MatExpr& e, const Mat& m);
+CV_EXPORTS MatExpr operator - (const Mat& m, const MatExpr& e);
+CV_EXPORTS MatExpr operator - (const MatExpr& e, const Scalar& s);
+CV_EXPORTS MatExpr operator - (const Scalar& s, const MatExpr& e);
+CV_EXPORTS MatExpr operator - (const MatExpr& e1, const MatExpr& e2);
+
+CV_EXPORTS MatExpr operator - (const Mat& m);
+CV_EXPORTS MatExpr operator - (const MatExpr& e);
+
+CV_EXPORTS MatExpr operator * (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator * (const Mat& a, double s);
+CV_EXPORTS MatExpr operator * (double s, const Mat& a);
+CV_EXPORTS MatExpr operator * (const MatExpr& e, const Mat& m);
+CV_EXPORTS MatExpr operator * (const Mat& m, const MatExpr& e);
+CV_EXPORTS MatExpr operator * (const MatExpr& e, double s);
+CV_EXPORTS MatExpr operator * (double s, const MatExpr& e);
+CV_EXPORTS MatExpr operator * (const MatExpr& e1, const MatExpr& e2);
+
+CV_EXPORTS MatExpr operator / (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator / (const Mat& a, double s);
+CV_EXPORTS MatExpr operator / (double s, const Mat& a);
+CV_EXPORTS MatExpr operator / (const MatExpr& e, const Mat& m);
+CV_EXPORTS MatExpr operator / (const Mat& m, const MatExpr& e);
+CV_EXPORTS MatExpr operator / (const MatExpr& e, double s);
+CV_EXPORTS MatExpr operator / (double s, const MatExpr& e);
+CV_EXPORTS MatExpr operator / (const MatExpr& e1, const MatExpr& e2);
+
+CV_EXPORTS MatExpr operator < (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator < (const Mat& a, double s);
+CV_EXPORTS MatExpr operator < (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator <= (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator <= (const Mat& a, double s);
+CV_EXPORTS MatExpr operator <= (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator == (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator == (const Mat& a, double s);
+CV_EXPORTS MatExpr operator == (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator != (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator != (const Mat& a, double s);
+CV_EXPORTS MatExpr operator != (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator >= (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator >= (const Mat& a, double s);
+CV_EXPORTS MatExpr operator >= (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator > (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator > (const Mat& a, double s);
+CV_EXPORTS MatExpr operator > (double s, const Mat& a);
+
+CV_EXPORTS MatExpr operator & (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator & (const Mat& a, const Scalar& s);
+CV_EXPORTS MatExpr operator & (const Scalar& s, const Mat& a);
+
+CV_EXPORTS MatExpr operator | (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator | (const Mat& a, const Scalar& s);
+CV_EXPORTS MatExpr operator | (const Scalar& s, const Mat& a);
+
+CV_EXPORTS MatExpr operator ^ (const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr operator ^ (const Mat& a, const Scalar& s);
+CV_EXPORTS MatExpr operator ^ (const Scalar& s, const Mat& a);
+
+CV_EXPORTS MatExpr operator ~(const Mat& m);
+
+CV_EXPORTS MatExpr min(const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr min(const Mat& a, double s);
+CV_EXPORTS MatExpr min(double s, const Mat& a);
+
+CV_EXPORTS MatExpr max(const Mat& a, const Mat& b);
+CV_EXPORTS MatExpr max(const Mat& a, double s);
+CV_EXPORTS MatExpr max(double s, const Mat& a);
+
+/** @brief Calculates an absolute value of each matrix element.
+
+abs is a meta-function that is expanded to one of absdiff or convertScaleAbs forms:
+- C = abs(A-B) is equivalent to `absdiff(A, B, C)`
+- C = abs(A) is equivalent to `absdiff(A, Scalar::all(0), C)`
+- C = `Mat_<Vec<uchar,n> >(abs(A*alpha + beta))` is equivalent to `convertScaleAbs(A, C, alpha,
+beta)`
+
+The output matrix has the same size and the same type as the input one except for the last case,
+where C is depth=CV_8U .
+@param m matrix.
+@sa @ref MatrixExpressions, absdiff, convertScaleAbs
+ */
+CV_EXPORTS MatExpr abs(const Mat& m);
+/** @overload
+@param e matrix expression.
+*/
+CV_EXPORTS MatExpr abs(const MatExpr& e);
+//! @} relates cv::MatExpr
+
+} // cv
+
+#include "opencv2/core/mat.inl.hpp"
+
+#endif // OPENCV_CORE_MAT_HPP

+ 3945 - 0
ThresholdTools/include/opencv2/core/mat.inl.hpp

@@ -0,0 +1,3945 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_MATRIX_OPERATIONS_HPP
+#define OPENCV_CORE_MATRIX_OPERATIONS_HPP
+
+#ifndef __cplusplus
+#  error mat.inl.hpp header must be compiled as C++
+#endif
+
+#ifdef _MSC_VER
+#pragma warning( push )
+#pragma warning( disable: 4127 )
+#endif
+
+namespace cv
+{
+CV__DEBUG_NS_BEGIN
+
+
+//! @cond IGNORED
+
+//////////////////////// Input/Output Arrays ////////////////////////
+
+inline void _InputArray::init(int _flags, const void* _obj)
+{ flags = _flags; obj = (void*)_obj; }
+
+inline void _InputArray::init(int _flags, const void* _obj, Size _sz)
+{ flags = _flags; obj = (void*)_obj; sz = _sz; }
+
+inline void* _InputArray::getObj() const { return obj; }
+inline int _InputArray::getFlags() const { return flags; }
+inline Size _InputArray::getSz() const { return sz; }
+
+inline _InputArray::_InputArray() { init(NONE, 0); }
+inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); }
+inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); }
+inline _InputArray::_InputArray(const std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); }
+inline _InputArray::_InputArray(const UMat& m) { init(UMAT+ACCESS_READ, &m); }
+inline _InputArray::_InputArray(const std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_READ, &vec); }
+
+template<typename _Tp> inline
+_InputArray::_InputArray(const std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); }
+
+#ifdef CV_CXX_STD_ARRAY
+template<typename _Tp, std::size_t _Nm> inline
+_InputArray::_InputArray(const std::array<_Tp, _Nm>& arr)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ, arr.data(), Size(1, _Nm)); }
+
+template<std::size_t _Nm> inline
+_InputArray::_InputArray(const std::array<Mat, _Nm>& arr)
+{ init(STD_ARRAY_MAT + ACCESS_READ, arr.data(), Size(1, _Nm)); }
+#endif
+
+inline
+_InputArray::_InputArray(const std::vector<bool>& vec)
+{ init(FIXED_TYPE + STD_BOOL_VECTOR + traits::Type<bool>::value + ACCESS_READ, &vec); }
+
+template<typename _Tp> inline
+_InputArray::_InputArray(const std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_READ, &vec); }
+
+inline
+_InputArray::_InputArray(const std::vector<std::vector<bool> >&)
+{ CV_Error(Error::StsUnsupportedFormat, "std::vector<std::vector<bool> > is not supported!\n"); }
+
+template<typename _Tp> inline
+_InputArray::_InputArray(const std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_READ, &vec); }
+
+template<typename _Tp, int m, int n> inline
+_InputArray::_InputArray(const Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_InputArray::_InputArray(const _Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, vec, Size(n, 1)); }
+
+template<typename _Tp> inline
+_InputArray::_InputArray(const Mat_<_Tp>& m)
+{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_READ, &m); }
+
+inline _InputArray::_InputArray(const double& val)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F + ACCESS_READ, &val, Size(1,1)); }
+
+inline _InputArray::_InputArray(const MatExpr& expr)
+{ init(FIXED_TYPE + FIXED_SIZE + EXPR + ACCESS_READ, &expr); }
+
+inline _InputArray::_InputArray(const cuda::GpuMat& d_mat)
+{ init(CUDA_GPU_MAT + ACCESS_READ, &d_mat); }
+
+inline _InputArray::_InputArray(const std::vector<cuda::GpuMat>& d_mat)
+{	init(STD_VECTOR_CUDA_GPU_MAT + ACCESS_READ, &d_mat);}
+
+inline _InputArray::_InputArray(const ogl::Buffer& buf)
+{ init(OPENGL_BUFFER + ACCESS_READ, &buf); }
+
+inline _InputArray::_InputArray(const cuda::HostMem& cuda_mem)
+{ init(CUDA_HOST_MEM + ACCESS_READ, &cuda_mem); }
+
+inline _InputArray::~_InputArray() {}
+
+inline Mat _InputArray::getMat(int i) const
+{
+    if( kind() == MAT && i < 0 )
+        return *(const Mat*)obj;
+    return getMat_(i);
+}
+
+inline bool _InputArray::isMat() const { return kind() == _InputArray::MAT; }
+inline bool _InputArray::isUMat() const  { return kind() == _InputArray::UMAT; }
+inline bool _InputArray::isMatVector() const { return kind() == _InputArray::STD_VECTOR_MAT; }
+inline bool _InputArray::isUMatVector() const  { return kind() == _InputArray::STD_VECTOR_UMAT; }
+inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; }
+inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR ||
+                                                   kind() == _InputArray::STD_BOOL_VECTOR ||
+                                                   kind() == _InputArray::STD_ARRAY; }
+inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; }
+inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; }
+
+////////////////////////////////////////////////////////////////////////////////////////
+
+inline _OutputArray::_OutputArray() { init(ACCESS_WRITE, 0); }
+inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags|ACCESS_WRITE, _obj); }
+inline _OutputArray::_OutputArray(Mat& m) { init(MAT+ACCESS_WRITE, &m); }
+inline _OutputArray::_OutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_WRITE, &vec); }
+inline _OutputArray::_OutputArray(UMat& m) { init(UMAT+ACCESS_WRITE, &m); }
+inline _OutputArray::_OutputArray(std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_WRITE, &vec); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
+
+#ifdef CV_CXX_STD_ARRAY
+template<typename _Tp, std::size_t _Nm> inline
+_OutputArray::_OutputArray(std::array<_Tp, _Nm>& arr)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
+
+template<std::size_t _Nm> inline
+_OutputArray::_OutputArray(std::array<Mat, _Nm>& arr)
+{ init(STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
+#endif
+
+inline
+_OutputArray::_OutputArray(std::vector<bool>&)
+{ CV_Error(Error::StsUnsupportedFormat, "std::vector<bool> cannot be an output array\n"); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
+
+inline
+_OutputArray::_OutputArray(std::vector<std::vector<bool> >&)
+{ CV_Error(Error::StsUnsupportedFormat, "std::vector<std::vector<bool> > cannot be an output array\n"); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(Mat_<_Tp>& m)
+{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &m); }
+
+template<typename _Tp, int m, int n> inline
+_OutputArray::_OutputArray(Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(_Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(const std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
+
+#ifdef CV_CXX_STD_ARRAY
+template<typename _Tp, std::size_t _Nm> inline
+_OutputArray::_OutputArray(const std::array<_Tp, _Nm>& arr)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
+
+template<std::size_t _Nm> inline
+_OutputArray::_OutputArray(const std::array<Mat, _Nm>& arr)
+{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
+#endif
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(const std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(const std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &vec); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(const Mat_<_Tp>& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + traits::Type<_Tp>::value + ACCESS_WRITE, &m); }
+
+template<typename _Tp, int m, int n> inline
+_OutputArray::_OutputArray(const Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_OutputArray::_OutputArray(const _Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, vec, Size(n, 1)); }
+
+inline _OutputArray::_OutputArray(cuda::GpuMat& d_mat)
+{ init(CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); }
+
+inline _OutputArray::_OutputArray(std::vector<cuda::GpuMat>& d_mat)
+{	init(STD_VECTOR_CUDA_GPU_MAT + ACCESS_WRITE, &d_mat);}
+
+inline _OutputArray::_OutputArray(ogl::Buffer& buf)
+{ init(OPENGL_BUFFER + ACCESS_WRITE, &buf); }
+
+inline _OutputArray::_OutputArray(cuda::HostMem& cuda_mem)
+{ init(CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); }
+
+inline _OutputArray::_OutputArray(const Mat& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_WRITE, &m); }
+
+inline _OutputArray::_OutputArray(const std::vector<Mat>& vec)
+{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_WRITE, &vec); }
+
+inline _OutputArray::_OutputArray(const UMat& m)
+{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_WRITE, &m); }
+
+inline _OutputArray::_OutputArray(const std::vector<UMat>& vec)
+{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_WRITE, &vec); }
+
+inline _OutputArray::_OutputArray(const cuda::GpuMat& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_WRITE, &d_mat); }
+
+
+inline _OutputArray::_OutputArray(const ogl::Buffer& buf)
+{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_WRITE, &buf); }
+
+inline _OutputArray::_OutputArray(const cuda::HostMem& cuda_mem)
+{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_WRITE, &cuda_mem); }
+
+///////////////////////////////////////////////////////////////////////////////////////////
+
+inline _InputOutputArray::_InputOutputArray() { init(ACCESS_RW, 0); }
+inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags|ACCESS_RW, _obj); }
+inline _InputOutputArray::_InputOutputArray(Mat& m) { init(MAT+ACCESS_RW, &m); }
+inline _InputOutputArray::_InputOutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_RW, &vec); }
+inline _InputOutputArray::_InputOutputArray(UMat& m) { init(UMAT+ACCESS_RW, &m); }
+inline _InputOutputArray::_InputOutputArray(std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
+
+#ifdef CV_CXX_STD_ARRAY
+template<typename _Tp, std::size_t _Nm> inline
+_InputOutputArray::_InputOutputArray(std::array<_Tp, _Nm>& arr)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); }
+
+template<std::size_t _Nm> inline
+_InputOutputArray::_InputOutputArray(std::array<Mat, _Nm>& arr)
+{ init(STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); }
+#endif
+
+inline _InputOutputArray::_InputOutputArray(std::vector<bool>&)
+{ CV_Error(Error::StsUnsupportedFormat, "std::vector<bool> cannot be an input/output array\n"); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(Mat_<_Tp>& m)
+{ init(FIXED_TYPE + MAT + traits::Type<_Tp>::value + ACCESS_RW, &m); }
+
+template<typename _Tp, int m, int n> inline
+_InputOutputArray::_InputOutputArray(Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(_Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
+
+#ifdef CV_CXX_STD_ARRAY
+template<typename _Tp, std::size_t _Nm> inline
+_InputOutputArray::_InputOutputArray(const std::array<_Tp, _Nm>& arr)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); }
+
+template<std::size_t _Nm> inline
+_InputOutputArray::_InputOutputArray(const std::array<Mat, _Nm>& arr)
+{ init(FIXED_SIZE + STD_ARRAY_MAT + ACCESS_RW, arr.data(), Size(1, _Nm)); }
+#endif
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + traits::Type<_Tp>::value + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const Mat_<_Tp>& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + traits::Type<_Tp>::value + ACCESS_RW, &m); }
+
+template<typename _Tp, int m, int n> inline
+_InputOutputArray::_InputOutputArray(const Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const _Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, vec, Size(n, 1)); }
+
+inline _InputOutputArray::_InputOutputArray(cuda::GpuMat& d_mat)
+{ init(CUDA_GPU_MAT + ACCESS_RW, &d_mat); }
+
+inline _InputOutputArray::_InputOutputArray(ogl::Buffer& buf)
+{ init(OPENGL_BUFFER + ACCESS_RW, &buf); }
+
+inline _InputOutputArray::_InputOutputArray(cuda::HostMem& cuda_mem)
+{ init(CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); }
+
+inline _InputOutputArray::_InputOutputArray(const Mat& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_RW, &m); }
+
+inline _InputOutputArray::_InputOutputArray(const std::vector<Mat>& vec)
+{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_RW, &vec); }
+
+inline _InputOutputArray::_InputOutputArray(const UMat& m)
+{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_RW, &m); }
+
+inline _InputOutputArray::_InputOutputArray(const std::vector<UMat>& vec)
+{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_RW, &vec); }
+
+inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + CUDA_GPU_MAT + ACCESS_RW, &d_mat); }
+
+inline _InputOutputArray::_InputOutputArray(const std::vector<cuda::GpuMat>& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);}
+
+template<> inline _InputOutputArray::_InputOutputArray(std::vector<cuda::GpuMat>& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_CUDA_GPU_MAT + ACCESS_RW, &d_mat);}
+
+inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf)
+{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_RW, &buf); }
+
+inline _InputOutputArray::_InputOutputArray(const cuda::HostMem& cuda_mem)
+{ init(FIXED_TYPE + FIXED_SIZE + CUDA_HOST_MEM + ACCESS_RW, &cuda_mem); }
+
+CV__DEBUG_NS_END
+
+//////////////////////////////////////////// Mat //////////////////////////////////////////
+
+inline
+Mat::Mat()
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{}
+
+inline
+Mat::Mat(int _rows, int _cols, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_rows, _cols, _type);
+}
+
+inline
+Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_rows, _cols, _type);
+    *this = _s;
+}
+
+inline
+Mat::Mat(Size _sz, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create( _sz.height, _sz.width, _type );
+}
+
+inline
+Mat::Mat(Size _sz, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_sz.height, _sz.width, _type);
+    *this = _s;
+}
+
+inline
+Mat::Mat(int _dims, const int* _sz, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_dims, _sz, _type);
+}
+
+inline
+Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_dims, _sz, _type);
+    *this = _s;
+}
+
+inline
+Mat::Mat(const std::vector<int>& _sz, int _type)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_sz, _type);
+}
+
+inline
+Mat::Mat(const std::vector<int>& _sz, int _type, const Scalar& _s)
+    : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+      datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    create(_sz, _type);
+    *this = _s;
+}
+
+inline
+Mat::Mat(const Mat& m)
+    : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
+      datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
+      u(m.u), size(&rows), step(0)
+{
+    if( u )
+        CV_XADD(&u->refcount, 1);
+    if( m.dims <= 2 )
+    {
+        step[0] = m.step[0]; step[1] = m.step[1];
+    }
+    else
+    {
+        dims = 0;
+        copySize(m);
+    }
+}
+
+inline
+Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step)
+    : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_rows), cols(_cols),
+      data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
+      allocator(0), u(0), size(&rows)
+{
+    CV_Assert(total() == 0 || data != NULL);
+
+    size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
+    size_t minstep = cols * esz;
+    if( _step == AUTO_STEP )
+    {
+        _step = minstep;
+    }
+    else
+    {
+        CV_DbgAssert( _step >= minstep );
+        if (_step % esz1 != 0)
+        {
+            CV_Error(Error::BadStep, "Step must be a multiple of esz1");
+        }
+    }
+    step[0] = _step;
+    step[1] = esz;
+    datalimit = datastart + _step * rows;
+    dataend = datalimit - _step + minstep;
+    updateContinuityFlag();
+}
+
+inline
+Mat::Mat(Size _sz, int _type, void* _data, size_t _step)
+    : flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_sz.height), cols(_sz.width),
+      data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
+      allocator(0), u(0), size(&rows)
+{
+    CV_Assert(total() == 0 || data != NULL);
+
+    size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
+    size_t minstep = cols*esz;
+    if( _step == AUTO_STEP )
+    {
+        _step = minstep;
+    }
+    else
+    {
+        CV_DbgAssert( _step >= minstep );
+
+        if (_step % esz1 != 0)
+        {
+            CV_Error(Error::BadStep, "Step must be a multiple of esz1");
+        }
+    }
+    step[0] = _step;
+    step[1] = esz;
+    datalimit = datastart + _step*rows;
+    dataend = datalimit - _step + minstep;
+    updateContinuityFlag();
+}
+
+template<typename _Tp> inline
+Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
+    : flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
+      cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    if(vec.empty())
+        return;
+    if( !copyData )
+    {
+        step[0] = step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)&vec[0];
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+        Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this);
+}
+
+#ifdef CV_CXX11
+template<typename _Tp, typename> inline
+Mat::Mat(const std::initializer_list<_Tp> list)
+    : Mat()
+{
+    CV_Assert(list.size() != 0);
+    Mat((int)list.size(), 1, traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this);
+}
+
+template<typename _Tp> inline
+Mat::Mat(const std::initializer_list<int> sizes, const std::initializer_list<_Tp> list)
+    : Mat()
+{
+    size_t size_total = 1;
+    for(auto s : sizes)
+        size_total *= s;
+    CV_Assert(list.size() != 0);
+    CV_Assert(size_total == list.size());
+    Mat((int)sizes.size(), (int*)sizes.begin(), traits::Type<_Tp>::value, (uchar*)list.begin()).copyTo(*this);
+}
+#endif
+
+#ifdef CV_CXX_STD_ARRAY
+template<typename _Tp, std::size_t _Nm> inline
+Mat::Mat(const std::array<_Tp, _Nm>& arr, bool copyData)
+    : flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows((int)arr.size()),
+      cols(1), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    if(arr.empty())
+        return;
+    if( !copyData )
+    {
+        step[0] = step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)arr.data();
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+        Mat((int)arr.size(), 1, traits::Type<_Tp>::value, (uchar*)arr.data()).copyTo(*this);
+}
+#endif
+
+template<typename _Tp, int n> inline
+Mat::Mat(const Vec<_Tp, n>& vec, bool copyData)
+    : flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0),
+      datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    if( !copyData )
+    {
+        step[0] = step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)vec.val;
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+        Mat(n, 1, traits::Type<_Tp>::value, (void*)vec.val).copyTo(*this);
+}
+
+
+template<typename _Tp, int m, int n> inline
+Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData)
+    : flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0),
+      datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    if( !copyData )
+    {
+        step[0] = cols * sizeof(_Tp);
+        step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)M.val;
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+        Mat(m, n, traits::Type<_Tp>::value, (uchar*)M.val).copyTo(*this);
+}
+
+template<typename _Tp> inline
+Mat::Mat(const Point_<_Tp>& pt, bool copyData)
+    : flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0),
+      datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    if( !copyData )
+    {
+        step[0] = step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)&pt.x;
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+    {
+        create(2, 1, traits::Type<_Tp>::value);
+        ((_Tp*)data)[0] = pt.x;
+        ((_Tp*)data)[1] = pt.y;
+    }
+}
+
+template<typename _Tp> inline
+Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
+    : flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0),
+      datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0)
+{
+    if( !copyData )
+    {
+        step[0] = step[1] = sizeof(_Tp);
+        datastart = data = (uchar*)&pt.x;
+        datalimit = dataend = datastart + rows * step[0];
+    }
+    else
+    {
+        create(3, 1, traits::Type<_Tp>::value);
+        ((_Tp*)data)[0] = pt.x;
+        ((_Tp*)data)[1] = pt.y;
+        ((_Tp*)data)[2] = pt.z;
+    }
+}
+
+template<typename _Tp> inline
+Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer)
+    : flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0),
+      datastart(0), dataend(0), allocator(0), u(0), size(&rows)
+{
+    *this = commaInitializer.operator Mat_<_Tp>();
+}
+
+inline
+Mat::~Mat()
+{
+    release();
+    if( step.p != step.buf )
+        fastFree(step.p);
+}
+
+inline
+Mat& Mat::operator = (const Mat& m)
+{
+    if( this != &m )
+    {
+        if( m.u )
+            CV_XADD(&m.u->refcount, 1);
+        release();
+        flags = m.flags;
+        if( dims <= 2 && m.dims <= 2 )
+        {
+            dims = m.dims;
+            rows = m.rows;
+            cols = m.cols;
+            step[0] = m.step[0];
+            step[1] = m.step[1];
+        }
+        else
+            copySize(m);
+        data = m.data;
+        datastart = m.datastart;
+        dataend = m.dataend;
+        datalimit = m.datalimit;
+        allocator = m.allocator;
+        u = m.u;
+    }
+    return *this;
+}
+
+inline
+Mat Mat::row(int y) const
+{
+    return Mat(*this, Range(y, y + 1), Range::all());
+}
+
+inline
+Mat Mat::col(int x) const
+{
+    return Mat(*this, Range::all(), Range(x, x + 1));
+}
+
+inline
+Mat Mat::rowRange(int startrow, int endrow) const
+{
+    return Mat(*this, Range(startrow, endrow), Range::all());
+}
+
+inline
+Mat Mat::rowRange(const Range& r) const
+{
+    return Mat(*this, r, Range::all());
+}
+
+inline
+Mat Mat::colRange(int startcol, int endcol) const
+{
+    return Mat(*this, Range::all(), Range(startcol, endcol));
+}
+
+inline
+Mat Mat::colRange(const Range& r) const
+{
+    return Mat(*this, Range::all(), r);
+}
+
+inline
+Mat Mat::clone() const
+{
+    Mat m;
+    copyTo(m);
+    return m;
+}
+
+inline
+void Mat::assignTo( Mat& m, int _type ) const
+{
+    if( _type < 0 )
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+inline
+void Mat::create(int _rows, int _cols, int _type)
+{
+    _type &= TYPE_MASK;
+    if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && data )
+        return;
+    int sz[] = {_rows, _cols};
+    create(2, sz, _type);
+}
+
+inline
+void Mat::create(Size _sz, int _type)
+{
+    create(_sz.height, _sz.width, _type);
+}
+
+inline
+void Mat::addref()
+{
+    if( u )
+        CV_XADD(&u->refcount, 1);
+}
+
+inline
+void Mat::release()
+{
+    if( u && CV_XADD(&u->refcount, -1) == 1 )
+        deallocate();
+    u = NULL;
+    datastart = dataend = datalimit = data = 0;
+    for(int i = 0; i < dims; i++)
+        size.p[i] = 0;
+#ifdef _DEBUG
+    flags = MAGIC_VAL;
+    dims = rows = cols = 0;
+    if(step.p != step.buf)
+    {
+        fastFree(step.p);
+        step.p = step.buf;
+        size.p = &rows;
+    }
+#endif
+}
+
+inline
+Mat Mat::operator()( Range _rowRange, Range _colRange ) const
+{
+    return Mat(*this, _rowRange, _colRange);
+}
+
+inline
+Mat Mat::operator()( const Rect& roi ) const
+{
+    return Mat(*this, roi);
+}
+
+inline
+Mat Mat::operator()(const Range* ranges) const
+{
+    return Mat(*this, ranges);
+}
+
+inline
+Mat Mat::operator()(const std::vector<Range>& ranges) const
+{
+    return Mat(*this, ranges);
+}
+
+inline
+bool Mat::isContinuous() const
+{
+    return (flags & CONTINUOUS_FLAG) != 0;
+}
+
+inline
+bool Mat::isSubmatrix() const
+{
+    return (flags & SUBMATRIX_FLAG) != 0;
+}
+
+inline
+size_t Mat::elemSize() const
+{
+    return dims > 0 ? step.p[dims - 1] : 0;
+}
+
+inline
+size_t Mat::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int Mat::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int Mat::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int Mat::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t Mat::step1(int i) const
+{
+    return step.p[i] / elemSize1();
+}
+
+inline
+bool Mat::empty() const
+{
+    return data == 0 || total() == 0 || dims == 0;
+}
+
+inline
+size_t Mat::total() const
+{
+    if( dims <= 2 )
+        return (size_t)rows * cols;
+    size_t p = 1;
+    for( int i = 0; i < dims; i++ )
+        p *= size[i];
+    return p;
+}
+
+inline
+size_t Mat::total(int startDim, int endDim) const
+{
+    CV_Assert( 0 <= startDim && startDim <= endDim);
+    size_t p = 1;
+    int endDim_ = endDim <= dims ? endDim : dims;
+    for( int i = startDim; i < endDim_; i++ )
+        p *= size[i];
+    return p;
+}
+
+inline
+uchar* Mat::ptr(int y)
+{
+    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
+    return data + step.p[0] * y;
+}
+
+inline
+const uchar* Mat::ptr(int y) const
+{
+    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
+    return data + step.p[0] * y;
+}
+
+template<typename _Tp> inline
+_Tp* Mat::ptr(int y)
+{
+    CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) );
+    return (_Tp*)(data + step.p[0] * y);
+}
+
+template<typename _Tp> inline
+const _Tp* Mat::ptr(int y) const
+{
+    CV_DbgAssert( y == 0 || (data && dims >= 1 && data && (unsigned)y < (unsigned)size.p[0]) );
+    return (const _Tp*)(data + step.p[0] * y);
+}
+
+inline
+uchar* Mat::ptr(int i0, int i1)
+{
+    CV_DbgAssert(dims >= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    return data + i0 * step.p[0] + i1 * step.p[1];
+}
+
+inline
+const uchar* Mat::ptr(int i0, int i1) const
+{
+    CV_DbgAssert(dims >= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    return data + i0 * step.p[0] + i1 * step.p[1];
+}
+
+template<typename _Tp> inline
+_Tp* Mat::ptr(int i0, int i1)
+{
+    CV_DbgAssert(dims >= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    return (_Tp*)(data + i0 * step.p[0] + i1 * step.p[1]);
+}
+
+template<typename _Tp> inline
+const _Tp* Mat::ptr(int i0, int i1) const
+{
+    CV_DbgAssert(dims >= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    return (const _Tp*)(data + i0 * step.p[0] + i1 * step.p[1]);
+}
+
+inline
+uchar* Mat::ptr(int i0, int i1, int i2)
+{
+    CV_DbgAssert(dims >= 3);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]);
+    return data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2];
+}
+
+inline
+const uchar* Mat::ptr(int i0, int i1, int i2) const
+{
+    CV_DbgAssert(dims >= 3);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]);
+    return data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2];
+}
+
+template<typename _Tp> inline
+_Tp* Mat::ptr(int i0, int i1, int i2)
+{
+    CV_DbgAssert(dims >= 3);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]);
+    return (_Tp*)(data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]);
+}
+
+template<typename _Tp> inline
+const _Tp* Mat::ptr(int i0, int i1, int i2) const
+{
+    CV_DbgAssert(dims >= 3);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert((unsigned)i2 < (unsigned)size.p[2]);
+    return (const _Tp*)(data + i0 * step.p[0] + i1 * step.p[1] + i2 * step.p[2]);
+}
+
+inline
+uchar* Mat::ptr(const int* idx)
+{
+    int i, d = dims;
+    uchar* p = data;
+    CV_DbgAssert( d >= 1 && p );
+    for( i = 0; i < d; i++ )
+    {
+        CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] );
+        p += idx[i] * step.p[i];
+    }
+    return p;
+}
+
+inline
+const uchar* Mat::ptr(const int* idx) const
+{
+    int i, d = dims;
+    uchar* p = data;
+    CV_DbgAssert( d >= 1 && p );
+    for( i = 0; i < d; i++ )
+    {
+        CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] );
+        p += idx[i] * step.p[i];
+    }
+    return p;
+}
+
+template<typename _Tp> inline
+_Tp* Mat::ptr(const int* idx)
+{
+    int i, d = dims;
+    uchar* p = data;
+    CV_DbgAssert( d >= 1 && p );
+    for( i = 0; i < d; i++ )
+    {
+        CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] );
+        p += idx[i] * step.p[i];
+    }
+    return (_Tp*)p;
+}
+
+template<typename _Tp> inline
+const _Tp* Mat::ptr(const int* idx) const
+{
+    int i, d = dims;
+    uchar* p = data;
+    CV_DbgAssert( d >= 1 && p );
+    for( i = 0; i < d; i++ )
+    {
+        CV_DbgAssert( (unsigned)idx[i] < (unsigned)size.p[i] );
+        p += idx[i] * step.p[i];
+    }
+    return (const _Tp*)p;
+}
+
+template<typename _Tp> inline
+_Tp& Mat::at(int i0, int i1)
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
+    CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1());
+    return ((_Tp*)(data + step.p[0] * i0))[i1];
+}
+
+template<typename _Tp> inline
+const _Tp& Mat::at(int i0, int i1) const
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)(i1 * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
+    CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1());
+    return ((const _Tp*)(data + step.p[0] * i0))[i1];
+}
+
+template<typename _Tp> inline
+_Tp& Mat::at(Point pt)
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
+    CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1());
+    return ((_Tp*)(data + step.p[0] * pt.y))[pt.x];
+}
+
+template<typename _Tp> inline
+const _Tp& Mat::at(Point pt) const
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)(pt.x * DataType<_Tp>::channels) < (unsigned)(size.p[1] * channels()));
+    CV_DbgAssert(CV_ELEM_SIZE1(traits::Depth<_Tp>::value) == elemSize1());
+    return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x];
+}
+
+template<typename _Tp> inline
+_Tp& Mat::at(int i0)
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1]));
+    CV_DbgAssert(elemSize() == sizeof(_Tp));
+    if( isContinuous() || size.p[0] == 1 )
+        return ((_Tp*)data)[i0];
+    if( size.p[1] == 1 )
+        return *(_Tp*)(data + step.p[0] * i0);
+    int i = i0 / cols, j = i0 - i * cols;
+    return ((_Tp*)(data + step.p[0] * i))[j];
+}
+
+template<typename _Tp> inline
+const _Tp& Mat::at(int i0) const
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)(size.p[0] * size.p[1]));
+    CV_DbgAssert(elemSize() == sizeof(_Tp));
+    if( isContinuous() || size.p[0] == 1 )
+        return ((const _Tp*)data)[i0];
+    if( size.p[1] == 1 )
+        return *(const _Tp*)(data + step.p[0] * i0);
+    int i = i0 / cols, j = i0 - i * cols;
+    return ((const _Tp*)(data + step.p[0] * i))[j];
+}
+
+template<typename _Tp> inline
+_Tp& Mat::at(int i0, int i1, int i2)
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    return *(_Tp*)ptr(i0, i1, i2);
+}
+
+template<typename _Tp> inline
+const _Tp& Mat::at(int i0, int i1, int i2) const
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    return *(const _Tp*)ptr(i0, i1, i2);
+}
+
+template<typename _Tp> inline
+_Tp& Mat::at(const int* idx)
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    return *(_Tp*)ptr(idx);
+}
+
+template<typename _Tp> inline
+const _Tp& Mat::at(const int* idx) const
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    return *(const _Tp*)ptr(idx);
+}
+
+template<typename _Tp, int n> inline
+_Tp& Mat::at(const Vec<int, n>& idx)
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    return *(_Tp*)ptr(idx.val);
+}
+
+template<typename _Tp, int n> inline
+const _Tp& Mat::at(const Vec<int, n>& idx) const
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    return *(const _Tp*)ptr(idx.val);
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> Mat::begin() const
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    return MatConstIterator_<_Tp>((const Mat_<_Tp>*)this);
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> Mat::end() const
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this);
+    it += total();
+    return it;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> Mat::begin()
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    return MatIterator_<_Tp>((Mat_<_Tp>*)this);
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> Mat::end()
+{
+    CV_DbgAssert( elemSize() == sizeof(_Tp) );
+    MatIterator_<_Tp> it((Mat_<_Tp>*)this);
+    it += total();
+    return it;
+}
+
+template<typename _Tp, typename Functor> inline
+void Mat::forEach(const Functor& operation) {
+    this->forEach_impl<_Tp>(operation);
+}
+
+template<typename _Tp, typename Functor> inline
+void Mat::forEach(const Functor& operation) const {
+    // call as not const
+    (const_cast<Mat*>(this))->forEach<_Tp>(operation);
+}
+
+template<typename _Tp> inline
+Mat::operator std::vector<_Tp>() const
+{
+    std::vector<_Tp> v;
+    copyTo(v);
+    return v;
+}
+
+#ifdef CV_CXX_STD_ARRAY
+template<typename _Tp, std::size_t _Nm> inline
+Mat::operator std::array<_Tp, _Nm>() const
+{
+    std::array<_Tp, _Nm> v;
+    copyTo(v);
+    return v;
+}
+#endif
+
+template<typename _Tp, int n> inline
+Mat::operator Vec<_Tp, n>() const
+{
+    CV_Assert( data && dims <= 2 && (rows == 1 || cols == 1) &&
+               rows + cols - 1 == n && channels() == 1 );
+
+    if( isContinuous() && type() == traits::Type<_Tp>::value )
+        return Vec<_Tp, n>((_Tp*)data);
+    Vec<_Tp, n> v;
+    Mat tmp(rows, cols, traits::Type<_Tp>::value, v.val);
+    convertTo(tmp, tmp.type());
+    return v;
+}
+
+template<typename _Tp, int m, int n> inline
+Mat::operator Matx<_Tp, m, n>() const
+{
+    CV_Assert( data && dims <= 2 && rows == m && cols == n && channels() == 1 );
+
+    if( isContinuous() && type() == traits::Type<_Tp>::value )
+        return Matx<_Tp, m, n>((_Tp*)data);
+    Matx<_Tp, m, n> mtx;
+    Mat tmp(rows, cols, traits::Type<_Tp>::value, mtx.val);
+    convertTo(tmp, tmp.type());
+    return mtx;
+}
+
+template<typename _Tp> inline
+void Mat::push_back(const _Tp& elem)
+{
+    if( !data )
+    {
+        *this = Mat(1, 1, traits::Type<_Tp>::value, (void*)&elem).clone();
+        return;
+    }
+    CV_Assert(traits::Type<_Tp>::value == type() && cols == 1
+              /* && dims == 2 (cols == 1 implies dims == 2) */);
+    const uchar* tmp = dataend + step[0];
+    if( !isSubmatrix() && isContinuous() && tmp <= datalimit )
+    {
+        *(_Tp*)(data + (size.p[0]++) * step.p[0]) = elem;
+        dataend = tmp;
+    }
+    else
+        push_back_(&elem);
+}
+
+template<typename _Tp> inline
+void Mat::push_back(const Mat_<_Tp>& m)
+{
+    push_back((const Mat&)m);
+}
+
+template<> inline
+void Mat::push_back(const MatExpr& expr)
+{
+    push_back(static_cast<Mat>(expr));
+}
+
+
+template<typename _Tp> inline
+void Mat::push_back(const std::vector<_Tp>& v)
+{
+    push_back(Mat(v));
+}
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+
+inline
+Mat::Mat(Mat&& m)
+    : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
+      datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
+      u(m.u), size(&rows)
+{
+    if (m.dims <= 2)  // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_DbgAssert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.data = NULL; m.datastart = NULL; m.dataend = NULL; m.datalimit = NULL;
+    m.allocator = NULL;
+    m.u = NULL;
+}
+
+inline
+Mat& Mat::operator = (Mat&& m)
+{
+    if (this == &m)
+      return *this;
+
+    release();
+    flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols; data = m.data;
+    datastart = m.datastart; dataend = m.dataend; datalimit = m.datalimit; allocator = m.allocator;
+    u = m.u;
+    if (step.p != step.buf) // release self step/size
+    {
+        fastFree(step.p);
+        step.p = step.buf;
+        size.p = &rows;
+    }
+    if (m.dims <= 2) // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_DbgAssert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.data = NULL; m.datastart = NULL; m.dataend = NULL; m.datalimit = NULL;
+    m.allocator = NULL;
+    m.u = NULL;
+    return *this;
+}
+
+#endif
+
+
+///////////////////////////// MatSize ////////////////////////////
+
+inline
+MatSize::MatSize(int* _p)
+    : p(_p) {}
+
+inline
+int MatSize::dims() const
+{
+    return (p - 1)[0];
+}
+
+inline
+Size MatSize::operator()() const
+{
+    CV_DbgAssert(dims() <= 2);
+    return Size(p[1], p[0]);
+}
+
+inline
+const int& MatSize::operator[](int i) const
+{
+    CV_DbgAssert(i < dims());
+#ifdef __OPENCV_BUILD
+    CV_DbgAssert(i >= 0);
+#endif
+    return p[i];
+}
+
+inline
+int& MatSize::operator[](int i)
+{
+    CV_DbgAssert(i < dims());
+#ifdef __OPENCV_BUILD
+    CV_DbgAssert(i >= 0);
+#endif
+    return p[i];
+}
+
+inline
+MatSize::operator const int*() const
+{
+    return p;
+}
+
+inline
+bool MatSize::operator == (const MatSize& sz) const
+{
+    int d = dims();
+    int dsz = sz.dims();
+    if( d != dsz )
+        return false;
+    if( d == 2 )
+        return p[0] == sz.p[0] && p[1] == sz.p[1];
+
+    for( int i = 0; i < d; i++ )
+        if( p[i] != sz.p[i] )
+            return false;
+    return true;
+}
+
+inline
+bool MatSize::operator != (const MatSize& sz) const
+{
+    return !(*this == sz);
+}
+
+
+
+///////////////////////////// MatStep ////////////////////////////
+
+inline
+MatStep::MatStep()
+{
+    p = buf; p[0] = p[1] = 0;
+}
+
+inline
+MatStep::MatStep(size_t s)
+{
+    p = buf; p[0] = s; p[1] = 0;
+}
+
+inline
+const size_t& MatStep::operator[](int i) const
+{
+    return p[i];
+}
+
+inline
+size_t& MatStep::operator[](int i)
+{
+    return p[i];
+}
+
+inline MatStep::operator size_t() const
+{
+    CV_DbgAssert( p == buf );
+    return buf[0];
+}
+
+inline MatStep& MatStep::operator = (size_t s)
+{
+    CV_DbgAssert( p == buf );
+    buf[0] = s;
+    return *this;
+}
+
+
+
+////////////////////////////// Mat_<_Tp> ////////////////////////////
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_()
+    : Mat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _rows, int _cols)
+    : Mat(_rows, _cols, traits::Type<_Tp>::value)
+{
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _rows, int _cols, const _Tp& value)
+    : Mat(_rows, _cols, traits::Type<_Tp>::value)
+{
+    *this = value;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(Size _sz)
+    : Mat(_sz.height, _sz.width, traits::Type<_Tp>::value)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(Size _sz, const _Tp& value)
+    : Mat(_sz.height, _sz.width, traits::Type<_Tp>::value)
+{
+    *this = value;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _dims, const int* _sz)
+    : Mat(_dims, _sz, traits::Type<_Tp>::value)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _dims, const int* _sz, const _Tp& _s)
+    : Mat(_dims, _sz, traits::Type<_Tp>::value, Scalar(_s))
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _dims, const int* _sz, _Tp* _data, const size_t* _steps)
+    : Mat(_dims, _sz, traits::Type<_Tp>::value, _data, _steps)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const Range* ranges)
+    : Mat(m, ranges)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_<_Tp>& m, const std::vector<Range>& ranges)
+    : Mat(m, ranges)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat& m)
+    : Mat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
+    *this = m;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_& m)
+    : Mat(m)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(int _rows, int _cols, _Tp* _data, size_t steps)
+    : Mat(_rows, _cols, traits::Type<_Tp>::value, _data, steps)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_& m, const Range& _rowRange, const Range& _colRange)
+    : Mat(m, _rowRange, _colRange)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Mat_& m, const Rect& roi)
+    : Mat(m, roi)
+{}
+
+template<typename _Tp> template<int n> inline
+Mat_<_Tp>::Mat_(const Vec<typename DataType<_Tp>::channel_type, n>& vec, bool copyData)
+    : Mat(n / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&vec)
+{
+    CV_Assert(n%DataType<_Tp>::channels == 0);
+    if( copyData )
+        *this = clone();
+}
+
+template<typename _Tp> template<int m, int n> inline
+Mat_<_Tp>::Mat_(const Matx<typename DataType<_Tp>::channel_type, m, n>& M, bool copyData)
+    : Mat(m, n / DataType<_Tp>::channels, traits::Type<_Tp>::value, (void*)&M)
+{
+    CV_Assert(n % DataType<_Tp>::channels == 0);
+    if( copyData )
+        *this = clone();
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Point_<typename DataType<_Tp>::channel_type>& pt, bool copyData)
+    : Mat(2 / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&pt)
+{
+    CV_Assert(2 % DataType<_Tp>::channels == 0);
+    if( copyData )
+        *this = clone();
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const Point3_<typename DataType<_Tp>::channel_type>& pt, bool copyData)
+    : Mat(3 / DataType<_Tp>::channels, 1, traits::Type<_Tp>::value, (void*)&pt)
+{
+    CV_Assert(3 % DataType<_Tp>::channels == 0);
+    if( copyData )
+        *this = clone();
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const MatCommaInitializer_<_Tp>& commaInitializer)
+    : Mat(commaInitializer)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const std::vector<_Tp>& vec, bool copyData)
+    : Mat(vec, copyData)
+{}
+
+#ifdef CV_CXX11
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(std::initializer_list<_Tp> list)
+    : Mat(list)
+{}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const std::initializer_list<int> sizes, std::initializer_list<_Tp> list)
+    : Mat(sizes, list)
+{}
+#endif
+
+#ifdef CV_CXX_STD_ARRAY
+template<typename _Tp> template<std::size_t _Nm> inline
+Mat_<_Tp>::Mat_(const std::array<_Tp, _Nm>& arr, bool copyData)
+    : Mat(arr, copyData)
+{}
+#endif
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat& m)
+{
+    if( traits::Type<_Tp>::value == m.type() )
+    {
+        Mat::operator = (m);
+        return *this;
+    }
+    if( traits::Depth<_Tp>::value == m.depth() )
+    {
+        return (*this = m.reshape(DataType<_Tp>::channels, m.dims, 0));
+    }
+    CV_Assert(DataType<_Tp>::channels == m.channels() || m.empty());
+    m.convertTo(*this, type());
+    return *this;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat_& m)
+{
+    Mat::operator=(m);
+    return *this;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (const _Tp& s)
+{
+    typedef typename DataType<_Tp>::vec_type VT;
+    Mat::operator=(Scalar((const VT&)s));
+    return *this;
+}
+
+template<typename _Tp> inline
+void Mat_<_Tp>::create(int _rows, int _cols)
+{
+    Mat::create(_rows, _cols, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+void Mat_<_Tp>::create(Size _sz)
+{
+    Mat::create(_sz, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+void Mat_<_Tp>::create(int _dims, const int* _sz)
+{
+    Mat::create(_dims, _sz, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+void Mat_<_Tp>::release()
+{
+    Mat::release();
+#ifdef _DEBUG
+    flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
+#endif
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::cross(const Mat_& m) const
+{
+    return Mat_<_Tp>(Mat::cross(m));
+}
+
+template<typename _Tp> template<typename T2> inline
+Mat_<_Tp>::operator Mat_<T2>() const
+{
+    return Mat_<T2>(*this);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::row(int y) const
+{
+    return Mat_(*this, Range(y, y+1), Range::all());
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::col(int x) const
+{
+    return Mat_(*this, Range::all(), Range(x, x+1));
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::diag(int d) const
+{
+    return Mat_(Mat::diag(d));
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::clone() const
+{
+    return Mat_(Mat::clone());
+}
+
+template<typename _Tp> inline
+size_t Mat_<_Tp>::elemSize() const
+{
+    CV_DbgAssert( Mat::elemSize() == sizeof(_Tp) );
+    return sizeof(_Tp);
+}
+
+template<typename _Tp> inline
+size_t Mat_<_Tp>::elemSize1() const
+{
+    CV_DbgAssert( Mat::elemSize1() == sizeof(_Tp) / DataType<_Tp>::channels );
+    return sizeof(_Tp) / DataType<_Tp>::channels;
+}
+
+template<typename _Tp> inline
+int Mat_<_Tp>::type() const
+{
+    CV_DbgAssert( Mat::type() == traits::Type<_Tp>::value );
+    return traits::Type<_Tp>::value;
+}
+
+template<typename _Tp> inline
+int Mat_<_Tp>::depth() const
+{
+    CV_DbgAssert( Mat::depth() == traits::Depth<_Tp>::value );
+    return traits::Depth<_Tp>::value;
+}
+
+template<typename _Tp> inline
+int Mat_<_Tp>::channels() const
+{
+    CV_DbgAssert( Mat::channels() == DataType<_Tp>::channels );
+    return DataType<_Tp>::channels;
+}
+
+template<typename _Tp> inline
+size_t Mat_<_Tp>::stepT(int i) const
+{
+    return step.p[i] / elemSize();
+}
+
+template<typename _Tp> inline
+size_t Mat_<_Tp>::step1(int i) const
+{
+    return step.p[i] / elemSize1();
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::adjustROI( int dtop, int dbottom, int dleft, int dright )
+{
+    return (Mat_<_Tp>&)(Mat::adjustROI(dtop, dbottom, dleft, dright));
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::operator()( const Range& _rowRange, const Range& _colRange ) const
+{
+    return Mat_<_Tp>(*this, _rowRange, _colRange);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::operator()( const Rect& roi ) const
+{
+    return Mat_<_Tp>(*this, roi);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::operator()( const Range* ranges ) const
+{
+    return Mat_<_Tp>(*this, ranges);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp> Mat_<_Tp>::operator()(const std::vector<Range>& ranges) const
+{
+    return Mat_<_Tp>(*this, ranges);
+}
+
+template<typename _Tp> inline
+_Tp* Mat_<_Tp>::operator [](int y)
+{
+    CV_DbgAssert( 0 <= y && y < size.p[0] );
+    return (_Tp*)(data + y*step.p[0]);
+}
+
+template<typename _Tp> inline
+const _Tp* Mat_<_Tp>::operator [](int y) const
+{
+    CV_DbgAssert( 0 <= y && y < size.p[0] );
+    return (const _Tp*)(data + y*step.p[0]);
+}
+
+template<typename _Tp> inline
+_Tp& Mat_<_Tp>::operator ()(int i0, int i1)
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert(type() == traits::Type<_Tp>::value);
+    return ((_Tp*)(data + step.p[0] * i0))[i1];
+}
+
+template<typename _Tp> inline
+const _Tp& Mat_<_Tp>::operator ()(int i0, int i1) const
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)i0 < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)i1 < (unsigned)size.p[1]);
+    CV_DbgAssert(type() == traits::Type<_Tp>::value);
+    return ((const _Tp*)(data + step.p[0] * i0))[i1];
+}
+
+template<typename _Tp> inline
+_Tp& Mat_<_Tp>::operator ()(Point pt)
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]);
+    CV_DbgAssert(type() == traits::Type<_Tp>::value);
+    return ((_Tp*)(data + step.p[0] * pt.y))[pt.x];
+}
+
+template<typename _Tp> inline
+const _Tp& Mat_<_Tp>::operator ()(Point pt) const
+{
+    CV_DbgAssert(dims <= 2);
+    CV_DbgAssert(data);
+    CV_DbgAssert((unsigned)pt.y < (unsigned)size.p[0]);
+    CV_DbgAssert((unsigned)pt.x < (unsigned)size.p[1]);
+    CV_DbgAssert(type() == traits::Type<_Tp>::value);
+    return ((const _Tp*)(data + step.p[0] * pt.y))[pt.x];
+}
+
+template<typename _Tp> inline
+_Tp& Mat_<_Tp>::operator ()(const int* idx)
+{
+    return Mat::at<_Tp>(idx);
+}
+
+template<typename _Tp> inline
+const _Tp& Mat_<_Tp>::operator ()(const int* idx) const
+{
+    return Mat::at<_Tp>(idx);
+}
+
+template<typename _Tp> template<int n> inline
+_Tp& Mat_<_Tp>::operator ()(const Vec<int, n>& idx)
+{
+    return Mat::at<_Tp>(idx);
+}
+
+template<typename _Tp> template<int n> inline
+const _Tp& Mat_<_Tp>::operator ()(const Vec<int, n>& idx) const
+{
+    return Mat::at<_Tp>(idx);
+}
+
+template<typename _Tp> inline
+_Tp& Mat_<_Tp>::operator ()(int i0)
+{
+    return this->at<_Tp>(i0);
+}
+
+template<typename _Tp> inline
+const _Tp& Mat_<_Tp>::operator ()(int i0) const
+{
+    return this->at<_Tp>(i0);
+}
+
+template<typename _Tp> inline
+_Tp& Mat_<_Tp>::operator ()(int i0, int i1, int i2)
+{
+    return this->at<_Tp>(i0, i1, i2);
+}
+
+template<typename _Tp> inline
+const _Tp& Mat_<_Tp>::operator ()(int i0, int i1, int i2) const
+{
+    return this->at<_Tp>(i0, i1, i2);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::operator std::vector<_Tp>() const
+{
+    std::vector<_Tp> v;
+    copyTo(v);
+    return v;
+}
+
+#ifdef CV_CXX_STD_ARRAY
+template<typename _Tp> template<std::size_t _Nm> inline
+Mat_<_Tp>::operator std::array<_Tp, _Nm>() const
+{
+    std::array<_Tp, _Nm> a;
+    copyTo(a);
+    return a;
+}
+#endif
+
+template<typename _Tp> template<int n> inline
+Mat_<_Tp>::operator Vec<typename DataType<_Tp>::channel_type, n>() const
+{
+    CV_Assert(n % DataType<_Tp>::channels == 0);
+
+#if defined _MSC_VER
+    const Mat* pMat = (const Mat*)this; // workaround for MSVS <= 2012 compiler bugs (but GCC 4.6 dislikes this workaround)
+    return pMat->operator Vec<typename DataType<_Tp>::channel_type, n>();
+#else
+    return this->Mat::operator Vec<typename DataType<_Tp>::channel_type, n>();
+#endif
+}
+
+template<typename _Tp> template<int m, int n> inline
+Mat_<_Tp>::operator Matx<typename DataType<_Tp>::channel_type, m, n>() const
+{
+    CV_Assert(n % DataType<_Tp>::channels == 0);
+
+#if defined _MSC_VER
+    const Mat* pMat = (const Mat*)this; // workaround for MSVS <= 2012 compiler bugs (but GCC 4.6 dislikes this workaround)
+    Matx<typename DataType<_Tp>::channel_type, m, n> res = pMat->operator Matx<typename DataType<_Tp>::channel_type, m, n>();
+    return res;
+#else
+    Matx<typename DataType<_Tp>::channel_type, m, n> res = this->Mat::operator Matx<typename DataType<_Tp>::channel_type, m, n>();
+    return res;
+#endif
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> Mat_<_Tp>::begin() const
+{
+    return Mat::begin<_Tp>();
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> Mat_<_Tp>::end() const
+{
+    return Mat::end<_Tp>();
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> Mat_<_Tp>::begin()
+{
+    return Mat::begin<_Tp>();
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> Mat_<_Tp>::end()
+{
+    return Mat::end<_Tp>();
+}
+
+template<typename _Tp> template<typename Functor> inline
+void Mat_<_Tp>::forEach(const Functor& operation) {
+    Mat::forEach<_Tp, Functor>(operation);
+}
+
+template<typename _Tp> template<typename Functor> inline
+void Mat_<_Tp>::forEach(const Functor& operation) const {
+    Mat::forEach<_Tp, Functor>(operation);
+}
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(Mat_&& m)
+    : Mat(m)
+{
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (Mat_&& m)
+{
+    Mat::operator = (std::move(m));
+    return *this;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(Mat&& m)
+    : Mat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
+    *this = m;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (Mat&& m)
+{
+    if( traits::Type<_Tp>::value == m.type() )
+    {
+        Mat::operator = ((Mat&&)m);
+        return *this;
+    }
+    if( traits::Depth<_Tp>::value == m.depth() )
+    {
+        Mat::operator = ((Mat&&)m.reshape(DataType<_Tp>::channels, m.dims, 0));
+        return *this;
+    }
+    CV_DbgAssert(DataType<_Tp>::channels == m.channels());
+    m.convertTo(*this, type());
+    return *this;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(MatExpr&& e)
+    : Mat()
+{
+    flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
+    *this = Mat(e);
+}
+
+#endif
+
+///////////////////////////// SparseMat /////////////////////////////
+
+inline
+SparseMat::SparseMat()
+    : flags(MAGIC_VAL), hdr(0)
+{}
+
+inline
+SparseMat::SparseMat(int _dims, const int* _sizes, int _type)
+    : flags(MAGIC_VAL), hdr(0)
+{
+    create(_dims, _sizes, _type);
+}
+
+inline
+SparseMat::SparseMat(const SparseMat& m)
+    : flags(m.flags), hdr(m.hdr)
+{
+    addref();
+}
+
+inline
+SparseMat::~SparseMat()
+{
+    release();
+}
+
+inline
+SparseMat& SparseMat::operator = (const SparseMat& m)
+{
+    if( this != &m )
+    {
+        if( m.hdr )
+            CV_XADD(&m.hdr->refcount, 1);
+        release();
+        flags = m.flags;
+        hdr = m.hdr;
+    }
+    return *this;
+}
+
+inline
+SparseMat& SparseMat::operator = (const Mat& m)
+{
+    return (*this = SparseMat(m));
+}
+
+inline
+SparseMat SparseMat::clone() const
+{
+    SparseMat temp;
+    this->copyTo(temp);
+    return temp;
+}
+
+inline
+void SparseMat::assignTo( SparseMat& m, int _type ) const
+{
+    if( _type < 0 )
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+inline
+void SparseMat::addref()
+{
+    if( hdr )
+        CV_XADD(&hdr->refcount, 1);
+}
+
+inline
+void SparseMat::release()
+{
+    if( hdr && CV_XADD(&hdr->refcount, -1) == 1 )
+        delete hdr;
+    hdr = 0;
+}
+
+inline
+size_t SparseMat::elemSize() const
+{
+    return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t SparseMat::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int SparseMat::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int SparseMat::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int SparseMat::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+const int* SparseMat::size() const
+{
+    return hdr ? hdr->size : 0;
+}
+
+inline
+int SparseMat::size(int i) const
+{
+    if( hdr )
+    {
+        CV_DbgAssert((unsigned)i < (unsigned)hdr->dims);
+        return hdr->size[i];
+    }
+    return 0;
+}
+
+inline
+int SparseMat::dims() const
+{
+    return hdr ? hdr->dims : 0;
+}
+
+inline
+size_t SparseMat::nzcount() const
+{
+    return hdr ? hdr->nodeCount : 0;
+}
+
+inline
+size_t SparseMat::hash(int i0) const
+{
+    return (size_t)i0;
+}
+
+inline
+size_t SparseMat::hash(int i0, int i1) const
+{
+    return (size_t)(unsigned)i0 * HASH_SCALE + (unsigned)i1;
+}
+
+inline
+size_t SparseMat::hash(int i0, int i1, int i2) const
+{
+    return ((size_t)(unsigned)i0 * HASH_SCALE + (unsigned)i1) * HASH_SCALE + (unsigned)i2;
+}
+
+inline
+size_t SparseMat::hash(const int* idx) const
+{
+    size_t h = (unsigned)idx[0];
+    if( !hdr )
+        return 0;
+    int d = hdr->dims;
+    for(int i = 1; i < d; i++ )
+        h = h * HASH_SCALE + (unsigned)idx[i];
+    return h;
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat::ref(int i0, size_t* hashval)
+{
+    return *(_Tp*)((SparseMat*)this)->ptr(i0, true, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat::ref(int i0, int i1, size_t* hashval)
+{
+    return *(_Tp*)((SparseMat*)this)->ptr(i0, i1, true, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat::ref(int i0, int i1, int i2, size_t* hashval)
+{
+    return *(_Tp*)((SparseMat*)this)->ptr(i0, i1, i2, true, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat::ref(const int* idx, size_t* hashval)
+{
+    return *(_Tp*)((SparseMat*)this)->ptr(idx, true, hashval);
+}
+
+template<typename _Tp> inline
+_Tp SparseMat::value(int i0, size_t* hashval) const
+{
+    const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, false, hashval);
+    return p ? *p : _Tp();
+}
+
+template<typename _Tp> inline
+_Tp SparseMat::value(int i0, int i1, size_t* hashval) const
+{
+    const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, i1, false, hashval);
+    return p ? *p : _Tp();
+}
+
+template<typename _Tp> inline
+_Tp SparseMat::value(int i0, int i1, int i2, size_t* hashval) const
+{
+    const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(i0, i1, i2, false, hashval);
+    return p ? *p : _Tp();
+}
+
+template<typename _Tp> inline
+_Tp SparseMat::value(const int* idx, size_t* hashval) const
+{
+    const _Tp* p = (const _Tp*)((SparseMat*)this)->ptr(idx, false, hashval);
+    return p ? *p : _Tp();
+}
+
+template<typename _Tp> inline
+const _Tp* SparseMat::find(int i0, size_t* hashval) const
+{
+    return (const _Tp*)((SparseMat*)this)->ptr(i0, false, hashval);
+}
+
+template<typename _Tp> inline
+const _Tp* SparseMat::find(int i0, int i1, size_t* hashval) const
+{
+    return (const _Tp*)((SparseMat*)this)->ptr(i0, i1, false, hashval);
+}
+
+template<typename _Tp> inline
+const _Tp* SparseMat::find(int i0, int i1, int i2, size_t* hashval) const
+{
+    return (const _Tp*)((SparseMat*)this)->ptr(i0, i1, i2, false, hashval);
+}
+
+template<typename _Tp> inline
+const _Tp* SparseMat::find(const int* idx, size_t* hashval) const
+{
+    return (const _Tp*)((SparseMat*)this)->ptr(idx, false, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat::value(Node* n)
+{
+    return *(_Tp*)((uchar*)n + hdr->valueOffset);
+}
+
+template<typename _Tp> inline
+const _Tp& SparseMat::value(const Node* n) const
+{
+    return *(const _Tp*)((const uchar*)n + hdr->valueOffset);
+}
+
+inline
+SparseMat::Node* SparseMat::node(size_t nidx)
+{
+    return (Node*)(void*)&hdr->pool[nidx];
+}
+
+inline
+const SparseMat::Node* SparseMat::node(size_t nidx) const
+{
+    return (const Node*)(const void*)&hdr->pool[nidx];
+}
+
+inline
+SparseMatIterator SparseMat::begin()
+{
+    return SparseMatIterator(this);
+}
+
+inline
+SparseMatConstIterator SparseMat::begin() const
+{
+    return SparseMatConstIterator(this);
+}
+
+inline
+SparseMatIterator SparseMat::end()
+{
+    SparseMatIterator it(this);
+    it.seekEnd();
+    return it;
+}
+
+inline
+SparseMatConstIterator SparseMat::end() const
+{
+    SparseMatConstIterator it(this);
+    it.seekEnd();
+    return it;
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp> SparseMat::begin()
+{
+    return SparseMatIterator_<_Tp>(this);
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp> SparseMat::begin() const
+{
+    return SparseMatConstIterator_<_Tp>(this);
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp> SparseMat::end()
+{
+    SparseMatIterator_<_Tp> it(this);
+    it.seekEnd();
+    return it;
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp> SparseMat::end() const
+{
+    SparseMatConstIterator_<_Tp> it(this);
+    it.seekEnd();
+    return it;
+}
+
+
+
+///////////////////////////// SparseMat_ ////////////////////////////
+
+template<typename _Tp> inline
+SparseMat_<_Tp>::SparseMat_()
+{
+    flags = MAGIC_VAL | traits::Type<_Tp>::value;
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>::SparseMat_(int _dims, const int* _sizes)
+    : SparseMat(_dims, _sizes, traits::Type<_Tp>::value)
+{}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>::SparseMat_(const SparseMat& m)
+{
+    if( m.type() == traits::Type<_Tp>::value )
+        *this = (const SparseMat_<_Tp>&)m;
+    else
+        m.convertTo(*this, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>::SparseMat_(const SparseMat_<_Tp>& m)
+{
+    this->flags = m.flags;
+    this->hdr = m.hdr;
+    if( this->hdr )
+        CV_XADD(&this->hdr->refcount, 1);
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>::SparseMat_(const Mat& m)
+{
+    SparseMat sm(m);
+    *this = sm;
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat_<_Tp>& m)
+{
+    if( this != &m )
+    {
+        if( m.hdr ) CV_XADD(&m.hdr->refcount, 1);
+        release();
+        flags = m.flags;
+        hdr = m.hdr;
+    }
+    return *this;
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const SparseMat& m)
+{
+    if( m.type() == traits::Type<_Tp>::value )
+        return (*this = (const SparseMat_<_Tp>&)m);
+    m.convertTo(*this, traits::Type<_Tp>::value);
+    return *this;
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp>& SparseMat_<_Tp>::operator = (const Mat& m)
+{
+    return (*this = SparseMat(m));
+}
+
+template<typename _Tp> inline
+SparseMat_<_Tp> SparseMat_<_Tp>::clone() const
+{
+    SparseMat_<_Tp> m;
+    this->copyTo(m);
+    return m;
+}
+
+template<typename _Tp> inline
+void SparseMat_<_Tp>::create(int _dims, const int* _sizes)
+{
+    SparseMat::create(_dims, _sizes, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+int SparseMat_<_Tp>::type() const
+{
+    return traits::Type<_Tp>::value;
+}
+
+template<typename _Tp> inline
+int SparseMat_<_Tp>::depth() const
+{
+    return traits::Depth<_Tp>::value;
+}
+
+template<typename _Tp> inline
+int SparseMat_<_Tp>::channels() const
+{
+    return DataType<_Tp>::channels;
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat_<_Tp>::ref(int i0, size_t* hashval)
+{
+    return SparseMat::ref<_Tp>(i0, hashval);
+}
+
+template<typename _Tp> inline
+_Tp SparseMat_<_Tp>::operator()(int i0, size_t* hashval) const
+{
+    return SparseMat::value<_Tp>(i0, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat_<_Tp>::ref(int i0, int i1, size_t* hashval)
+{
+    return SparseMat::ref<_Tp>(i0, i1, hashval);
+}
+
+template<typename _Tp> inline
+_Tp SparseMat_<_Tp>::operator()(int i0, int i1, size_t* hashval) const
+{
+    return SparseMat::value<_Tp>(i0, i1, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat_<_Tp>::ref(int i0, int i1, int i2, size_t* hashval)
+{
+    return SparseMat::ref<_Tp>(i0, i1, i2, hashval);
+}
+
+template<typename _Tp> inline
+_Tp SparseMat_<_Tp>::operator()(int i0, int i1, int i2, size_t* hashval) const
+{
+    return SparseMat::value<_Tp>(i0, i1, i2, hashval);
+}
+
+template<typename _Tp> inline
+_Tp& SparseMat_<_Tp>::ref(const int* idx, size_t* hashval)
+{
+    return SparseMat::ref<_Tp>(idx, hashval);
+}
+
+template<typename _Tp> inline
+_Tp SparseMat_<_Tp>::operator()(const int* idx, size_t* hashval) const
+{
+    return SparseMat::value<_Tp>(idx, hashval);
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp> SparseMat_<_Tp>::begin()
+{
+    return SparseMatIterator_<_Tp>(this);
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp> SparseMat_<_Tp>::begin() const
+{
+    return SparseMatConstIterator_<_Tp>(this);
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp> SparseMat_<_Tp>::end()
+{
+    SparseMatIterator_<_Tp> it(this);
+    it.seekEnd();
+    return it;
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp> SparseMat_<_Tp>::end() const
+{
+    SparseMatConstIterator_<_Tp> it(this);
+    it.seekEnd();
+    return it;
+}
+
+
+
+////////////////////////// MatConstIterator /////////////////////////
+
+inline
+MatConstIterator::MatConstIterator()
+    : m(0), elemSize(0), ptr(0), sliceStart(0), sliceEnd(0)
+{}
+
+inline
+MatConstIterator::MatConstIterator(const Mat* _m)
+    : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0)
+{
+    if( m && m->isContinuous() )
+    {
+        sliceStart = m->ptr();
+        sliceEnd = sliceStart + m->total()*elemSize;
+    }
+    seek((const int*)0);
+}
+
+inline
+MatConstIterator::MatConstIterator(const Mat* _m, int _row, int _col)
+    : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0)
+{
+    CV_Assert(m && m->dims <= 2);
+    if( m->isContinuous() )
+    {
+        sliceStart = m->ptr();
+        sliceEnd = sliceStart + m->total()*elemSize;
+    }
+    int idx[] = {_row, _col};
+    seek(idx);
+}
+
+inline
+MatConstIterator::MatConstIterator(const Mat* _m, Point _pt)
+    : m(_m), elemSize(_m->elemSize()), ptr(0), sliceStart(0), sliceEnd(0)
+{
+    CV_Assert(m && m->dims <= 2);
+    if( m->isContinuous() )
+    {
+        sliceStart = m->ptr();
+        sliceEnd = sliceStart + m->total()*elemSize;
+    }
+    int idx[] = {_pt.y, _pt.x};
+    seek(idx);
+}
+
+inline
+MatConstIterator::MatConstIterator(const MatConstIterator& it)
+    : m(it.m), elemSize(it.elemSize), ptr(it.ptr), sliceStart(it.sliceStart), sliceEnd(it.sliceEnd)
+{}
+
+inline
+MatConstIterator& MatConstIterator::operator = (const MatConstIterator& it )
+{
+    m = it.m; elemSize = it.elemSize; ptr = it.ptr;
+    sliceStart = it.sliceStart; sliceEnd = it.sliceEnd;
+    return *this;
+}
+
+inline
+const uchar* MatConstIterator::operator *() const
+{
+    return ptr;
+}
+
+inline MatConstIterator& MatConstIterator::operator += (ptrdiff_t ofs)
+{
+    if( !m || ofs == 0 )
+        return *this;
+    ptrdiff_t ofsb = ofs*elemSize;
+    ptr += ofsb;
+    if( ptr < sliceStart || sliceEnd <= ptr )
+    {
+        ptr -= ofsb;
+        seek(ofs, true);
+    }
+    return *this;
+}
+
+inline
+MatConstIterator& MatConstIterator::operator -= (ptrdiff_t ofs)
+{
+    return (*this += -ofs);
+}
+
+inline
+MatConstIterator& MatConstIterator::operator --()
+{
+    if( m && (ptr -= elemSize) < sliceStart )
+    {
+        ptr += elemSize;
+        seek(-1, true);
+    }
+    return *this;
+}
+
+inline
+MatConstIterator MatConstIterator::operator --(int)
+{
+    MatConstIterator b = *this;
+    *this += -1;
+    return b;
+}
+
+inline
+MatConstIterator& MatConstIterator::operator ++()
+{
+    if( m && (ptr += elemSize) >= sliceEnd )
+    {
+        ptr -= elemSize;
+        seek(1, true);
+    }
+    return *this;
+}
+
+inline MatConstIterator MatConstIterator::operator ++(int)
+{
+    MatConstIterator b = *this;
+    *this += 1;
+    return b;
+}
+
+
+static inline
+bool operator == (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return a.m == b.m && a.ptr == b.ptr;
+}
+
+static inline
+bool operator != (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return !(a == b);
+}
+
+static inline
+bool operator < (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return a.ptr < b.ptr;
+}
+
+static inline
+bool operator > (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return a.ptr > b.ptr;
+}
+
+static inline
+bool operator <= (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return a.ptr <= b.ptr;
+}
+
+static inline
+bool operator >= (const MatConstIterator& a, const MatConstIterator& b)
+{
+    return a.ptr >= b.ptr;
+}
+
+static inline
+ptrdiff_t operator - (const MatConstIterator& b, const MatConstIterator& a)
+{
+    if( a.m != b.m )
+        return ((size_t)(-1) >> 1);
+    if( a.sliceEnd == b.sliceEnd )
+        return (b.ptr - a.ptr)/static_cast<ptrdiff_t>(b.elemSize);
+
+    return b.lpos() - a.lpos();
+}
+
+static inline
+MatConstIterator operator + (const MatConstIterator& a, ptrdiff_t ofs)
+{
+    MatConstIterator b = a;
+    return b += ofs;
+}
+
+static inline
+MatConstIterator operator + (ptrdiff_t ofs, const MatConstIterator& a)
+{
+    MatConstIterator b = a;
+    return b += ofs;
+}
+
+static inline
+MatConstIterator operator - (const MatConstIterator& a, ptrdiff_t ofs)
+{
+    MatConstIterator b = a;
+    return b += -ofs;
+}
+
+
+inline
+const uchar* MatConstIterator::operator [](ptrdiff_t i) const
+{
+    return *(*this + i);
+}
+
+
+
+///////////////////////// MatConstIterator_ /////////////////////////
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>::MatConstIterator_()
+{}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m)
+    : MatConstIterator(_m)
+{}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m, int _row, int _col)
+    : MatConstIterator(_m, _row, _col)
+{}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>::MatConstIterator_(const Mat_<_Tp>* _m, Point _pt)
+    : MatConstIterator(_m, _pt)
+{}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>::MatConstIterator_(const MatConstIterator_& it)
+    : MatConstIterator(it)
+{}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator = (const MatConstIterator_& it )
+{
+    MatConstIterator::operator = (it);
+    return *this;
+}
+
+template<typename _Tp> inline
+const _Tp& MatConstIterator_<_Tp>::operator *() const
+{
+    return *(_Tp*)(this->ptr);
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator += (ptrdiff_t ofs)
+{
+    MatConstIterator::operator += (ofs);
+    return *this;
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator -= (ptrdiff_t ofs)
+{
+    return (*this += -ofs);
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator --()
+{
+    MatConstIterator::operator --();
+    return *this;
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> MatConstIterator_<_Tp>::operator --(int)
+{
+    MatConstIterator_ b = *this;
+    MatConstIterator::operator --();
+    return b;
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp>& MatConstIterator_<_Tp>::operator ++()
+{
+    MatConstIterator::operator ++();
+    return *this;
+}
+
+template<typename _Tp> inline
+MatConstIterator_<_Tp> MatConstIterator_<_Tp>::operator ++(int)
+{
+    MatConstIterator_ b = *this;
+    MatConstIterator::operator ++();
+    return b;
+}
+
+
+template<typename _Tp> inline
+Point MatConstIterator_<_Tp>::pos() const
+{
+    if( !m )
+        return Point();
+    CV_DbgAssert( m->dims <= 2 );
+    if( m->isContinuous() )
+    {
+        ptrdiff_t ofs = (const _Tp*)ptr - (const _Tp*)m->data;
+        int y = (int)(ofs / m->cols);
+        int x = (int)(ofs - (ptrdiff_t)y * m->cols);
+        return Point(x, y);
+    }
+    else
+    {
+        ptrdiff_t ofs = (uchar*)ptr - m->data;
+        int y = (int)(ofs / m->step);
+        int x = (int)((ofs - y * m->step)/sizeof(_Tp));
+        return Point(x, y);
+    }
+}
+
+
+template<typename _Tp> static inline
+bool operator == (const MatConstIterator_<_Tp>& a, const MatConstIterator_<_Tp>& b)
+{
+    return a.m == b.m && a.ptr == b.ptr;
+}
+
+template<typename _Tp> static inline
+bool operator != (const MatConstIterator_<_Tp>& a, const MatConstIterator_<_Tp>& b)
+{
+    return a.m != b.m || a.ptr != b.ptr;
+}
+
+template<typename _Tp> static inline
+MatConstIterator_<_Tp> operator + (const MatConstIterator_<_Tp>& a, ptrdiff_t ofs)
+{
+    MatConstIterator t = (const MatConstIterator&)a + ofs;
+    return (MatConstIterator_<_Tp>&)t;
+}
+
+template<typename _Tp> static inline
+MatConstIterator_<_Tp> operator + (ptrdiff_t ofs, const MatConstIterator_<_Tp>& a)
+{
+    MatConstIterator t = (const MatConstIterator&)a + ofs;
+    return (MatConstIterator_<_Tp>&)t;
+}
+
+template<typename _Tp> static inline
+MatConstIterator_<_Tp> operator - (const MatConstIterator_<_Tp>& a, ptrdiff_t ofs)
+{
+    MatConstIterator t = (const MatConstIterator&)a - ofs;
+    return (MatConstIterator_<_Tp>&)t;
+}
+
+template<typename _Tp> inline
+const _Tp& MatConstIterator_<_Tp>::operator [](ptrdiff_t i) const
+{
+    return *(_Tp*)MatConstIterator::operator [](i);
+}
+
+
+
+//////////////////////////// MatIterator_ ///////////////////////////
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_()
+    : MatConstIterator_<_Tp>()
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m)
+    : MatConstIterator_<_Tp>(_m)
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, int _row, int _col)
+    : MatConstIterator_<_Tp>(_m, _row, _col)
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, Point _pt)
+    : MatConstIterator_<_Tp>(_m, _pt)
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, const int* _idx)
+    : MatConstIterator_<_Tp>(_m, _idx)
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>::MatIterator_(const MatIterator_& it)
+    : MatConstIterator_<_Tp>(it)
+{}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>& MatIterator_<_Tp>::operator = (const MatIterator_<_Tp>& it )
+{
+    MatConstIterator::operator = (it);
+    return *this;
+}
+
+template<typename _Tp> inline
+_Tp& MatIterator_<_Tp>::operator *() const
+{
+    return *(_Tp*)(this->ptr);
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>& MatIterator_<_Tp>::operator += (ptrdiff_t ofs)
+{
+    MatConstIterator::operator += (ofs);
+    return *this;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>& MatIterator_<_Tp>::operator -= (ptrdiff_t ofs)
+{
+    MatConstIterator::operator += (-ofs);
+    return *this;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>& MatIterator_<_Tp>::operator --()
+{
+    MatConstIterator::operator --();
+    return *this;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> MatIterator_<_Tp>::operator --(int)
+{
+    MatIterator_ b = *this;
+    MatConstIterator::operator --();
+    return b;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp>& MatIterator_<_Tp>::operator ++()
+{
+    MatConstIterator::operator ++();
+    return *this;
+}
+
+template<typename _Tp> inline
+MatIterator_<_Tp> MatIterator_<_Tp>::operator ++(int)
+{
+    MatIterator_ b = *this;
+    MatConstIterator::operator ++();
+    return b;
+}
+
+template<typename _Tp> inline
+_Tp& MatIterator_<_Tp>::operator [](ptrdiff_t i) const
+{
+    return *(*this + i);
+}
+
+
+template<typename _Tp> static inline
+bool operator == (const MatIterator_<_Tp>& a, const MatIterator_<_Tp>& b)
+{
+    return a.m == b.m && a.ptr == b.ptr;
+}
+
+template<typename _Tp> static inline
+bool operator != (const MatIterator_<_Tp>& a, const MatIterator_<_Tp>& b)
+{
+    return a.m != b.m || a.ptr != b.ptr;
+}
+
+template<typename _Tp> static inline
+MatIterator_<_Tp> operator + (const MatIterator_<_Tp>& a, ptrdiff_t ofs)
+{
+    MatConstIterator t = (const MatConstIterator&)a + ofs;
+    return (MatIterator_<_Tp>&)t;
+}
+
+template<typename _Tp> static inline
+MatIterator_<_Tp> operator + (ptrdiff_t ofs, const MatIterator_<_Tp>& a)
+{
+    MatConstIterator t = (const MatConstIterator&)a + ofs;
+    return (MatIterator_<_Tp>&)t;
+}
+
+template<typename _Tp> static inline
+MatIterator_<_Tp> operator - (const MatIterator_<_Tp>& a, ptrdiff_t ofs)
+{
+    MatConstIterator t = (const MatConstIterator&)a - ofs;
+    return (MatIterator_<_Tp>&)t;
+}
+
+
+
+/////////////////////// SparseMatConstIterator //////////////////////
+
+inline
+SparseMatConstIterator::SparseMatConstIterator()
+    : m(0), hashidx(0), ptr(0)
+{}
+
+inline
+SparseMatConstIterator::SparseMatConstIterator(const SparseMatConstIterator& it)
+    : m(it.m), hashidx(it.hashidx), ptr(it.ptr)
+{}
+
+inline SparseMatConstIterator& SparseMatConstIterator::operator = (const SparseMatConstIterator& it)
+{
+    if( this != &it )
+    {
+        m = it.m;
+        hashidx = it.hashidx;
+        ptr = it.ptr;
+    }
+    return *this;
+}
+
+template<typename _Tp> inline
+const _Tp& SparseMatConstIterator::value() const
+{
+    return *(const _Tp*)ptr;
+}
+
+inline
+const SparseMat::Node* SparseMatConstIterator::node() const
+{
+    return (ptr && m && m->hdr) ? (const SparseMat::Node*)(const void*)(ptr - m->hdr->valueOffset) : 0;
+}
+
+inline
+SparseMatConstIterator SparseMatConstIterator::operator ++(int)
+{
+    SparseMatConstIterator it = *this;
+    ++*this;
+    return it;
+}
+
+inline
+void SparseMatConstIterator::seekEnd()
+{
+    if( m && m->hdr )
+    {
+        hashidx = m->hdr->hashtab.size();
+        ptr = 0;
+    }
+}
+
+
+static inline
+bool operator == (const SparseMatConstIterator& it1, const SparseMatConstIterator& it2)
+{
+    return it1.m == it2.m && it1.ptr == it2.ptr;
+}
+
+static inline
+bool operator != (const SparseMatConstIterator& it1, const SparseMatConstIterator& it2)
+{
+    return !(it1 == it2);
+}
+
+
+
+///////////////////////// SparseMatIterator /////////////////////////
+
+inline
+SparseMatIterator::SparseMatIterator()
+{}
+
+inline
+SparseMatIterator::SparseMatIterator(SparseMat* _m)
+    : SparseMatConstIterator(_m)
+{}
+
+inline
+SparseMatIterator::SparseMatIterator(const SparseMatIterator& it)
+    : SparseMatConstIterator(it)
+{}
+
+inline
+SparseMatIterator& SparseMatIterator::operator = (const SparseMatIterator& it)
+{
+    (SparseMatConstIterator&)*this = it;
+    return *this;
+}
+
+template<typename _Tp> inline
+_Tp& SparseMatIterator::value() const
+{
+    return *(_Tp*)ptr;
+}
+
+inline
+SparseMat::Node* SparseMatIterator::node() const
+{
+    return (SparseMat::Node*)SparseMatConstIterator::node();
+}
+
+inline
+SparseMatIterator& SparseMatIterator::operator ++()
+{
+    SparseMatConstIterator::operator ++();
+    return *this;
+}
+
+inline
+SparseMatIterator SparseMatIterator::operator ++(int)
+{
+    SparseMatIterator it = *this;
+    ++*this;
+    return it;
+}
+
+
+
+////////////////////// SparseMatConstIterator_ //////////////////////
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>::SparseMatConstIterator_()
+{}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat_<_Tp>* _m)
+    : SparseMatConstIterator(_m)
+{}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat* _m)
+    : SparseMatConstIterator(_m)
+{
+    CV_Assert( _m->type() == traits::Type<_Tp>::value );
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMatConstIterator_<_Tp>& it)
+    : SparseMatConstIterator(it)
+{}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>& SparseMatConstIterator_<_Tp>::operator = (const SparseMatConstIterator_<_Tp>& it)
+{
+    return reinterpret_cast<SparseMatConstIterator_<_Tp>&>
+         (*reinterpret_cast<SparseMatConstIterator*>(this) =
+           reinterpret_cast<const SparseMatConstIterator&>(it));
+}
+
+template<typename _Tp> inline
+const _Tp& SparseMatConstIterator_<_Tp>::operator *() const
+{
+    return *(const _Tp*)this->ptr;
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp>& SparseMatConstIterator_<_Tp>::operator ++()
+{
+    SparseMatConstIterator::operator ++();
+    return *this;
+}
+
+template<typename _Tp> inline
+SparseMatConstIterator_<_Tp> SparseMatConstIterator_<_Tp>::operator ++(int)
+{
+    SparseMatConstIterator_<_Tp> it = *this;
+    SparseMatConstIterator::operator ++();
+    return it;
+}
+
+
+
+///////////////////////// SparseMatIterator_ ////////////////////////
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>::SparseMatIterator_()
+{}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat_<_Tp>* _m)
+    : SparseMatConstIterator_<_Tp>(_m)
+{}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat* _m)
+    : SparseMatConstIterator_<_Tp>(_m)
+{}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>::SparseMatIterator_(const SparseMatIterator_<_Tp>& it)
+    : SparseMatConstIterator_<_Tp>(it)
+{}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>& SparseMatIterator_<_Tp>::operator = (const SparseMatIterator_<_Tp>& it)
+{
+    return reinterpret_cast<SparseMatIterator_<_Tp>&>
+         (*reinterpret_cast<SparseMatConstIterator*>(this) =
+           reinterpret_cast<const SparseMatConstIterator&>(it));
+}
+
+template<typename _Tp> inline
+_Tp& SparseMatIterator_<_Tp>::operator *() const
+{
+    return *(_Tp*)this->ptr;
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp>& SparseMatIterator_<_Tp>::operator ++()
+{
+    SparseMatConstIterator::operator ++();
+    return *this;
+}
+
+template<typename _Tp> inline
+SparseMatIterator_<_Tp> SparseMatIterator_<_Tp>::operator ++(int)
+{
+    SparseMatIterator_<_Tp> it = *this;
+    SparseMatConstIterator::operator ++();
+    return it;
+}
+
+
+
+//////////////////////// MatCommaInitializer_ ///////////////////////
+
+template<typename _Tp> inline
+MatCommaInitializer_<_Tp>::MatCommaInitializer_(Mat_<_Tp>* _m)
+    : it(_m)
+{}
+
+template<typename _Tp> template<typename T2> inline
+MatCommaInitializer_<_Tp>& MatCommaInitializer_<_Tp>::operator , (T2 v)
+{
+    CV_DbgAssert( this->it < ((const Mat_<_Tp>*)this->it.m)->end() );
+    *this->it = _Tp(v);
+    ++this->it;
+    return *this;
+}
+
+template<typename _Tp> inline
+MatCommaInitializer_<_Tp>::operator Mat_<_Tp>() const
+{
+    CV_DbgAssert( this->it == ((const Mat_<_Tp>*)this->it.m)->end() );
+    return Mat_<_Tp>(*this->it.m);
+}
+
+
+template<typename _Tp, typename T2> static inline
+MatCommaInitializer_<_Tp> operator << (const Mat_<_Tp>& m, T2 val)
+{
+    MatCommaInitializer_<_Tp> commaInitializer((Mat_<_Tp>*)&m);
+    return (commaInitializer, val);
+}
+
+
+
+///////////////////////// Matrix Expressions ////////////////////////
+
+inline
+Mat& Mat::operator = (const MatExpr& e)
+{
+    e.op->assign(e, *this);
+    return *this;
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>::Mat_(const MatExpr& e)
+{
+    e.op->assign(e, *this, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+Mat_<_Tp>& Mat_<_Tp>::operator = (const MatExpr& e)
+{
+    e.op->assign(e, *this, traits::Type<_Tp>::value);
+    return *this;
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::zeros(int rows, int cols)
+{
+    return Mat::zeros(rows, cols, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::zeros(Size sz)
+{
+    return Mat::zeros(sz, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::ones(int rows, int cols)
+{
+    return Mat::ones(rows, cols, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::ones(Size sz)
+{
+    return Mat::ones(sz, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::eye(int rows, int cols)
+{
+    return Mat::eye(rows, cols, traits::Type<_Tp>::value);
+}
+
+template<typename _Tp> inline
+MatExpr Mat_<_Tp>::eye(Size sz)
+{
+    return Mat::eye(sz, traits::Type<_Tp>::value);
+}
+
+inline
+MatExpr::MatExpr()
+    : op(0), flags(0), a(Mat()), b(Mat()), c(Mat()), alpha(0), beta(0), s()
+{}
+
+inline
+MatExpr::MatExpr(const MatOp* _op, int _flags, const Mat& _a, const Mat& _b,
+                 const Mat& _c, double _alpha, double _beta, const Scalar& _s)
+    : op(_op), flags(_flags), a(_a), b(_b), c(_c), alpha(_alpha), beta(_beta), s(_s)
+{}
+
+inline
+MatExpr::operator Mat() const
+{
+    Mat m;
+    op->assign(*this, m);
+    return m;
+}
+
+template<typename _Tp> inline
+MatExpr::operator Mat_<_Tp>() const
+{
+    Mat_<_Tp> m;
+    op->assign(*this, m, traits::Type<_Tp>::value);
+    return m;
+}
+
+
+template<typename _Tp> static inline
+MatExpr min(const Mat_<_Tp>& a, const Mat_<_Tp>& b)
+{
+    return cv::min((const Mat&)a, (const Mat&)b);
+}
+
+template<typename _Tp> static inline
+MatExpr min(const Mat_<_Tp>& a, double s)
+{
+    return cv::min((const Mat&)a, s);
+}
+
+template<typename _Tp> static inline
+MatExpr min(double s, const Mat_<_Tp>& a)
+{
+    return cv::min((const Mat&)a, s);
+}
+
+template<typename _Tp> static inline
+MatExpr max(const Mat_<_Tp>& a, const Mat_<_Tp>& b)
+{
+    return cv::max((const Mat&)a, (const Mat&)b);
+}
+
+template<typename _Tp> static inline
+MatExpr max(const Mat_<_Tp>& a, double s)
+{
+    return cv::max((const Mat&)a, s);
+}
+
+template<typename _Tp> static inline
+MatExpr max(double s, const Mat_<_Tp>& a)
+{
+    return cv::max((const Mat&)a, s);
+}
+
+template<typename _Tp> static inline
+MatExpr abs(const Mat_<_Tp>& m)
+{
+    return cv::abs((const Mat&)m);
+}
+
+
+static inline
+Mat& operator += (Mat& a, const MatExpr& b)
+{
+    b.op->augAssignAdd(b, a);
+    return a;
+}
+
+static inline
+const Mat& operator += (const Mat& a, const MatExpr& b)
+{
+    b.op->augAssignAdd(b, (Mat&)a);
+    return a;
+}
+
+template<typename _Tp> static inline
+Mat_<_Tp>& operator += (Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignAdd(b, a);
+    return a;
+}
+
+template<typename _Tp> static inline
+const Mat_<_Tp>& operator += (const Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignAdd(b, (Mat&)a);
+    return a;
+}
+
+static inline
+Mat& operator -= (Mat& a, const MatExpr& b)
+{
+    b.op->augAssignSubtract(b, a);
+    return a;
+}
+
+static inline
+const Mat& operator -= (const Mat& a, const MatExpr& b)
+{
+    b.op->augAssignSubtract(b, (Mat&)a);
+    return a;
+}
+
+template<typename _Tp> static inline
+Mat_<_Tp>& operator -= (Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignSubtract(b, a);
+    return a;
+}
+
+template<typename _Tp> static inline
+const Mat_<_Tp>& operator -= (const Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignSubtract(b, (Mat&)a);
+    return a;
+}
+
+static inline
+Mat& operator *= (Mat& a, const MatExpr& b)
+{
+    b.op->augAssignMultiply(b, a);
+    return a;
+}
+
+static inline
+const Mat& operator *= (const Mat& a, const MatExpr& b)
+{
+    b.op->augAssignMultiply(b, (Mat&)a);
+    return a;
+}
+
+template<typename _Tp> static inline
+Mat_<_Tp>& operator *= (Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignMultiply(b, a);
+    return a;
+}
+
+template<typename _Tp> static inline
+const Mat_<_Tp>& operator *= (const Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignMultiply(b, (Mat&)a);
+    return a;
+}
+
+static inline
+Mat& operator /= (Mat& a, const MatExpr& b)
+{
+    b.op->augAssignDivide(b, a);
+    return a;
+}
+
+static inline
+const Mat& operator /= (const Mat& a, const MatExpr& b)
+{
+    b.op->augAssignDivide(b, (Mat&)a);
+    return a;
+}
+
+template<typename _Tp> static inline
+Mat_<_Tp>& operator /= (Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignDivide(b, a);
+    return a;
+}
+
+template<typename _Tp> static inline
+const Mat_<_Tp>& operator /= (const Mat_<_Tp>& a, const MatExpr& b)
+{
+    b.op->augAssignDivide(b, (Mat&)a);
+    return a;
+}
+
+
+//////////////////////////////// UMat ////////////////////////////////
+
+inline
+UMat::UMat(UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{}
+
+inline
+UMat::UMat(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_rows, _cols, _type);
+}
+
+inline
+UMat::UMat(int _rows, int _cols, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_rows, _cols, _type);
+    *this = _s;
+}
+
+inline
+UMat::UMat(Size _sz, int _type, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create( _sz.height, _sz.width, _type );
+}
+
+inline
+UMat::UMat(Size _sz, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_sz.height, _sz.width, _type);
+    *this = _s;
+}
+
+inline
+UMat::UMat(int _dims, const int* _sz, int _type, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_dims, _sz, _type);
+}
+
+inline
+UMat::UMat(int _dims, const int* _sz, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
+{
+    create(_dims, _sz, _type);
+    *this = _s;
+}
+
+inline
+UMat::UMat(const UMat& m)
+: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
+  usageFlags(m.usageFlags), u(m.u), offset(m.offset), size(&rows)
+{
+    addref();
+    if( m.dims <= 2 )
+    {
+        step[0] = m.step[0]; step[1] = m.step[1];
+    }
+    else
+    {
+        dims = 0;
+        copySize(m);
+    }
+}
+
+
+template<typename _Tp> inline
+UMat::UMat(const std::vector<_Tp>& vec, bool copyData)
+: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
+cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
+{
+    if(vec.empty())
+        return;
+    if( !copyData )
+    {
+        // !!!TODO!!!
+        CV_Error(Error::StsNotImplemented, "");
+    }
+    else
+        Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this);
+}
+
+inline
+UMat& UMat::operator = (const UMat& m)
+{
+    if( this != &m )
+    {
+        const_cast<UMat&>(m).addref();
+        release();
+        flags = m.flags;
+        if( dims <= 2 && m.dims <= 2 )
+        {
+            dims = m.dims;
+            rows = m.rows;
+            cols = m.cols;
+            step[0] = m.step[0];
+            step[1] = m.step[1];
+        }
+        else
+            copySize(m);
+        allocator = m.allocator;
+        if (usageFlags == USAGE_DEFAULT)
+            usageFlags = m.usageFlags;
+        u = m.u;
+        offset = m.offset;
+    }
+    return *this;
+}
+
+inline
+UMat UMat::row(int y) const
+{
+    return UMat(*this, Range(y, y + 1), Range::all());
+}
+
+inline
+UMat UMat::col(int x) const
+{
+    return UMat(*this, Range::all(), Range(x, x + 1));
+}
+
+inline
+UMat UMat::rowRange(int startrow, int endrow) const
+{
+    return UMat(*this, Range(startrow, endrow), Range::all());
+}
+
+inline
+UMat UMat::rowRange(const Range& r) const
+{
+    return UMat(*this, r, Range::all());
+}
+
+inline
+UMat UMat::colRange(int startcol, int endcol) const
+{
+    return UMat(*this, Range::all(), Range(startcol, endcol));
+}
+
+inline
+UMat UMat::colRange(const Range& r) const
+{
+    return UMat(*this, Range::all(), r);
+}
+
+inline
+UMat UMat::clone() const
+{
+    UMat m;
+    copyTo(m);
+    return m;
+}
+
+inline
+void UMat::assignTo( UMat& m, int _type ) const
+{
+    if( _type < 0 )
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+inline
+void UMat::create(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
+{
+    _type &= TYPE_MASK;
+    if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && u )
+        return;
+    int sz[] = {_rows, _cols};
+    create(2, sz, _type, _usageFlags);
+}
+
+inline
+void UMat::create(Size _sz, int _type, UMatUsageFlags _usageFlags)
+{
+    create(_sz.height, _sz.width, _type, _usageFlags);
+}
+
+inline
+void UMat::addref()
+{
+    if( u )
+        CV_XADD(&(u->urefcount), 1);
+}
+
+inline void UMat::release()
+{
+    if( u && CV_XADD(&(u->urefcount), -1) == 1 )
+        deallocate();
+    for(int i = 0; i < dims; i++)
+        size.p[i] = 0;
+    u = 0;
+}
+
+inline
+UMat UMat::operator()( Range _rowRange, Range _colRange ) const
+{
+    return UMat(*this, _rowRange, _colRange);
+}
+
+inline
+UMat UMat::operator()( const Rect& roi ) const
+{
+    return UMat(*this, roi);
+}
+
+inline
+UMat UMat::operator()(const Range* ranges) const
+{
+    return UMat(*this, ranges);
+}
+
+inline
+UMat UMat::operator()(const std::vector<Range>& ranges) const
+{
+    return UMat(*this, ranges);
+}
+
+inline
+bool UMat::isContinuous() const
+{
+    return (flags & CONTINUOUS_FLAG) != 0;
+}
+
+inline
+bool UMat::isSubmatrix() const
+{
+    return (flags & SUBMATRIX_FLAG) != 0;
+}
+
+inline
+size_t UMat::elemSize() const
+{
+    return dims > 0 ? step.p[dims - 1] : 0;
+}
+
+inline
+size_t UMat::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int UMat::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int UMat::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int UMat::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t UMat::step1(int i) const
+{
+    return step.p[i] / elemSize1();
+}
+
+inline
+bool UMat::empty() const
+{
+    return u == 0 || total() == 0 || dims == 0;
+}
+
+inline
+size_t UMat::total() const
+{
+    if( dims <= 2 )
+        return (size_t)rows * cols;
+    size_t p = 1;
+    for( int i = 0; i < dims; i++ )
+        p *= size[i];
+    return p;
+}
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+
+inline
+UMat::UMat(UMat&& m)
+: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
+  usageFlags(m.usageFlags), u(m.u), offset(m.offset), size(&rows)
+{
+    if (m.dims <= 2)  // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_DbgAssert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.allocator = NULL;
+    m.u = NULL;
+    m.offset = 0;
+}
+
+inline
+UMat& UMat::operator = (UMat&& m)
+{
+    if (this == &m)
+      return *this;
+    release();
+    flags = m.flags; dims = m.dims; rows = m.rows; cols = m.cols;
+    allocator = m.allocator; usageFlags = m.usageFlags;
+    u = m.u;
+    offset = m.offset;
+    if (step.p != step.buf) // release self step/size
+    {
+        fastFree(step.p);
+        step.p = step.buf;
+        size.p = &rows;
+    }
+    if (m.dims <= 2) // move new step/size info
+    {
+        step[0] = m.step[0];
+        step[1] = m.step[1];
+    }
+    else
+    {
+        CV_DbgAssert(m.step.p != m.step.buf);
+        step.p = m.step.p;
+        size.p = m.size.p;
+        m.step.p = m.step.buf;
+        m.size.p = &m.rows;
+    }
+    m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0;
+    m.allocator = NULL;
+    m.u = NULL;
+    m.offset = 0;
+    return *this;
+}
+
+#endif
+
+
+inline bool UMatData::hostCopyObsolete() const { return (flags & HOST_COPY_OBSOLETE) != 0; }
+inline bool UMatData::deviceCopyObsolete() const { return (flags & DEVICE_COPY_OBSOLETE) != 0; }
+inline bool UMatData::deviceMemMapped() const { return (flags & DEVICE_MEM_MAPPED) != 0; }
+inline bool UMatData::copyOnMap() const { return (flags & COPY_ON_MAP) != 0; }
+inline bool UMatData::tempUMat() const { return (flags & TEMP_UMAT) != 0; }
+inline bool UMatData::tempCopiedUMat() const { return (flags & TEMP_COPIED_UMAT) == TEMP_COPIED_UMAT; }
+
+inline void UMatData::markDeviceMemMapped(bool flag)
+{
+  if(flag)
+    flags |= DEVICE_MEM_MAPPED;
+  else
+    flags &= ~DEVICE_MEM_MAPPED;
+}
+
+inline void UMatData::markHostCopyObsolete(bool flag)
+{
+    if(flag)
+        flags |= HOST_COPY_OBSOLETE;
+    else
+        flags &= ~HOST_COPY_OBSOLETE;
+}
+inline void UMatData::markDeviceCopyObsolete(bool flag)
+{
+    if(flag)
+        flags |= DEVICE_COPY_OBSOLETE;
+    else
+        flags &= ~DEVICE_COPY_OBSOLETE;
+}
+
+//! @endcond
+
+} //cv
+
+#ifdef _MSC_VER
+#pragma warning( pop )
+#endif
+
+#endif

+ 1476 - 0
ThresholdTools/include/opencv2/core/matx.hpp

@@ -0,0 +1,1476 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_MATX_HPP
+#define OPENCV_CORE_MATX_HPP
+
+#ifndef __cplusplus
+#  error matx.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/base.hpp"
+#include "opencv2/core/traits.hpp"
+#include "opencv2/core/saturate.hpp"
+
+#ifdef CV_CXX11
+#include <initializer_list>
+#endif
+
+namespace cv
+{
+
+//! @addtogroup core_basic
+//! @{
+
+////////////////////////////// Small Matrix ///////////////////////////
+
+//! @cond IGNORED
+struct CV_EXPORTS Matx_AddOp {};
+struct CV_EXPORTS Matx_SubOp {};
+struct CV_EXPORTS Matx_ScaleOp {};
+struct CV_EXPORTS Matx_MulOp {};
+struct CV_EXPORTS Matx_DivOp {};
+struct CV_EXPORTS Matx_MatMulOp {};
+struct CV_EXPORTS Matx_TOp {};
+//! @endcond
+
+/** @brief Template class for small matrices whose type and size are known at compilation time
+
+If you need a more flexible type, use Mat . The elements of the matrix M are accessible using the
+M(i,j) notation. Most of the common matrix operations (see also @ref MatrixExpressions ) are
+available. To do an operation on Matx that is not implemented, you can easily convert the matrix to
+Mat and backwards:
+@code{.cpp}
+    Matx33f m(1, 2, 3,
+              4, 5, 6,
+              7, 8, 9);
+    cout << sum(Mat(m*m.t())) << endl;
+@endcode
+Except of the plain constructor which takes a list of elements, Matx can be initialized from a C-array:
+@code{.cpp}
+    float values[] = { 1, 2, 3};
+    Matx31f m(values);
+@endcode
+In case if C++11 features are available, std::initializer_list can be also used to initialize Matx:
+@code{.cpp}
+    Matx31f m = { 1, 2, 3};
+@endcode
+ */
+template<typename _Tp, int m, int n> class Matx
+{
+public:
+    enum {
+           rows     = m,
+           cols     = n,
+           channels = rows*cols,
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           depth    = traits::Type<_Tp>::value,
+           type     = CV_MAKETYPE(depth, channels),
+#endif
+           shortdim = (m < n ? m : n)
+         };
+
+    typedef _Tp                           value_type;
+    typedef Matx<_Tp, m, n>               mat_type;
+    typedef Matx<_Tp, shortdim, 1> diag_type;
+
+    //! default constructor
+    Matx();
+
+    Matx(_Tp v0); //!< 1x1 matrix
+    Matx(_Tp v0, _Tp v1); //!< 1x2 or 2x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2); //!< 1x3 or 3x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 1x4, 2x2 or 4x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4); //!< 1x5 or 5x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5); //!< 1x6, 2x3, 3x2 or 6x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6); //!< 1x7 or 7x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7); //!< 1x8, 2x4, 4x2 or 8x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8); //!< 1x9, 3x3 or 9x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9); //!< 1x10, 2x5 or 5x2 or 10x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3,
+         _Tp v4, _Tp v5, _Tp v6, _Tp v7,
+         _Tp v8, _Tp v9, _Tp v10, _Tp v11); //!< 1x12, 2x6, 3x4, 4x3, 6x2 or 12x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3,
+         _Tp v4, _Tp v5, _Tp v6, _Tp v7,
+         _Tp v8, _Tp v9, _Tp v10, _Tp v11,
+         _Tp v12, _Tp v13); //!< 1x14, 2x7, 7x2 or 14x1 matrix
+    Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3,
+         _Tp v4, _Tp v5, _Tp v6, _Tp v7,
+         _Tp v8, _Tp v9, _Tp v10, _Tp v11,
+         _Tp v12, _Tp v13, _Tp v14, _Tp v15); //!< 1x16, 4x4 or 16x1 matrix
+    explicit Matx(const _Tp* vals); //!< initialize from a plain array
+
+#ifdef CV_CXX11
+    Matx(std::initializer_list<_Tp>); //!< initialize from an initializer list
+#endif
+
+    static Matx all(_Tp alpha);
+    static Matx zeros();
+    static Matx ones();
+    static Matx eye();
+    static Matx diag(const diag_type& d);
+    static Matx randu(_Tp a, _Tp b);
+    static Matx randn(_Tp a, _Tp b);
+
+    //! dot product computed with the default precision
+    _Tp dot(const Matx<_Tp, m, n>& v) const;
+
+    //! dot product computed in double-precision arithmetics
+    double ddot(const Matx<_Tp, m, n>& v) const;
+
+    //! conversion to another data type
+    template<typename T2> operator Matx<T2, m, n>() const;
+
+    //! change the matrix shape
+    template<int m1, int n1> Matx<_Tp, m1, n1> reshape() const;
+
+    //! extract part of the matrix
+    template<int m1, int n1> Matx<_Tp, m1, n1> get_minor(int i, int j) const;
+
+    //! extract the matrix row
+    Matx<_Tp, 1, n> row(int i) const;
+
+    //! extract the matrix column
+    Matx<_Tp, m, 1> col(int i) const;
+
+    //! extract the matrix diagonal
+    diag_type diag() const;
+
+    //! transpose the matrix
+    Matx<_Tp, n, m> t() const;
+
+    //! invert the matrix
+    Matx<_Tp, n, m> inv(int method=DECOMP_LU, bool *p_is_ok = NULL) const;
+
+    //! solve linear system
+    template<int l> Matx<_Tp, n, l> solve(const Matx<_Tp, m, l>& rhs, int flags=DECOMP_LU) const;
+    Vec<_Tp, n> solve(const Vec<_Tp, m>& rhs, int method) const;
+
+    //! multiply two matrices element-wise
+    Matx<_Tp, m, n> mul(const Matx<_Tp, m, n>& a) const;
+
+    //! divide two matrices element-wise
+    Matx<_Tp, m, n> div(const Matx<_Tp, m, n>& a) const;
+
+    //! element access
+    const _Tp& operator ()(int i, int j) const;
+    _Tp& operator ()(int i, int j);
+
+    //! 1D element access
+    const _Tp& operator ()(int i) const;
+    _Tp& operator ()(int i);
+
+    Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_AddOp);
+    Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp);
+    template<typename _T2> Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp);
+    Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp);
+    Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp);
+    template<int l> Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp);
+    Matx(const Matx<_Tp, n, m>& a, Matx_TOp);
+
+    _Tp val[m*n]; //< matrix elements
+};
+
+typedef Matx<float, 1, 2> Matx12f;
+typedef Matx<double, 1, 2> Matx12d;
+typedef Matx<float, 1, 3> Matx13f;
+typedef Matx<double, 1, 3> Matx13d;
+typedef Matx<float, 1, 4> Matx14f;
+typedef Matx<double, 1, 4> Matx14d;
+typedef Matx<float, 1, 6> Matx16f;
+typedef Matx<double, 1, 6> Matx16d;
+
+typedef Matx<float, 2, 1> Matx21f;
+typedef Matx<double, 2, 1> Matx21d;
+typedef Matx<float, 3, 1> Matx31f;
+typedef Matx<double, 3, 1> Matx31d;
+typedef Matx<float, 4, 1> Matx41f;
+typedef Matx<double, 4, 1> Matx41d;
+typedef Matx<float, 6, 1> Matx61f;
+typedef Matx<double, 6, 1> Matx61d;
+
+typedef Matx<float, 2, 2> Matx22f;
+typedef Matx<double, 2, 2> Matx22d;
+typedef Matx<float, 2, 3> Matx23f;
+typedef Matx<double, 2, 3> Matx23d;
+typedef Matx<float, 3, 2> Matx32f;
+typedef Matx<double, 3, 2> Matx32d;
+
+typedef Matx<float, 3, 3> Matx33f;
+typedef Matx<double, 3, 3> Matx33d;
+
+typedef Matx<float, 3, 4> Matx34f;
+typedef Matx<double, 3, 4> Matx34d;
+typedef Matx<float, 4, 3> Matx43f;
+typedef Matx<double, 4, 3> Matx43d;
+
+typedef Matx<float, 4, 4> Matx44f;
+typedef Matx<double, 4, 4> Matx44d;
+typedef Matx<float, 6, 6> Matx66f;
+typedef Matx<double, 6, 6> Matx66d;
+
+/*!
+  traits
+*/
+template<typename _Tp, int m, int n> class DataType< Matx<_Tp, m, n> >
+{
+public:
+    typedef Matx<_Tp, m, n>                               value_type;
+    typedef Matx<typename DataType<_Tp>::work_type, m, n> work_type;
+    typedef _Tp                                           channel_type;
+    typedef value_type                                    vec_type;
+
+    enum { generic_type = 0,
+           channels     = m * n,
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+         };
+};
+
+namespace traits {
+template<typename _Tp, int m, int n>
+struct Depth< Matx<_Tp, m, n> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp, int m, int n>
+struct Type< Matx<_Tp, m, n> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, n*m) }; };
+} // namespace
+
+
+/** @brief  Comma-separated Matrix Initializer
+*/
+template<typename _Tp, int m, int n> class MatxCommaInitializer
+{
+public:
+    MatxCommaInitializer(Matx<_Tp, m, n>* _mtx);
+    template<typename T2> MatxCommaInitializer<_Tp, m, n>& operator , (T2 val);
+    Matx<_Tp, m, n> operator *() const;
+
+    Matx<_Tp, m, n>* dst;
+    int idx;
+};
+
+/*
+ Utility methods
+*/
+template<typename _Tp, int m> static double determinant(const Matx<_Tp, m, m>& a);
+template<typename _Tp, int m, int n> static double trace(const Matx<_Tp, m, n>& a);
+template<typename _Tp, int m, int n> static double norm(const Matx<_Tp, m, n>& M);
+template<typename _Tp, int m, int n> static double norm(const Matx<_Tp, m, n>& M, int normType);
+
+
+
+/////////////////////// Vec (used as element of multi-channel images /////////////////////
+
+/** @brief Template class for short numerical vectors, a partial case of Matx
+
+This template class represents short numerical vectors (of 1, 2, 3, 4 ... elements) on which you
+can perform basic arithmetical operations, access individual elements using [] operator etc. The
+vectors are allocated on stack, as opposite to std::valarray, std::vector, cv::Mat etc., which
+elements are dynamically allocated in the heap.
+
+The template takes 2 parameters:
+@tparam _Tp element type
+@tparam cn the number of elements
+
+In addition to the universal notation like Vec<float, 3>, you can use shorter aliases
+for the most popular specialized variants of Vec, e.g. Vec3f ~ Vec<float, 3>.
+
+It is possible to convert Vec\<T,2\> to/from Point_, Vec\<T,3\> to/from Point3_ , and Vec\<T,4\>
+to CvScalar or Scalar_. Use operator[] to access the elements of Vec.
+
+All the expected vector operations are also implemented:
+-   v1 = v2 + v3
+-   v1 = v2 - v3
+-   v1 = v2 \* scale
+-   v1 = scale \* v2
+-   v1 = -v2
+-   v1 += v2 and other augmenting operations
+-   v1 == v2, v1 != v2
+-   norm(v1) (euclidean norm)
+The Vec class is commonly used to describe pixel types of multi-channel arrays. See Mat for details.
+*/
+template<typename _Tp, int cn> class Vec : public Matx<_Tp, cn, 1>
+{
+public:
+    typedef _Tp value_type;
+    enum {
+           channels = cn,
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           depth    = Matx<_Tp, cn, 1>::depth,
+           type     = CV_MAKETYPE(depth, channels),
+#endif
+           _dummy_enum_finalizer = 0
+         };
+
+    //! default constructor
+    Vec();
+
+    Vec(_Tp v0); //!< 1-element vector constructor
+    Vec(_Tp v0, _Tp v1); //!< 2-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2); //!< 3-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 4-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4); //!< 5-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5); //!< 6-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6); //!< 7-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7); //!< 8-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8); //!< 9-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9); //!< 10-element vector constructor
+    Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13); //!< 14-element vector constructor
+    explicit Vec(const _Tp* values);
+
+#ifdef CV_CXX11
+    Vec(std::initializer_list<_Tp>);
+#endif
+
+    Vec(const Vec<_Tp, cn>& v);
+
+    static Vec all(_Tp alpha);
+
+    //! per-element multiplication
+    Vec mul(const Vec<_Tp, cn>& v) const;
+
+    //! conjugation (makes sense for complex numbers and quaternions)
+    Vec conj() const;
+
+    /*!
+      cross product of the two 3D vectors.
+
+      For other dimensionalities the exception is raised
+    */
+    Vec cross(const Vec& v) const;
+    //! conversion to another data type
+    template<typename T2> operator Vec<T2, cn>() const;
+
+    /*! element access */
+    const _Tp& operator [](int i) const;
+    _Tp& operator[](int i);
+    const _Tp& operator ()(int i) const;
+    _Tp& operator ()(int i);
+
+    Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp);
+    Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp);
+    template<typename _T2> Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp);
+};
+
+/** @name Shorter aliases for the most popular specializations of Vec<T,n>
+  @{
+*/
+typedef Vec<uchar, 2> Vec2b;
+typedef Vec<uchar, 3> Vec3b;
+typedef Vec<uchar, 4> Vec4b;
+
+typedef Vec<short, 2> Vec2s;
+typedef Vec<short, 3> Vec3s;
+typedef Vec<short, 4> Vec4s;
+
+typedef Vec<ushort, 2> Vec2w;
+typedef Vec<ushort, 3> Vec3w;
+typedef Vec<ushort, 4> Vec4w;
+
+typedef Vec<int, 2> Vec2i;
+typedef Vec<int, 3> Vec3i;
+typedef Vec<int, 4> Vec4i;
+typedef Vec<int, 6> Vec6i;
+typedef Vec<int, 8> Vec8i;
+
+typedef Vec<float, 2> Vec2f;
+typedef Vec<float, 3> Vec3f;
+typedef Vec<float, 4> Vec4f;
+typedef Vec<float, 6> Vec6f;
+
+typedef Vec<double, 2> Vec2d;
+typedef Vec<double, 3> Vec3d;
+typedef Vec<double, 4> Vec4d;
+typedef Vec<double, 6> Vec6d;
+/** @} */
+
+/*!
+  traits
+*/
+template<typename _Tp, int cn> class DataType< Vec<_Tp, cn> >
+{
+public:
+    typedef Vec<_Tp, cn>                               value_type;
+    typedef Vec<typename DataType<_Tp>::work_type, cn> work_type;
+    typedef _Tp                                        channel_type;
+    typedef value_type                                 vec_type;
+
+    enum { generic_type = 0,
+           channels     = cn,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           depth        = DataType<channel_type>::depth,
+           type         = CV_MAKETYPE(depth, channels),
+#endif
+           _dummy_enum_finalizer = 0
+         };
+};
+
+namespace traits {
+template<typename _Tp, int cn>
+struct Depth< Vec<_Tp, cn> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp, int cn>
+struct Type< Vec<_Tp, cn> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, cn) }; };
+} // namespace
+
+
+/** @brief  Comma-separated Vec Initializer
+*/
+template<typename _Tp, int m> class VecCommaInitializer : public MatxCommaInitializer<_Tp, m, 1>
+{
+public:
+    VecCommaInitializer(Vec<_Tp, m>* _vec);
+    template<typename T2> VecCommaInitializer<_Tp, m>& operator , (T2 val);
+    Vec<_Tp, m> operator *() const;
+};
+
+template<typename _Tp, int cn> static Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v);
+
+//! @} core_basic
+
+//! @cond IGNORED
+
+///////////////////////////////////// helper classes /////////////////////////////////////
+namespace internal
+{
+
+template<typename _Tp, int m> struct Matx_DetOp
+{
+    double operator ()(const Matx<_Tp, m, m>& a) const
+    {
+        Matx<_Tp, m, m> temp = a;
+        double p = LU(temp.val, m*sizeof(_Tp), m, 0, 0, 0);
+        if( p == 0 )
+            return p;
+        for( int i = 0; i < m; i++ )
+            p *= temp(i, i);
+        return p;
+    }
+};
+
+template<typename _Tp> struct Matx_DetOp<_Tp, 1>
+{
+    double operator ()(const Matx<_Tp, 1, 1>& a) const
+    {
+        return a(0,0);
+    }
+};
+
+template<typename _Tp> struct Matx_DetOp<_Tp, 2>
+{
+    double operator ()(const Matx<_Tp, 2, 2>& a) const
+    {
+        return a(0,0)*a(1,1) - a(0,1)*a(1,0);
+    }
+};
+
+template<typename _Tp> struct Matx_DetOp<_Tp, 3>
+{
+    double operator ()(const Matx<_Tp, 3, 3>& a) const
+    {
+        return a(0,0)*(a(1,1)*a(2,2) - a(2,1)*a(1,2)) -
+            a(0,1)*(a(1,0)*a(2,2) - a(2,0)*a(1,2)) +
+            a(0,2)*(a(1,0)*a(2,1) - a(2,0)*a(1,1));
+    }
+};
+
+template<typename _Tp> Vec<_Tp, 2> inline conjugate(const Vec<_Tp, 2>& v)
+{
+    return Vec<_Tp, 2>(v[0], -v[1]);
+}
+
+template<typename _Tp> Vec<_Tp, 4> inline conjugate(const Vec<_Tp, 4>& v)
+{
+    return Vec<_Tp, 4>(v[0], -v[1], -v[2], -v[3]);
+}
+
+} // internal
+
+
+
+////////////////////////////////// Matx Implementation ///////////////////////////////////
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx()
+{
+    for(int i = 0; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0)
+{
+    val[0] = v0;
+    for(int i = 1; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1)
+{
+    CV_StaticAssert(channels >= 2, "Matx should have at least 2 elements.");
+    val[0] = v0; val[1] = v1;
+    for(int i = 2; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2)
+{
+    CV_StaticAssert(channels >= 3, "Matx should have at least 3 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2;
+    for(int i = 3; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3)
+{
+    CV_StaticAssert(channels >= 4, "Matx should have at least 4 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    for(int i = 4; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4)
+{
+    CV_StaticAssert(channels >= 5, "Matx should have at least 5 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; val[4] = v4;
+    for(int i = 5; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5)
+{
+    CV_StaticAssert(channels >= 6, "Matx should have at least 6 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5;
+    for(int i = 6; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6)
+{
+    CV_StaticAssert(channels >= 7, "Matx should have at least 7 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6;
+    for(int i = 7; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7)
+{
+    CV_StaticAssert(channels >= 8, "Matx should have at least 8 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    for(int i = 8; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8)
+{
+    CV_StaticAssert(channels >= 9, "Matx should have at least 9 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    val[8] = v8;
+    for(int i = 9; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9)
+{
+    CV_StaticAssert(channels >= 10, "Matx should have at least 10 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    val[8] = v8; val[9] = v9;
+    for(int i = 10; i < channels; i++) val[i] = _Tp(0);
+}
+
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11)
+{
+    CV_StaticAssert(channels >= 12, "Matx should have at least 12 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
+    for(int i = 12; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13)
+{
+    CV_StaticAssert(channels >= 14, "Matx should have at least 14 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
+    val[12] = v12; val[13] = v13;
+    for (int i = 14; i < channels; i++) val[i] = _Tp(0);
+}
+
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13, _Tp v14, _Tp v15)
+{
+    CV_StaticAssert(channels >= 16, "Matx should have at least 16 elements.");
+    val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+    val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+    val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
+    val[12] = v12; val[13] = v13; val[14] = v14; val[15] = v15;
+    for(int i = 16; i < channels; i++) val[i] = _Tp(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(const _Tp* values)
+{
+    for( int i = 0; i < channels; i++ ) val[i] = values[i];
+}
+
+#ifdef CV_CXX11
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n>::Matx(std::initializer_list<_Tp> list)
+{
+    CV_DbgAssert(list.size() == channels);
+    int i = 0;
+    for(const auto& elem : list)
+    {
+        val[i++] = elem;
+    }
+}
+#endif
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n> Matx<_Tp, m, n>::all(_Tp alpha)
+{
+    Matx<_Tp, m, n> M;
+    for( int i = 0; i < m*n; i++ ) M.val[i] = alpha;
+    return M;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::zeros()
+{
+    return all(0);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::ones()
+{
+    return all(1);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::eye()
+{
+    Matx<_Tp,m,n> M;
+    for(int i = 0; i < shortdim; i++)
+        M(i,i) = 1;
+    return M;
+}
+
+template<typename _Tp, int m, int n> inline
+_Tp Matx<_Tp, m, n>::dot(const Matx<_Tp, m, n>& M) const
+{
+    _Tp s = 0;
+    for( int i = 0; i < channels; i++ ) s += val[i]*M.val[i];
+    return s;
+}
+
+template<typename _Tp, int m, int n> inline
+double Matx<_Tp, m, n>::ddot(const Matx<_Tp, m, n>& M) const
+{
+    double s = 0;
+    for( int i = 0; i < channels; i++ ) s += (double)val[i]*M.val[i];
+    return s;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::diag(const typename Matx<_Tp,m,n>::diag_type& d)
+{
+    Matx<_Tp,m,n> M;
+    for(int i = 0; i < shortdim; i++)
+        M(i,i) = d(i, 0);
+    return M;
+}
+
+template<typename _Tp, int m, int n> template<typename T2>
+inline Matx<_Tp, m, n>::operator Matx<T2, m, n>() const
+{
+    Matx<T2, m, n> M;
+    for( int i = 0; i < m*n; i++ ) M.val[i] = saturate_cast<T2>(val[i]);
+    return M;
+}
+
+template<typename _Tp, int m, int n> template<int m1, int n1> inline
+Matx<_Tp, m1, n1> Matx<_Tp, m, n>::reshape() const
+{
+    CV_StaticAssert(m1*n1 == m*n, "Input and destnarion matrices must have the same number of elements");
+    return (const Matx<_Tp, m1, n1>&)*this;
+}
+
+template<typename _Tp, int m, int n>
+template<int m1, int n1> inline
+Matx<_Tp, m1, n1> Matx<_Tp, m, n>::get_minor(int i, int j) const
+{
+    CV_DbgAssert(0 <= i && i+m1 <= m && 0 <= j && j+n1 <= n);
+    Matx<_Tp, m1, n1> s;
+    for( int di = 0; di < m1; di++ )
+        for( int dj = 0; dj < n1; dj++ )
+            s(di, dj) = (*this)(i+di, j+dj);
+    return s;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, 1, n> Matx<_Tp, m, n>::row(int i) const
+{
+    CV_DbgAssert((unsigned)i < (unsigned)m);
+    return Matx<_Tp, 1, n>(&val[i*n]);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, 1> Matx<_Tp, m, n>::col(int j) const
+{
+    CV_DbgAssert((unsigned)j < (unsigned)n);
+    Matx<_Tp, m, 1> v;
+    for( int i = 0; i < m; i++ )
+        v.val[i] = val[i*n + j];
+    return v;
+}
+
+template<typename _Tp, int m, int n> inline
+typename Matx<_Tp, m, n>::diag_type Matx<_Tp, m, n>::diag() const
+{
+    diag_type d;
+    for( int i = 0; i < shortdim; i++ )
+        d.val[i] = val[i*n + i];
+    return d;
+}
+
+template<typename _Tp, int m, int n> inline
+const _Tp& Matx<_Tp, m, n>::operator()(int i, int j) const
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)m && (unsigned)j < (unsigned)n );
+    return this->val[i*n + j];
+}
+
+template<typename _Tp, int m, int n> inline
+_Tp& Matx<_Tp, m, n>::operator ()(int i, int j)
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)m && (unsigned)j < (unsigned)n );
+    return val[i*n + j];
+}
+
+template<typename _Tp, int m, int n> inline
+const _Tp& Matx<_Tp, m, n>::operator ()(int i) const
+{
+    CV_StaticAssert(m == 1 || n == 1, "Single index indexation requires matrix to be a column or a row");
+    CV_DbgAssert( (unsigned)i < (unsigned)(m+n-1) );
+    return val[i];
+}
+
+template<typename _Tp, int m, int n> inline
+_Tp& Matx<_Tp, m, n>::operator ()(int i)
+{
+    CV_StaticAssert(m == 1 || n == 1, "Single index indexation requires matrix to be a column or a row");
+    CV_DbgAssert( (unsigned)i < (unsigned)(m+n-1) );
+    return val[i];
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_AddOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] + b.val[i]);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] - b.val[i]);
+}
+
+template<typename _Tp, int m, int n> template<typename _T2> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] * alpha);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] * b.val[i]);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp)
+{
+    for( int i = 0; i < channels; i++ )
+        val[i] = saturate_cast<_Tp>(a.val[i] / b.val[i]);
+}
+
+template<typename _Tp, int m, int n> template<int l> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp)
+{
+    for( int i = 0; i < m; i++ )
+        for( int j = 0; j < n; j++ )
+        {
+            _Tp s = 0;
+            for( int k = 0; k < l; k++ )
+                s += a(i, k) * b(k, j);
+            val[i*n + j] = s;
+        }
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n>::Matx(const Matx<_Tp, n, m>& a, Matx_TOp)
+{
+    for( int i = 0; i < m; i++ )
+        for( int j = 0; j < n; j++ )
+            val[i*n + j] = a(j, i);
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n> Matx<_Tp, m, n>::mul(const Matx<_Tp, m, n>& a) const
+{
+    return Matx<_Tp, m, n>(*this, a, Matx_MulOp());
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n> Matx<_Tp, m, n>::div(const Matx<_Tp, m, n>& a) const
+{
+    return Matx<_Tp, m, n>(*this, a, Matx_DivOp());
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, n, m> Matx<_Tp, m, n>::t() const
+{
+    return Matx<_Tp, n, m>(*this, Matx_TOp());
+}
+
+template<typename _Tp, int m, int n> inline
+Vec<_Tp, n> Matx<_Tp, m, n>::solve(const Vec<_Tp, m>& rhs, int method) const
+{
+    Matx<_Tp, n, 1> x = solve((const Matx<_Tp, m, 1>&)(rhs), method);
+    return (Vec<_Tp, n>&)(x);
+}
+
+template<typename _Tp, int m> static inline
+double determinant(const Matx<_Tp, m, m>& a)
+{
+    return cv::internal::Matx_DetOp<_Tp, m>()(a);
+}
+
+template<typename _Tp, int m, int n> static inline
+double trace(const Matx<_Tp, m, n>& a)
+{
+    _Tp s = 0;
+    for( int i = 0; i < std::min(m, n); i++ )
+        s += a(i,i);
+    return s;
+}
+
+template<typename _Tp, int m, int n> static inline
+double norm(const Matx<_Tp, m, n>& M)
+{
+    return std::sqrt(normL2Sqr<_Tp, double>(M.val, m*n));
+}
+
+template<typename _Tp, int m, int n> static inline
+double norm(const Matx<_Tp, m, n>& M, int normType)
+{
+    switch(normType) {
+    case NORM_INF:
+        return (double)normInf<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n);
+    case NORM_L1:
+        return (double)normL1<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n);
+    case NORM_L2SQR:
+        return (double)normL2Sqr<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n);
+    default:
+    case NORM_L2:
+        return std::sqrt((double)normL2Sqr<_Tp, typename DataType<_Tp>::work_type>(M.val, m*n));
+    }
+}
+
+
+
+//////////////////////////////// matx comma initializer //////////////////////////////////
+
+template<typename _Tp, typename _T2, int m, int n> static inline
+MatxCommaInitializer<_Tp, m, n> operator << (const Matx<_Tp, m, n>& mtx, _T2 val)
+{
+    MatxCommaInitializer<_Tp, m, n> commaInitializer((Matx<_Tp, m, n>*)&mtx);
+    return (commaInitializer, val);
+}
+
+template<typename _Tp, int m, int n> inline
+MatxCommaInitializer<_Tp, m, n>::MatxCommaInitializer(Matx<_Tp, m, n>* _mtx)
+    : dst(_mtx), idx(0)
+{}
+
+template<typename _Tp, int m, int n> template<typename _T2> inline
+MatxCommaInitializer<_Tp, m, n>& MatxCommaInitializer<_Tp, m, n>::operator , (_T2 value)
+{
+    CV_DbgAssert( idx < m*n );
+    dst->val[idx++] = saturate_cast<_Tp>(value);
+    return *this;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, m, n> MatxCommaInitializer<_Tp, m, n>::operator *() const
+{
+    CV_DbgAssert( idx == n*m );
+    return *dst;
+}
+
+
+
+/////////////////////////////////// Vec Implementation ///////////////////////////////////
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec() {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0)
+    : Matx<_Tp, cn, 1>(v0) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1)
+    : Matx<_Tp, cn, 1>(v0, v1) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2)
+    : Matx<_Tp, cn, 1>(v0, v1, v2) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13)
+    : Matx<_Tp, cn, 1>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(const _Tp* values)
+    : Matx<_Tp, cn, 1>(values) {}
+
+#ifdef CV_CXX11
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(std::initializer_list<_Tp> list)
+    : Matx<_Tp, cn, 1>(list) {}
+#endif
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(const Vec<_Tp, cn>& m)
+    : Matx<_Tp, cn, 1>(m.val) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp op)
+    : Matx<_Tp, cn, 1>(a, b, op) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp op)
+    : Matx<_Tp, cn, 1>(a, b, op) {}
+
+template<typename _Tp, int cn> template<typename _T2> inline
+Vec<_Tp, cn>::Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp op)
+    : Matx<_Tp, cn, 1>(a, alpha, op) {}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> Vec<_Tp, cn>::all(_Tp alpha)
+{
+    Vec v;
+    for( int i = 0; i < cn; i++ ) v.val[i] = alpha;
+    return v;
+}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> Vec<_Tp, cn>::mul(const Vec<_Tp, cn>& v) const
+{
+    Vec<_Tp, cn> w;
+    for( int i = 0; i < cn; i++ ) w.val[i] = saturate_cast<_Tp>(this->val[i]*v.val[i]);
+    return w;
+}
+
+template<> inline
+Vec<float, 2> Vec<float, 2>::conj() const
+{
+    return cv::internal::conjugate(*this);
+}
+
+template<> inline
+Vec<double, 2> Vec<double, 2>::conj() const
+{
+    return cv::internal::conjugate(*this);
+}
+
+template<> inline
+Vec<float, 4> Vec<float, 4>::conj() const
+{
+    return cv::internal::conjugate(*this);
+}
+
+template<> inline
+Vec<double, 4> Vec<double, 4>::conj() const
+{
+    return cv::internal::conjugate(*this);
+}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> Vec<_Tp, cn>::cross(const Vec<_Tp, cn>&) const
+{
+    CV_StaticAssert(cn == 3, "for arbitrary-size vector there is no cross-product defined");
+    return Vec<_Tp, cn>();
+}
+
+template<> inline
+Vec<float, 3> Vec<float, 3>::cross(const Vec<float, 3>& v) const
+{
+    return Vec<float,3>(this->val[1]*v.val[2] - this->val[2]*v.val[1],
+                     this->val[2]*v.val[0] - this->val[0]*v.val[2],
+                     this->val[0]*v.val[1] - this->val[1]*v.val[0]);
+}
+
+template<> inline
+Vec<double, 3> Vec<double, 3>::cross(const Vec<double, 3>& v) const
+{
+    return Vec<double,3>(this->val[1]*v.val[2] - this->val[2]*v.val[1],
+                     this->val[2]*v.val[0] - this->val[0]*v.val[2],
+                     this->val[0]*v.val[1] - this->val[1]*v.val[0]);
+}
+
+template<typename _Tp, int cn> template<typename T2> inline
+Vec<_Tp, cn>::operator Vec<T2, cn>() const
+{
+    Vec<T2, cn> v;
+    for( int i = 0; i < cn; i++ ) v.val[i] = saturate_cast<T2>(this->val[i]);
+    return v;
+}
+
+template<typename _Tp, int cn> inline
+const _Tp& Vec<_Tp, cn>::operator [](int i) const
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)cn );
+    return this->val[i];
+}
+
+template<typename _Tp, int cn> inline
+_Tp& Vec<_Tp, cn>::operator [](int i)
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)cn );
+    return this->val[i];
+}
+
+template<typename _Tp, int cn> inline
+const _Tp& Vec<_Tp, cn>::operator ()(int i) const
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)cn );
+    return this->val[i];
+}
+
+template<typename _Tp, int cn> inline
+_Tp& Vec<_Tp, cn>::operator ()(int i)
+{
+    CV_DbgAssert( (unsigned)i < (unsigned)cn );
+    return this->val[i];
+}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v)
+{
+    double nv = norm(v);
+    return v * (nv ? 1./nv : 0.);
+}
+
+
+
+//////////////////////////////// vec comma initializer //////////////////////////////////
+
+
+template<typename _Tp, typename _T2, int cn> static inline
+VecCommaInitializer<_Tp, cn> operator << (const Vec<_Tp, cn>& vec, _T2 val)
+{
+    VecCommaInitializer<_Tp, cn> commaInitializer((Vec<_Tp, cn>*)&vec);
+    return (commaInitializer, val);
+}
+
+template<typename _Tp, int cn> inline
+VecCommaInitializer<_Tp, cn>::VecCommaInitializer(Vec<_Tp, cn>* _vec)
+    : MatxCommaInitializer<_Tp, cn, 1>(_vec)
+{}
+
+template<typename _Tp, int cn> template<typename _T2> inline
+VecCommaInitializer<_Tp, cn>& VecCommaInitializer<_Tp, cn>::operator , (_T2 value)
+{
+    CV_DbgAssert( this->idx < cn );
+    this->dst->val[this->idx++] = saturate_cast<_Tp>(value);
+    return *this;
+}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> VecCommaInitializer<_Tp, cn>::operator *() const
+{
+    CV_DbgAssert( this->idx == cn );
+    return *this->dst;
+}
+
+//! @endcond
+
+///////////////////////////// Matx out-of-class operators ////////////////////////////////
+
+//! @relates cv::Matx
+//! @{
+
+template<typename _Tp1, typename _Tp2, int m, int n> static inline
+Matx<_Tp1, m, n>& operator += (Matx<_Tp1, m, n>& a, const Matx<_Tp2, m, n>& b)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = saturate_cast<_Tp1>(a.val[i] + b.val[i]);
+    return a;
+}
+
+template<typename _Tp1, typename _Tp2, int m, int n> static inline
+Matx<_Tp1, m, n>& operator -= (Matx<_Tp1, m, n>& a, const Matx<_Tp2, m, n>& b)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = saturate_cast<_Tp1>(a.val[i] - b.val[i]);
+    return a;
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator + (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b)
+{
+    return Matx<_Tp, m, n>(a, b, Matx_AddOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b)
+{
+    return Matx<_Tp, m, n>(a, b, Matx_SubOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, int alpha)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha);
+    return a;
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, float alpha)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha);
+    return a;
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, double alpha)
+{
+    for( int i = 0; i < m*n; i++ )
+        a.val[i] = saturate_cast<_Tp>(a.val[i] * alpha);
+    return a;
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, int alpha)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, float alpha)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, double alpha)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (int alpha, const Matx<_Tp, m, n>& a)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (float alpha, const Matx<_Tp, m, n>& a)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator * (double alpha, const Matx<_Tp, m, n>& a)
+{
+    return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a)
+{
+    return Matx<_Tp, m, n>(a, -1, Matx_ScaleOp());
+}
+
+template<typename _Tp, int m, int n, int l> static inline
+Matx<_Tp, m, n> operator * (const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b)
+{
+    return Matx<_Tp, m, n>(a, b, Matx_MatMulOp());
+}
+
+template<typename _Tp, int m, int n> static inline
+Vec<_Tp, m> operator * (const Matx<_Tp, m, n>& a, const Vec<_Tp, n>& b)
+{
+    Matx<_Tp, m, 1> c(a, b, Matx_MatMulOp());
+    return (const Vec<_Tp, m>&)(c);
+}
+
+template<typename _Tp, int m, int n> static inline
+bool operator == (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b)
+{
+    for( int i = 0; i < m*n; i++ )
+        if( a.val[i] != b.val[i] ) return false;
+    return true;
+}
+
+template<typename _Tp, int m, int n> static inline
+bool operator != (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b)
+{
+    return !(a == b);
+}
+
+//! @}
+
+////////////////////////////// Vec out-of-class operators ////////////////////////////////
+
+//! @relates cv::Vec
+//! @{
+
+template<typename _Tp1, typename _Tp2, int cn> static inline
+Vec<_Tp1, cn>& operator += (Vec<_Tp1, cn>& a, const Vec<_Tp2, cn>& b)
+{
+    for( int i = 0; i < cn; i++ )
+        a.val[i] = saturate_cast<_Tp1>(a.val[i] + b.val[i]);
+    return a;
+}
+
+template<typename _Tp1, typename _Tp2, int cn> static inline
+Vec<_Tp1, cn>& operator -= (Vec<_Tp1, cn>& a, const Vec<_Tp2, cn>& b)
+{
+    for( int i = 0; i < cn; i++ )
+        a.val[i] = saturate_cast<_Tp1>(a.val[i] - b.val[i]);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator + (const Vec<_Tp, cn>& a, const Vec<_Tp, cn>& b)
+{
+    return Vec<_Tp, cn>(a, b, Matx_AddOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator - (const Vec<_Tp, cn>& a, const Vec<_Tp, cn>& b)
+{
+    return Vec<_Tp, cn>(a, b, Matx_SubOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, int alpha)
+{
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*alpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, float alpha)
+{
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*alpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, double alpha)
+{
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*alpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, int alpha)
+{
+    double ialpha = 1./alpha;
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*ialpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, float alpha)
+{
+    float ialpha = 1.f/alpha;
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*ialpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, double alpha)
+{
+    double ialpha = 1./alpha;
+    for( int i = 0; i < cn; i++ )
+        a[i] = saturate_cast<_Tp>(a[i]*ialpha);
+    return a;
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, int alpha)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (int alpha, const Vec<_Tp, cn>& a)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, float alpha)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (float alpha, const Vec<_Tp, cn>& a)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, double alpha)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator * (double alpha, const Vec<_Tp, cn>& a)
+{
+    return Vec<_Tp, cn>(a, alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, int alpha)
+{
+    return Vec<_Tp, cn>(a, 1./alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, float alpha)
+{
+    return Vec<_Tp, cn>(a, 1.f/alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, double alpha)
+{
+    return Vec<_Tp, cn>(a, 1./alpha, Matx_ScaleOp());
+}
+
+template<typename _Tp, int cn> static inline
+Vec<_Tp, cn> operator - (const Vec<_Tp, cn>& a)
+{
+    Vec<_Tp,cn> t;
+    for( int i = 0; i < cn; i++ ) t.val[i] = saturate_cast<_Tp>(-a.val[i]);
+    return t;
+}
+
+template<typename _Tp> inline Vec<_Tp, 4> operator * (const Vec<_Tp, 4>& v1, const Vec<_Tp, 4>& v2)
+{
+    return Vec<_Tp, 4>(saturate_cast<_Tp>(v1[0]*v2[0] - v1[1]*v2[1] - v1[2]*v2[2] - v1[3]*v2[3]),
+                       saturate_cast<_Tp>(v1[0]*v2[1] + v1[1]*v2[0] + v1[2]*v2[3] - v1[3]*v2[2]),
+                       saturate_cast<_Tp>(v1[0]*v2[2] - v1[1]*v2[3] + v1[2]*v2[0] + v1[3]*v2[1]),
+                       saturate_cast<_Tp>(v1[0]*v2[3] + v1[1]*v2[2] - v1[2]*v2[1] + v1[3]*v2[0]));
+}
+
+template<typename _Tp> inline Vec<_Tp, 4>& operator *= (Vec<_Tp, 4>& v1, const Vec<_Tp, 4>& v2)
+{
+    v1 = v1 * v2;
+    return v1;
+}
+
+//! @}
+
+} // cv
+
+#endif // OPENCV_CORE_MATX_HPP

+ 128 - 0
ThresholdTools/include/opencv2/core/neon_utils.hpp

@@ -0,0 +1,128 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_NEON_UTILS_HPP
+#define OPENCV_HAL_NEON_UTILS_HPP
+
+#include "opencv2/core/cvdef.h"
+
+//! @addtogroup core_utils_neon
+//! @{
+
+#if CV_NEON
+
+inline int32x2_t cv_vrnd_s32_f32(float32x2_t v)
+{
+    static int32x2_t v_sign = vdup_n_s32(1 << 31),
+        v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f));
+
+    int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v)));
+    return vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition)));
+}
+
+inline int32x4_t cv_vrndq_s32_f32(float32x4_t v)
+{
+    static int32x4_t v_sign = vdupq_n_s32(1 << 31),
+        v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
+
+    int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v)));
+    return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition)));
+}
+
+inline uint32x2_t cv_vrnd_u32_f32(float32x2_t v)
+{
+    static float32x2_t v_05 = vdup_n_f32(0.5f);
+    return vcvt_u32_f32(vadd_f32(v, v_05));
+}
+
+inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v)
+{
+    static float32x4_t v_05 = vdupq_n_f32(0.5f);
+    return vcvtq_u32_f32(vaddq_f32(v, v_05));
+}
+
+inline float32x4_t cv_vrecpq_f32(float32x4_t val)
+{
+    float32x4_t reciprocal = vrecpeq_f32(val);
+    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
+    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
+    return reciprocal;
+}
+
+inline float32x2_t cv_vrecp_f32(float32x2_t val)
+{
+    float32x2_t reciprocal = vrecpe_f32(val);
+    reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
+    reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
+    return reciprocal;
+}
+
+inline float32x4_t cv_vrsqrtq_f32(float32x4_t val)
+{
+    float32x4_t e = vrsqrteq_f32(val);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
+    return e;
+}
+
+inline float32x2_t cv_vrsqrt_f32(float32x2_t val)
+{
+    float32x2_t e = vrsqrte_f32(val);
+    e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
+    e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
+    return e;
+}
+
+inline float32x4_t cv_vsqrtq_f32(float32x4_t val)
+{
+    return cv_vrecpq_f32(cv_vrsqrtq_f32(val));
+}
+
+inline float32x2_t cv_vsqrt_f32(float32x2_t val)
+{
+    return cv_vrecp_f32(cv_vrsqrt_f32(val));
+}
+
+#endif
+
+//! @}
+
+#endif // OPENCV_HAL_NEON_UTILS_HPP

+ 842 - 0
ThresholdTools/include/opencv2/core/ocl.hpp

@@ -0,0 +1,842 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OPENCL_HPP
+#define OPENCV_OPENCL_HPP
+
+#include "opencv2/core.hpp"
+
+namespace cv { namespace ocl {
+
+//! @addtogroup core_opencl
+//! @{
+
+CV_EXPORTS_W bool haveOpenCL();
+CV_EXPORTS_W bool useOpenCL();
+CV_EXPORTS_W bool haveAmdBlas();
+CV_EXPORTS_W bool haveAmdFft();
+CV_EXPORTS_W void setUseOpenCL(bool flag);
+CV_EXPORTS_W void finish();
+
+CV_EXPORTS bool haveSVM();
+
+class CV_EXPORTS Context;
+class CV_EXPORTS Device;
+class CV_EXPORTS Kernel;
+class CV_EXPORTS Program;
+class CV_EXPORTS ProgramSource;
+class CV_EXPORTS Queue;
+class CV_EXPORTS PlatformInfo;
+class CV_EXPORTS Image2D;
+
+class CV_EXPORTS Device
+{
+public:
+    Device();
+    explicit Device(void* d);
+    Device(const Device& d);
+    Device& operator = (const Device& d);
+    ~Device();
+
+    void set(void* d);
+
+    enum
+    {
+        TYPE_DEFAULT     = (1 << 0),
+        TYPE_CPU         = (1 << 1),
+        TYPE_GPU         = (1 << 2),
+        TYPE_ACCELERATOR = (1 << 3),
+        TYPE_DGPU        = TYPE_GPU + (1 << 16),
+        TYPE_IGPU        = TYPE_GPU + (1 << 17),
+        TYPE_ALL         = 0xFFFFFFFF
+    };
+
+    String name() const;
+    String extensions() const;
+    bool isExtensionSupported(const String& extensionName) const;
+    String version() const;
+    String vendorName() const;
+    String OpenCL_C_Version() const;
+    String OpenCLVersion() const;
+    int deviceVersionMajor() const;
+    int deviceVersionMinor() const;
+    String driverVersion() const;
+    void* ptr() const;
+
+    int type() const;
+
+    int addressBits() const;
+    bool available() const;
+    bool compilerAvailable() const;
+    bool linkerAvailable() const;
+
+    enum
+    {
+        FP_DENORM=(1 << 0),
+        FP_INF_NAN=(1 << 1),
+        FP_ROUND_TO_NEAREST=(1 << 2),
+        FP_ROUND_TO_ZERO=(1 << 3),
+        FP_ROUND_TO_INF=(1 << 4),
+        FP_FMA=(1 << 5),
+        FP_SOFT_FLOAT=(1 << 6),
+        FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
+    };
+    int doubleFPConfig() const;
+    int singleFPConfig() const;
+    int halfFPConfig() const;
+
+    bool endianLittle() const;
+    bool errorCorrectionSupport() const;
+
+    enum
+    {
+        EXEC_KERNEL=(1 << 0),
+        EXEC_NATIVE_KERNEL=(1 << 1)
+    };
+    int executionCapabilities() const;
+
+    size_t globalMemCacheSize() const;
+
+    enum
+    {
+        NO_CACHE=0,
+        READ_ONLY_CACHE=1,
+        READ_WRITE_CACHE=2
+    };
+    int globalMemCacheType() const;
+    int globalMemCacheLineSize() const;
+    size_t globalMemSize() const;
+
+    size_t localMemSize() const;
+    enum
+    {
+        NO_LOCAL_MEM=0,
+        LOCAL_IS_LOCAL=1,
+        LOCAL_IS_GLOBAL=2
+    };
+    int localMemType() const;
+    bool hostUnifiedMemory() const;
+
+    bool imageSupport() const;
+
+    bool imageFromBufferSupport() const;
+    uint imagePitchAlignment() const;
+    uint imageBaseAddressAlignment() const;
+
+    /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value)
+    bool intelSubgroupsSupport() const;
+
+    size_t image2DMaxWidth() const;
+    size_t image2DMaxHeight() const;
+
+    size_t image3DMaxWidth() const;
+    size_t image3DMaxHeight() const;
+    size_t image3DMaxDepth() const;
+
+    size_t imageMaxBufferSize() const;
+    size_t imageMaxArraySize() const;
+
+    enum
+    {
+        UNKNOWN_VENDOR=0,
+        VENDOR_AMD=1,
+        VENDOR_INTEL=2,
+        VENDOR_NVIDIA=3
+    };
+    int vendorID() const;
+    // FIXIT
+    // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
+    // This method should use platform name instead of vendor name.
+    // After fix restore code in arithm.cpp: ocl_compare()
+    inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
+    inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
+    inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
+
+    int maxClockFrequency() const;
+    int maxComputeUnits() const;
+    int maxConstantArgs() const;
+    size_t maxConstantBufferSize() const;
+
+    size_t maxMemAllocSize() const;
+    size_t maxParameterSize() const;
+
+    int maxReadImageArgs() const;
+    int maxWriteImageArgs() const;
+    int maxSamplers() const;
+
+    size_t maxWorkGroupSize() const;
+    int maxWorkItemDims() const;
+    void maxWorkItemSizes(size_t*) const;
+
+    int memBaseAddrAlign() const;
+
+    int nativeVectorWidthChar() const;
+    int nativeVectorWidthShort() const;
+    int nativeVectorWidthInt() const;
+    int nativeVectorWidthLong() const;
+    int nativeVectorWidthFloat() const;
+    int nativeVectorWidthDouble() const;
+    int nativeVectorWidthHalf() const;
+
+    int preferredVectorWidthChar() const;
+    int preferredVectorWidthShort() const;
+    int preferredVectorWidthInt() const;
+    int preferredVectorWidthLong() const;
+    int preferredVectorWidthFloat() const;
+    int preferredVectorWidthDouble() const;
+    int preferredVectorWidthHalf() const;
+
+    size_t printfBufferSize() const;
+    size_t profilingTimerResolution() const;
+
+    static const Device& getDefault();
+
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+
+class CV_EXPORTS Context
+{
+public:
+    Context();
+    explicit Context(int dtype);
+    ~Context();
+    Context(const Context& c);
+    Context& operator = (const Context& c);
+
+    bool create();
+    bool create(int dtype);
+    size_t ndevices() const;
+    const Device& device(size_t idx) const;
+    Program getProg(const ProgramSource& prog,
+                    const String& buildopt, String& errmsg);
+    void unloadProg(Program& prog);
+
+    static Context& getDefault(bool initialize = true);
+    void* ptr() const;
+
+    friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
+
+    bool useSVM() const;
+    void setUseSVM(bool enabled);
+
+    struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+//protected:
+    Impl* p;
+};
+
+class CV_EXPORTS Platform
+{
+public:
+    Platform();
+    ~Platform();
+    Platform(const Platform& p);
+    Platform& operator = (const Platform& p);
+
+    void* ptr() const;
+    static Platform& getDefault();
+
+    friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+/** @brief Attaches OpenCL context to OpenCV
+@note
+  OpenCV will check if available OpenCL platform has platformName name, then assign context to
+  OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and
+  new command queue will be created.
+@param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime
+@param platformID ID of platform attached context was created for
+@param context OpenCL context to be attached to OpenCV
+@param deviceID ID of device, must be created from attached context
+*/
+CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID);
+
+/** @brief Convert OpenCL buffer to UMat
+@note
+  OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. Memory
+  content is not copied from `clBuffer` to UMat. Instead, buffer handle assigned to UMat and
+  `clRetainMemObject` is called.
+@param cl_mem_buffer source clBuffer handle
+@param step num of bytes in single row
+@param rows number of rows
+@param cols number of cols
+@param type OpenCV type of image
+@param dst destination UMat
+*/
+CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst);
+
+/** @brief Convert OpenCL image2d_t to UMat
+@note
+  OpenCL `image2d_t` (cl_mem_image), should be compatible with OpenCV UMat formats. Memory content
+  is copied from image to UMat with `clEnqueueCopyImageToBuffer` function.
+@param cl_mem_image source image2d_t handle
+@param dst destination UMat
+*/
+CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst);
+
+// TODO Move to internal header
+void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
+
+class CV_EXPORTS Queue
+{
+public:
+    Queue();
+    explicit Queue(const Context& c, const Device& d=Device());
+    ~Queue();
+    Queue(const Queue& q);
+    Queue& operator = (const Queue& q);
+
+    bool create(const Context& c=Context(), const Device& d=Device());
+    void finish();
+    void* ptr() const;
+    static Queue& getDefault();
+
+    /// @brief Returns OpenCL command queue with enable profiling mode support
+    const Queue& getProfilingQueue() const;
+
+    struct Impl; friend struct Impl;
+    inline Impl* getImpl() const { return p; }
+protected:
+    Impl* p;
+};
+
+
+class CV_EXPORTS KernelArg
+{
+public:
+    enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 };
+    KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0);
+    KernelArg();
+
+    static KernelArg Local() { return KernelArg(LOCAL, 0); }
+    static KernelArg PtrWriteOnly(const UMat& m)
+    { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); }
+    static KernelArg PtrReadOnly(const UMat& m)
+    { return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); }
+    static KernelArg PtrReadWrite(const UMat& m)
+    { return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); }
+    static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); }
+    static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); }
+    static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg Constant(const Mat& m);
+    template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
+    { return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); }
+
+    int flags;
+    UMat* m;
+    const void* obj;
+    size_t sz;
+    int wscale, iwscale;
+};
+
+
+class CV_EXPORTS Kernel
+{
+public:
+    Kernel();
+    Kernel(const char* kname, const Program& prog);
+    Kernel(const char* kname, const ProgramSource& prog,
+           const String& buildopts = String(), String* errmsg=0);
+    ~Kernel();
+    Kernel(const Kernel& k);
+    Kernel& operator = (const Kernel& k);
+
+    bool empty() const;
+    bool create(const char* kname, const Program& prog);
+    bool create(const char* kname, const ProgramSource& prog,
+                const String& buildopts, String* errmsg=0);
+
+    int set(int i, const void* value, size_t sz);
+    int set(int i, const Image2D& image2D);
+    int set(int i, const UMat& m);
+    int set(int i, const KernelArg& arg);
+    template<typename _Tp> int set(int i, const _Tp& value)
+    { return set(i, &value, sizeof(value)); }
+
+    template<typename _Tp0>
+    Kernel& args(const _Tp0& a0)
+    {
+        set(0, a0); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1)
+    {
+        int i = set(0, a0); set(i, a1); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
+    {
+        int i = set(0, a0); i = set(i, a1); set(i, a2); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
+                 const _Tp3& a3, const _Tp4& a4)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2);
+        i = set(i, a3); set(i, a4); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2,
+             typename _Tp3, typename _Tp4, typename _Tp5>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
+                 const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2);
+        i = set(i, a3); i = set(i, a4); set(i, a5); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
+        i = set(i, a4); i = set(i, a5); set(i, a6); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
+        i = set(i, a4); i = set(i, a5); i = set(i, a6); set(i, a7); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
+             typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
+        i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
+             typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); set(i, a9); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); set(i, a10); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        set(i, a12); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
+             typename _Tp13>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12, const _Tp13& a13)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        i = set(i, a12); set(i, a13); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
+             typename _Tp13, typename _Tp14>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12, const _Tp13& a13, const _Tp14& a14)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        i = set(i, a12); i = set(i, a13); set(i, a14); return *this;
+    }
+
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
+             typename _Tp13, typename _Tp14, typename _Tp15>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this;
+    }
+    /** @brief Run the OpenCL kernel.
+    @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3.
+    @param globalsize work items for each dimension. It is not the final globalsize passed to
+      OpenCL. Each dimension will be adjusted to the nearest integer divisible by the corresponding
+      value in localsize. If localsize is NULL, it will still be adjusted depending on dims. The
+      adjusted values are greater than or equal to the original values.
+    @param localsize work-group size for each dimension.
+    @param sync specify whether to wait for OpenCL computation to finish before return.
+    @param q command queue
+    */
+    bool run(int dims, size_t globalsize[],
+             size_t localsize[], bool sync, const Queue& q=Queue());
+    bool runTask(bool sync, const Queue& q=Queue());
+
+    /** @brief Similar to synchronized run() call with returning of kernel execution time
+     * Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE)
+     * @return Execution time in nanoseconds or negative number on error
+     */
+    int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue());
+
+    size_t workGroupSize() const;
+    size_t preferedWorkGroupSizeMultiple() const;
+    bool compileWorkGroupSize(size_t wsz[]) const;
+    size_t localMemSize() const;
+
+    void* ptr() const;
+    struct Impl;
+
+protected:
+    Impl* p;
+};
+
+class CV_EXPORTS Program
+{
+public:
+    Program();
+    Program(const ProgramSource& src,
+            const String& buildflags, String& errmsg);
+    Program(const Program& prog);
+
+    Program& operator = (const Program& prog);
+    ~Program();
+
+    bool create(const ProgramSource& src,
+                const String& buildflags, String& errmsg);
+
+    void* ptr() const;
+
+    /**
+     * @brief Query device-specific program binary.
+     *
+     * Returns RAW OpenCL executable binary without additional attachments.
+     *
+     * @sa ProgramSource::fromBinary
+     *
+     * @param[out] binary output buffer
+     */
+    void getBinary(std::vector<char>& binary) const;
+
+    struct Impl; friend struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+protected:
+    Impl* p;
+public:
+#ifndef OPENCV_REMOVE_DEPRECATED_API
+    // TODO Remove this
+    CV_DEPRECATED bool read(const String& buf, const String& buildflags); // removed, use ProgramSource instead
+    CV_DEPRECATED bool write(String& buf) const; // removed, use getBinary() method instead (RAW OpenCL binary)
+    CV_DEPRECATED const ProgramSource& source() const; // implementation removed
+    CV_DEPRECATED String getPrefix() const; // deprecated, implementation replaced
+    CV_DEPRECATED static String getPrefix(const String& buildflags); // deprecated, implementation replaced
+#endif
+};
+
+
+class CV_EXPORTS ProgramSource
+{
+public:
+    typedef uint64 hash_t; // deprecated
+
+    ProgramSource();
+    explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash);
+    explicit ProgramSource(const String& prog); // deprecated
+    explicit ProgramSource(const char* prog); // deprecated
+    ~ProgramSource();
+    ProgramSource(const ProgramSource& prog);
+    ProgramSource& operator = (const ProgramSource& prog);
+
+    const String& source() const; // deprecated
+    hash_t hash() const; // deprecated
+
+
+    /** @brief Describe OpenCL program binary.
+     * Do not call clCreateProgramWithBinary() and/or clBuildProgram().
+     *
+     * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies).
+     *
+     * This kind of binary is not portable between platforms in general - it is specific to OpenCL vendor / device / driver version.
+     *
+     * @param module name of program owner module
+     * @param name unique name of program (module+name is used as key for OpenCL program caching)
+     * @param binary buffer address. See buffer lifetime requirement in description.
+     * @param size buffer size
+     * @param buildOptions additional program-related build options passed to clBuildProgram()
+     * @return created ProgramSource object
+     */
+    static ProgramSource fromBinary(const String& module, const String& name,
+            const unsigned char* binary, const size_t size,
+            const cv::String& buildOptions = cv::String());
+
+    /** @brief Describe OpenCL program in SPIR format.
+     * Do not call clCreateProgramWithBinary() and/or clBuildProgram().
+     *
+     * Supports SPIR 1.2 by default (pass '-spir-std=X.Y' in buildOptions to override this behavior)
+     *
+     * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies).
+     *
+     * Programs in this format are portable between OpenCL implementations with 'khr_spir' extension:
+     * https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/cl_khr_spir.html
+     * (but they are not portable between different platforms: 32-bit / 64-bit)
+     *
+     * Note: these programs can't support vendor specific extensions, like 'cl_intel_subgroups'.
+     *
+     * @param module name of program owner module
+     * @param name unique name of program (module+name is used as key for OpenCL program caching)
+     * @param binary buffer address. See buffer lifetime requirement in description.
+     * @param size buffer size
+     * @param buildOptions additional program-related build options passed to clBuildProgram()
+     *        (these options are added automatically: '-x spir' and '-spir-std=1.2')
+     * @return created ProgramSource object.
+     */
+    static ProgramSource fromSPIR(const String& module, const String& name,
+            const unsigned char* binary, const size_t size,
+            const cv::String& buildOptions = cv::String());
+
+    //OpenCL 2.1+ only
+    //static Program fromSPIRV(const String& module, const String& name,
+    //        const unsigned char* binary, const size_t size,
+    //        const cv::String& buildOptions = cv::String());
+
+    struct Impl; friend struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+protected:
+    Impl* p;
+};
+
+class CV_EXPORTS PlatformInfo
+{
+public:
+    PlatformInfo();
+    explicit PlatformInfo(void* id);
+    ~PlatformInfo();
+
+    PlatformInfo(const PlatformInfo& i);
+    PlatformInfo& operator =(const PlatformInfo& i);
+
+    String name() const;
+    String vendor() const;
+    String version() const;
+    int deviceNumber() const;
+    void getDevice(Device& device, int d) const;
+
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
+CV_EXPORTS const char* typeToStr(int t);
+CV_EXPORTS const char* memopTypeToStr(int t);
+CV_EXPORTS const char* vecopTypeToStr(int t);
+CV_EXPORTS const char* getOpenCLErrorString(int errorCode);
+CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
+CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
+
+
+enum OclVectorStrategy
+{
+    // all matrices have its own vector width
+    OCL_VECTOR_OWN = 0,
+    // all matrices have maximal vector width among all matrices
+    // (useful for cases when matrices have different data types)
+    OCL_VECTOR_MAX = 1,
+
+    // default strategy
+    OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN
+};
+
+CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
+                                         InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
+                                         InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
+                                         OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
+
+CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths,
+                                       InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
+                                       InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
+                                       InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
+                                       OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
+
+// with OCL_VECTOR_MAX strategy
+CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
+                                            InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
+                                            InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
+
+CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
+
+class CV_EXPORTS Image2D
+{
+public:
+    Image2D();
+
+    /**
+    @param src UMat object from which to get image properties and data
+    @param norm flag to enable the use of normalized channel data types
+    @param alias flag indicating that the image should alias the src UMat. If true, changes to the
+        image or src will be reflected in both objects.
+    */
+    explicit Image2D(const UMat &src, bool norm = false, bool alias = false);
+    Image2D(const Image2D & i);
+    ~Image2D();
+
+    Image2D & operator = (const Image2D & i);
+
+    /** Indicates if creating an aliased image should succeed.
+    Depends on the underlying platform and the dimensions of the UMat.
+    */
+    static bool canCreateAlias(const UMat &u);
+
+    /** Indicates if the image format is supported.
+    */
+    static bool isFormatSupported(int depth, int cn, bool norm);
+
+    void* ptr() const;
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+class CV_EXPORTS Timer
+{
+public:
+    Timer(const Queue& q);
+    ~Timer();
+    void start();
+    void stop();
+
+    uint64 durationNS() const; //< duration in nanoseconds
+
+protected:
+    struct Impl;
+    Impl* const p;
+
+private:
+    Timer(const Timer&); // disabled
+    Timer& operator=(const Timer&); // disabled
+};
+
+CV_EXPORTS MatAllocator* getOpenCLAllocator();
+
+
+#ifdef __OPENCV_BUILD
+namespace internal {
+
+CV_EXPORTS bool isOpenCLForced();
+#define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition))
+
+CV_EXPORTS bool isPerformanceCheckBypassed();
+#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
+
+CV_EXPORTS bool isCLBuffer(UMat& u);
+
+} // namespace internal
+#endif
+
+//! @}
+
+}}
+
+#endif

+ 69 - 0
ThresholdTools/include/opencv2/core/ocl_genbase.hpp

@@ -0,0 +1,69 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OPENCL_GENBASE_HPP
+#define OPENCV_OPENCL_GENBASE_HPP
+
+//! @cond IGNORED
+
+namespace cv {
+namespace ocl {
+
+class ProgramSource;
+
+namespace internal {
+
+struct CV_EXPORTS ProgramEntry
+{
+    const char* module;
+    const char* name;
+    const char* programCode;
+    const char* programHash;
+    ProgramSource* pProgramSource;
+
+    operator ProgramSource& () const;
+};
+
+} } } // namespace
+
+//! @endcond
+
+#endif

+ 729 - 0
ThresholdTools/include/opencv2/core/opengl.hpp

@@ -0,0 +1,729 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_OPENGL_HPP
+#define OPENCV_CORE_OPENGL_HPP
+
+#ifndef __cplusplus
+#  error opengl.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core.hpp"
+#include "ocl.hpp"
+
+namespace cv { namespace ogl {
+
+/** @addtogroup core_opengl
+This section describes OpenGL interoperability.
+
+To enable OpenGL support, configure OpenCV using CMake with WITH_OPENGL=ON . Currently OpenGL is
+supported only with WIN32, GTK and Qt backends on Windows and Linux (MacOS and Android are not
+supported). For GTK backend gtkglext-1.0 library is required.
+
+To use OpenGL functionality you should first create OpenGL context (window or frame buffer). You can
+do this with namedWindow function or with other OpenGL toolkit (GLUT, for example).
+*/
+//! @{
+
+/////////////////// OpenGL Objects ///////////////////
+
+/** @brief Smart pointer for OpenGL buffer object with reference counting.
+
+Buffer Objects are OpenGL objects that store an array of unformatted memory allocated by the OpenGL
+context. These can be used to store vertex data, pixel data retrieved from images or the
+framebuffer, and a variety of other things.
+
+ogl::Buffer has interface similar with Mat interface and represents 2D array memory.
+
+ogl::Buffer supports memory transfers between host and device and also can be mapped to CUDA memory.
+ */
+class CV_EXPORTS Buffer
+{
+public:
+    /** @brief The target defines how you intend to use the buffer object.
+    */
+    enum Target
+    {
+        ARRAY_BUFFER         = 0x8892, //!< The buffer will be used as a source for vertex data
+        ELEMENT_ARRAY_BUFFER = 0x8893, //!< The buffer will be used for indices (in glDrawElements, for example)
+        PIXEL_PACK_BUFFER    = 0x88EB, //!< The buffer will be used for reading from OpenGL textures
+        PIXEL_UNPACK_BUFFER  = 0x88EC  //!< The buffer will be used for writing to OpenGL textures
+    };
+
+    enum Access
+    {
+        READ_ONLY  = 0x88B8,
+        WRITE_ONLY = 0x88B9,
+        READ_WRITE = 0x88BA
+    };
+
+    /** @brief The constructors.
+
+    Creates empty ogl::Buffer object, creates ogl::Buffer object from existed buffer ( abufId
+    parameter), allocates memory for ogl::Buffer object or copies from host/device memory.
+     */
+    Buffer();
+
+    /** @overload
+    @param arows Number of rows in a 2D array.
+    @param acols Number of columns in a 2D array.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param abufId Buffer object name.
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Buffer(int arows, int acols, int atype, unsigned int abufId, bool autoRelease = false);
+
+    /** @overload
+    @param asize 2D array size.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param abufId Buffer object name.
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Buffer(Size asize, int atype, unsigned int abufId, bool autoRelease = false);
+
+    /** @overload
+    @param arows Number of rows in a 2D array.
+    @param acols Number of columns in a 2D array.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Buffer(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @overload
+    @param asize 2D array size.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Buffer(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @overload
+    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ).
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    explicit Buffer(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @brief Allocates memory for ogl::Buffer object.
+
+    @param arows Number of rows in a 2D array.
+    @param acols Number of columns in a 2D array.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+     */
+    void create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @overload
+    @param asize 2D array size.
+    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @brief Decrements the reference counter and destroys the buffer object if needed.
+
+    The function will call setAutoRelease(true) .
+     */
+    void release();
+
+    /** @brief Sets auto release mode.
+
+    The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was
+    bound to a window it could be released at any time (user can close a window). If object's destructor
+    is called after destruction of the context it will cause an error. Thus ogl::Buffer doesn't destroy
+    OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL context).
+    This function can force ogl::Buffer destructor to destroy OpenGL object.
+    @param flag Auto release mode (if true, release will be called in object's destructor).
+     */
+    void setAutoRelease(bool flag);
+
+    /** @brief Copies from host/device memory to OpenGL buffer.
+    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ).
+    @param target Buffer usage. See cv::ogl::Buffer::Target .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+     */
+    void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @overload */
+    void copyFrom(InputArray arr, cuda::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false);
+
+    /** @brief Copies from OpenGL buffer to host/device memory or another OpenGL buffer object.
+
+    @param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , std::vector or
+    ogl::Buffer ).
+     */
+    void copyTo(OutputArray arr) const;
+
+    /** @overload */
+    void copyTo(OutputArray arr, cuda::Stream& stream) const;
+
+    /** @brief Creates a full copy of the buffer object and the underlying data.
+
+    @param target Buffer usage for destination buffer.
+    @param autoRelease Auto release mode for destination buffer.
+     */
+    Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const;
+
+    /** @brief Binds OpenGL buffer to the specified buffer binding point.
+
+    @param target Binding point. See cv::ogl::Buffer::Target .
+     */
+    void bind(Target target) const;
+
+    /** @brief Unbind any buffers from the specified binding point.
+
+    @param target Binding point. See cv::ogl::Buffer::Target .
+     */
+    static void unbind(Target target);
+
+    /** @brief Maps OpenGL buffer to host memory.
+
+    mapHost maps to the client's address space the entire data store of the buffer object. The data can
+    then be directly read and/or written relative to the returned pointer, depending on the specified
+    access policy.
+
+    A mapped data store must be unmapped with ogl::Buffer::unmapHost before its buffer object is used.
+
+    This operation can lead to memory transfers between host and device.
+
+    Only one buffer object can be mapped at a time.
+    @param access Access policy, indicating whether it will be possible to read from, write to, or both
+    read from and write to the buffer object's mapped data store. The symbolic constant must be
+    ogl::Buffer::READ_ONLY , ogl::Buffer::WRITE_ONLY or ogl::Buffer::READ_WRITE .
+     */
+    Mat mapHost(Access access);
+
+    /** @brief Unmaps OpenGL buffer.
+    */
+    void unmapHost();
+
+    //! map to device memory (blocking)
+    cuda::GpuMat mapDevice();
+    void unmapDevice();
+
+    /** @brief Maps OpenGL buffer to CUDA device memory.
+
+    This operation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time.
+
+    A mapped data store must be unmapped with ogl::Buffer::unmapDevice before its buffer object is used.
+     */
+    cuda::GpuMat mapDevice(cuda::Stream& stream);
+
+    /** @brief Unmaps OpenGL buffer.
+    */
+    void unmapDevice(cuda::Stream& stream);
+
+    int rows() const;
+    int cols() const;
+    Size size() const;
+    bool empty() const;
+
+    int type() const;
+    int depth() const;
+    int channels() const;
+    int elemSize() const;
+    int elemSize1() const;
+
+    //! get OpenGL opject id
+    unsigned int bufId() const;
+
+    class Impl;
+
+private:
+    Ptr<Impl> impl_;
+    int rows_;
+    int cols_;
+    int type_;
+};
+
+/** @brief Smart pointer for OpenGL 2D texture memory with reference counting.
+ */
+class CV_EXPORTS Texture2D
+{
+public:
+    /** @brief An Image Format describes the way that the images in Textures store their data.
+    */
+    enum Format
+    {
+        NONE            = 0,
+        DEPTH_COMPONENT = 0x1902, //!< Depth
+        RGB             = 0x1907, //!< Red, Green, Blue
+        RGBA            = 0x1908  //!< Red, Green, Blue, Alpha
+    };
+
+    /** @brief The constructors.
+
+    Creates empty ogl::Texture2D object, allocates memory for ogl::Texture2D object or copies from
+    host/device memory.
+     */
+    Texture2D();
+
+    /** @overload */
+    Texture2D(int arows, int acols, Format aformat, unsigned int atexId, bool autoRelease = false);
+
+    /** @overload */
+    Texture2D(Size asize, Format aformat, unsigned int atexId, bool autoRelease = false);
+
+    /** @overload
+    @param arows Number of rows.
+    @param acols Number of columns.
+    @param aformat Image format. See cv::ogl::Texture2D::Format .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Texture2D(int arows, int acols, Format aformat, bool autoRelease = false);
+
+    /** @overload
+    @param asize 2D array size.
+    @param aformat Image format. See cv::ogl::Texture2D::Format .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    Texture2D(Size asize, Format aformat, bool autoRelease = false);
+
+    /** @overload
+    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ).
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    explicit Texture2D(InputArray arr, bool autoRelease = false);
+
+    /** @brief Allocates memory for ogl::Texture2D object.
+
+    @param arows Number of rows.
+    @param acols Number of columns.
+    @param aformat Image format. See cv::ogl::Texture2D::Format .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+     */
+    void create(int arows, int acols, Format aformat, bool autoRelease = false);
+    /** @overload
+    @param asize 2D array size.
+    @param aformat Image format. See cv::ogl::Texture2D::Format .
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+    */
+    void create(Size asize, Format aformat, bool autoRelease = false);
+
+    /** @brief Decrements the reference counter and destroys the texture object if needed.
+
+    The function will call setAutoRelease(true) .
+     */
+    void release();
+
+    /** @brief Sets auto release mode.
+
+    @param flag Auto release mode (if true, release will be called in object's destructor).
+
+    The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was
+    bound to a window it could be released at any time (user can close a window). If object's destructor
+    is called after destruction of the context it will cause an error. Thus ogl::Texture2D doesn't
+    destroy OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL
+    context). This function can force ogl::Texture2D destructor to destroy OpenGL object.
+     */
+    void setAutoRelease(bool flag);
+
+    /** @brief Copies from host/device memory to OpenGL texture.
+
+    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ).
+    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
+     */
+    void copyFrom(InputArray arr, bool autoRelease = false);
+
+    /** @brief Copies from OpenGL texture to host/device memory or another OpenGL texture object.
+
+    @param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , ogl::Buffer or
+    ogl::Texture2D ).
+    @param ddepth Destination depth.
+    @param autoRelease Auto release mode for destination buffer (if arr is OpenGL buffer or texture).
+     */
+    void copyTo(OutputArray arr, int ddepth = CV_32F, bool autoRelease = false) const;
+
+    /** @brief Binds texture to current active texture unit for GL_TEXTURE_2D target.
+    */
+    void bind() const;
+
+    int rows() const;
+    int cols() const;
+    Size size() const;
+    bool empty() const;
+
+    Format format() const;
+
+    //! get OpenGL opject id
+    unsigned int texId() const;
+
+    class Impl;
+
+private:
+    Ptr<Impl> impl_;
+    int rows_;
+    int cols_;
+    Format format_;
+};
+
+/** @brief Wrapper for OpenGL Client-Side Vertex arrays.
+
+ogl::Arrays stores vertex data in ogl::Buffer objects.
+ */
+class CV_EXPORTS Arrays
+{
+public:
+    /** @brief Default constructor
+     */
+    Arrays();
+
+    /** @brief Sets an array of vertex coordinates.
+    @param vertex array with vertex coordinates, can be both host and device memory.
+    */
+    void setVertexArray(InputArray vertex);
+
+    /** @brief Resets vertex coordinates.
+    */
+    void resetVertexArray();
+
+    /** @brief Sets an array of vertex colors.
+    @param color array with vertex colors, can be both host and device memory.
+     */
+    void setColorArray(InputArray color);
+
+    /** @brief Resets vertex colors.
+    */
+    void resetColorArray();
+
+    /** @brief Sets an array of vertex normals.
+    @param normal array with vertex normals, can be both host and device memory.
+     */
+    void setNormalArray(InputArray normal);
+
+    /** @brief Resets vertex normals.
+    */
+    void resetNormalArray();
+
+    /** @brief Sets an array of vertex texture coordinates.
+    @param texCoord array with vertex texture coordinates, can be both host and device memory.
+     */
+    void setTexCoordArray(InputArray texCoord);
+
+    /** @brief Resets vertex texture coordinates.
+    */
+    void resetTexCoordArray();
+
+    /** @brief Releases all inner buffers.
+    */
+    void release();
+
+    /** @brief Sets auto release mode all inner buffers.
+    @param flag Auto release mode.
+     */
+    void setAutoRelease(bool flag);
+
+    /** @brief Binds all vertex arrays.
+    */
+    void bind() const;
+
+    /** @brief Returns the vertex count.
+    */
+    int size() const;
+    bool empty() const;
+
+private:
+    int size_;
+    Buffer vertex_;
+    Buffer color_;
+    Buffer normal_;
+    Buffer texCoord_;
+};
+
+/////////////////// Render Functions ///////////////////
+
+//! render mode
+enum RenderModes {
+    POINTS         = 0x0000,
+    LINES          = 0x0001,
+    LINE_LOOP      = 0x0002,
+    LINE_STRIP     = 0x0003,
+    TRIANGLES      = 0x0004,
+    TRIANGLE_STRIP = 0x0005,
+    TRIANGLE_FAN   = 0x0006,
+    QUADS          = 0x0007,
+    QUAD_STRIP     = 0x0008,
+    POLYGON        = 0x0009
+};
+
+/** @brief Render OpenGL texture or primitives.
+@param tex Texture to draw.
+@param wndRect Region of window, where to draw a texture (normalized coordinates).
+@param texRect Region of texture to draw (normalized coordinates).
+ */
+CV_EXPORTS void render(const Texture2D& tex,
+    Rect_<double> wndRect = Rect_<double>(0.0, 0.0, 1.0, 1.0),
+    Rect_<double> texRect = Rect_<double>(0.0, 0.0, 1.0, 1.0));
+
+/** @overload
+@param arr Array of privitives vertices.
+@param mode Render mode. One of cv::ogl::RenderModes
+@param color Color for all vertices. Will be used if arr doesn't contain color array.
+*/
+CV_EXPORTS void render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255));
+
+/** @overload
+@param arr Array of privitives vertices.
+@param indices Array of vertices indices (host or device memory).
+@param mode Render mode. One of cv::ogl::RenderModes
+@param color Color for all vertices. Will be used if arr doesn't contain color array.
+*/
+CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255));
+
+/////////////////// CL-GL Interoperability Functions ///////////////////
+
+namespace ocl {
+using namespace cv::ocl;
+
+// TODO static functions in the Context class
+/** @brief Creates OpenCL context from GL.
+@return Returns reference to OpenCL Context
+ */
+CV_EXPORTS Context& initializeContextFromGL();
+
+} // namespace cv::ogl::ocl
+
+/** @brief Converts InputArray to Texture2D object.
+@param src     - source InputArray.
+@param texture - destination Texture2D object.
+ */
+CV_EXPORTS void convertToGLTexture2D(InputArray src, Texture2D& texture);
+
+/** @brief Converts Texture2D object to OutputArray.
+@param texture - source Texture2D object.
+@param dst     - destination OutputArray.
+ */
+CV_EXPORTS void convertFromGLTexture2D(const Texture2D& texture, OutputArray dst);
+
+/** @brief Maps Buffer object to process on CL side (convert to UMat).
+
+Function creates CL buffer from GL one, and then constructs UMat that can be used
+to process buffer data with OpenCV functions. Note that in current implementation
+UMat constructed this way doesn't own corresponding GL buffer object, so it is
+the user responsibility to close down CL/GL buffers relationships by explicitly
+calling unmapGLBuffer() function.
+@param buffer      - source Buffer object.
+@param accessFlags - data access flags (ACCESS_READ|ACCESS_WRITE).
+@return Returns UMat object
+ */
+CV_EXPORTS UMat mapGLBuffer(const Buffer& buffer, int accessFlags = ACCESS_READ|ACCESS_WRITE);
+
+/** @brief Unmaps Buffer object (releases UMat, previously mapped from Buffer).
+
+Function must be called explicitly by the user for each UMat previously constructed
+by the call to mapGLBuffer() function.
+@param u           - source UMat, created by mapGLBuffer().
+ */
+CV_EXPORTS void unmapGLBuffer(UMat& u);
+
+}} // namespace cv::ogl
+
+namespace cv { namespace cuda {
+
+//! @addtogroup cuda
+//! @{
+
+/** @brief Sets a CUDA device and initializes it for the current thread with OpenGL interoperability.
+
+This function should be explicitly called after OpenGL context creation and before any CUDA calls.
+@param device System index of a CUDA device starting with 0.
+@ingroup core_opengl
+ */
+CV_EXPORTS void setGlDevice(int device = 0);
+
+//! @}
+
+}}
+
+//! @cond IGNORED
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+
+inline
+cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
+{
+    create(arows, acols, atype, target, autoRelease);
+}
+
+inline
+cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
+{
+    create(asize, atype, target, autoRelease);
+}
+
+inline
+void cv::ogl::Buffer::create(Size asize, int atype, Target target, bool autoRelease)
+{
+    create(asize.height, asize.width, atype, target, autoRelease);
+}
+
+inline
+int cv::ogl::Buffer::rows() const
+{
+    return rows_;
+}
+
+inline
+int cv::ogl::Buffer::cols() const
+{
+    return cols_;
+}
+
+inline
+cv::Size cv::ogl::Buffer::size() const
+{
+    return Size(cols_, rows_);
+}
+
+inline
+bool cv::ogl::Buffer::empty() const
+{
+    return rows_ == 0 || cols_ == 0;
+}
+
+inline
+int cv::ogl::Buffer::type() const
+{
+    return type_;
+}
+
+inline
+int cv::ogl::Buffer::depth() const
+{
+    return CV_MAT_DEPTH(type_);
+}
+
+inline
+int cv::ogl::Buffer::channels() const
+{
+    return CV_MAT_CN(type_);
+}
+
+inline
+int cv::ogl::Buffer::elemSize() const
+{
+    return CV_ELEM_SIZE(type_);
+}
+
+inline
+int cv::ogl::Buffer::elemSize1() const
+{
+    return CV_ELEM_SIZE1(type_);
+}
+
+///////
+
+inline
+cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
+{
+    create(arows, acols, aformat, autoRelease);
+}
+
+inline
+cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
+{
+    create(asize, aformat, autoRelease);
+}
+
+inline
+void cv::ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease)
+{
+    create(asize.height, asize.width, aformat, autoRelease);
+}
+
+inline
+int cv::ogl::Texture2D::rows() const
+{
+    return rows_;
+}
+
+inline
+int cv::ogl::Texture2D::cols() const
+{
+    return cols_;
+}
+
+inline
+cv::Size cv::ogl::Texture2D::size() const
+{
+    return Size(cols_, rows_);
+}
+
+inline
+bool cv::ogl::Texture2D::empty() const
+{
+    return rows_ == 0 || cols_ == 0;
+}
+
+inline
+cv::ogl::Texture2D::Format cv::ogl::Texture2D::format() const
+{
+    return format_;
+}
+
+///////
+
+inline
+cv::ogl::Arrays::Arrays() : size_(0)
+{
+}
+
+inline
+int cv::ogl::Arrays::size() const
+{
+    return size_;
+}
+
+inline
+bool cv::ogl::Arrays::empty() const
+{
+    return size_ == 0;
+}
+
+//! @endcond
+
+#endif /* OPENCV_CORE_OPENGL_HPP */

+ 538 - 0
ThresholdTools/include/opencv2/core/operations.hpp

@@ -0,0 +1,538 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_OPERATIONS_HPP
+#define OPENCV_CORE_OPERATIONS_HPP
+
+#ifndef __cplusplus
+#  error operations.hpp header must be compiled as C++
+#endif
+
+#include <cstdio>
+
+//! @cond IGNORED
+
+namespace cv
+{
+
+////////////////////////////// Matx methods depending on core API /////////////////////////////
+
+namespace internal
+{
+
+template<typename _Tp, int m> struct Matx_FastInvOp
+{
+    bool operator()(const Matx<_Tp, m, m>& a, Matx<_Tp, m, m>& b, int method) const
+    {
+        Matx<_Tp, m, m> temp = a;
+
+        // assume that b is all 0's on input => make it a unity matrix
+        for( int i = 0; i < m; i++ )
+            b(i, i) = (_Tp)1;
+
+        if( method == DECOMP_CHOLESKY )
+            return Cholesky(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m);
+
+        return LU(temp.val, m*sizeof(_Tp), m, b.val, m*sizeof(_Tp), m) != 0;
+    }
+};
+
+template<typename _Tp> struct Matx_FastInvOp<_Tp, 2>
+{
+    bool operator()(const Matx<_Tp, 2, 2>& a, Matx<_Tp, 2, 2>& b, int) const
+    {
+        _Tp d = (_Tp)determinant(a);
+        if( d == 0 )
+            return false;
+        d = 1/d;
+        b(1,1) = a(0,0)*d;
+        b(0,0) = a(1,1)*d;
+        b(0,1) = -a(0,1)*d;
+        b(1,0) = -a(1,0)*d;
+        return true;
+    }
+};
+
+template<typename _Tp> struct Matx_FastInvOp<_Tp, 3>
+{
+    bool operator()(const Matx<_Tp, 3, 3>& a, Matx<_Tp, 3, 3>& b, int) const
+    {
+        _Tp d = (_Tp)determinant(a);
+        if( d == 0 )
+            return false;
+        d = 1/d;
+        b(0,0) = (a(1,1) * a(2,2) - a(1,2) * a(2,1)) * d;
+        b(0,1) = (a(0,2) * a(2,1) - a(0,1) * a(2,2)) * d;
+        b(0,2) = (a(0,1) * a(1,2) - a(0,2) * a(1,1)) * d;
+
+        b(1,0) = (a(1,2) * a(2,0) - a(1,0) * a(2,2)) * d;
+        b(1,1) = (a(0,0) * a(2,2) - a(0,2) * a(2,0)) * d;
+        b(1,2) = (a(0,2) * a(1,0) - a(0,0) * a(1,2)) * d;
+
+        b(2,0) = (a(1,0) * a(2,1) - a(1,1) * a(2,0)) * d;
+        b(2,1) = (a(0,1) * a(2,0) - a(0,0) * a(2,1)) * d;
+        b(2,2) = (a(0,0) * a(1,1) - a(0,1) * a(1,0)) * d;
+        return true;
+    }
+};
+
+
+template<typename _Tp, int m, int n> struct Matx_FastSolveOp
+{
+    bool operator()(const Matx<_Tp, m, m>& a, const Matx<_Tp, m, n>& b,
+                    Matx<_Tp, m, n>& x, int method) const
+    {
+        Matx<_Tp, m, m> temp = a;
+        x = b;
+        if( method == DECOMP_CHOLESKY )
+            return Cholesky(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n);
+
+        return LU(temp.val, m*sizeof(_Tp), m, x.val, n*sizeof(_Tp), n) != 0;
+    }
+};
+
+template<typename _Tp> struct Matx_FastSolveOp<_Tp, 2, 1>
+{
+    bool operator()(const Matx<_Tp, 2, 2>& a, const Matx<_Tp, 2, 1>& b,
+                    Matx<_Tp, 2, 1>& x, int) const
+    {
+        _Tp d = (_Tp)determinant(a);
+        if( d == 0 )
+            return false;
+        d = 1/d;
+        x(0) = (b(0)*a(1,1) - b(1)*a(0,1))*d;
+        x(1) = (b(1)*a(0,0) - b(0)*a(1,0))*d;
+        return true;
+    }
+};
+
+template<typename _Tp> struct Matx_FastSolveOp<_Tp, 3, 1>
+{
+    bool operator()(const Matx<_Tp, 3, 3>& a, const Matx<_Tp, 3, 1>& b,
+                    Matx<_Tp, 3, 1>& x, int) const
+    {
+        _Tp d = (_Tp)determinant(a);
+        if( d == 0 )
+            return false;
+        d = 1/d;
+        x(0) = d*(b(0)*(a(1,1)*a(2,2) - a(1,2)*a(2,1)) -
+                a(0,1)*(b(1)*a(2,2) - a(1,2)*b(2)) +
+                a(0,2)*(b(1)*a(2,1) - a(1,1)*b(2)));
+
+        x(1) = d*(a(0,0)*(b(1)*a(2,2) - a(1,2)*b(2)) -
+                b(0)*(a(1,0)*a(2,2) - a(1,2)*a(2,0)) +
+                a(0,2)*(a(1,0)*b(2) - b(1)*a(2,0)));
+
+        x(2) = d*(a(0,0)*(a(1,1)*b(2) - b(1)*a(2,1)) -
+                a(0,1)*(a(1,0)*b(2) - b(1)*a(2,0)) +
+                b(0)*(a(1,0)*a(2,1) - a(1,1)*a(2,0)));
+        return true;
+    }
+};
+
+} // internal
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::randu(_Tp a, _Tp b)
+{
+    Matx<_Tp,m,n> M;
+    cv::randu(M, Scalar(a), Scalar(b));
+    return M;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp,m,n> Matx<_Tp,m,n>::randn(_Tp a, _Tp b)
+{
+    Matx<_Tp,m,n> M;
+    cv::randn(M, Scalar(a), Scalar(b));
+    return M;
+}
+
+template<typename _Tp, int m, int n> inline
+Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const
+{
+    Matx<_Tp, n, m> b;
+    bool ok;
+    if( m == n && (method == DECOMP_LU || method == DECOMP_CHOLESKY) )
+        ok = cv::internal::Matx_FastInvOp<_Tp, m>()(*reinterpret_cast<const Matx<_Tp, m, m>*>(this), reinterpret_cast<Matx<_Tp, m, m>&>(b), method);
+    else
+    {
+        Mat A(*this, false), B(b, false);
+        ok = (invert(A, B, method) != 0);
+    }
+    if( NULL != p_is_ok ) { *p_is_ok = ok; }
+    return ok ? b : Matx<_Tp, n, m>::zeros();
+}
+
+template<typename _Tp, int m, int n> template<int l> inline
+Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) const
+{
+    Matx<_Tp, n, l> x;
+    bool ok;
+    if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
+        ok = cv::internal::Matx_FastSolveOp<_Tp, m, l>()(*this, rhs, x, method);
+    else
+    {
+        Mat A(*this, false), B(rhs, false), X(x, false);
+        ok = cv::solve(A, B, X, method);
+    }
+
+    return ok ? x : Matx<_Tp, n, l>::zeros();
+}
+
+
+
+////////////////////////// Augmenting algebraic & logical operations //////////////////////////
+
+#define CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
+    static inline A& operator op (A& a, const B& b) { cvop; return a; }
+
+#define CV_MAT_AUG_OPERATOR(op, cvop, A, B)   \
+    CV_MAT_AUG_OPERATOR1(op, cvop, A, B)      \
+    CV_MAT_AUG_OPERATOR1(op, cvop, const A, B)
+
+#define CV_MAT_AUG_OPERATOR_T(op, cvop, A, B)                   \
+    template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
+    template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, const A, B)
+
+CV_MAT_AUG_OPERATOR  (+=, cv::add(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (+=, cv::add(a,b,a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (-=, cv::subtract(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (-=, cv::subtract(a,b,a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat, Mat)
+CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat_<_Tp>)
+CV_MAT_AUG_OPERATOR  (*=, a.convertTo(a, -1, b), Mat, double)
+CV_MAT_AUG_OPERATOR_T(*=, a.convertTo(a, -1, b), Mat_<_Tp>, double)
+
+CV_MAT_AUG_OPERATOR  (/=, cv::divide(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+CV_MAT_AUG_OPERATOR  (/=, a.convertTo((Mat&)a, -1, 1./b), Mat, double)
+CV_MAT_AUG_OPERATOR_T(/=, a.convertTo((Mat&)a, -1, 1./b), Mat_<_Tp>, double)
+
+CV_MAT_AUG_OPERATOR  (&=, cv::bitwise_and(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (&=, cv::bitwise_and(a,b,a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (|=, cv::bitwise_or(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (|=, cv::bitwise_or(a,b,a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+
+CV_MAT_AUG_OPERATOR  (^=, cv::bitwise_xor(a,b,a), Mat, Mat)
+CV_MAT_AUG_OPERATOR  (^=, cv::bitwise_xor(a,b,a), Mat, Scalar)
+CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat)
+CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Scalar)
+CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
+
+#undef CV_MAT_AUG_OPERATOR_T
+#undef CV_MAT_AUG_OPERATOR
+#undef CV_MAT_AUG_OPERATOR1
+
+
+
+///////////////////////////////////////////// SVD /////////////////////////////////////////////
+
+inline SVD::SVD() {}
+inline SVD::SVD( InputArray m, int flags ) { operator ()(m, flags); }
+inline void SVD::solveZ( InputArray m, OutputArray _dst )
+{
+    Mat mtx = m.getMat();
+    SVD svd(mtx, (mtx.rows >= mtx.cols ? 0 : SVD::FULL_UV));
+    _dst.create(svd.vt.cols, 1, svd.vt.type());
+    Mat dst = _dst.getMat();
+    svd.vt.row(svd.vt.rows-1).reshape(1,svd.vt.cols).copyTo(dst);
+}
+
+template<typename _Tp, int m, int n, int nm> inline void
+    SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w, Matx<_Tp, m, nm>& u, Matx<_Tp, n, nm>& vt )
+{
+    CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
+    Mat _a(a, false), _u(u, false), _w(w, false), _vt(vt, false);
+    SVD::compute(_a, _w, _u, _vt);
+    CV_Assert(_w.data == (uchar*)&w.val[0] && _u.data == (uchar*)&u.val[0] && _vt.data == (uchar*)&vt.val[0]);
+}
+
+template<typename _Tp, int m, int n, int nm> inline void
+SVD::compute( const Matx<_Tp, m, n>& a, Matx<_Tp, nm, 1>& w )
+{
+    CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
+    Mat _a(a, false), _w(w, false);
+    SVD::compute(_a, _w);
+    CV_Assert(_w.data == (uchar*)&w.val[0]);
+}
+
+template<typename _Tp, int m, int n, int nm, int nb> inline void
+SVD::backSubst( const Matx<_Tp, nm, 1>& w, const Matx<_Tp, m, nm>& u,
+                const Matx<_Tp, n, nm>& vt, const Matx<_Tp, m, nb>& rhs,
+                Matx<_Tp, n, nb>& dst )
+{
+    CV_StaticAssert( nm == MIN(m, n), "Invalid size of output vector.");
+    Mat _u(u, false), _w(w, false), _vt(vt, false), _rhs(rhs, false), _dst(dst, false);
+    SVD::backSubst(_w, _u, _vt, _rhs, _dst);
+    CV_Assert(_dst.data == (uchar*)&dst.val[0]);
+}
+
+
+
+/////////////////////////////////// Multiply-with-Carry RNG ///////////////////////////////////
+
+inline RNG::RNG()              { state = 0xffffffff; }
+inline RNG::RNG(uint64 _state) { state = _state ? _state : 0xffffffff; }
+
+inline RNG::operator uchar()    { return (uchar)next(); }
+inline RNG::operator schar()    { return (schar)next(); }
+inline RNG::operator ushort()   { return (ushort)next(); }
+inline RNG::operator short()    { return (short)next(); }
+inline RNG::operator int()      { return (int)next(); }
+inline RNG::operator unsigned() { return next(); }
+inline RNG::operator float()    { return next()*2.3283064365386962890625e-10f; }
+inline RNG::operator double()   { unsigned t = next(); return (((uint64)t << 32) | next()) * 5.4210108624275221700372640043497e-20; }
+
+inline unsigned RNG::operator ()(unsigned N) { return (unsigned)uniform(0,N); }
+inline unsigned RNG::operator ()()           { return next(); }
+
+inline int    RNG::uniform(int a, int b)       { return a == b ? a : (int)(next() % (b - a) + a); }
+inline float  RNG::uniform(float a, float b)   { return ((float)*this)*(b - a) + a; }
+inline double RNG::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }
+
+inline bool RNG::operator ==(const RNG& other) const { return state == other.state; }
+
+inline unsigned RNG::next()
+{
+    state = (uint64)(unsigned)state* /*CV_RNG_COEFF*/ 4164903690U + (unsigned)(state >> 32);
+    return (unsigned)state;
+}
+
+//! returns the next unifomly-distributed random number of the specified type
+template<typename _Tp> static inline _Tp randu()
+{
+  return (_Tp)theRNG();
+}
+
+///////////////////////////////// Formatted string generation /////////////////////////////////
+
+/** @brief Returns a text string formatted using the printf-like expression.
+
+The function acts like sprintf but forms and returns an STL string. It can be used to form an error
+message in the Exception constructor.
+@param fmt printf-compatible formatting specifiers.
+ */
+CV_EXPORTS String format( const char* fmt, ... );
+
+///////////////////////////////// Formatted output of cv::Mat /////////////////////////////////
+
+static inline
+Ptr<Formatted> format(InputArray mtx, int fmt)
+{
+    return Formatter::get(fmt)->format(mtx.getMat());
+}
+
+static inline
+int print(Ptr<Formatted> fmtd, FILE* stream = stdout)
+{
+    int written = 0;
+    fmtd->reset();
+    for(const char* str = fmtd->next(); str; str = fmtd->next())
+        written += fputs(str, stream);
+
+    return written;
+}
+
+static inline
+int print(const Mat& mtx, FILE* stream = stdout)
+{
+    return print(Formatter::get()->format(mtx), stream);
+}
+
+static inline
+int print(const UMat& mtx, FILE* stream = stdout)
+{
+    return print(Formatter::get()->format(mtx.getMat(ACCESS_READ)), stream);
+}
+
+template<typename _Tp> static inline
+int print(const std::vector<Point_<_Tp> >& vec, FILE* stream = stdout)
+{
+    return print(Formatter::get()->format(Mat(vec)), stream);
+}
+
+template<typename _Tp> static inline
+int print(const std::vector<Point3_<_Tp> >& vec, FILE* stream = stdout)
+{
+    return print(Formatter::get()->format(Mat(vec)), stream);
+}
+
+template<typename _Tp, int m, int n> static inline
+int print(const Matx<_Tp, m, n>& matx, FILE* stream = stdout)
+{
+    return print(Formatter::get()->format(cv::Mat(matx)), stream);
+}
+
+//! @endcond
+
+/****************************************************************************************\
+*                                  Auxiliary algorithms                                  *
+\****************************************************************************************/
+
+/** @brief Splits an element set into equivalency classes.
+
+The generic function partition implements an \f$O(N^2)\f$ algorithm for splitting a set of \f$N\f$ elements
+into one or more equivalency classes, as described in
+<http://en.wikipedia.org/wiki/Disjoint-set_data_structure> . The function returns the number of
+equivalency classes.
+@param _vec Set of elements stored as a vector.
+@param labels Output vector of labels. It contains as many elements as vec. Each label labels[i] is
+a 0-based cluster index of `vec[i]`.
+@param predicate Equivalence predicate (pointer to a boolean function of two arguments or an
+instance of the class that has the method bool operator()(const _Tp& a, const _Tp& b) ). The
+predicate returns true when the elements are certainly in the same class, and returns false if they
+may or may not be in the same class.
+@ingroup core_cluster
+*/
+template<typename _Tp, class _EqPredicate> int
+partition( const std::vector<_Tp>& _vec, std::vector<int>& labels,
+          _EqPredicate predicate=_EqPredicate())
+{
+    int i, j, N = (int)_vec.size();
+    const _Tp* vec = &_vec[0];
+
+    const int PARENT=0;
+    const int RANK=1;
+
+    std::vector<int> _nodes(N*2);
+    int (*nodes)[2] = (int(*)[2])&_nodes[0];
+
+    // The first O(N) pass: create N single-vertex trees
+    for(i = 0; i < N; i++)
+    {
+        nodes[i][PARENT]=-1;
+        nodes[i][RANK] = 0;
+    }
+
+    // The main O(N^2) pass: merge connected components
+    for( i = 0; i < N; i++ )
+    {
+        int root = i;
+
+        // find root
+        while( nodes[root][PARENT] >= 0 )
+            root = nodes[root][PARENT];
+
+        for( j = 0; j < N; j++ )
+        {
+            if( i == j || !predicate(vec[i], vec[j]))
+                continue;
+            int root2 = j;
+
+            while( nodes[root2][PARENT] >= 0 )
+                root2 = nodes[root2][PARENT];
+
+            if( root2 != root )
+            {
+                // unite both trees
+                int rank = nodes[root][RANK], rank2 = nodes[root2][RANK];
+                if( rank > rank2 )
+                    nodes[root2][PARENT] = root;
+                else
+                {
+                    nodes[root][PARENT] = root2;
+                    nodes[root2][RANK] += rank == rank2;
+                    root = root2;
+                }
+                CV_Assert( nodes[root][PARENT] < 0 );
+
+                int k = j, parent;
+
+                // compress the path from node2 to root
+                while( (parent = nodes[k][PARENT]) >= 0 )
+                {
+                    nodes[k][PARENT] = root;
+                    k = parent;
+                }
+
+                // compress the path from node to root
+                k = i;
+                while( (parent = nodes[k][PARENT]) >= 0 )
+                {
+                    nodes[k][PARENT] = root;
+                    k = parent;
+                }
+            }
+        }
+    }
+
+    // Final O(N) pass: enumerate classes
+    labels.resize(N);
+    int nclasses = 0;
+
+    for( i = 0; i < N; i++ )
+    {
+        int root = i;
+        while( nodes[root][PARENT] >= 0 )
+            root = nodes[root][PARENT];
+        // re-use the rank as the class label
+        if( nodes[root][RANK] >= 0 )
+            nodes[root][RANK] = ~nclasses++;
+        labels[i] = ~nodes[root][RANK];
+    }
+
+    return nclasses;
+}
+
+} // cv
+
+#endif

+ 302 - 0
ThresholdTools/include/opencv2/core/optim.hpp

@@ -0,0 +1,302 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OPTIM_HPP
+#define OPENCV_OPTIM_HPP
+
+#include "opencv2/core.hpp"
+
+namespace cv
+{
+
+/** @addtogroup core_optim
+The algorithms in this section minimize or maximize function value within specified constraints or
+without any constraints.
+@{
+*/
+
+/** @brief Basic interface for all solvers
+ */
+class CV_EXPORTS MinProblemSolver : public Algorithm
+{
+public:
+    /** @brief Represents function being optimized
+     */
+    class CV_EXPORTS Function
+    {
+    public:
+        virtual ~Function() {}
+        virtual int getDims() const = 0;
+        virtual double getGradientEps() const;
+        virtual double calc(const double* x) const = 0;
+        virtual void getGradient(const double* x,double* grad);
+    };
+
+    /** @brief Getter for the optimized function.
+
+    The optimized function is represented by Function interface, which requires derivatives to
+    implement the calc(double*) and getDim() methods to evaluate the function.
+
+    @return Smart-pointer to an object that implements Function interface - it represents the
+    function that is being optimized. It can be empty, if no function was given so far.
+     */
+    virtual Ptr<Function> getFunction() const = 0;
+
+    /** @brief Setter for the optimized function.
+
+    *It should be called at least once before the call to* minimize(), as default value is not usable.
+
+    @param f The new function to optimize.
+     */
+    virtual void setFunction(const Ptr<Function>& f) = 0;
+
+    /** @brief Getter for the previously set terminal criteria for this algorithm.
+
+    @return Deep copy of the terminal criteria used at the moment.
+     */
+    virtual TermCriteria getTermCriteria() const = 0;
+
+    /** @brief Set terminal criteria for solver.
+
+    This method *is not necessary* to be called before the first call to minimize(), as the default
+    value is sensible.
+
+    Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when
+    the function values at the vertices of simplex are within termcrit.epsilon range or simplex
+    becomes so small that it can enclosed in a box with termcrit.epsilon sides, whatever comes
+    first.
+    @param termcrit Terminal criteria to be used, represented as cv::TermCriteria structure.
+     */
+    virtual void setTermCriteria(const TermCriteria& termcrit) = 0;
+
+    /** @brief actually runs the algorithm and performs the minimization.
+
+    The sole input parameter determines the centroid of the starting simplex (roughly, it tells
+    where to start), all the others (terminal criteria, initial step, function to be minimized) are
+    supposed to be set via the setters before the call to this method or the default values (not
+    always sensible) will be used.
+
+    @param x The initial point, that will become a centroid of an initial simplex. After the algorithm
+    will terminate, it will be set to the point where the algorithm stops, the point of possible
+    minimum.
+    @return The value of a function at the point found.
+     */
+    virtual double minimize(InputOutputArray x) = 0;
+};
+
+/** @brief This class is used to perform the non-linear non-constrained minimization of a function,
+
+defined on an `n`-dimensional Euclidean space, using the **Nelder-Mead method**, also known as
+**downhill simplex method**. The basic idea about the method can be obtained from
+<http://en.wikipedia.org/wiki/Nelder-Mead_method>.
+
+It should be noted, that this method, although deterministic, is rather a heuristic and therefore
+may converge to a local minima, not necessary a global one. It is iterative optimization technique,
+which at each step uses an information about the values of a function evaluated only at `n+1`
+points, arranged as a *simplex* in `n`-dimensional space (hence the second name of the method). At
+each step new point is chosen to evaluate function at, obtained value is compared with previous
+ones and based on this information simplex changes it's shape , slowly moving to the local minimum.
+Thus this method is using *only* function values to make decision, on contrary to, say, Nonlinear
+Conjugate Gradient method (which is also implemented in optim).
+
+Algorithm stops when the number of function evaluations done exceeds termcrit.maxCount, when the
+function values at the vertices of simplex are within termcrit.epsilon range or simplex becomes so
+small that it can enclosed in a box with termcrit.epsilon sides, whatever comes first, for some
+defined by user positive integer termcrit.maxCount and positive non-integer termcrit.epsilon.
+
+@note DownhillSolver is a derivative of the abstract interface
+cv::MinProblemSolver, which in turn is derived from the Algorithm interface and is used to
+encapsulate the functionality, common to all non-linear optimization algorithms in the optim
+module.
+
+@note term criteria should meet following condition:
+@code
+    termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0
+@endcode
+ */
+class CV_EXPORTS DownhillSolver : public MinProblemSolver
+{
+public:
+    /** @brief Returns the initial step that will be used in downhill simplex algorithm.
+
+    @param step Initial step that will be used in algorithm. Note, that although corresponding setter
+    accepts column-vectors as well as row-vectors, this method will return a row-vector.
+    @see DownhillSolver::setInitStep
+     */
+    virtual void getInitStep(OutputArray step) const=0;
+
+    /** @brief Sets the initial step that will be used in downhill simplex algorithm.
+
+    Step, together with initial point (givin in DownhillSolver::minimize) are two `n`-dimensional
+    vectors that are used to determine the shape of initial simplex. Roughly said, initial point
+    determines the position of a simplex (it will become simplex's centroid), while step determines the
+    spread (size in each dimension) of a simplex. To be more precise, if \f$s,x_0\in\mathbb{R}^n\f$ are
+    the initial step and initial point respectively, the vertices of a simplex will be:
+    \f$v_0:=x_0-\frac{1}{2} s\f$ and \f$v_i:=x_0+s_i\f$ for \f$i=1,2,\dots,n\f$ where \f$s_i\f$ denotes
+    projections of the initial step of *n*-th coordinate (the result of projection is treated to be
+    vector given by \f$s_i:=e_i\cdot\left<e_i\cdot s\right>\f$, where \f$e_i\f$ form canonical basis)
+
+    @param step Initial step that will be used in algorithm. Roughly said, it determines the spread
+    (size in each dimension) of an initial simplex.
+     */
+    virtual void setInitStep(InputArray step)=0;
+
+    /** @brief This function returns the reference to the ready-to-use DownhillSolver object.
+
+    All the parameters are optional, so this procedure can be called even without parameters at
+    all. In this case, the default values will be used. As default value for terminal criteria are
+    the only sensible ones, MinProblemSolver::setFunction() and DownhillSolver::setInitStep()
+    should be called upon the obtained object, if the respective parameters were not given to
+    create(). Otherwise, the two ways (give parameters to createDownhillSolver() or miss them out
+    and call the MinProblemSolver::setFunction() and DownhillSolver::setInitStep()) are absolutely
+    equivalent (and will drop the same errors in the same way, should invalid input be detected).
+    @param f Pointer to the function that will be minimized, similarly to the one you submit via
+    MinProblemSolver::setFunction.
+    @param initStep Initial step, that will be used to construct the initial simplex, similarly to the one
+    you submit via MinProblemSolver::setInitStep.
+    @param termcrit Terminal criteria to the algorithm, similarly to the one you submit via
+    MinProblemSolver::setTermCriteria.
+     */
+    static Ptr<DownhillSolver> create(const Ptr<MinProblemSolver::Function>& f=Ptr<MinProblemSolver::Function>(),
+                                      InputArray initStep=Mat_<double>(1,1,0.0),
+                                      TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001));
+};
+
+/** @brief This class is used to perform the non-linear non-constrained minimization of a function
+with known gradient,
+
+defined on an *n*-dimensional Euclidean space, using the **Nonlinear Conjugate Gradient method**.
+The implementation was done based on the beautifully clear explanatory article [An Introduction to
+the Conjugate Gradient Method Without the Agonizing
+Pain](http://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf) by Jonathan Richard
+Shewchuk. The method can be seen as an adaptation of a standard Conjugate Gradient method (see, for
+example <http://en.wikipedia.org/wiki/Conjugate_gradient_method>) for numerically solving the
+systems of linear equations.
+
+It should be noted, that this method, although deterministic, is rather a heuristic method and
+therefore may converge to a local minima, not necessary a global one. What is even more disastrous,
+most of its behaviour is ruled by gradient, therefore it essentially cannot distinguish between
+local minima and maxima. Therefore, if it starts sufficiently near to the local maximum, it may
+converge to it. Another obvious restriction is that it should be possible to compute the gradient of
+a function at any point, thus it is preferable to have analytic expression for gradient and
+computational burden should be born by the user.
+
+The latter responsibility is accompilished via the getGradient method of a
+MinProblemSolver::Function interface (which represents function being optimized). This method takes
+point a point in *n*-dimensional space (first argument represents the array of coordinates of that
+point) and comput its gradient (it should be stored in the second argument as an array).
+
+@note class ConjGradSolver thus does not add any new methods to the basic MinProblemSolver interface.
+
+@note term criteria should meet following condition:
+@code
+    termcrit.type == (TermCriteria::MAX_ITER + TermCriteria::EPS) && termcrit.epsilon > 0 && termcrit.maxCount > 0
+    // or
+    termcrit.type == TermCriteria::MAX_ITER) && termcrit.maxCount > 0
+@endcode
+ */
+class CV_EXPORTS ConjGradSolver : public MinProblemSolver
+{
+public:
+    /** @brief This function returns the reference to the ready-to-use ConjGradSolver object.
+
+    All the parameters are optional, so this procedure can be called even without parameters at
+    all. In this case, the default values will be used. As default value for terminal criteria are
+    the only sensible ones, MinProblemSolver::setFunction() should be called upon the obtained
+    object, if the function was not given to create(). Otherwise, the two ways (submit it to
+    create() or miss it out and call the MinProblemSolver::setFunction()) are absolutely equivalent
+    (and will drop the same errors in the same way, should invalid input be detected).
+    @param f Pointer to the function that will be minimized, similarly to the one you submit via
+    MinProblemSolver::setFunction.
+    @param termcrit Terminal criteria to the algorithm, similarly to the one you submit via
+    MinProblemSolver::setTermCriteria.
+    */
+    static Ptr<ConjGradSolver> create(const Ptr<MinProblemSolver::Function>& f=Ptr<ConjGradSolver::Function>(),
+                                      TermCriteria termcrit=TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5000,0.000001));
+};
+
+//! return codes for cv::solveLP() function
+enum SolveLPResult
+{
+    SOLVELP_UNBOUNDED    = -2, //!< problem is unbounded (target function can achieve arbitrary high values)
+    SOLVELP_UNFEASIBLE    = -1, //!< problem is unfeasible (there are no points that satisfy all the constraints imposed)
+    SOLVELP_SINGLE    = 0, //!< there is only one maximum for target function
+    SOLVELP_MULTI    = 1 //!< there are multiple maxima for target function - the arbitrary one is returned
+};
+
+/** @brief Solve given (non-integer) linear programming problem using the Simplex Algorithm (Simplex Method).
+
+What we mean here by "linear programming problem" (or LP problem, for short) can be formulated as:
+
+\f[\mbox{Maximize } c\cdot x\\
+ \mbox{Subject to:}\\
+ Ax\leq b\\
+ x\geq 0\f]
+
+Where \f$c\f$ is fixed `1`-by-`n` row-vector, \f$A\f$ is fixed `m`-by-`n` matrix, \f$b\f$ is fixed `m`-by-`1`
+column vector and \f$x\f$ is an arbitrary `n`-by-`1` column vector, which satisfies the constraints.
+
+Simplex algorithm is one of many algorithms that are designed to handle this sort of problems
+efficiently. Although it is not optimal in theoretical sense (there exist algorithms that can solve
+any problem written as above in polynomial time, while simplex method degenerates to exponential
+time for some special cases), it is well-studied, easy to implement and is shown to work well for
+real-life purposes.
+
+The particular implementation is taken almost verbatim from **Introduction to Algorithms, third
+edition** by T. H. Cormen, C. E. Leiserson, R. L. Rivest and Clifford Stein. In particular, the
+Bland's rule <http://en.wikipedia.org/wiki/Bland%27s_rule> is used to prevent cycling.
+
+@param Func This row-vector corresponds to \f$c\f$ in the LP problem formulation (see above). It should
+contain 32- or 64-bit floating point numbers. As a convenience, column-vector may be also submitted,
+in the latter case it is understood to correspond to \f$c^T\f$.
+@param Constr `m`-by-`n+1` matrix, whose rightmost column corresponds to \f$b\f$ in formulation above
+and the remaining to \f$A\f$. It should contain 32- or 64-bit floating point numbers.
+@param z The solution will be returned here as a column-vector - it corresponds to \f$c\f$ in the
+formulation above. It will contain 64-bit floating point numbers.
+@return One of cv::SolveLPResult
+ */
+CV_EXPORTS_W int solveLP(const Mat& Func, const Mat& Constr, Mat& z);
+
+//! @}
+
+}// cv
+
+#endif

+ 28 - 0
ThresholdTools/include/opencv2/core/ovx.hpp

@@ -0,0 +1,28 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2016, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+// OpenVX related definitions and declarations
+
+#pragma once
+#ifndef OPENCV_OVX_HPP
+#define OPENCV_OVX_HPP
+
+#include "cvdef.h"
+
+namespace cv
+{
+/// Check if use of OpenVX is possible
+CV_EXPORTS_W bool haveOpenVX();
+
+/// Check if use of OpenVX is enabled
+CV_EXPORTS_W bool useOpenVX();
+
+/// Enable/disable use of OpenVX
+CV_EXPORTS_W void setUseOpenVX(bool flag);
+} // namespace cv
+
+#endif // OPENCV_OVX_HPP

+ 1361 - 0
ThresholdTools/include/opencv2/core/persistence.hpp

@@ -0,0 +1,1361 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_PERSISTENCE_HPP
+#define OPENCV_CORE_PERSISTENCE_HPP
+
+#ifndef CV_DOXYGEN
+/// Define to support persistence legacy formats
+#define CV__LEGACY_PERSISTENCE
+#endif
+
+#ifndef __cplusplus
+#  error persistence.hpp header must be compiled as C++
+#endif
+
+//! @addtogroup core_c
+//! @{
+
+/** @brief "black box" representation of the file storage associated with a file on disk.
+
+Several functions that are described below take CvFileStorage\* as inputs and allow the user to
+save or to load hierarchical collections that consist of scalar values, standard CXCore objects
+(such as matrices, sequences, graphs), and user-defined objects.
+
+OpenCV can read and write data in XML (<http://www.w3c.org/XML>), YAML (<http://www.yaml.org>) or
+JSON (<http://www.json.org/>) formats. Below is an example of 3x3 floating-point identity matrix A,
+stored in XML and YAML files
+using CXCore functions:
+XML:
+@code{.xml}
+    <?xml version="1.0">
+    <opencv_storage>
+    <A type_id="opencv-matrix">
+      <rows>3</rows>
+      <cols>3</cols>
+      <dt>f</dt>
+      <data>1. 0. 0. 0. 1. 0. 0. 0. 1.</data>
+    </A>
+    </opencv_storage>
+@endcode
+YAML:
+@code{.yaml}
+    %YAML:1.0
+    A: !!opencv-matrix
+      rows: 3
+      cols: 3
+      dt: f
+      data: [ 1., 0., 0., 0., 1., 0., 0., 0., 1.]
+@endcode
+As it can be seen from the examples, XML uses nested tags to represent hierarchy, while YAML uses
+indentation for that purpose (similar to the Python programming language).
+
+The same functions can read and write data in both formats; the particular format is determined by
+the extension of the opened file, ".xml" for XML files, ".yml" or ".yaml" for YAML and ".json" for
+JSON.
+ */
+typedef struct CvFileStorage CvFileStorage;
+typedef struct CvFileNode CvFileNode;
+typedef struct CvMat CvMat;
+typedef struct CvMatND CvMatND;
+
+//! @} core_c
+
+#include "opencv2/core/types.hpp"
+#include "opencv2/core/mat.hpp"
+
+namespace cv {
+
+/** @addtogroup core_xml
+
+XML/YAML/JSON file storages.     {#xml_storage}
+=======================
+Writing to a file storage.
+--------------------------
+You can store and then restore various OpenCV data structures to/from XML (<http://www.w3c.org/XML>),
+YAML (<http://www.yaml.org>) or JSON (<http://www.json.org/>) formats. Also, it is possible to store
+and load arbitrarily complex data structures, which include OpenCV data structures, as well as
+primitive data types (integer and floating-point numbers and text strings) as their elements.
+
+Use the following procedure to write something to XML, YAML or JSON:
+-# Create new FileStorage and open it for writing. It can be done with a single call to
+FileStorage::FileStorage constructor that takes a filename, or you can use the default constructor
+and then call FileStorage::open. Format of the file (XML, YAML or JSON) is determined from the filename
+extension (".xml", ".yml"/".yaml" and ".json", respectively)
+-# Write all the data you want using the streaming operator `<<`, just like in the case of STL
+streams.
+-# Close the file using FileStorage::release. FileStorage destructor also closes the file.
+
+Here is an example:
+@code
+    #include "opencv2/opencv.hpp"
+    #include <time.h>
+
+    using namespace cv;
+
+    int main(int, char** argv)
+    {
+        FileStorage fs("test.yml", FileStorage::WRITE);
+
+        fs << "frameCount" << 5;
+        time_t rawtime; time(&rawtime);
+        fs << "calibrationDate" << asctime(localtime(&rawtime));
+        Mat cameraMatrix = (Mat_<double>(3,3) << 1000, 0, 320, 0, 1000, 240, 0, 0, 1);
+        Mat distCoeffs = (Mat_<double>(5,1) << 0.1, 0.01, -0.001, 0, 0);
+        fs << "cameraMatrix" << cameraMatrix << "distCoeffs" << distCoeffs;
+        fs << "features" << "[";
+        for( int i = 0; i < 3; i++ )
+        {
+            int x = rand() % 640;
+            int y = rand() % 480;
+            uchar lbp = rand() % 256;
+
+            fs << "{:" << "x" << x << "y" << y << "lbp" << "[:";
+            for( int j = 0; j < 8; j++ )
+                fs << ((lbp >> j) & 1);
+            fs << "]" << "}";
+        }
+        fs << "]";
+        fs.release();
+        return 0;
+    }
+@endcode
+The sample above stores to YML an integer, a text string (calibration date), 2 matrices, and a custom
+structure "feature", which includes feature coordinates and LBP (local binary pattern) value. Here
+is output of the sample:
+@code{.yaml}
+%YAML:1.0
+frameCount: 5
+calibrationDate: "Fri Jun 17 14:09:29 2011\n"
+cameraMatrix: !!opencv-matrix
+   rows: 3
+   cols: 3
+   dt: d
+   data: [ 1000., 0., 320., 0., 1000., 240., 0., 0., 1. ]
+distCoeffs: !!opencv-matrix
+   rows: 5
+   cols: 1
+   dt: d
+   data: [ 1.0000000000000001e-01, 1.0000000000000000e-02,
+       -1.0000000000000000e-03, 0., 0. ]
+features:
+   - { x:167, y:49, lbp:[ 1, 0, 0, 1, 1, 0, 1, 1 ] }
+   - { x:298, y:130, lbp:[ 0, 0, 0, 1, 0, 0, 1, 1 ] }
+   - { x:344, y:158, lbp:[ 1, 1, 0, 0, 0, 0, 1, 0 ] }
+@endcode
+
+As an exercise, you can replace ".yml" with ".xml" or ".json" in the sample above and see, how the
+corresponding XML file will look like.
+
+Several things can be noted by looking at the sample code and the output:
+
+-   The produced YAML (and XML/JSON) consists of heterogeneous collections that can be nested. There are
+    2 types of collections: named collections (mappings) and unnamed collections (sequences). In mappings
+    each element has a name and is accessed by name. This is similar to structures and std::map in
+    C/C++ and dictionaries in Python. In sequences elements do not have names, they are accessed by
+    indices. This is similar to arrays and std::vector in C/C++ and lists, tuples in Python.
+    "Heterogeneous" means that elements of each single collection can have different types.
+
+    Top-level collection in YAML/XML/JSON is a mapping. Each matrix is stored as a mapping, and the matrix
+    elements are stored as a sequence. Then, there is a sequence of features, where each feature is
+    represented a mapping, and lbp value in a nested sequence.
+
+-   When you write to a mapping (a structure), you write element name followed by its value. When you
+    write to a sequence, you simply write the elements one by one. OpenCV data structures (such as
+    cv::Mat) are written in absolutely the same way as simple C data structures - using `<<`
+    operator.
+
+-   To write a mapping, you first write the special string `{` to the storage, then write the
+    elements as pairs (`fs << <element_name> << <element_value>`) and then write the closing
+    `}`.
+
+-   To write a sequence, you first write the special string `[`, then write the elements, then
+    write the closing `]`.
+
+-   In YAML/JSON (but not XML), mappings and sequences can be written in a compact Python-like inline
+    form. In the sample above matrix elements, as well as each feature, including its lbp value, is
+    stored in such inline form. To store a mapping/sequence in a compact form, put `:` after the
+    opening character, e.g. use `{:` instead of `{` and `[:` instead of `[`. When the
+    data is written to XML, those extra `:` are ignored.
+
+Reading data from a file storage.
+---------------------------------
+To read the previously written XML, YAML or JSON file, do the following:
+-#  Open the file storage using FileStorage::FileStorage constructor or FileStorage::open method.
+    In the current implementation the whole file is parsed and the whole representation of file
+    storage is built in memory as a hierarchy of file nodes (see FileNode)
+
+-#  Read the data you are interested in. Use FileStorage::operator [], FileNode::operator []
+    and/or FileNodeIterator.
+
+-#  Close the storage using FileStorage::release.
+
+Here is how to read the file created by the code sample above:
+@code
+    FileStorage fs2("test.yml", FileStorage::READ);
+
+    // first method: use (type) operator on FileNode.
+    int frameCount = (int)fs2["frameCount"];
+
+    String date;
+    // second method: use FileNode::operator >>
+    fs2["calibrationDate"] >> date;
+
+    Mat cameraMatrix2, distCoeffs2;
+    fs2["cameraMatrix"] >> cameraMatrix2;
+    fs2["distCoeffs"] >> distCoeffs2;
+
+    cout << "frameCount: " << frameCount << endl
+         << "calibration date: " << date << endl
+         << "camera matrix: " << cameraMatrix2 << endl
+         << "distortion coeffs: " << distCoeffs2 << endl;
+
+    FileNode features = fs2["features"];
+    FileNodeIterator it = features.begin(), it_end = features.end();
+    int idx = 0;
+    std::vector<uchar> lbpval;
+
+    // iterate through a sequence using FileNodeIterator
+    for( ; it != it_end; ++it, idx++ )
+    {
+        cout << "feature #" << idx << ": ";
+        cout << "x=" << (int)(*it)["x"] << ", y=" << (int)(*it)["y"] << ", lbp: (";
+        // you can also easily read numerical arrays using FileNode >> std::vector operator.
+        (*it)["lbp"] >> lbpval;
+        for( int i = 0; i < (int)lbpval.size(); i++ )
+            cout << " " << (int)lbpval[i];
+        cout << ")" << endl;
+    }
+    fs2.release();
+@endcode
+
+Format specification    {#format_spec}
+--------------------
+`([count]{u|c|w|s|i|f|d})`... where the characters correspond to fundamental C++ types:
+-   `u` 8-bit unsigned number
+-   `c` 8-bit signed number
+-   `w` 16-bit unsigned number
+-   `s` 16-bit signed number
+-   `i` 32-bit signed number
+-   `f` single precision floating-point number
+-   `d` double precision floating-point number
+-   `r` pointer, 32 lower bits of which are written as a signed integer. The type can be used to
+    store structures with links between the elements.
+
+`count` is the optional counter of values of a given type. For example, `2if` means that each array
+element is a structure of 2 integers, followed by a single-precision floating-point number. The
+equivalent notations of the above specification are `iif`, `2i1f` and so forth. Other examples: `u`
+means that the array consists of bytes, and `2d` means the array consists of pairs of doubles.
+
+@see @ref filestorage.cpp
+*/
+
+//! @{
+
+/** @example filestorage.cpp
+A complete example using the FileStorage interface
+*/
+
+////////////////////////// XML & YAML I/O //////////////////////////
+
+class CV_EXPORTS FileNode;
+class CV_EXPORTS FileNodeIterator;
+
+/** @brief XML/YAML/JSON file storage class that encapsulates all the information necessary for writing or
+reading data to/from a file.
+ */
+class CV_EXPORTS_W FileStorage
+{
+public:
+    //! file storage mode
+    enum Mode
+    {
+        READ        = 0, //!< value, open the file for reading
+        WRITE       = 1, //!< value, open the file for writing
+        APPEND      = 2, //!< value, open the file for appending
+        MEMORY      = 4, //!< flag, read data from source or write data to the internal buffer (which is
+                         //!< returned by FileStorage::release)
+        FORMAT_MASK = (7<<3), //!< mask for format flags
+        FORMAT_AUTO = 0,      //!< flag, auto format
+        FORMAT_XML  = (1<<3), //!< flag, XML format
+        FORMAT_YAML = (2<<3), //!< flag, YAML format
+        FORMAT_JSON = (3<<3), //!< flag, JSON format
+
+        BASE64      = 64,     //!< flag, write rawdata in Base64 by default. (consider using WRITE_BASE64)
+        WRITE_BASE64 = BASE64 | WRITE, //!< flag, enable both WRITE and BASE64
+    };
+    enum
+    {
+        UNDEFINED      = 0,
+        VALUE_EXPECTED = 1,
+        NAME_EXPECTED  = 2,
+        INSIDE_MAP     = 4
+    };
+
+    /** @brief The constructors.
+
+    The full constructor opens the file. Alternatively you can use the default constructor and then
+    call FileStorage::open.
+     */
+    CV_WRAP FileStorage();
+
+    /** @overload
+    @copydoc open()
+    */
+    CV_WRAP FileStorage(const String& filename, int flags, const String& encoding=String());
+
+    /** @overload */
+    FileStorage(CvFileStorage* fs, bool owning=true);
+
+    //! the destructor. calls release()
+    virtual ~FileStorage();
+
+    /** @brief Opens a file.
+
+    See description of parameters in FileStorage::FileStorage. The method calls FileStorage::release
+    before opening the file.
+    @param filename Name of the file to open or the text string to read the data from.
+       Extension of the file (.xml, .yml/.yaml or .json) determines its format (XML, YAML or JSON
+        respectively). Also you can append .gz to work with compressed files, for example myHugeMatrix.xml.gz. If both
+        FileStorage::WRITE and FileStorage::MEMORY flags are specified, source is used just to specify
+        the output file format (e.g. mydata.xml, .yml etc.). A file name can also contain parameters.
+        You can use this format, "*?base64" (e.g. "file.json?base64" (case sensitive)), as an alternative to
+        FileStorage::BASE64 flag.
+    @param flags Mode of operation. One of FileStorage::Mode
+    @param encoding Encoding of the file. Note that UTF-16 XML encoding is not supported currently and
+    you should use 8-bit encoding instead of it.
+     */
+    CV_WRAP virtual bool open(const String& filename, int flags, const String& encoding=String());
+
+    /** @brief Checks whether the file is opened.
+
+    @returns true if the object is associated with the current file and false otherwise. It is a
+    good practice to call this method after you tried to open a file.
+     */
+    CV_WRAP virtual bool isOpened() const;
+
+    /** @brief Closes the file and releases all the memory buffers.
+
+    Call this method after all I/O operations with the storage are finished.
+     */
+    CV_WRAP virtual void release();
+
+    /** @brief Closes the file and releases all the memory buffers.
+
+    Call this method after all I/O operations with the storage are finished. If the storage was
+    opened for writing data and FileStorage::WRITE was specified
+     */
+    CV_WRAP virtual String releaseAndGetString();
+
+    /** @brief Returns the first element of the top-level mapping.
+    @returns The first element of the top-level mapping.
+     */
+    CV_WRAP FileNode getFirstTopLevelNode() const;
+
+    /** @brief Returns the top-level mapping
+    @param streamidx Zero-based index of the stream. In most cases there is only one stream in the file.
+    However, YAML supports multiple streams and so there can be several.
+    @returns The top-level mapping.
+     */
+    CV_WRAP FileNode root(int streamidx=0) const;
+
+    /** @brief Returns the specified element of the top-level mapping.
+    @param nodename Name of the file node.
+    @returns Node with the given name.
+     */
+    FileNode operator[](const String& nodename) const;
+
+    /** @overload */
+    CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const;
+
+    /** @brief Returns the obsolete C FileStorage structure.
+    @returns Pointer to the underlying C FileStorage structure
+     */
+    CvFileStorage* operator *() { return fs.get(); }
+
+    /** @overload */
+    const CvFileStorage* operator *() const { return fs.get(); }
+
+    /** @brief Writes multiple numbers.
+
+    Writes one or more numbers of the specified format to the currently written structure. Usually it is
+    more convenient to use operator `<<` instead of this method.
+    @param fmt Specification of each array element, see @ref format_spec "format specification"
+    @param vec Pointer to the written array.
+    @param len Number of the uchar elements to write.
+     */
+    void writeRaw( const String& fmt, const uchar* vec, size_t len );
+
+    /** @brief Writes the registered C structure (CvMat, CvMatND, CvSeq).
+    @param name Name of the written object.
+    @param obj Pointer to the object.
+    @see ocvWrite for details.
+     */
+    void writeObj( const String& name, const void* obj );
+
+    /**
+     * @brief Simplified writing API to use with bindings.
+     * @param name Name of the written object
+     * @param val Value of the written object
+     */
+    CV_WRAP void write(const String& name, int val);
+    /// @overload
+    CV_WRAP void write(const String& name, double val);
+    /// @overload
+    CV_WRAP void write(const String& name, const String& val);
+    /// @overload
+    CV_WRAP void write(const String& name, InputArray val);
+
+    /** @brief Writes a comment.
+
+    The function writes a comment into file storage. The comments are skipped when the storage is read.
+    @param comment The written comment, single-line or multi-line
+    @param append If true, the function tries to put the comment at the end of current line.
+    Else if the comment is multi-line, or if it does not fit at the end of the current
+    line, the comment starts a new line.
+     */
+    CV_WRAP void writeComment(const String& comment, bool append = false);
+
+    /** @brief Returns the normalized object name for the specified name of a file.
+    @param filename Name of a file
+    @returns The normalized object name.
+     */
+    static String getDefaultObjectName(const String& filename);
+
+    /** @brief Returns the current format.
+     * @returns The current format, see FileStorage::Mode
+     */
+    CV_WRAP int getFormat() const;
+
+    Ptr<CvFileStorage> fs; //!< the underlying C FileStorage structure
+    String elname; //!< the currently written element
+    std::vector<char> structs; //!< the stack of written structures
+    int state; //!< the writer state
+};
+
+template<> CV_EXPORTS void DefaultDeleter<CvFileStorage>::operator ()(CvFileStorage* obj) const;
+
+/** @brief File Storage Node class.
+
+The node is used to store each and every element of the file storage opened for reading. When
+XML/YAML file is read, it is first parsed and stored in the memory as a hierarchical collection of
+nodes. Each node can be a "leaf" that is contain a single number or a string, or be a collection of
+other nodes. There can be named collections (mappings) where each element has a name and it is
+accessed by a name, and ordered collections (sequences) where elements do not have names but rather
+accessed by index. Type of the file node can be determined using FileNode::type method.
+
+Note that file nodes are only used for navigating file storages opened for reading. When a file
+storage is opened for writing, no data is stored in memory after it is written.
+ */
+class CV_EXPORTS_W_SIMPLE FileNode
+{
+public:
+    //! type of the file storage node
+    enum Type
+    {
+        NONE      = 0, //!< empty node
+        INT       = 1, //!< an integer
+        REAL      = 2, //!< floating-point number
+        FLOAT     = REAL, //!< synonym or REAL
+        STR       = 3, //!< text string in UTF-8 encoding
+        STRING    = STR, //!< synonym for STR
+        REF       = 4, //!< integer of size size_t. Typically used for storing complex dynamic structures where some elements reference the others
+        SEQ       = 5, //!< sequence
+        MAP       = 6, //!< mapping
+        TYPE_MASK = 7,
+        FLOW      = 8,  //!< compact representation of a sequence or mapping. Used only by YAML writer
+        USER      = 16, //!< a registered object (e.g. a matrix)
+        EMPTY     = 32, //!< empty structure (sequence or mapping)
+        NAMED     = 64  //!< the node has a name (i.e. it is element of a mapping)
+    };
+    /** @brief The constructors.
+
+    These constructors are used to create a default file node, construct it from obsolete structures or
+    from the another file node.
+     */
+    CV_WRAP FileNode();
+
+    /** @overload
+    @param fs Pointer to the obsolete file storage structure.
+    @param node File node to be used as initialization for the created file node.
+    */
+    FileNode(const CvFileStorage* fs, const CvFileNode* node);
+
+    /** @overload
+    @param node File node to be used as initialization for the created file node.
+    */
+    FileNode(const FileNode& node);
+
+    /** @brief Returns element of a mapping node or a sequence node.
+    @param nodename Name of an element in the mapping node.
+    @returns Returns the element with the given identifier.
+     */
+    FileNode operator[](const String& nodename) const;
+
+    /** @overload
+    @param nodename Name of an element in the mapping node.
+    */
+    CV_WRAP_AS(getNode) FileNode operator[](const char* nodename) const;
+
+    /** @overload
+    @param i Index of an element in the sequence node.
+    */
+    CV_WRAP_AS(at) FileNode operator[](int i) const;
+
+    /** @brief Returns type of the node.
+    @returns Type of the node. See FileNode::Type
+     */
+    CV_WRAP int type() const;
+
+    //! returns true if the node is empty
+    CV_WRAP bool empty() const;
+    //! returns true if the node is a "none" object
+    CV_WRAP bool isNone() const;
+    //! returns true if the node is a sequence
+    CV_WRAP bool isSeq() const;
+    //! returns true if the node is a mapping
+    CV_WRAP bool isMap() const;
+    //! returns true if the node is an integer
+    CV_WRAP bool isInt() const;
+    //! returns true if the node is a floating-point number
+    CV_WRAP bool isReal() const;
+    //! returns true if the node is a text string
+    CV_WRAP bool isString() const;
+    //! returns true if the node has a name
+    CV_WRAP bool isNamed() const;
+    //! returns the node name or an empty string if the node is nameless
+    CV_WRAP String name() const;
+    //! returns the number of elements in the node, if it is a sequence or mapping, or 1 otherwise.
+    CV_WRAP size_t size() const;
+    //! returns the node content as an integer. If the node stores floating-point number, it is rounded.
+    operator int() const;
+    //! returns the node content as float
+    operator float() const;
+    //! returns the node content as double
+    operator double() const;
+    //! returns the node content as text string
+    operator String() const;
+    operator std::string() const;
+
+    //! returns pointer to the underlying file node
+    CvFileNode* operator *();
+    //! returns pointer to the underlying file node
+    const CvFileNode* operator* () const;
+
+    //! returns iterator pointing to the first node element
+    FileNodeIterator begin() const;
+    //! returns iterator pointing to the element following the last node element
+    FileNodeIterator end() const;
+
+    /** @brief Reads node elements to the buffer with the specified format.
+
+    Usually it is more convenient to use operator `>>` instead of this method.
+    @param fmt Specification of each array element. See @ref format_spec "format specification"
+    @param vec Pointer to the destination array.
+    @param len Number of elements to read. If it is greater than number of remaining elements then all
+    of them will be read.
+     */
+    void readRaw( const String& fmt, uchar* vec, size_t len ) const;
+
+    //! reads the registered object and returns pointer to it
+    void* readObj() const;
+
+    //! Simplified reading API to use with bindings.
+    CV_WRAP double real() const;
+    //! Simplified reading API to use with bindings.
+    CV_WRAP String string() const;
+    //! Simplified reading API to use with bindings.
+    CV_WRAP Mat mat() const;
+
+    // do not use wrapper pointer classes for better efficiency
+    const CvFileStorage* fs;
+    const CvFileNode* node;
+};
+
+
+/** @brief used to iterate through sequences and mappings.
+
+A standard STL notation, with node.begin(), node.end() denoting the beginning and the end of a
+sequence, stored in node. See the data reading sample in the beginning of the section.
+ */
+class CV_EXPORTS FileNodeIterator
+{
+public:
+    /** @brief The constructors.
+
+    These constructors are used to create a default iterator, set it to specific element in a file node
+    or construct it from another iterator.
+     */
+    FileNodeIterator();
+
+    /** @overload
+    @param fs File storage for the iterator.
+    @param node File node for the iterator.
+    @param ofs Index of the element in the node. The created iterator will point to this element.
+    */
+    FileNodeIterator(const CvFileStorage* fs, const CvFileNode* node, size_t ofs=0);
+
+    /** @overload
+    @param it Iterator to be used as initialization for the created iterator.
+    */
+    FileNodeIterator(const FileNodeIterator& it);
+
+    //! returns the currently observed element
+    FileNode operator *() const;
+    //! accesses the currently observed element methods
+    FileNode operator ->() const;
+
+    //! moves iterator to the next node
+    FileNodeIterator& operator ++ ();
+    //! moves iterator to the next node
+    FileNodeIterator operator ++ (int);
+    //! moves iterator to the previous node
+    FileNodeIterator& operator -- ();
+    //! moves iterator to the previous node
+    FileNodeIterator operator -- (int);
+    //! moves iterator forward by the specified offset (possibly negative)
+    FileNodeIterator& operator += (int ofs);
+    //! moves iterator backward by the specified offset (possibly negative)
+    FileNodeIterator& operator -= (int ofs);
+
+    /** @brief Reads node elements to the buffer with the specified format.
+
+    Usually it is more convenient to use operator `>>` instead of this method.
+    @param fmt Specification of each array element. See @ref format_spec "format specification"
+    @param vec Pointer to the destination array.
+    @param maxCount Number of elements to read. If it is greater than number of remaining elements then
+    all of them will be read.
+     */
+    FileNodeIterator& readRaw( const String& fmt, uchar* vec,
+                               size_t maxCount=(size_t)INT_MAX );
+
+    struct SeqReader
+    {
+      int          header_size;
+      void*        seq;        /* sequence, beign read; CvSeq      */
+      void*        block;      /* current block;        CvSeqBlock */
+      schar*       ptr;        /* pointer to element be read next */
+      schar*       block_min;  /* pointer to the beginning of block */
+      schar*       block_max;  /* pointer to the end of block */
+      int          delta_index;/* = seq->first->start_index   */
+      schar*       prev_elem;  /* pointer to previous element */
+    };
+
+    const CvFileStorage* fs;
+    const CvFileNode* container;
+    SeqReader reader;
+    size_t remaining;
+};
+
+//! @} core_xml
+
+/////////////////// XML & YAML I/O implementation //////////////////
+
+//! @relates cv::FileStorage
+//! @{
+
+CV_EXPORTS void write( FileStorage& fs, const String& name, int value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, float value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, double value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, const String& value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, const Mat& value );
+CV_EXPORTS void write( FileStorage& fs, const String& name, const SparseMat& value );
+#ifdef CV__LEGACY_PERSISTENCE
+CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector<KeyPoint>& value);
+CV_EXPORTS void write( FileStorage& fs, const String& name, const std::vector<DMatch>& value);
+#endif
+
+CV_EXPORTS void writeScalar( FileStorage& fs, int value );
+CV_EXPORTS void writeScalar( FileStorage& fs, float value );
+CV_EXPORTS void writeScalar( FileStorage& fs, double value );
+CV_EXPORTS void writeScalar( FileStorage& fs, const String& value );
+
+//! @}
+
+//! @relates cv::FileNode
+//! @{
+
+CV_EXPORTS void read(const FileNode& node, int& value, int default_value);
+CV_EXPORTS void read(const FileNode& node, float& value, float default_value);
+CV_EXPORTS void read(const FileNode& node, double& value, double default_value);
+CV_EXPORTS void read(const FileNode& node, String& value, const String& default_value);
+CV_EXPORTS void read(const FileNode& node, std::string& value, const std::string& default_value);
+CV_EXPORTS void read(const FileNode& node, Mat& mat, const Mat& default_mat = Mat() );
+CV_EXPORTS void read(const FileNode& node, SparseMat& mat, const SparseMat& default_mat = SparseMat() );
+#ifdef CV__LEGACY_PERSISTENCE
+CV_EXPORTS void read(const FileNode& node, std::vector<KeyPoint>& keypoints);
+CV_EXPORTS void read(const FileNode& node, std::vector<DMatch>& matches);
+#endif
+CV_EXPORTS void read(const FileNode& node, KeyPoint& value, const KeyPoint& default_value);
+CV_EXPORTS void read(const FileNode& node, DMatch& value, const DMatch& default_value);
+
+template<typename _Tp> static inline void read(const FileNode& node, Point_<_Tp>& value, const Point_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 2 ? default_value : Point_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Point3_<_Tp>& value, const Point3_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 3 ? default_value : Point3_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]),
+                                                            saturate_cast<_Tp>(temp[2]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Size_<_Tp>& value, const Size_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 2 ? default_value : Size_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Complex<_Tp>& value, const Complex<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 2 ? default_value : Complex<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]));
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Rect_<_Tp>& value, const Rect_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 4 ? default_value : Rect_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]),
+                                                          saturate_cast<_Tp>(temp[2]), saturate_cast<_Tp>(temp[3]));
+}
+
+template<typename _Tp, int cn> static inline void read(const FileNode& node, Vec<_Tp, cn>& value, const Vec<_Tp, cn>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != cn ? default_value : Vec<_Tp, cn>(&temp[0]);
+}
+
+template<typename _Tp> static inline void read(const FileNode& node, Scalar_<_Tp>& value, const Scalar_<_Tp>& default_value)
+{
+    std::vector<_Tp> temp; FileNodeIterator it = node.begin(); it >> temp;
+    value = temp.size() != 4 ? default_value : Scalar_<_Tp>(saturate_cast<_Tp>(temp[0]), saturate_cast<_Tp>(temp[1]),
+                                                            saturate_cast<_Tp>(temp[2]), saturate_cast<_Tp>(temp[3]));
+}
+
+static inline void read(const FileNode& node, Range& value, const Range& default_value)
+{
+    Point2i temp(value.start, value.end); const Point2i default_temp = Point2i(default_value.start, default_value.end);
+    read(node, temp, default_temp);
+    value.start = temp.x; value.end = temp.y;
+}
+
+//! @}
+
+/** @brief Writes string to a file storage.
+@relates cv::FileStorage
+ */
+CV_EXPORTS FileStorage& operator << (FileStorage& fs, const String& str);
+
+//! @cond IGNORED
+
+namespace internal
+{
+    class CV_EXPORTS WriteStructContext
+    {
+    public:
+        WriteStructContext(FileStorage& _fs, const String& name, int flags, const String& typeName = String());
+        ~WriteStructContext();
+    private:
+        FileStorage* fs;
+    };
+
+    template<typename _Tp, int numflag> class VecWriterProxy
+    {
+    public:
+        VecWriterProxy( FileStorage* _fs ) : fs(_fs) {}
+        void operator()(const std::vector<_Tp>& vec) const
+        {
+            size_t count = vec.size();
+            for (size_t i = 0; i < count; i++)
+                write(*fs, vec[i]);
+        }
+    private:
+        FileStorage* fs;
+    };
+
+    template<typename _Tp> class VecWriterProxy<_Tp, 1>
+    {
+    public:
+        VecWriterProxy( FileStorage* _fs ) : fs(_fs) {}
+        void operator()(const std::vector<_Tp>& vec) const
+        {
+            int _fmt = traits::SafeFmt<_Tp>::fmt;
+            char fmt[] = { (char)((_fmt >> 8) + '1'), (char)_fmt, '\0' };
+            fs->writeRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, vec.size() * sizeof(_Tp));
+        }
+    private:
+        FileStorage* fs;
+    };
+
+    template<typename _Tp, int numflag> class VecReaderProxy
+    {
+    public:
+        VecReaderProxy( FileNodeIterator* _it ) : it(_it) {}
+        void operator()(std::vector<_Tp>& vec, size_t count) const
+        {
+            count = std::min(count, it->remaining);
+            vec.resize(count);
+            for (size_t i = 0; i < count; i++, ++(*it))
+                read(**it, vec[i], _Tp());
+        }
+    private:
+        FileNodeIterator* it;
+    };
+
+    template<typename _Tp> class VecReaderProxy<_Tp, 1>
+    {
+    public:
+        VecReaderProxy( FileNodeIterator* _it ) : it(_it) {}
+        void operator()(std::vector<_Tp>& vec, size_t count) const
+        {
+            size_t remaining = it->remaining;
+            size_t cn = DataType<_Tp>::channels;
+            int _fmt = traits::SafeFmt<_Tp>::fmt;
+            CV_Assert((_fmt >> 8) < 9);
+            char fmt[] = { (char)((_fmt >> 8)+'1'), (char)_fmt, '\0' };
+            CV_Assert((remaining % cn) == 0);
+            size_t remaining1 = remaining / cn;
+            count = count < remaining1 ? count : remaining1;
+            vec.resize(count);
+            it->readRaw(fmt, !vec.empty() ? (uchar*)&vec[0] : 0, count*sizeof(_Tp));
+        }
+    private:
+        FileNodeIterator* it;
+    };
+
+} // internal
+
+//! @endcond
+
+//! @relates cv::FileStorage
+//! @{
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const _Tp& value)
+{
+    write(fs, String(), value);
+}
+
+template<> inline
+void write( FileStorage& fs, const int& value )
+{
+    writeScalar(fs, value);
+}
+
+template<> inline
+void write( FileStorage& fs, const float& value )
+{
+    writeScalar(fs, value);
+}
+
+template<> inline
+void write( FileStorage& fs, const double& value )
+{
+    writeScalar(fs, value);
+}
+
+template<> inline
+void write( FileStorage& fs, const String& value )
+{
+    writeScalar(fs, value);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Point_<_Tp>& pt )
+{
+    write(fs, pt.x);
+    write(fs, pt.y);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Point3_<_Tp>& pt )
+{
+    write(fs, pt.x);
+    write(fs, pt.y);
+    write(fs, pt.z);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Size_<_Tp>& sz )
+{
+    write(fs, sz.width);
+    write(fs, sz.height);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Complex<_Tp>& c )
+{
+    write(fs, c.re);
+    write(fs, c.im);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Rect_<_Tp>& r )
+{
+    write(fs, r.x);
+    write(fs, r.y);
+    write(fs, r.width);
+    write(fs, r.height);
+}
+
+template<typename _Tp, int cn> static inline
+void write(FileStorage& fs, const Vec<_Tp, cn>& v )
+{
+    for(int i = 0; i < cn; i++)
+        write(fs, v.val[i]);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const Scalar_<_Tp>& s )
+{
+    write(fs, s.val[0]);
+    write(fs, s.val[1]);
+    write(fs, s.val[2]);
+    write(fs, s.val[3]);
+}
+
+static inline
+void write(FileStorage& fs, const Range& r )
+{
+    write(fs, r.start);
+    write(fs, r.end);
+}
+
+template<typename _Tp> static inline
+void write( FileStorage& fs, const std::vector<_Tp>& vec )
+{
+    cv::internal::VecWriterProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> w(&fs);
+    w(vec);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Point_<_Tp>& pt )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, pt);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Point3_<_Tp>& pt )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, pt);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Size_<_Tp>& sz )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, sz);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Complex<_Tp>& c )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, c);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Rect_<_Tp>& r )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, r);
+}
+
+template<typename _Tp, int cn> static inline
+void write(FileStorage& fs, const String& name, const Vec<_Tp, cn>& v )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, v);
+}
+
+template<typename _Tp> static inline
+void write(FileStorage& fs, const String& name, const Scalar_<_Tp>& s )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, s);
+}
+
+static inline
+void write(FileStorage& fs, const String& name, const Range& r )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, r);
+}
+
+static inline
+void write(FileStorage& fs, const String& name, const KeyPoint& kpt)
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, kpt.pt.x);
+    write(fs, kpt.pt.y);
+    write(fs, kpt.size);
+    write(fs, kpt.angle);
+    write(fs, kpt.response);
+    write(fs, kpt.octave);
+    write(fs, kpt.class_id);
+}
+
+static inline
+void write(FileStorage& fs, const String& name, const DMatch& m)
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+FileNode::FLOW);
+    write(fs, m.queryIdx);
+    write(fs, m.trainIdx);
+    write(fs, m.imgIdx);
+    write(fs, m.distance);
+}
+
+template<typename _Tp> static inline
+void write( FileStorage& fs, const String& name, const std::vector<_Tp>& vec )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ+(traits::SafeFmt<_Tp>::fmt != 0 ? FileNode::FLOW : 0));
+    write(fs, vec);
+}
+
+template<typename _Tp> static inline
+void write( FileStorage& fs, const String& name, const std::vector< std::vector<_Tp> >& vec )
+{
+    cv::internal::WriteStructContext ws(fs, name, FileNode::SEQ);
+    for(size_t i = 0; i < vec.size(); i++)
+    {
+        cv::internal::WriteStructContext ws_(fs, name, FileNode::SEQ+(traits::SafeFmt<_Tp>::fmt != 0 ? FileNode::FLOW : 0));
+        write(fs, vec[i]);
+    }
+}
+
+#ifdef CV__LEGACY_PERSISTENCE
+// This code is not needed anymore, but it is preserved here to keep source compatibility
+// Implementation is similar to templates instantiations
+static inline void write(FileStorage& fs, const KeyPoint& kpt) { write(fs, String(), kpt); }
+static inline void write(FileStorage& fs, const DMatch& m) { write(fs, String(), m); }
+static inline void write(FileStorage& fs, const std::vector<KeyPoint>& vec)
+{
+    cv::internal::VecWriterProxy<KeyPoint, 0> w(&fs);
+    w(vec);
+}
+static inline void write(FileStorage& fs, const std::vector<DMatch>& vec)
+{
+    cv::internal::VecWriterProxy<DMatch, 0> w(&fs);
+    w(vec);
+
+}
+#endif
+
+//! @} FileStorage
+
+//! @relates cv::FileNode
+//! @{
+
+static inline
+void read(const FileNode& node, bool& value, bool default_value)
+{
+    int temp;
+    read(node, temp, (int)default_value);
+    value = temp != 0;
+}
+
+static inline
+void read(const FileNode& node, uchar& value, uchar default_value)
+{
+    int temp;
+    read(node, temp, (int)default_value);
+    value = saturate_cast<uchar>(temp);
+}
+
+static inline
+void read(const FileNode& node, schar& value, schar default_value)
+{
+    int temp;
+    read(node, temp, (int)default_value);
+    value = saturate_cast<schar>(temp);
+}
+
+static inline
+void read(const FileNode& node, ushort& value, ushort default_value)
+{
+    int temp;
+    read(node, temp, (int)default_value);
+    value = saturate_cast<ushort>(temp);
+}
+
+static inline
+void read(const FileNode& node, short& value, short default_value)
+{
+    int temp;
+    read(node, temp, (int)default_value);
+    value = saturate_cast<short>(temp);
+}
+
+template<typename _Tp> static inline
+void read( FileNodeIterator& it, std::vector<_Tp>& vec, size_t maxCount = (size_t)INT_MAX )
+{
+    cv::internal::VecReaderProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> r(&it);
+    r(vec, maxCount);
+}
+
+template<typename _Tp> static inline
+void read( const FileNode& node, std::vector<_Tp>& vec, const std::vector<_Tp>& default_value = std::vector<_Tp>() )
+{
+    if(!node.node)
+        vec = default_value;
+    else
+    {
+        FileNodeIterator it = node.begin();
+        read( it, vec );
+    }
+}
+
+static inline
+void read( const FileNode& node, std::vector<KeyPoint>& vec, const std::vector<KeyPoint>& default_value )
+{
+    if(!node.node)
+        vec = default_value;
+    else
+        read(node, vec);
+}
+
+static inline
+void read( const FileNode& node, std::vector<DMatch>& vec, const std::vector<DMatch>& default_value )
+{
+    if(!node.node)
+        vec = default_value;
+    else
+        read(node, vec);
+}
+
+//! @} FileNode
+
+//! @relates cv::FileStorage
+//! @{
+
+/** @brief Writes data to a file storage.
+ */
+template<typename _Tp> static inline
+FileStorage& operator << (FileStorage& fs, const _Tp& value)
+{
+    if( !fs.isOpened() )
+        return fs;
+    if( fs.state == FileStorage::NAME_EXPECTED + FileStorage::INSIDE_MAP )
+        CV_Error( Error::StsError, "No element name has been given" );
+    write( fs, fs.elname, value );
+    if( fs.state & FileStorage::INSIDE_MAP )
+        fs.state = FileStorage::NAME_EXPECTED + FileStorage::INSIDE_MAP;
+    return fs;
+}
+
+/** @brief Writes data to a file storage.
+ */
+static inline
+FileStorage& operator << (FileStorage& fs, const char* str)
+{
+    return (fs << String(str));
+}
+
+/** @brief Writes data to a file storage.
+ */
+static inline
+FileStorage& operator << (FileStorage& fs, char* value)
+{
+    return (fs << String(value));
+}
+
+//! @} FileStorage
+
+//! @relates cv::FileNodeIterator
+//! @{
+
+/** @brief Reads data from a file storage.
+ */
+template<typename _Tp> static inline
+FileNodeIterator& operator >> (FileNodeIterator& it, _Tp& value)
+{
+    read( *it, value, _Tp());
+    return ++it;
+}
+
+/** @brief Reads data from a file storage.
+ */
+template<typename _Tp> static inline
+FileNodeIterator& operator >> (FileNodeIterator& it, std::vector<_Tp>& vec)
+{
+    cv::internal::VecReaderProxy<_Tp, traits::SafeFmt<_Tp>::fmt != 0> r(&it);
+    r(vec, (size_t)INT_MAX);
+    return it;
+}
+
+//! @} FileNodeIterator
+
+//! @relates cv::FileNode
+//! @{
+
+/** @brief Reads data from a file storage.
+ */
+template<typename _Tp> static inline
+void operator >> (const FileNode& n, _Tp& value)
+{
+    read( n, value, _Tp());
+}
+
+/** @brief Reads data from a file storage.
+ */
+template<typename _Tp> static inline
+void operator >> (const FileNode& n, std::vector<_Tp>& vec)
+{
+    FileNodeIterator it = n.begin();
+    it >> vec;
+}
+
+/** @brief Reads KeyPoint from a file storage.
+*/
+//It needs special handling because it contains two types of fields, int & float.
+static inline
+void operator >> (const FileNode& n, KeyPoint& kpt)
+{
+    FileNodeIterator it = n.begin();
+    it >> kpt.pt.x >> kpt.pt.y >> kpt.size >> kpt.angle >> kpt.response >> kpt.octave >> kpt.class_id;
+}
+
+#ifdef CV__LEGACY_PERSISTENCE
+static inline
+void operator >> (const FileNode& n, std::vector<KeyPoint>& vec)
+{
+    read(n, vec);
+}
+static inline
+void operator >> (const FileNode& n, std::vector<DMatch>& vec)
+{
+    read(n, vec);
+}
+#endif
+
+/** @brief Reads DMatch from a file storage.
+*/
+//It needs special handling because it contains two types of fields, int & float.
+static inline
+void operator >> (const FileNode& n, DMatch& m)
+{
+    FileNodeIterator it = n.begin();
+    it >> m.queryIdx >> m.trainIdx >> m.imgIdx >> m.distance;
+}
+
+//! @} FileNode
+
+//! @relates cv::FileNodeIterator
+//! @{
+
+static inline
+bool operator == (const FileNodeIterator& it1, const FileNodeIterator& it2)
+{
+    return it1.fs == it2.fs && it1.container == it2.container &&
+        it1.reader.ptr == it2.reader.ptr && it1.remaining == it2.remaining;
+}
+
+static inline
+bool operator != (const FileNodeIterator& it1, const FileNodeIterator& it2)
+{
+    return !(it1 == it2);
+}
+
+static inline
+ptrdiff_t operator - (const FileNodeIterator& it1, const FileNodeIterator& it2)
+{
+    return it2.remaining - it1.remaining;
+}
+
+static inline
+bool operator < (const FileNodeIterator& it1, const FileNodeIterator& it2)
+{
+    return it1.remaining > it2.remaining;
+}
+
+//! @} FileNodeIterator
+
+//! @cond IGNORED
+
+inline FileNode FileStorage::getFirstTopLevelNode() const { FileNode r = root(); FileNodeIterator it = r.begin(); return it != r.end() ? *it : FileNode(); }
+inline FileNode::FileNode() : fs(0), node(0) {}
+inline FileNode::FileNode(const CvFileStorage* _fs, const CvFileNode* _node) : fs(_fs), node(_node) {}
+inline FileNode::FileNode(const FileNode& _node) : fs(_node.fs), node(_node.node) {}
+inline bool FileNode::empty() const    { return node   == 0;    }
+inline bool FileNode::isNone() const   { return type() == NONE; }
+inline bool FileNode::isSeq() const    { return type() == SEQ;  }
+inline bool FileNode::isMap() const    { return type() == MAP;  }
+inline bool FileNode::isInt() const    { return type() == INT;  }
+inline bool FileNode::isReal() const   { return type() == REAL; }
+inline bool FileNode::isString() const { return type() == STR;  }
+inline CvFileNode* FileNode::operator *() { return (CvFileNode*)node; }
+inline const CvFileNode* FileNode::operator* () const { return node; }
+inline FileNode::operator int() const    { int value;    read(*this, value, 0);     return value; }
+inline FileNode::operator float() const  { float value;  read(*this, value, 0.f);   return value; }
+inline FileNode::operator double() const { double value; read(*this, value, 0.);    return value; }
+inline FileNode::operator String() const { String value; read(*this, value, value); return value; }
+inline double FileNode::real() const  { return double(*this); }
+inline String FileNode::string() const { return String(*this); }
+inline Mat FileNode::mat() const { Mat value; read(*this, value, value);    return value; }
+inline FileNodeIterator FileNode::begin() const { return FileNodeIterator(fs, node); }
+inline FileNodeIterator FileNode::end() const   { return FileNodeIterator(fs, node, size()); }
+inline void FileNode::readRaw( const String& fmt, uchar* vec, size_t len ) const { begin().readRaw( fmt, vec, len ); }
+inline FileNode FileNodeIterator::operator *() const  { return FileNode(fs, (const CvFileNode*)(const void*)reader.ptr); }
+inline FileNode FileNodeIterator::operator ->() const { return FileNode(fs, (const CvFileNode*)(const void*)reader.ptr); }
+inline String::String(const FileNode& fn): cstr_(0), len_(0) { read(fn, *this, *this); }
+
+//! @endcond
+
+
+CV_EXPORTS void cvStartWriteRawData_Base64(::CvFileStorage * fs, const char* name, int len, const char* dt);
+
+CV_EXPORTS void cvWriteRawData_Base64(::CvFileStorage * fs, const void* _data, int len);
+
+CV_EXPORTS void cvEndWriteRawData_Base64(::CvFileStorage * fs);
+
+CV_EXPORTS void cvWriteMat_Base64(::CvFileStorage* fs, const char* name, const ::CvMat* mat);
+
+CV_EXPORTS void cvWriteMatND_Base64(::CvFileStorage* fs, const char* name, const ::CvMatND* mat);
+
+} // cv
+
+#endif // OPENCV_CORE_PERSISTENCE_HPP

+ 379 - 0
ThresholdTools/include/opencv2/core/ptr.inl.hpp

@@ -0,0 +1,379 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the copyright holders or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_PTR_INL_HPP
+#define OPENCV_CORE_PTR_INL_HPP
+
+#include <algorithm>
+
+//! @cond IGNORED
+
+namespace cv {
+
+template<typename Y>
+void DefaultDeleter<Y>::operator () (Y* p) const
+{
+    delete p;
+}
+
+namespace detail
+{
+
+struct PtrOwner
+{
+    PtrOwner() : refCount(1)
+    {}
+
+    void incRef()
+    {
+        CV_XADD(&refCount, 1);
+    }
+
+    void decRef()
+    {
+        if (CV_XADD(&refCount, -1) == 1) deleteSelf();
+    }
+
+protected:
+    /* This doesn't really need to be virtual, since PtrOwner is never deleted
+       directly, but it doesn't hurt and it helps avoid warnings. */
+    virtual ~PtrOwner()
+    {}
+
+    virtual void deleteSelf() = 0;
+
+private:
+    unsigned int refCount;
+
+    // noncopyable
+    PtrOwner(const PtrOwner&);
+    PtrOwner& operator = (const PtrOwner&);
+};
+
+template<typename Y, typename D>
+struct PtrOwnerImpl CV_FINAL : PtrOwner
+{
+    PtrOwnerImpl(Y* p, D d) : owned(p), deleter(d)
+    {}
+
+    void deleteSelf() CV_OVERRIDE
+    {
+        deleter(owned);
+        delete this;
+    }
+
+private:
+    Y* owned;
+    D deleter;
+};
+
+
+}
+
+template<typename T>
+Ptr<T>::Ptr() : owner(NULL), stored(NULL)
+{}
+
+template<typename T>
+template<typename Y>
+Ptr<T>::Ptr(Y* p)
+  : owner(p
+      ? new detail::PtrOwnerImpl<Y, DefaultDeleter<Y> >(p, DefaultDeleter<Y>())
+      : NULL),
+    stored(p)
+{}
+
+template<typename T>
+template<typename Y, typename D>
+Ptr<T>::Ptr(Y* p, D d)
+  : owner(p
+      ? new detail::PtrOwnerImpl<Y, D>(p, d)
+      : NULL),
+    stored(p)
+{}
+
+template<typename T>
+Ptr<T>::Ptr(const Ptr& o) : owner(o.owner), stored(o.stored)
+{
+    if (owner) owner->incRef();
+}
+
+template<typename T>
+template<typename Y>
+Ptr<T>::Ptr(const Ptr<Y>& o) : owner(o.owner), stored(o.stored)
+{
+    if (owner) owner->incRef();
+}
+
+template<typename T>
+template<typename Y>
+Ptr<T>::Ptr(const Ptr<Y>& o, T* p) : owner(o.owner), stored(p)
+{
+    if (owner) owner->incRef();
+}
+
+template<typename T>
+Ptr<T>::~Ptr()
+{
+    release();
+}
+
+template<typename T>
+Ptr<T>& Ptr<T>::operator = (const Ptr<T>& o)
+{
+    Ptr(o).swap(*this);
+    return *this;
+}
+
+template<typename T>
+template<typename Y>
+Ptr<T>& Ptr<T>::operator = (const Ptr<Y>& o)
+{
+    Ptr(o).swap(*this);
+    return *this;
+}
+
+template<typename T>
+void Ptr<T>::release()
+{
+    if (owner) owner->decRef();
+    owner = NULL;
+    stored = NULL;
+}
+
+template<typename T>
+template<typename Y>
+void Ptr<T>::reset(Y* p)
+{
+    Ptr(p).swap(*this);
+}
+
+template<typename T>
+template<typename Y, typename D>
+void Ptr<T>::reset(Y* p, D d)
+{
+    Ptr(p, d).swap(*this);
+}
+
+template<typename T>
+void Ptr<T>::swap(Ptr<T>& o)
+{
+    std::swap(owner, o.owner);
+    std::swap(stored, o.stored);
+}
+
+template<typename T>
+T* Ptr<T>::get() const
+{
+    return stored;
+}
+
+template<typename T>
+typename detail::RefOrVoid<T>::type Ptr<T>::operator * () const
+{
+    return *stored;
+}
+
+template<typename T>
+T* Ptr<T>::operator -> () const
+{
+    return stored;
+}
+
+template<typename T>
+Ptr<T>::operator T* () const
+{
+    return stored;
+}
+
+
+template<typename T>
+bool Ptr<T>::empty() const
+{
+    return !stored;
+}
+
+template<typename T>
+template<typename Y>
+Ptr<Y> Ptr<T>::staticCast() const
+{
+    return Ptr<Y>(*this, static_cast<Y*>(stored));
+}
+
+template<typename T>
+template<typename Y>
+Ptr<Y> Ptr<T>::constCast() const
+{
+    return Ptr<Y>(*this, const_cast<Y*>(stored));
+}
+
+template<typename T>
+template<typename Y>
+Ptr<Y> Ptr<T>::dynamicCast() const
+{
+    return Ptr<Y>(*this, dynamic_cast<Y*>(stored));
+}
+
+#ifdef CV_CXX_MOVE_SEMANTICS
+
+template<typename T>
+Ptr<T>::Ptr(Ptr&& o) : owner(o.owner), stored(o.stored)
+{
+    o.owner = NULL;
+    o.stored = NULL;
+}
+
+template<typename T>
+Ptr<T>& Ptr<T>::operator = (Ptr<T>&& o)
+{
+    if (this == &o)
+        return *this;
+
+    release();
+    owner = o.owner;
+    stored = o.stored;
+    o.owner = NULL;
+    o.stored = NULL;
+    return *this;
+}
+
+#endif
+
+
+template<typename T>
+void swap(Ptr<T>& ptr1, Ptr<T>& ptr2){
+    ptr1.swap(ptr2);
+}
+
+template<typename T>
+bool operator == (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
+{
+    return ptr1.get() == ptr2.get();
+}
+
+template<typename T>
+bool operator != (const Ptr<T>& ptr1, const Ptr<T>& ptr2)
+{
+    return ptr1.get() != ptr2.get();
+}
+
+template<typename T>
+Ptr<T> makePtr()
+{
+    return Ptr<T>(new T());
+}
+
+template<typename T, typename A1>
+Ptr<T> makePtr(const A1& a1)
+{
+    return Ptr<T>(new T(a1));
+}
+
+template<typename T, typename A1, typename A2>
+Ptr<T> makePtr(const A1& a1, const A2& a2)
+{
+    return Ptr<T>(new T(a1, a2));
+}
+
+template<typename T, typename A1, typename A2, typename A3>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3)
+{
+    return Ptr<T>(new T(a1, a2, a3));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10, typename A11>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11));
+}
+
+template<typename T, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10, typename A11, typename A12>
+Ptr<T> makePtr(const A1& a1, const A2& a2, const A3& a3, const A4& a4, const A5& a5, const A6& a6, const A7& a7, const A8& a8, const A9& a9, const A10& a10, const A11& a11, const A12& a12)
+{
+    return Ptr<T>(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12));
+}
+} // namespace cv
+
+//! @endcond
+
+#endif // OPENCV_CORE_PTR_INL_HPP

+ 165 - 0
ThresholdTools/include/opencv2/core/saturate.hpp

@@ -0,0 +1,165 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2014, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_SATURATE_HPP
+#define OPENCV_CORE_SATURATE_HPP
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/fast_math.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_utils
+//! @{
+
+/////////////// saturate_cast (used in image & signal processing) ///////////////////
+
+/** @brief Template function for accurate conversion from one primitive type to another.
+
+ The function saturate_cast resembles the standard C++ cast operations, such as static_cast\<T\>()
+ and others. It perform an efficient and accurate conversion from one primitive type to another
+ (see the introduction chapter). saturate in the name means that when the input value v is out of the
+ range of the target type, the result is not formed just by taking low bits of the input, but instead
+ the value is clipped. For example:
+ @code
+ uchar a = saturate_cast<uchar>(-100); // a = 0 (UCHAR_MIN)
+ short b = saturate_cast<short>(33333.33333); // b = 32767 (SHRT_MAX)
+ @endcode
+ Such clipping is done when the target type is unsigned char , signed char , unsigned short or
+ signed short . For 32-bit integers, no clipping is done.
+
+ When the parameter is a floating-point value and the target type is an integer (8-, 16- or 32-bit),
+ the floating-point value is first rounded to the nearest integer and then clipped if needed (when
+ the target type is 8- or 16-bit).
+
+ This operation is used in the simplest or most complex image processing functions in OpenCV.
+
+ @param v Function parameter.
+ @sa add, subtract, multiply, divide, Mat::convertTo
+ */
+template<typename _Tp> static inline _Tp saturate_cast(uchar v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(schar v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(ushort v)   { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(short v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(unsigned v) { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(int v)      { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(float v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(double v)   { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(int64 v)    { return _Tp(v); }
+/** @overload */
+template<typename _Tp> static inline _Tp saturate_cast(uint64 v)   { return _Tp(v); }
+
+template<> inline uchar saturate_cast<uchar>(schar v)        { return (uchar)std::max((int)v, 0); }
+template<> inline uchar saturate_cast<uchar>(ushort v)       { return (uchar)std::min((unsigned)v, (unsigned)UCHAR_MAX); }
+template<> inline uchar saturate_cast<uchar>(int v)          { return (uchar)((unsigned)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
+template<> inline uchar saturate_cast<uchar>(short v)        { return saturate_cast<uchar>((int)v); }
+template<> inline uchar saturate_cast<uchar>(unsigned v)     { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
+template<> inline uchar saturate_cast<uchar>(float v)        { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
+template<> inline uchar saturate_cast<uchar>(double v)       { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
+template<> inline uchar saturate_cast<uchar>(int64 v)        { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
+template<> inline uchar saturate_cast<uchar>(uint64 v)       { return (uchar)std::min(v, (uint64)UCHAR_MAX); }
+
+template<> inline schar saturate_cast<schar>(uchar v)        { return (schar)std::min((int)v, SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(ushort v)       { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(int v)          { return (schar)((unsigned)(v-SCHAR_MIN) <= (unsigned)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
+template<> inline schar saturate_cast<schar>(short v)        { return saturate_cast<schar>((int)v); }
+template<> inline schar saturate_cast<schar>(unsigned v)     { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(float v)        { int iv = cvRound(v); return saturate_cast<schar>(iv); }
+template<> inline schar saturate_cast<schar>(double v)       { int iv = cvRound(v); return saturate_cast<schar>(iv); }
+template<> inline schar saturate_cast<schar>(int64 v)        { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
+template<> inline schar saturate_cast<schar>(uint64 v)       { return (schar)std::min(v, (uint64)SCHAR_MAX); }
+
+template<> inline ushort saturate_cast<ushort>(schar v)      { return (ushort)std::max((int)v, 0); }
+template<> inline ushort saturate_cast<ushort>(short v)      { return (ushort)std::max((int)v, 0); }
+template<> inline ushort saturate_cast<ushort>(int v)        { return (ushort)((unsigned)v <= (unsigned)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
+template<> inline ushort saturate_cast<ushort>(unsigned v)   { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
+template<> inline ushort saturate_cast<ushort>(float v)      { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
+template<> inline ushort saturate_cast<ushort>(double v)     { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
+template<> inline ushort saturate_cast<ushort>(int64 v)      { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
+template<> inline ushort saturate_cast<ushort>(uint64 v)     { return (ushort)std::min(v, (uint64)USHRT_MAX); }
+
+template<> inline short saturate_cast<short>(ushort v)       { return (short)std::min((int)v, SHRT_MAX); }
+template<> inline short saturate_cast<short>(int v)          { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
+template<> inline short saturate_cast<short>(unsigned v)     { return (short)std::min(v, (unsigned)SHRT_MAX); }
+template<> inline short saturate_cast<short>(float v)        { int iv = cvRound(v); return saturate_cast<short>(iv); }
+template<> inline short saturate_cast<short>(double v)       { int iv = cvRound(v); return saturate_cast<short>(iv); }
+template<> inline short saturate_cast<short>(int64 v)        { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
+template<> inline short saturate_cast<short>(uint64 v)       { return (short)std::min(v, (uint64)SHRT_MAX); }
+
+template<> inline int saturate_cast<int>(unsigned v)         { return (int)std::min(v, (unsigned)INT_MAX); }
+template<> inline int saturate_cast<int>(int64 v)            { return (int)((uint64)(v - INT_MIN) <= (uint64)UINT_MAX ? v : v > 0 ? INT_MAX : INT_MIN); }
+template<> inline int saturate_cast<int>(uint64 v)           { return (int)std::min(v, (uint64)INT_MAX); }
+template<> inline int saturate_cast<int>(float v)            { return cvRound(v); }
+template<> inline int saturate_cast<int>(double v)           { return cvRound(v); }
+
+template<> inline unsigned saturate_cast<unsigned>(schar v)  { return (unsigned)std::max(v, (schar)0); }
+template<> inline unsigned saturate_cast<unsigned>(short v)  { return (unsigned)std::max(v, (short)0); }
+template<> inline unsigned saturate_cast<unsigned>(int v)    { return (unsigned)std::max(v, (int)0); }
+template<> inline unsigned saturate_cast<unsigned>(int64 v)  { return (unsigned)((uint64)v <= (uint64)UINT_MAX ? v : v > 0 ? UINT_MAX : 0); }
+template<> inline unsigned saturate_cast<unsigned>(uint64 v) { return (unsigned)std::min(v, (uint64)UINT_MAX); }
+// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
+template<> inline unsigned saturate_cast<unsigned>(float v)  { return static_cast<unsigned>(cvRound(v)); }
+template<> inline unsigned saturate_cast<unsigned>(double v) { return static_cast<unsigned>(cvRound(v)); }
+
+template<> inline uint64 saturate_cast<uint64>(schar v)      { return (uint64)std::max(v, (schar)0); }
+template<> inline uint64 saturate_cast<uint64>(short v)      { return (uint64)std::max(v, (short)0); }
+template<> inline uint64 saturate_cast<uint64>(int v)        { return (uint64)std::max(v, (int)0); }
+template<> inline uint64 saturate_cast<uint64>(int64 v)      { return (uint64)std::max(v, (int64)0); }
+
+template<> inline int64 saturate_cast<int64>(uint64 v)       { return (int64)std::min(v, (uint64)LLONG_MAX); }
+
+//! @}
+
+} // cv
+
+#endif // OPENCV_CORE_SATURATE_HPP

+ 514 - 0
ThresholdTools/include/opencv2/core/softfloat.hpp

@@ -0,0 +1,514 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This file is based on files from package issued with the following license:
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3c, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#pragma once
+#ifndef softfloat_h
+#define softfloat_h 1
+
+#include "cvdef.h"
+
+namespace cv
+{
+
+/** @addtogroup core_utils_softfloat
+
+  [SoftFloat](http://www.jhauser.us/arithmetic/SoftFloat.html) is a software implementation
+  of floating-point calculations according to IEEE 754 standard.
+  All calculations are done in integers, that's why they are machine-independent and bit-exact.
+  This library can be useful in accuracy-critical parts like look-up tables generation, tests, etc.
+  OpenCV contains a subset of SoftFloat partially rewritten to C++.
+
+  ### Types
+
+  There are two basic types: @ref softfloat and @ref softdouble.
+  These types are binary compatible with float and double types respectively
+  and support conversions to/from them.
+  Other types from original SoftFloat library like fp16 or fp128 were thrown away
+  as well as quiet/signaling NaN support, on-the-fly rounding mode switch
+  and exception flags (though exceptions can be implemented in the future).
+
+  ### Operations
+
+  Both types support the following:
+  - Construction from signed and unsigned 32-bit and 64 integers,
+  float/double or raw binary representation
+  - Conversions between each other, to float or double and to int
+  using @ref cvRound, @ref cvTrunc, @ref cvFloor, @ref cvCeil or a bunch of
+  saturate_cast functions
+  - Add, subtract, multiply, divide, remainder, square root, FMA with absolute precision
+  - Comparison operations
+  - Explicit sign, exponent and significand manipulation through get/set methods,
+ number state indicators (isInf, isNan, isSubnormal)
+  - Type-specific constants like eps, minimum/maximum value, best pi approximation, etc.
+  - min(), max(), abs(), exp(), log() and pow() functions
+
+*/
+//! @{
+
+struct softfloat;
+struct softdouble;
+
+struct CV_EXPORTS softfloat
+{
+public:
+    /** @brief Default constructor */
+    softfloat() { v = 0; }
+    /** @brief Copy constructor */
+    softfloat( const softfloat& c) { v = c.v; }
+    /** @brief Assign constructor */
+    softfloat& operator=( const softfloat& c )
+    {
+        if(&c != this) v = c.v;
+        return *this;
+    }
+    /** @brief Construct from raw
+
+    Builds new value from raw binary representation
+    */
+    static const softfloat fromRaw( const uint32_t a ) { softfloat x; x.v = a; return x; }
+
+    /** @brief Construct from integer */
+    explicit softfloat( const uint32_t );
+    explicit softfloat( const uint64_t );
+    explicit softfloat( const int32_t );
+    explicit softfloat( const int64_t );
+
+#ifdef CV_INT32_T_IS_LONG_INT
+    // for platforms with int32_t = long int
+    explicit softfloat( const int a ) { *this = softfloat(static_cast<int32_t>(a)); }
+#endif
+
+    /** @brief Construct from float */
+    explicit softfloat( const float a ) { Cv32suf s; s.f = a; v = s.u; }
+
+    /** @brief Type casts  */
+    operator softdouble() const;
+    operator float() const { Cv32suf s; s.u = v; return s.f; }
+
+    /** @brief Basic arithmetics */
+    softfloat operator + (const softfloat&) const;
+    softfloat operator - (const softfloat&) const;
+    softfloat operator * (const softfloat&) const;
+    softfloat operator / (const softfloat&) const;
+    softfloat operator - () const { softfloat x; x.v = v ^ (1U << 31); return x; }
+
+    /** @brief Remainder operator
+
+    A quote from original SoftFloat manual:
+
+    > The IEEE Standard remainder operation computes the value
+    > a - n * b, where n is the integer closest to a / b.
+    > If a / b is exactly halfway between two integers, n is the even integer
+    > closest to a / b. The IEEE Standard’s remainder operation is always exact and so requires no rounding.
+    > Depending on the relative magnitudes of the operands, the remainder functions
+    > can take considerably longer to execute than the other SoftFloat functions.
+    > This is an inherent characteristic of the remainder operation itself and is not a flaw
+    > in the SoftFloat implementation.
+    */
+    softfloat operator % (const softfloat&) const;
+
+    softfloat& operator += (const softfloat& a) { *this = *this + a; return *this; }
+    softfloat& operator -= (const softfloat& a) { *this = *this - a; return *this; }
+    softfloat& operator *= (const softfloat& a) { *this = *this * a; return *this; }
+    softfloat& operator /= (const softfloat& a) { *this = *this / a; return *this; }
+    softfloat& operator %= (const softfloat& a) { *this = *this % a; return *this; }
+
+    /** @brief Comparison operations
+
+     - Any operation with NaN produces false
+       + The only exception is when x is NaN: x != y for any y.
+     - Positive and negative zeros are equal
+    */
+    bool operator == ( const softfloat& ) const;
+    bool operator != ( const softfloat& ) const;
+    bool operator >  ( const softfloat& ) const;
+    bool operator >= ( const softfloat& ) const;
+    bool operator <  ( const softfloat& ) const;
+    bool operator <= ( const softfloat& ) const;
+
+    /** @brief NaN state indicator */
+    inline bool isNaN() const { return (v & 0x7fffffff)  > 0x7f800000; }
+    /** @brief Inf state indicator */
+    inline bool isInf() const { return (v & 0x7fffffff) == 0x7f800000; }
+    /** @brief Subnormal number indicator */
+    inline bool isSubnormal() const { return ((v >> 23) & 0xFF) == 0; }
+
+    /** @brief Get sign bit */
+    inline bool getSign() const { return (v >> 31) != 0; }
+    /** @brief Construct a copy with new sign bit */
+    inline softfloat setSign(bool sign) const { softfloat x; x.v = (v & ((1U << 31) - 1)) | ((uint32_t)sign << 31); return x; }
+    /** @brief Get 0-based exponent */
+    inline int getExp() const { return ((v >> 23) & 0xFF) - 127; }
+    /** @brief Construct a copy with new 0-based exponent */
+    inline softfloat setExp(int e) const { softfloat x; x.v = (v & 0x807fffff) | (((e + 127) & 0xFF) << 23 ); return x; }
+
+    /** @brief Get a fraction part
+
+    Returns a number 1 <= x < 2 with the same significand
+    */
+    inline softfloat getFrac() const
+    {
+        uint_fast32_t vv = (v & 0x007fffff) | (127 << 23);
+        return softfloat::fromRaw(vv);
+    }
+    /** @brief Construct a copy with provided significand
+
+    Constructs a copy of a number with significand taken from parameter
+    */
+    inline softfloat setFrac(const softfloat& s) const
+    {
+        softfloat x;
+        x.v = (v & 0xff800000) | (s.v & 0x007fffff);
+        return x;
+    }
+
+    /** @brief Zero constant */
+    static softfloat zero() { return softfloat::fromRaw( 0 ); }
+    /** @brief Positive infinity constant */
+    static softfloat  inf() { return softfloat::fromRaw( 0xFF << 23 ); }
+    /** @brief Default NaN constant */
+    static softfloat  nan() { return softfloat::fromRaw( 0x7fffffff ); }
+    /** @brief One constant */
+    static softfloat  one() { return softfloat::fromRaw(  127 << 23 ); }
+    /** @brief Smallest normalized value */
+    static softfloat  min() { return softfloat::fromRaw( 0x01 << 23 ); }
+    /** @brief Difference between 1 and next representable value */
+    static softfloat  eps() { return softfloat::fromRaw( (127 - 23) << 23 ); }
+    /** @brief Biggest finite value */
+    static softfloat  max() { return softfloat::fromRaw( (0xFF << 23) - 1 ); }
+    /** @brief Correct pi approximation */
+    static softfloat   pi() { return softfloat::fromRaw( 0x40490fdb ); }
+
+    uint32_t v;
+};
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+
+struct CV_EXPORTS softdouble
+{
+public:
+    /** @brief Default constructor */
+    softdouble() : v(0) { }
+    /** @brief Copy constructor */
+    softdouble( const softdouble& c) { v = c.v; }
+    /** @brief Assign constructor */
+    softdouble& operator=( const softdouble& c )
+    {
+        if(&c != this) v = c.v;
+        return *this;
+    }
+    /** @brief Construct from raw
+
+    Builds new value from raw binary representation
+    */
+    static softdouble fromRaw( const uint64_t a ) { softdouble x; x.v = a; return x; }
+
+    /** @brief Construct from integer */
+    explicit softdouble( const uint32_t );
+    explicit softdouble( const uint64_t );
+    explicit softdouble( const  int32_t );
+    explicit softdouble( const  int64_t );
+
+#ifdef CV_INT32_T_IS_LONG_INT
+    // for platforms with int32_t = long int
+    explicit softdouble( const int a ) { *this = softdouble(static_cast<int32_t>(a)); }
+#endif
+
+    /** @brief Construct from double */
+    explicit softdouble( const double a ) { Cv64suf s; s.f = a; v = s.u; }
+
+    /** @brief Type casts  */
+    operator softfloat() const;
+    operator double() const { Cv64suf s; s.u = v; return s.f; }
+
+    /** @brief Basic arithmetics */
+    softdouble operator + (const softdouble&) const;
+    softdouble operator - (const softdouble&) const;
+    softdouble operator * (const softdouble&) const;
+    softdouble operator / (const softdouble&) const;
+    softdouble operator - () const { softdouble x; x.v = v ^ (1ULL << 63); return x; }
+
+    /** @brief Remainder operator
+
+    A quote from original SoftFloat manual:
+
+    > The IEEE Standard remainder operation computes the value
+    > a - n * b, where n is the integer closest to a / b.
+    > If a / b is exactly halfway between two integers, n is the even integer
+    > closest to a / b. The IEEE Standard’s remainder operation is always exact and so requires no rounding.
+    > Depending on the relative magnitudes of the operands, the remainder functions
+    > can take considerably longer to execute than the other SoftFloat functions.
+    > This is an inherent characteristic of the remainder operation itself and is not a flaw
+    > in the SoftFloat implementation.
+    */
+    softdouble operator % (const softdouble&) const;
+
+    softdouble& operator += (const softdouble& a) { *this = *this + a; return *this; }
+    softdouble& operator -= (const softdouble& a) { *this = *this - a; return *this; }
+    softdouble& operator *= (const softdouble& a) { *this = *this * a; return *this; }
+    softdouble& operator /= (const softdouble& a) { *this = *this / a; return *this; }
+    softdouble& operator %= (const softdouble& a) { *this = *this % a; return *this; }
+
+    /** @brief Comparison operations
+
+     - Any operation with NaN produces false
+       + The only exception is when x is NaN: x != y for any y.
+     - Positive and negative zeros are equal
+    */
+    bool operator == ( const softdouble& ) const;
+    bool operator != ( const softdouble& ) const;
+    bool operator >  ( const softdouble& ) const;
+    bool operator >= ( const softdouble& ) const;
+    bool operator <  ( const softdouble& ) const;
+    bool operator <= ( const softdouble& ) const;
+
+    /** @brief NaN state indicator */
+    inline bool isNaN() const { return (v & 0x7fffffffffffffff)  > 0x7ff0000000000000; }
+    /** @brief Inf state indicator */
+    inline bool isInf() const { return (v & 0x7fffffffffffffff) == 0x7ff0000000000000; }
+    /** @brief Subnormal number indicator */
+    inline bool isSubnormal() const { return ((v >> 52) & 0x7FF) == 0; }
+
+    /** @brief Get sign bit */
+    inline bool getSign() const { return (v >> 63) != 0; }
+    /** @brief Construct a copy with new sign bit */
+    softdouble setSign(bool sign) const { softdouble x; x.v = (v & ((1ULL << 63) - 1)) | ((uint_fast64_t)(sign) << 63); return x; }
+    /** @brief Get 0-based exponent */
+    inline int getExp() const { return ((v >> 52) & 0x7FF) - 1023; }
+    /** @brief Construct a copy with new 0-based exponent */
+    inline softdouble setExp(int e) const
+    {
+        softdouble x;
+        x.v = (v & 0x800FFFFFFFFFFFFF) | ((uint_fast64_t)((e + 1023) & 0x7FF) << 52);
+        return x;
+    }
+
+    /** @brief Get a fraction part
+
+    Returns a number 1 <= x < 2 with the same significand
+    */
+    inline softdouble getFrac() const
+    {
+        uint_fast64_t vv = (v & 0x000FFFFFFFFFFFFF) | ((uint_fast64_t)(1023) << 52);
+        return softdouble::fromRaw(vv);
+    }
+    /** @brief Construct a copy with provided significand
+
+    Constructs a copy of a number with significand taken from parameter
+    */
+    inline softdouble setFrac(const softdouble& s) const
+    {
+        softdouble x;
+        x.v = (v & 0xFFF0000000000000) | (s.v & 0x000FFFFFFFFFFFFF);
+        return x;
+    }
+
+    /** @brief Zero constant */
+    static softdouble zero() { return softdouble::fromRaw( 0 ); }
+    /** @brief Positive infinity constant */
+    static softdouble  inf() { return softdouble::fromRaw( (uint_fast64_t)(0x7FF) << 52 ); }
+    /** @brief Default NaN constant */
+    static softdouble  nan() { return softdouble::fromRaw( CV_BIG_INT(0x7FFFFFFFFFFFFFFF) ); }
+    /** @brief One constant */
+    static softdouble  one() { return softdouble::fromRaw( (uint_fast64_t)( 1023) << 52 ); }
+    /** @brief Smallest normalized value */
+    static softdouble  min() { return softdouble::fromRaw( (uint_fast64_t)( 0x01) << 52 ); }
+    /** @brief Difference between 1 and next representable value */
+    static softdouble  eps() { return softdouble::fromRaw( (uint_fast64_t)( 1023 - 52 ) << 52 ); }
+    /** @brief Biggest finite value */
+    static softdouble  max() { return softdouble::fromRaw( ((uint_fast64_t)(0x7FF) << 52) - 1 ); }
+    /** @brief Correct pi approximation */
+    static softdouble   pi() { return softdouble::fromRaw( CV_BIG_INT(0x400921FB54442D18) ); }
+
+    uint64_t v;
+};
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+
+/** @brief Fused Multiplication and Addition
+
+Computes (a*b)+c with single rounding
+*/
+CV_EXPORTS softfloat  mulAdd( const softfloat&  a, const softfloat&  b, const softfloat & c);
+CV_EXPORTS softdouble mulAdd( const softdouble& a, const softdouble& b, const softdouble& c);
+
+/** @brief Square root */
+CV_EXPORTS softfloat  sqrt( const softfloat&  a );
+CV_EXPORTS softdouble sqrt( const softdouble& a );
+}
+
+/*----------------------------------------------------------------------------
+| Ported from OpenCV and added for usability
+*----------------------------------------------------------------------------*/
+
+/** @brief Truncates number to integer with minimum magnitude */
+CV_EXPORTS int cvTrunc(const cv::softfloat&  a);
+CV_EXPORTS int cvTrunc(const cv::softdouble& a);
+
+/** @brief Rounds a number to nearest even integer */
+CV_EXPORTS int cvRound(const cv::softfloat&  a);
+CV_EXPORTS int cvRound(const cv::softdouble& a);
+
+/** @brief Rounds a number to nearest even long long integer */
+CV_EXPORTS int64_t cvRound64(const cv::softdouble& a);
+
+/** @brief Rounds a number down to integer */
+CV_EXPORTS int cvFloor(const cv::softfloat&  a);
+CV_EXPORTS int cvFloor(const cv::softdouble& a);
+
+/** @brief Rounds number up to integer */
+CV_EXPORTS int  cvCeil(const cv::softfloat&  a);
+CV_EXPORTS int  cvCeil(const cv::softdouble& a);
+
+namespace cv
+{
+/** @brief Saturate casts */
+template<typename _Tp> static inline _Tp saturate_cast(softfloat  a) { return _Tp(a); }
+template<typename _Tp> static inline _Tp saturate_cast(softdouble a) { return _Tp(a); }
+
+template<> inline uchar saturate_cast<uchar>(softfloat  a) { return (uchar)std::max(std::min(cvRound(a), (int)UCHAR_MAX), 0); }
+template<> inline uchar saturate_cast<uchar>(softdouble a) { return (uchar)std::max(std::min(cvRound(a), (int)UCHAR_MAX), 0); }
+
+template<> inline schar saturate_cast<schar>(softfloat  a) { return (schar)std::min(std::max(cvRound(a), (int)SCHAR_MIN), (int)SCHAR_MAX); }
+template<> inline schar saturate_cast<schar>(softdouble a) { return (schar)std::min(std::max(cvRound(a), (int)SCHAR_MIN), (int)SCHAR_MAX); }
+
+template<> inline ushort saturate_cast<ushort>(softfloat  a) { return (ushort)std::max(std::min(cvRound(a), (int)USHRT_MAX), 0); }
+template<> inline ushort saturate_cast<ushort>(softdouble a) { return (ushort)std::max(std::min(cvRound(a), (int)USHRT_MAX), 0); }
+
+template<> inline short saturate_cast<short>(softfloat  a) { return (short)std::min(std::max(cvRound(a), (int)SHRT_MIN), (int)SHRT_MAX); }
+template<> inline short saturate_cast<short>(softdouble a) { return (short)std::min(std::max(cvRound(a), (int)SHRT_MIN), (int)SHRT_MAX); }
+
+template<> inline int saturate_cast<int>(softfloat  a) { return cvRound(a); }
+template<> inline int saturate_cast<int>(softdouble a) { return cvRound(a); }
+
+template<> inline int64_t saturate_cast<int64_t>(softfloat  a) { return cvRound(a); }
+template<> inline int64_t saturate_cast<int64_t>(softdouble a) { return cvRound64(a); }
+
+/** @brief Saturate cast to unsigned integer and unsigned long long integer
+We intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
+*/
+template<> inline unsigned saturate_cast<unsigned>(softfloat  a) { return cvRound(a); }
+template<> inline unsigned saturate_cast<unsigned>(softdouble a) { return cvRound(a); }
+
+template<> inline uint64_t saturate_cast<uint64_t>(softfloat  a) { return cvRound(a); }
+template<> inline uint64_t saturate_cast<uint64_t>(softdouble a) { return cvRound64(a); }
+
+/** @brief Min and Max functions */
+inline softfloat  min(const softfloat&  a, const softfloat&  b) { return (a > b) ? b : a; }
+inline softdouble min(const softdouble& a, const softdouble& b) { return (a > b) ? b : a; }
+
+inline softfloat  max(const softfloat&  a, const softfloat&  b) { return (a > b) ? a : b; }
+inline softdouble max(const softdouble& a, const softdouble& b) { return (a > b) ? a : b; }
+
+/** @brief Absolute value */
+inline softfloat  abs( softfloat  a) { softfloat  x; x.v = a.v & ((1U   << 31) - 1); return x; }
+inline softdouble abs( softdouble a) { softdouble x; x.v = a.v & ((1ULL << 63) - 1); return x; }
+
+/** @brief Exponent
+
+Special cases:
+- exp(NaN) is NaN
+- exp(-Inf) == 0
+- exp(+Inf) == +Inf
+*/
+CV_EXPORTS softfloat  exp( const softfloat&  a);
+CV_EXPORTS softdouble exp( const softdouble& a);
+
+/** @brief Natural logarithm
+
+Special cases:
+- log(NaN), log(x < 0) are NaN
+- log(0) == -Inf
+*/
+CV_EXPORTS softfloat  log( const softfloat&  a );
+CV_EXPORTS softdouble log( const softdouble& a );
+
+/** @brief Raising to the power
+
+Special cases:
+- x**NaN is NaN for any x
+- ( |x| == 1 )**Inf is NaN
+- ( |x|  > 1 )**+Inf or ( |x| < 1 )**-Inf is +Inf
+- ( |x|  > 1 )**-Inf or ( |x| < 1 )**+Inf is 0
+- x ** 0 == 1 for any x
+- x ** 1 == 1 for any x
+- NaN ** y is NaN for any other y
+- Inf**(y < 0) == 0
+- Inf ** y is +Inf for any other y
+- (x < 0)**y is NaN for any other y if x can't be correctly rounded to integer
+- 0 ** 0 == 1
+- 0 ** (y < 0) is +Inf
+- 0 ** (y > 0) is 0
+*/
+CV_EXPORTS softfloat  pow( const softfloat&  a, const softfloat&  b);
+CV_EXPORTS softdouble pow( const softdouble& a, const softdouble& b);
+
+/** @brief Cube root
+
+Special cases:
+- cbrt(NaN) is NaN
+- cbrt(+/-Inf) is +/-Inf
+*/
+CV_EXPORTS softfloat cbrt( const softfloat& a );
+
+/** @brief Sine
+
+Special cases:
+- sin(Inf) or sin(NaN) is NaN
+- sin(x) == x when sin(x) is close to zero
+*/
+CV_EXPORTS softdouble sin( const softdouble& a );
+
+/** @brief Cosine
+ *
+Special cases:
+- cos(Inf) or cos(NaN) is NaN
+- cos(x) == +/- 1 when cos(x) is close to +/- 1
+*/
+CV_EXPORTS softdouble cos( const softdouble& a );
+
+}
+
+//! @}
+
+#endif

+ 652 - 0
ThresholdTools/include/opencv2/core/sse_utils.hpp

@@ -0,0 +1,652 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_SSE_UTILS_HPP
+#define OPENCV_CORE_SSE_UTILS_HPP
+
+#ifndef __cplusplus
+#  error sse_utils.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/cvdef.h"
+
+//! @addtogroup core_utils_sse
+//! @{
+
+#if CV_SSE2
+
+inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g0);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g0);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_g1);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_g1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk2);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk2);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk3);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk3);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk2);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk2);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk3);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk3);
+
+    __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk2);
+    __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk2);
+    __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk3);
+    __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk3);
+
+    v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk2);
+    v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk2);
+    v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk3);
+    v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk3);
+}
+
+inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
+                                  __m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_g1);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_g1);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b0);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b0);
+    __m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_b1);
+    __m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_b1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk3);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk3);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk4);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk4);
+    __m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk5);
+    __m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk5);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk3);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk3);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk4);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk4);
+    __m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk5);
+    __m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk5);
+
+    __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk3);
+    __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk3);
+    __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk4);
+    __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk4);
+    __m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk5);
+    __m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk5);
+
+    v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk3);
+    v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk3);
+    v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk4);
+    v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk4);
+    v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk5);
+    v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk5);
+}
+
+inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
+                                  __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi8(v_r0, v_b0);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi8(v_r0, v_b0);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi8(v_r1, v_b1);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi8(v_r1, v_b1);
+    __m128i layer1_chunk4 = _mm_unpacklo_epi8(v_g0, v_a0);
+    __m128i layer1_chunk5 = _mm_unpackhi_epi8(v_g0, v_a0);
+    __m128i layer1_chunk6 = _mm_unpacklo_epi8(v_g1, v_a1);
+    __m128i layer1_chunk7 = _mm_unpackhi_epi8(v_g1, v_a1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi8(layer1_chunk0, layer1_chunk4);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi8(layer1_chunk0, layer1_chunk4);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi8(layer1_chunk1, layer1_chunk5);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi8(layer1_chunk1, layer1_chunk5);
+    __m128i layer2_chunk4 = _mm_unpacklo_epi8(layer1_chunk2, layer1_chunk6);
+    __m128i layer2_chunk5 = _mm_unpackhi_epi8(layer1_chunk2, layer1_chunk6);
+    __m128i layer2_chunk6 = _mm_unpacklo_epi8(layer1_chunk3, layer1_chunk7);
+    __m128i layer2_chunk7 = _mm_unpackhi_epi8(layer1_chunk3, layer1_chunk7);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi8(layer2_chunk0, layer2_chunk4);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi8(layer2_chunk0, layer2_chunk4);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi8(layer2_chunk1, layer2_chunk5);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi8(layer2_chunk1, layer2_chunk5);
+    __m128i layer3_chunk4 = _mm_unpacklo_epi8(layer2_chunk2, layer2_chunk6);
+    __m128i layer3_chunk5 = _mm_unpackhi_epi8(layer2_chunk2, layer2_chunk6);
+    __m128i layer3_chunk6 = _mm_unpacklo_epi8(layer2_chunk3, layer2_chunk7);
+    __m128i layer3_chunk7 = _mm_unpackhi_epi8(layer2_chunk3, layer2_chunk7);
+
+    __m128i layer4_chunk0 = _mm_unpacklo_epi8(layer3_chunk0, layer3_chunk4);
+    __m128i layer4_chunk1 = _mm_unpackhi_epi8(layer3_chunk0, layer3_chunk4);
+    __m128i layer4_chunk2 = _mm_unpacklo_epi8(layer3_chunk1, layer3_chunk5);
+    __m128i layer4_chunk3 = _mm_unpackhi_epi8(layer3_chunk1, layer3_chunk5);
+    __m128i layer4_chunk4 = _mm_unpacklo_epi8(layer3_chunk2, layer3_chunk6);
+    __m128i layer4_chunk5 = _mm_unpackhi_epi8(layer3_chunk2, layer3_chunk6);
+    __m128i layer4_chunk6 = _mm_unpacklo_epi8(layer3_chunk3, layer3_chunk7);
+    __m128i layer4_chunk7 = _mm_unpackhi_epi8(layer3_chunk3, layer3_chunk7);
+
+    v_r0 = _mm_unpacklo_epi8(layer4_chunk0, layer4_chunk4);
+    v_r1 = _mm_unpackhi_epi8(layer4_chunk0, layer4_chunk4);
+    v_g0 = _mm_unpacklo_epi8(layer4_chunk1, layer4_chunk5);
+    v_g1 = _mm_unpackhi_epi8(layer4_chunk1, layer4_chunk5);
+    v_b0 = _mm_unpacklo_epi8(layer4_chunk2, layer4_chunk6);
+    v_b1 = _mm_unpackhi_epi8(layer4_chunk2, layer4_chunk6);
+    v_a0 = _mm_unpacklo_epi8(layer4_chunk3, layer4_chunk7);
+    v_a1 = _mm_unpackhi_epi8(layer4_chunk3, layer4_chunk7);
+}
+
+inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
+{
+    __m128i v_mask = _mm_set1_epi16(0x00ff);
+
+    __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer4_chunk2 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
+    __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
+
+    __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
+    __m128i layer3_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
+    __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
+    __m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
+
+    __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
+    __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
+
+    __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk2 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
+    __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
+
+    v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_g0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
+    v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
+}
+
+inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
+                                __m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
+{
+    __m128i v_mask = _mm_set1_epi16(0x00ff);
+
+    __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer4_chunk3 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
+    __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
+    __m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
+    __m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8));
+
+    __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
+    __m128i layer3_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
+    __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
+    __m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
+    __m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask));
+    __m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8));
+
+    __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
+    __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
+    __m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
+    __m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8));
+
+    __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk3 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
+    __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
+    __m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
+    __m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8));
+
+    v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_g1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
+    v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
+    v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
+    v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8));
+}
+
+inline void _mm_interleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
+                                __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
+{
+    __m128i v_mask = _mm_set1_epi16(0x00ff);
+
+    __m128i layer4_chunk0 = _mm_packus_epi16(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer4_chunk4 = _mm_packus_epi16(_mm_srli_epi16(v_r0, 8), _mm_srli_epi16(v_r1, 8));
+    __m128i layer4_chunk1 = _mm_packus_epi16(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer4_chunk5 = _mm_packus_epi16(_mm_srli_epi16(v_g0, 8), _mm_srli_epi16(v_g1, 8));
+    __m128i layer4_chunk2 = _mm_packus_epi16(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
+    __m128i layer4_chunk6 = _mm_packus_epi16(_mm_srli_epi16(v_b0, 8), _mm_srli_epi16(v_b1, 8));
+    __m128i layer4_chunk3 = _mm_packus_epi16(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask));
+    __m128i layer4_chunk7 = _mm_packus_epi16(_mm_srli_epi16(v_a0, 8), _mm_srli_epi16(v_a1, 8));
+
+    __m128i layer3_chunk0 = _mm_packus_epi16(_mm_and_si128(layer4_chunk0, v_mask), _mm_and_si128(layer4_chunk1, v_mask));
+    __m128i layer3_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk0, 8), _mm_srli_epi16(layer4_chunk1, 8));
+    __m128i layer3_chunk1 = _mm_packus_epi16(_mm_and_si128(layer4_chunk2, v_mask), _mm_and_si128(layer4_chunk3, v_mask));
+    __m128i layer3_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk2, 8), _mm_srli_epi16(layer4_chunk3, 8));
+    __m128i layer3_chunk2 = _mm_packus_epi16(_mm_and_si128(layer4_chunk4, v_mask), _mm_and_si128(layer4_chunk5, v_mask));
+    __m128i layer3_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk4, 8), _mm_srli_epi16(layer4_chunk5, 8));
+    __m128i layer3_chunk3 = _mm_packus_epi16(_mm_and_si128(layer4_chunk6, v_mask), _mm_and_si128(layer4_chunk7, v_mask));
+    __m128i layer3_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer4_chunk6, 8), _mm_srli_epi16(layer4_chunk7, 8));
+
+    __m128i layer2_chunk0 = _mm_packus_epi16(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk0, 8), _mm_srli_epi16(layer3_chunk1, 8));
+    __m128i layer2_chunk1 = _mm_packus_epi16(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk2, 8), _mm_srli_epi16(layer3_chunk3, 8));
+    __m128i layer2_chunk2 = _mm_packus_epi16(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
+    __m128i layer2_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk4, 8), _mm_srli_epi16(layer3_chunk5, 8));
+    __m128i layer2_chunk3 = _mm_packus_epi16(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask));
+    __m128i layer2_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer3_chunk6, 8), _mm_srli_epi16(layer3_chunk7, 8));
+
+    __m128i layer1_chunk0 = _mm_packus_epi16(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk4 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk0, 8), _mm_srli_epi16(layer2_chunk1, 8));
+    __m128i layer1_chunk1 = _mm_packus_epi16(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk5 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk2, 8), _mm_srli_epi16(layer2_chunk3, 8));
+    __m128i layer1_chunk2 = _mm_packus_epi16(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
+    __m128i layer1_chunk6 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk4, 8), _mm_srli_epi16(layer2_chunk5, 8));
+    __m128i layer1_chunk3 = _mm_packus_epi16(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask));
+    __m128i layer1_chunk7 = _mm_packus_epi16(_mm_srli_epi16(layer2_chunk6, 8), _mm_srli_epi16(layer2_chunk7, 8));
+
+    v_r0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_b0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk0, 8), _mm_srli_epi16(layer1_chunk1, 8));
+    v_r1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_b1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk2, 8), _mm_srli_epi16(layer1_chunk3, 8));
+    v_g0 = _mm_packus_epi16(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
+    v_a0 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk4, 8), _mm_srli_epi16(layer1_chunk5, 8));
+    v_g1 = _mm_packus_epi16(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask));
+    v_a1 = _mm_packus_epi16(_mm_srli_epi16(layer1_chunk6, 8), _mm_srli_epi16(layer1_chunk7, 8));
+}
+
+inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g0);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g0);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_g1);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_g1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk2);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk2);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk3);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk3);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk2);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk2);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk3);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk3);
+
+    v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk2);
+    v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk2);
+    v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk3);
+    v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk3);
+}
+
+inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
+                                   __m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_g1);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_g1);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b0);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b0);
+    __m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_b1);
+    __m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_b1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk3);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk3);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk4);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk4);
+    __m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk5);
+    __m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk5);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk3);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk3);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk4);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk4);
+    __m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk5);
+    __m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk5);
+
+    v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk3);
+    v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk3);
+    v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk4);
+    v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk4);
+    v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk5);
+    v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk5);
+}
+
+inline void _mm_deinterleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
+                                   __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
+{
+    __m128i layer1_chunk0 = _mm_unpacklo_epi16(v_r0, v_b0);
+    __m128i layer1_chunk1 = _mm_unpackhi_epi16(v_r0, v_b0);
+    __m128i layer1_chunk2 = _mm_unpacklo_epi16(v_r1, v_b1);
+    __m128i layer1_chunk3 = _mm_unpackhi_epi16(v_r1, v_b1);
+    __m128i layer1_chunk4 = _mm_unpacklo_epi16(v_g0, v_a0);
+    __m128i layer1_chunk5 = _mm_unpackhi_epi16(v_g0, v_a0);
+    __m128i layer1_chunk6 = _mm_unpacklo_epi16(v_g1, v_a1);
+    __m128i layer1_chunk7 = _mm_unpackhi_epi16(v_g1, v_a1);
+
+    __m128i layer2_chunk0 = _mm_unpacklo_epi16(layer1_chunk0, layer1_chunk4);
+    __m128i layer2_chunk1 = _mm_unpackhi_epi16(layer1_chunk0, layer1_chunk4);
+    __m128i layer2_chunk2 = _mm_unpacklo_epi16(layer1_chunk1, layer1_chunk5);
+    __m128i layer2_chunk3 = _mm_unpackhi_epi16(layer1_chunk1, layer1_chunk5);
+    __m128i layer2_chunk4 = _mm_unpacklo_epi16(layer1_chunk2, layer1_chunk6);
+    __m128i layer2_chunk5 = _mm_unpackhi_epi16(layer1_chunk2, layer1_chunk6);
+    __m128i layer2_chunk6 = _mm_unpacklo_epi16(layer1_chunk3, layer1_chunk7);
+    __m128i layer2_chunk7 = _mm_unpackhi_epi16(layer1_chunk3, layer1_chunk7);
+
+    __m128i layer3_chunk0 = _mm_unpacklo_epi16(layer2_chunk0, layer2_chunk4);
+    __m128i layer3_chunk1 = _mm_unpackhi_epi16(layer2_chunk0, layer2_chunk4);
+    __m128i layer3_chunk2 = _mm_unpacklo_epi16(layer2_chunk1, layer2_chunk5);
+    __m128i layer3_chunk3 = _mm_unpackhi_epi16(layer2_chunk1, layer2_chunk5);
+    __m128i layer3_chunk4 = _mm_unpacklo_epi16(layer2_chunk2, layer2_chunk6);
+    __m128i layer3_chunk5 = _mm_unpackhi_epi16(layer2_chunk2, layer2_chunk6);
+    __m128i layer3_chunk6 = _mm_unpacklo_epi16(layer2_chunk3, layer2_chunk7);
+    __m128i layer3_chunk7 = _mm_unpackhi_epi16(layer2_chunk3, layer2_chunk7);
+
+    v_r0 = _mm_unpacklo_epi16(layer3_chunk0, layer3_chunk4);
+    v_r1 = _mm_unpackhi_epi16(layer3_chunk0, layer3_chunk4);
+    v_g0 = _mm_unpacklo_epi16(layer3_chunk1, layer3_chunk5);
+    v_g1 = _mm_unpackhi_epi16(layer3_chunk1, layer3_chunk5);
+    v_b0 = _mm_unpacklo_epi16(layer3_chunk2, layer3_chunk6);
+    v_b1 = _mm_unpackhi_epi16(layer3_chunk2, layer3_chunk6);
+    v_a0 = _mm_unpacklo_epi16(layer3_chunk3, layer3_chunk7);
+    v_a1 = _mm_unpackhi_epi16(layer3_chunk3, layer3_chunk7);
+}
+
+#if CV_SSE4_1
+
+inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
+{
+    __m128i v_mask = _mm_set1_epi32(0x0000ffff);
+
+    __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer3_chunk2 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
+    __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
+
+    __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
+    __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
+
+    __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk2 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
+    __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
+
+    v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_g0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
+    v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
+}
+
+inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
+                                 __m128i & v_g1, __m128i & v_b0, __m128i & v_b1)
+{
+    __m128i v_mask = _mm_set1_epi32(0x0000ffff);
+
+    __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer3_chunk3 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
+    __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
+    __m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
+    __m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16));
+
+    __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
+    __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
+    __m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
+    __m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
+
+    __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk3 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
+    __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
+    __m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
+    __m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
+
+    v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_g1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
+    v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
+    v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
+    v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16));
+}
+
+inline void _mm_interleave_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1,
+                                 __m128i & v_b0, __m128i & v_b1, __m128i & v_a0, __m128i & v_a1)
+{
+    __m128i v_mask = _mm_set1_epi32(0x0000ffff);
+
+    __m128i layer3_chunk0 = _mm_packus_epi32(_mm_and_si128(v_r0, v_mask), _mm_and_si128(v_r1, v_mask));
+    __m128i layer3_chunk4 = _mm_packus_epi32(_mm_srli_epi32(v_r0, 16), _mm_srli_epi32(v_r1, 16));
+    __m128i layer3_chunk1 = _mm_packus_epi32(_mm_and_si128(v_g0, v_mask), _mm_and_si128(v_g1, v_mask));
+    __m128i layer3_chunk5 = _mm_packus_epi32(_mm_srli_epi32(v_g0, 16), _mm_srli_epi32(v_g1, 16));
+    __m128i layer3_chunk2 = _mm_packus_epi32(_mm_and_si128(v_b0, v_mask), _mm_and_si128(v_b1, v_mask));
+    __m128i layer3_chunk6 = _mm_packus_epi32(_mm_srli_epi32(v_b0, 16), _mm_srli_epi32(v_b1, 16));
+    __m128i layer3_chunk3 = _mm_packus_epi32(_mm_and_si128(v_a0, v_mask), _mm_and_si128(v_a1, v_mask));
+    __m128i layer3_chunk7 = _mm_packus_epi32(_mm_srli_epi32(v_a0, 16), _mm_srli_epi32(v_a1, 16));
+
+    __m128i layer2_chunk0 = _mm_packus_epi32(_mm_and_si128(layer3_chunk0, v_mask), _mm_and_si128(layer3_chunk1, v_mask));
+    __m128i layer2_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk0, 16), _mm_srli_epi32(layer3_chunk1, 16));
+    __m128i layer2_chunk1 = _mm_packus_epi32(_mm_and_si128(layer3_chunk2, v_mask), _mm_and_si128(layer3_chunk3, v_mask));
+    __m128i layer2_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk2, 16), _mm_srli_epi32(layer3_chunk3, 16));
+    __m128i layer2_chunk2 = _mm_packus_epi32(_mm_and_si128(layer3_chunk4, v_mask), _mm_and_si128(layer3_chunk5, v_mask));
+    __m128i layer2_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk4, 16), _mm_srli_epi32(layer3_chunk5, 16));
+    __m128i layer2_chunk3 = _mm_packus_epi32(_mm_and_si128(layer3_chunk6, v_mask), _mm_and_si128(layer3_chunk7, v_mask));
+    __m128i layer2_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer3_chunk6, 16), _mm_srli_epi32(layer3_chunk7, 16));
+
+    __m128i layer1_chunk0 = _mm_packus_epi32(_mm_and_si128(layer2_chunk0, v_mask), _mm_and_si128(layer2_chunk1, v_mask));
+    __m128i layer1_chunk4 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk0, 16), _mm_srli_epi32(layer2_chunk1, 16));
+    __m128i layer1_chunk1 = _mm_packus_epi32(_mm_and_si128(layer2_chunk2, v_mask), _mm_and_si128(layer2_chunk3, v_mask));
+    __m128i layer1_chunk5 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk2, 16), _mm_srli_epi32(layer2_chunk3, 16));
+    __m128i layer1_chunk2 = _mm_packus_epi32(_mm_and_si128(layer2_chunk4, v_mask), _mm_and_si128(layer2_chunk5, v_mask));
+    __m128i layer1_chunk6 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk4, 16), _mm_srli_epi32(layer2_chunk5, 16));
+    __m128i layer1_chunk3 = _mm_packus_epi32(_mm_and_si128(layer2_chunk6, v_mask), _mm_and_si128(layer2_chunk7, v_mask));
+    __m128i layer1_chunk7 = _mm_packus_epi32(_mm_srli_epi32(layer2_chunk6, 16), _mm_srli_epi32(layer2_chunk7, 16));
+
+    v_r0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk0, v_mask), _mm_and_si128(layer1_chunk1, v_mask));
+    v_b0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk0, 16), _mm_srli_epi32(layer1_chunk1, 16));
+    v_r1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk2, v_mask), _mm_and_si128(layer1_chunk3, v_mask));
+    v_b1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk2, 16), _mm_srli_epi32(layer1_chunk3, 16));
+    v_g0 = _mm_packus_epi32(_mm_and_si128(layer1_chunk4, v_mask), _mm_and_si128(layer1_chunk5, v_mask));
+    v_a0 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk4, 16), _mm_srli_epi32(layer1_chunk5, 16));
+    v_g1 = _mm_packus_epi32(_mm_and_si128(layer1_chunk6, v_mask), _mm_and_si128(layer1_chunk7, v_mask));
+    v_a1 = _mm_packus_epi32(_mm_srli_epi32(layer1_chunk6, 16), _mm_srli_epi32(layer1_chunk7, 16));
+}
+
+#endif // CV_SSE4_1
+
+inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
+{
+    __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g0);
+    __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g0);
+    __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_g1);
+    __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_g1);
+
+    __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk2);
+    __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk2);
+    __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk3);
+    __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk3);
+
+    v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk2);
+    v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk2);
+    v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk3);
+    v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk3);
+}
+
+inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
+                                __m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
+{
+    __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_g1);
+    __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_g1);
+    __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b0);
+    __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b0);
+    __m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_b1);
+    __m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_b1);
+
+    __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk3);
+    __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk3);
+    __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk4);
+    __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk4);
+    __m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk5);
+    __m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk5);
+
+    v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk3);
+    v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk3);
+    v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk4);
+    v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk4);
+    v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk5);
+    v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk5);
+}
+
+inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
+                                __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
+{
+    __m128 layer1_chunk0 = _mm_unpacklo_ps(v_r0, v_b0);
+    __m128 layer1_chunk1 = _mm_unpackhi_ps(v_r0, v_b0);
+    __m128 layer1_chunk2 = _mm_unpacklo_ps(v_r1, v_b1);
+    __m128 layer1_chunk3 = _mm_unpackhi_ps(v_r1, v_b1);
+    __m128 layer1_chunk4 = _mm_unpacklo_ps(v_g0, v_a0);
+    __m128 layer1_chunk5 = _mm_unpackhi_ps(v_g0, v_a0);
+    __m128 layer1_chunk6 = _mm_unpacklo_ps(v_g1, v_a1);
+    __m128 layer1_chunk7 = _mm_unpackhi_ps(v_g1, v_a1);
+
+    __m128 layer2_chunk0 = _mm_unpacklo_ps(layer1_chunk0, layer1_chunk4);
+    __m128 layer2_chunk1 = _mm_unpackhi_ps(layer1_chunk0, layer1_chunk4);
+    __m128 layer2_chunk2 = _mm_unpacklo_ps(layer1_chunk1, layer1_chunk5);
+    __m128 layer2_chunk3 = _mm_unpackhi_ps(layer1_chunk1, layer1_chunk5);
+    __m128 layer2_chunk4 = _mm_unpacklo_ps(layer1_chunk2, layer1_chunk6);
+    __m128 layer2_chunk5 = _mm_unpackhi_ps(layer1_chunk2, layer1_chunk6);
+    __m128 layer2_chunk6 = _mm_unpacklo_ps(layer1_chunk3, layer1_chunk7);
+    __m128 layer2_chunk7 = _mm_unpackhi_ps(layer1_chunk3, layer1_chunk7);
+
+    v_r0 = _mm_unpacklo_ps(layer2_chunk0, layer2_chunk4);
+    v_r1 = _mm_unpackhi_ps(layer2_chunk0, layer2_chunk4);
+    v_g0 = _mm_unpacklo_ps(layer2_chunk1, layer2_chunk5);
+    v_g1 = _mm_unpackhi_ps(layer2_chunk1, layer2_chunk5);
+    v_b0 = _mm_unpacklo_ps(layer2_chunk2, layer2_chunk6);
+    v_b1 = _mm_unpackhi_ps(layer2_chunk2, layer2_chunk6);
+    v_a0 = _mm_unpacklo_ps(layer2_chunk3, layer2_chunk7);
+    v_a1 = _mm_unpackhi_ps(layer2_chunk3, layer2_chunk7);
+}
+
+inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
+{
+    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+
+    __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
+    __m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
+    __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
+    __m128 layer2_chunk3 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
+
+    __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
+    __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
+    __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
+    __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
+
+    v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
+    v_g0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
+    v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
+    v_g1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
+}
+
+inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
+                              __m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
+{
+    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+
+    __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
+    __m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
+    __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
+    __m128 layer2_chunk4 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
+    __m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo);
+    __m128 layer2_chunk5 = _mm_shuffle_ps(v_b0, v_b1, mask_hi);
+
+    __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
+    __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
+    __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
+    __m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
+    __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo);
+    __m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi);
+
+    v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
+    v_g1 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
+    v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
+    v_b0 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
+    v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo);
+    v_b1 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi);
+}
+
+inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
+                              __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
+{
+    const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
+
+    __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
+    __m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
+    __m128 layer2_chunk1 = _mm_shuffle_ps(v_g0, v_g1, mask_lo);
+    __m128 layer2_chunk5 = _mm_shuffle_ps(v_g0, v_g1, mask_hi);
+    __m128 layer2_chunk2 = _mm_shuffle_ps(v_b0, v_b1, mask_lo);
+    __m128 layer2_chunk6 = _mm_shuffle_ps(v_b0, v_b1, mask_hi);
+    __m128 layer2_chunk3 = _mm_shuffle_ps(v_a0, v_a1, mask_lo);
+    __m128 layer2_chunk7 = _mm_shuffle_ps(v_a0, v_a1, mask_hi);
+
+    __m128 layer1_chunk0 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_lo);
+    __m128 layer1_chunk4 = _mm_shuffle_ps(layer2_chunk0, layer2_chunk1, mask_hi);
+    __m128 layer1_chunk1 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_lo);
+    __m128 layer1_chunk5 = _mm_shuffle_ps(layer2_chunk2, layer2_chunk3, mask_hi);
+    __m128 layer1_chunk2 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_lo);
+    __m128 layer1_chunk6 = _mm_shuffle_ps(layer2_chunk4, layer2_chunk5, mask_hi);
+    __m128 layer1_chunk3 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_lo);
+    __m128 layer1_chunk7 = _mm_shuffle_ps(layer2_chunk6, layer2_chunk7, mask_hi);
+
+    v_r0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_lo);
+    v_b0 = _mm_shuffle_ps(layer1_chunk0, layer1_chunk1, mask_hi);
+    v_r1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_lo);
+    v_b1 = _mm_shuffle_ps(layer1_chunk2, layer1_chunk3, mask_hi);
+    v_g0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_lo);
+    v_a0 = _mm_shuffle_ps(layer1_chunk4, layer1_chunk5, mask_hi);
+    v_g1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_lo);
+    v_a1 = _mm_shuffle_ps(layer1_chunk6, layer1_chunk7, mask_hi);
+}
+
+#endif // CV_SSE2
+
+//! @}
+
+#endif //OPENCV_CORE_SSE_UTILS_HPP

+ 397 - 0
ThresholdTools/include/opencv2/core/traits.hpp

@@ -0,0 +1,397 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_TRAITS_HPP
+#define OPENCV_CORE_TRAITS_HPP
+
+#include "opencv2/core/cvdef.h"
+
+namespace cv
+{
+
+//#define OPENCV_TRAITS_ENABLE_DEPRECATED
+
+//! @addtogroup core_basic
+//! @{
+
+/** @brief Template "trait" class for OpenCV primitive data types.
+
+@note Deprecated. This is replaced by "single purpose" traits: traits::Type and traits::Depth
+
+A primitive OpenCV data type is one of unsigned char, bool, signed char, unsigned short, signed
+short, int, float, double, or a tuple of values of one of these types, where all the values in the
+tuple have the same type. Any primitive type from the list can be defined by an identifier in the
+form CV_\<bit-depth\>{U|S|F}C(\<number_of_channels\>), for example: uchar \~ CV_8UC1, 3-element
+floating-point tuple \~ CV_32FC3, and so on. A universal OpenCV structure that is able to store a
+single instance of such a primitive data type is Vec. Multiple instances of such a type can be
+stored in a std::vector, Mat, Mat_, SparseMat, SparseMat_, or any other container that is able to
+store Vec instances.
+
+The DataType class is basically used to provide a description of such primitive data types without
+adding any fields or methods to the corresponding classes (and it is actually impossible to add
+anything to primitive C/C++ data types). This technique is known in C++ as class traits. It is not
+DataType itself that is used but its specialized versions, such as:
+@code
+    template<> class DataType<uchar>
+    {
+        typedef uchar value_type;
+        typedef int work_type;
+        typedef uchar channel_type;
+        enum { channel_type = CV_8U, channels = 1, fmt='u', type = CV_8U };
+    };
+    ...
+    template<typename _Tp> DataType<std::complex<_Tp> >
+    {
+        typedef std::complex<_Tp> value_type;
+        typedef std::complex<_Tp> work_type;
+        typedef _Tp channel_type;
+        // DataDepth is another helper trait class
+        enum { depth = DataDepth<_Tp>::value, channels=2,
+            fmt=(channels-1)*256+DataDepth<_Tp>::fmt,
+            type=CV_MAKETYPE(depth, channels) };
+    };
+    ...
+@endcode
+The main purpose of this class is to convert compilation-time type information to an
+OpenCV-compatible data type identifier, for example:
+@code
+    // allocates a 30x40 floating-point matrix
+    Mat A(30, 40, DataType<float>::type);
+
+    Mat B = Mat_<std::complex<double> >(3, 3);
+    // the statement below will print 6, 2 , that is depth == CV_64F, channels == 2
+    cout << B.depth() << ", " << B.channels() << endl;
+@endcode
+So, such traits are used to tell OpenCV which data type you are working with, even if such a type is
+not native to OpenCV. For example, the matrix B initialization above is compiled because OpenCV
+defines the proper specialized template class DataType\<complex\<_Tp\> \> . This mechanism is also
+useful (and used in OpenCV this way) for generic algorithms implementations.
+
+@note Default values were dropped to stop confusing developers about using of unsupported types (see #7599)
+*/
+template<typename _Tp> class DataType
+{
+public:
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+    typedef _Tp         value_type;
+    typedef value_type  work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 1,
+           depth        = -1,
+           channels     = 1,
+           fmt          = 0,
+           type = CV_MAKETYPE(depth, channels)
+         };
+#endif
+};
+
+template<> class DataType<bool>
+{
+public:
+    typedef bool        value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_8U,
+           channels     = 1,
+           fmt          = (int)'u',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<uchar>
+{
+public:
+    typedef uchar       value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_8U,
+           channels     = 1,
+           fmt          = (int)'u',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<schar>
+{
+public:
+    typedef schar       value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_8S,
+           channels     = 1,
+           fmt          = (int)'c',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<char>
+{
+public:
+    typedef schar       value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_8S,
+           channels     = 1,
+           fmt          = (int)'c',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<ushort>
+{
+public:
+    typedef ushort      value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_16U,
+           channels     = 1,
+           fmt          = (int)'w',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<short>
+{
+public:
+    typedef short       value_type;
+    typedef int         work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_16S,
+           channels     = 1,
+           fmt          = (int)'s',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<int>
+{
+public:
+    typedef int         value_type;
+    typedef value_type  work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_32S,
+           channels     = 1,
+           fmt          = (int)'i',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<float>
+{
+public:
+    typedef float       value_type;
+    typedef value_type  work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_32F,
+           channels     = 1,
+           fmt          = (int)'f',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+template<> class DataType<double>
+{
+public:
+    typedef double      value_type;
+    typedef value_type  work_type;
+    typedef value_type  channel_type;
+    typedef value_type  vec_type;
+    enum { generic_type = 0,
+           depth        = CV_64F,
+           channels     = 1,
+           fmt          = (int)'d',
+           type         = CV_MAKETYPE(depth, channels)
+         };
+};
+
+
+/** @brief A helper class for cv::DataType
+
+The class is specialized for each fundamental numerical data type supported by OpenCV. It provides
+DataDepth<T>::value constant.
+*/
+template<typename _Tp> class DataDepth
+{
+public:
+    enum
+    {
+        value = DataType<_Tp>::depth,
+        fmt   = DataType<_Tp>::fmt
+    };
+};
+
+
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+
+template<int _depth> class TypeDepth
+{
+#ifdef OPENCV_TRAITS_ENABLE_LEGACY_DEFAULTS
+    enum { depth = CV_USRTYPE1 };
+    typedef void value_type;
+#endif
+};
+
+template<> class TypeDepth<CV_8U>
+{
+    enum { depth = CV_8U };
+    typedef uchar value_type;
+};
+
+template<> class TypeDepth<CV_8S>
+{
+    enum { depth = CV_8S };
+    typedef schar value_type;
+};
+
+template<> class TypeDepth<CV_16U>
+{
+    enum { depth = CV_16U };
+    typedef ushort value_type;
+};
+
+template<> class TypeDepth<CV_16S>
+{
+    enum { depth = CV_16S };
+    typedef short value_type;
+};
+
+template<> class TypeDepth<CV_32S>
+{
+    enum { depth = CV_32S };
+    typedef int value_type;
+};
+
+template<> class TypeDepth<CV_32F>
+{
+    enum { depth = CV_32F };
+    typedef float value_type;
+};
+
+template<> class TypeDepth<CV_64F>
+{
+    enum { depth = CV_64F };
+    typedef double value_type;
+};
+
+#endif
+
+//! @}
+
+namespace traits {
+
+namespace internal {
+#define CV_CREATE_MEMBER_CHECK(X) \
+template<typename T> class CheckMember_##X { \
+    struct Fallback { int X; }; \
+    struct Derived : T, Fallback { }; \
+    template<typename U, U> struct Check; \
+    typedef char CV_NO[1]; \
+    typedef char CV_YES[2]; \
+    template<typename U> static CV_NO & func(Check<int Fallback::*, &U::X> *); \
+    template<typename U> static CV_YES & func(...); \
+public: \
+    typedef CheckMember_##X type; \
+    enum { value = sizeof(func<Derived>(0)) == sizeof(CV_YES) }; \
+};
+
+CV_CREATE_MEMBER_CHECK(fmt)
+CV_CREATE_MEMBER_CHECK(type)
+
+} // namespace internal
+
+
+template<typename T>
+struct Depth
+{ enum { value = DataType<T>::depth }; };
+
+template<typename T>
+struct Type
+{ enum { value = DataType<T>::type }; };
+
+/** Similar to traits::Type<T> but has value = -1 in case of unknown type (instead of compiler error) */
+template<typename T, bool available = internal::CheckMember_type< DataType<T> >::value >
+struct SafeType {};
+
+template<typename T>
+struct SafeType<T, false>
+{ enum { value = -1 }; };
+
+template<typename T>
+struct SafeType<T, true>
+{ enum { value = Type<T>::value }; };
+
+
+template<typename T, bool available = internal::CheckMember_fmt< DataType<T> >::value >
+struct SafeFmt {};
+
+template<typename T>
+struct SafeFmt<T, false>
+{ enum { fmt = 0 }; };
+
+template<typename T>
+struct SafeFmt<T, true>
+{ enum { fmt = DataType<T>::fmt }; };
+
+
+} // namespace
+
+} // cv
+
+#endif // OPENCV_CORE_TRAITS_HPP

+ 2372 - 0
ThresholdTools/include/opencv2/core/types.hpp

@@ -0,0 +1,2372 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_TYPES_HPP
+#define OPENCV_CORE_TYPES_HPP
+
+#ifndef __cplusplus
+#  error types.hpp header must be compiled as C++
+#endif
+
+#include <climits>
+#include <cfloat>
+#include <vector>
+#include <limits>
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/cvstd.hpp"
+#include "opencv2/core/matx.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_basic
+//! @{
+
+//////////////////////////////// Complex //////////////////////////////
+
+/** @brief  A complex number class.
+
+  The template class is similar and compatible with std::complex, however it provides slightly
+  more convenient access to the real and imaginary parts using through the simple field access, as opposite
+  to std::complex::real() and std::complex::imag().
+*/
+template<typename _Tp> class Complex
+{
+public:
+
+    //! default constructor
+    Complex();
+    Complex( _Tp _re, _Tp _im = 0 );
+
+    //! conversion to another data type
+    template<typename T2> operator Complex<T2>() const;
+    //! conjugation
+    Complex conj() const;
+
+    _Tp re, im; //< the real and the imaginary parts
+};
+
+typedef Complex<float> Complexf;
+typedef Complex<double> Complexd;
+
+template<typename _Tp> class DataType< Complex<_Tp> >
+{
+public:
+    typedef Complex<_Tp> value_type;
+    typedef value_type   work_type;
+    typedef _Tp          channel_type;
+
+    enum { generic_type = 0,
+           channels     = 2,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+    };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+namespace traits {
+template<typename _Tp>
+struct Depth< Complex<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Complex<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; };
+} // namespace
+
+
+//////////////////////////////// Point_ ////////////////////////////////
+
+/** @brief Template class for 2D points specified by its coordinates `x` and `y`.
+
+An instance of the class is interchangeable with C structures, CvPoint and CvPoint2D32f . There is
+also a cast operator to convert point coordinates to the specified type. The conversion from
+floating-point coordinates to integer coordinates is done by rounding. Commonly, the conversion
+uses this operation for each of the coordinates. Besides the class members listed in the
+declaration above, the following operations on points are implemented:
+@code
+    pt1 = pt2 + pt3;
+    pt1 = pt2 - pt3;
+    pt1 = pt2 * a;
+    pt1 = a * pt2;
+    pt1 = pt2 / a;
+    pt1 += pt2;
+    pt1 -= pt2;
+    pt1 *= a;
+    pt1 /= a;
+    double value = norm(pt); // L2 norm
+    pt1 == pt2;
+    pt1 != pt2;
+@endcode
+For your convenience, the following type aliases are defined:
+@code
+    typedef Point_<int> Point2i;
+    typedef Point2i Point;
+    typedef Point_<float> Point2f;
+    typedef Point_<double> Point2d;
+@endcode
+Example:
+@code
+    Point2f a(0.3f, 0.f), b(0.f, 0.4f);
+    Point pt = (a + b)*10.f;
+    cout << pt.x << ", " << pt.y << endl;
+@endcode
+*/
+template<typename _Tp> class Point_
+{
+public:
+    typedef _Tp value_type;
+
+    //! default constructor
+    Point_();
+    Point_(_Tp _x, _Tp _y);
+    Point_(const Point_& pt);
+    Point_(const Size_<_Tp>& sz);
+    Point_(const Vec<_Tp, 2>& v);
+
+    Point_& operator = (const Point_& pt);
+    //! conversion to another data type
+    template<typename _Tp2> operator Point_<_Tp2>() const;
+
+    //! conversion to the old-style C structures
+    operator Vec<_Tp, 2>() const;
+
+    //! dot product
+    _Tp dot(const Point_& pt) const;
+    //! dot product computed in double-precision arithmetics
+    double ddot(const Point_& pt) const;
+    //! cross-product
+    double cross(const Point_& pt) const;
+    //! checks whether the point is inside the specified rectangle
+    bool inside(const Rect_<_Tp>& r) const;
+    _Tp x; //!< x coordinate of the point
+    _Tp y; //!< y coordinate of the point
+};
+
+typedef Point_<int> Point2i;
+typedef Point_<int64> Point2l;
+typedef Point_<float> Point2f;
+typedef Point_<double> Point2d;
+typedef Point2i Point;
+
+template<typename _Tp> class DataType< Point_<_Tp> >
+{
+public:
+    typedef Point_<_Tp>                               value_type;
+    typedef Point_<typename DataType<_Tp>::work_type> work_type;
+    typedef _Tp                                       channel_type;
+
+    enum { generic_type = 0,
+           channels     = 2,
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+namespace traits {
+template<typename _Tp>
+struct Depth< Point_<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Point_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; };
+} // namespace
+
+
+//////////////////////////////// Point3_ ////////////////////////////////
+
+/** @brief Template class for 3D points specified by its coordinates `x`, `y` and `z`.
+
+An instance of the class is interchangeable with the C structure CvPoint2D32f . Similarly to
+Point_ , the coordinates of 3D points can be converted to another type. The vector arithmetic and
+comparison operations are also supported.
+
+The following Point3_\<\> aliases are available:
+@code
+    typedef Point3_<int> Point3i;
+    typedef Point3_<float> Point3f;
+    typedef Point3_<double> Point3d;
+@endcode
+@see cv::Point3i, cv::Point3f and cv::Point3d
+*/
+template<typename _Tp> class Point3_
+{
+public:
+    typedef _Tp value_type;
+
+    //! default constructor
+    Point3_();
+    Point3_(_Tp _x, _Tp _y, _Tp _z);
+    Point3_(const Point3_& pt);
+    explicit Point3_(const Point_<_Tp>& pt);
+    Point3_(const Vec<_Tp, 3>& v);
+
+    Point3_& operator = (const Point3_& pt);
+    //! conversion to another data type
+    template<typename _Tp2> operator Point3_<_Tp2>() const;
+    //! conversion to cv::Vec<>
+#if OPENCV_ABI_COMPATIBILITY > 300
+    template<typename _Tp2> operator Vec<_Tp2, 3>() const;
+#else
+    operator Vec<_Tp, 3>() const;
+#endif
+
+    //! dot product
+    _Tp dot(const Point3_& pt) const;
+    //! dot product computed in double-precision arithmetics
+    double ddot(const Point3_& pt) const;
+    //! cross product of the 2 3D points
+    Point3_ cross(const Point3_& pt) const;
+    _Tp x; //!< x coordinate of the 3D point
+    _Tp y; //!< y coordinate of the 3D point
+    _Tp z; //!< z coordinate of the 3D point
+};
+
+typedef Point3_<int> Point3i;
+typedef Point3_<float> Point3f;
+typedef Point3_<double> Point3d;
+
+template<typename _Tp> class DataType< Point3_<_Tp> >
+{
+public:
+    typedef Point3_<_Tp>                               value_type;
+    typedef Point3_<typename DataType<_Tp>::work_type> work_type;
+    typedef _Tp                                        channel_type;
+
+    enum { generic_type = 0,
+           channels     = 3,
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+namespace traits {
+template<typename _Tp>
+struct Depth< Point3_<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Point3_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 3) }; };
+} // namespace
+
+//////////////////////////////// Size_ ////////////////////////////////
+
+/** @brief Template class for specifying the size of an image or rectangle.
+
+The class includes two members called width and height. The structure can be converted to and from
+the old OpenCV structures CvSize and CvSize2D32f . The same set of arithmetic and comparison
+operations as for Point_ is available.
+
+OpenCV defines the following Size_\<\> aliases:
+@code
+    typedef Size_<int> Size2i;
+    typedef Size2i Size;
+    typedef Size_<float> Size2f;
+@endcode
+*/
+template<typename _Tp> class Size_
+{
+public:
+    typedef _Tp value_type;
+
+    //! default constructor
+    Size_();
+    Size_(_Tp _width, _Tp _height);
+    Size_(const Size_& sz);
+    Size_(const Point_<_Tp>& pt);
+
+    Size_& operator = (const Size_& sz);
+    //! the area (width*height)
+    _Tp area() const;
+    //! true if empty
+    bool empty() const;
+
+    //! conversion of another data type.
+    template<typename _Tp2> operator Size_<_Tp2>() const;
+
+    _Tp width; //!< the width
+    _Tp height; //!< the height
+};
+
+typedef Size_<int> Size2i;
+typedef Size_<int64> Size2l;
+typedef Size_<float> Size2f;
+typedef Size_<double> Size2d;
+typedef Size2i Size;
+
+template<typename _Tp> class DataType< Size_<_Tp> >
+{
+public:
+    typedef Size_<_Tp>                               value_type;
+    typedef Size_<typename DataType<_Tp>::work_type> work_type;
+    typedef _Tp                                      channel_type;
+
+    enum { generic_type = 0,
+           channels     = 2,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+namespace traits {
+template<typename _Tp>
+struct Depth< Size_<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Size_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 2) }; };
+} // namespace
+
+//////////////////////////////// Rect_ ////////////////////////////////
+
+/** @brief Template class for 2D rectangles
+
+described by the following parameters:
+-   Coordinates of the top-left corner. This is a default interpretation of Rect_::x and Rect_::y
+    in OpenCV. Though, in your algorithms you may count x and y from the bottom-left corner.
+-   Rectangle width and height.
+
+OpenCV typically assumes that the top and left boundary of the rectangle are inclusive, while the
+right and bottom boundaries are not. For example, the method Rect_::contains returns true if
+
+\f[x  \leq pt.x < x+width,
+      y  \leq pt.y < y+height\f]
+
+Virtually every loop over an image ROI in OpenCV (where ROI is specified by Rect_\<int\> ) is
+implemented as:
+@code
+    for(int y = roi.y; y < roi.y + roi.height; y++)
+        for(int x = roi.x; x < roi.x + roi.width; x++)
+        {
+            // ...
+        }
+@endcode
+In addition to the class members, the following operations on rectangles are implemented:
+-   \f$\texttt{rect} = \texttt{rect} \pm \texttt{point}\f$ (shifting a rectangle by a certain offset)
+-   \f$\texttt{rect} = \texttt{rect} \pm \texttt{size}\f$ (expanding or shrinking a rectangle by a
+    certain amount)
+-   rect += point, rect -= point, rect += size, rect -= size (augmenting operations)
+-   rect = rect1 & rect2 (rectangle intersection)
+-   rect = rect1 | rect2 (minimum area rectangle containing rect1 and rect2 )
+-   rect &= rect1, rect |= rect1 (and the corresponding augmenting operations)
+-   rect == rect1, rect != rect1 (rectangle comparison)
+
+This is an example how the partial ordering on rectangles can be established (rect1 \f$\subseteq\f$
+rect2):
+@code
+    template<typename _Tp> inline bool
+    operator <= (const Rect_<_Tp>& r1, const Rect_<_Tp>& r2)
+    {
+        return (r1 & r2) == r1;
+    }
+@endcode
+For your convenience, the Rect_\<\> alias is available: cv::Rect
+*/
+template<typename _Tp> class Rect_
+{
+public:
+    typedef _Tp value_type;
+
+    //! default constructor
+    Rect_();
+    Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height);
+    Rect_(const Rect_& r);
+    Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz);
+    Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2);
+
+    Rect_& operator = ( const Rect_& r );
+    //! the top-left corner
+    Point_<_Tp> tl() const;
+    //! the bottom-right corner
+    Point_<_Tp> br() const;
+
+    //! size (width, height) of the rectangle
+    Size_<_Tp> size() const;
+    //! area (width*height) of the rectangle
+    _Tp area() const;
+    //! true if empty
+    bool empty() const;
+
+    //! conversion to another data type
+    template<typename _Tp2> operator Rect_<_Tp2>() const;
+
+    //! checks whether the rectangle contains the point
+    bool contains(const Point_<_Tp>& pt) const;
+
+    _Tp x; //!< x coordinate of the top-left corner
+    _Tp y; //!< y coordinate of the top-left corner
+    _Tp width; //!< width of the rectangle
+    _Tp height; //!< height of the rectangle
+};
+
+typedef Rect_<int> Rect2i;
+typedef Rect_<float> Rect2f;
+typedef Rect_<double> Rect2d;
+typedef Rect2i Rect;
+
+template<typename _Tp> class DataType< Rect_<_Tp> >
+{
+public:
+    typedef Rect_<_Tp>                               value_type;
+    typedef Rect_<typename DataType<_Tp>::work_type> work_type;
+    typedef _Tp                                      channel_type;
+
+    enum { generic_type = 0,
+           channels     = 4,
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+namespace traits {
+template<typename _Tp>
+struct Depth< Rect_<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Rect_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 4) }; };
+} // namespace
+
+///////////////////////////// RotatedRect /////////////////////////////
+
+/** @brief The class represents rotated (i.e. not up-right) rectangles on a plane.
+
+Each rectangle is specified by the center point (mass center), length of each side (represented by
+#Size2f structure) and the rotation angle in degrees.
+
+The sample below demonstrates how to use RotatedRect:
+@snippet snippets/core_various.cpp RotatedRect_demo
+![image](pics/rotatedrect.png)
+
+@sa CamShift, fitEllipse, minAreaRect, CvBox2D
+*/
+class CV_EXPORTS RotatedRect
+{
+public:
+    //! default constructor
+    RotatedRect();
+    /** full constructor
+    @param center The rectangle mass center.
+    @param size Width and height of the rectangle.
+    @param angle The rotation angle in a clockwise direction. When the angle is 0, 90, 180, 270 etc.,
+    the rectangle becomes an up-right rectangle.
+    */
+    RotatedRect(const Point2f& center, const Size2f& size, float angle);
+    /**
+    Any 3 end points of the RotatedRect. They must be given in order (either clockwise or
+    anticlockwise).
+     */
+    RotatedRect(const Point2f& point1, const Point2f& point2, const Point2f& point3);
+
+    /** returns 4 vertices of the rectangle
+    @param pts The points array for storing rectangle vertices. The order is bottomLeft, topLeft, topRight, bottomRight.
+    */
+    void points(Point2f pts[]) const;
+    //! returns the minimal up-right integer rectangle containing the rotated rectangle
+    Rect boundingRect() const;
+    //! returns the minimal (exact) floating point rectangle containing the rotated rectangle, not intended for use with images
+    Rect_<float> boundingRect2f() const;
+    //! returns the rectangle mass center
+    Point2f center;
+    //! returns width and height of the rectangle
+    Size2f size;
+    //! returns the rotation angle. When the angle is 0, 90, 180, 270 etc., the rectangle becomes an up-right rectangle.
+    float angle;
+};
+
+template<> class DataType< RotatedRect >
+{
+public:
+    typedef RotatedRect  value_type;
+    typedef value_type   work_type;
+    typedef float        channel_type;
+
+    enum { generic_type = 0,
+           channels     = (int)sizeof(value_type)/sizeof(channel_type), // 5
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+namespace traits {
+template<>
+struct Depth< RotatedRect > { enum { value = Depth<float>::value }; };
+template<>
+struct Type< RotatedRect > { enum { value = CV_MAKETYPE(Depth<float>::value, (int)sizeof(RotatedRect)/sizeof(float)) }; };
+} // namespace
+
+
+//////////////////////////////// Range /////////////////////////////////
+
+/** @brief Template class specifying a continuous subsequence (slice) of a sequence.
+
+The class is used to specify a row or a column span in a matrix ( Mat ) and for many other purposes.
+Range(a,b) is basically the same as a:b in Matlab or a..b in Python. As in Python, start is an
+inclusive left boundary of the range and end is an exclusive right boundary of the range. Such a
+half-opened interval is usually denoted as \f$[start,end)\f$ .
+
+The static method Range::all() returns a special variable that means "the whole sequence" or "the
+whole range", just like " : " in Matlab or " ... " in Python. All the methods and functions in
+OpenCV that take Range support this special Range::all() value. But, of course, in case of your own
+custom processing, you will probably have to check and handle it explicitly:
+@code
+    void my_function(..., const Range& r, ....)
+    {
+        if(r == Range::all()) {
+            // process all the data
+        }
+        else {
+            // process [r.start, r.end)
+        }
+    }
+@endcode
+*/
+class CV_EXPORTS Range
+{
+public:
+    Range();
+    Range(int _start, int _end);
+    int size() const;
+    bool empty() const;
+    static Range all();
+
+    int start, end;
+};
+
+template<> class DataType<Range>
+{
+public:
+    typedef Range      value_type;
+    typedef value_type work_type;
+    typedef int        channel_type;
+
+    enum { generic_type = 0,
+           channels     = 2,
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+namespace traits {
+template<>
+struct Depth< Range > { enum { value = Depth<int>::value }; };
+template<>
+struct Type< Range > { enum { value = CV_MAKETYPE(Depth<int>::value, 2) }; };
+} // namespace
+
+
+//////////////////////////////// Scalar_ ///////////////////////////////
+
+/** @brief Template class for a 4-element vector derived from Vec.
+
+Being derived from Vec\<_Tp, 4\> , Scalar\_ and Scalar can be used just as typical 4-element
+vectors. In addition, they can be converted to/from CvScalar . The type Scalar is widely used in
+OpenCV to pass pixel values.
+*/
+template<typename _Tp> class Scalar_ : public Vec<_Tp, 4>
+{
+public:
+    //! default constructor
+    Scalar_();
+    Scalar_(_Tp v0, _Tp v1, _Tp v2=0, _Tp v3=0);
+    Scalar_(_Tp v0);
+
+    template<typename _Tp2, int cn>
+    Scalar_(const Vec<_Tp2, cn>& v);
+
+    //! returns a scalar with all elements set to v0
+    static Scalar_<_Tp> all(_Tp v0);
+
+    //! conversion to another data type
+    template<typename T2> operator Scalar_<T2>() const;
+
+    //! per-element product
+    Scalar_<_Tp> mul(const Scalar_<_Tp>& a, double scale=1 ) const;
+
+    //! returns (v0, -v1, -v2, -v3)
+    Scalar_<_Tp> conj() const;
+
+    //! returns true iff v1 == v2 == v3 == 0
+    bool isReal() const;
+};
+
+typedef Scalar_<double> Scalar;
+
+template<typename _Tp> class DataType< Scalar_<_Tp> >
+{
+public:
+    typedef Scalar_<_Tp>                               value_type;
+    typedef Scalar_<typename DataType<_Tp>::work_type> work_type;
+    typedef _Tp                                        channel_type;
+
+    enum { generic_type = 0,
+           channels     = 4,
+           fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+namespace traits {
+template<typename _Tp>
+struct Depth< Scalar_<_Tp> > { enum { value = Depth<_Tp>::value }; };
+template<typename _Tp>
+struct Type< Scalar_<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 4) }; };
+} // namespace
+
+
+/////////////////////////////// KeyPoint ////////////////////////////////
+
+/** @brief Data structure for salient point detectors.
+
+The class instance stores a keypoint, i.e. a point feature found by one of many available keypoint
+detectors, such as Harris corner detector, #FAST, %StarDetector, %SURF, %SIFT etc.
+
+The keypoint is characterized by the 2D position, scale (proportional to the diameter of the
+neighborhood that needs to be taken into account), orientation and some other parameters. The
+keypoint neighborhood is then analyzed by another algorithm that builds a descriptor (usually
+represented as a feature vector). The keypoints representing the same object in different images
+can then be matched using %KDTree or another method.
+*/
+class CV_EXPORTS_W_SIMPLE KeyPoint
+{
+public:
+    //! the default constructor
+    CV_WRAP KeyPoint();
+    /**
+    @param _pt x & y coordinates of the keypoint
+    @param _size keypoint diameter
+    @param _angle keypoint orientation
+    @param _response keypoint detector response on the keypoint (that is, strength of the keypoint)
+    @param _octave pyramid octave in which the keypoint has been detected
+    @param _class_id object id
+     */
+    KeyPoint(Point2f _pt, float _size, float _angle=-1, float _response=0, int _octave=0, int _class_id=-1);
+    /**
+    @param x x-coordinate of the keypoint
+    @param y y-coordinate of the keypoint
+    @param _size keypoint diameter
+    @param _angle keypoint orientation
+    @param _response keypoint detector response on the keypoint (that is, strength of the keypoint)
+    @param _octave pyramid octave in which the keypoint has been detected
+    @param _class_id object id
+     */
+    CV_WRAP KeyPoint(float x, float y, float _size, float _angle=-1, float _response=0, int _octave=0, int _class_id=-1);
+
+    size_t hash() const;
+
+    /**
+    This method converts vector of keypoints to vector of points or the reverse, where each keypoint is
+    assigned the same size and the same orientation.
+
+    @param keypoints Keypoints obtained from any feature detection algorithm like SIFT/SURF/ORB
+    @param points2f Array of (x,y) coordinates of each keypoint
+    @param keypointIndexes Array of indexes of keypoints to be converted to points. (Acts like a mask to
+    convert only specified keypoints)
+    */
+    CV_WRAP static void convert(const std::vector<KeyPoint>& keypoints,
+                                CV_OUT std::vector<Point2f>& points2f,
+                                const std::vector<int>& keypointIndexes=std::vector<int>());
+    /** @overload
+    @param points2f Array of (x,y) coordinates of each keypoint
+    @param keypoints Keypoints obtained from any feature detection algorithm like SIFT/SURF/ORB
+    @param size keypoint diameter
+    @param response keypoint detector response on the keypoint (that is, strength of the keypoint)
+    @param octave pyramid octave in which the keypoint has been detected
+    @param class_id object id
+    */
+    CV_WRAP static void convert(const std::vector<Point2f>& points2f,
+                                CV_OUT std::vector<KeyPoint>& keypoints,
+                                float size=1, float response=1, int octave=0, int class_id=-1);
+
+    /**
+    This method computes overlap for pair of keypoints. Overlap is the ratio between area of keypoint
+    regions' intersection and area of keypoint regions' union (considering keypoint region as circle).
+    If they don't overlap, we get zero. If they coincide at same location with same size, we get 1.
+    @param kp1 First keypoint
+    @param kp2 Second keypoint
+    */
+    CV_WRAP static float overlap(const KeyPoint& kp1, const KeyPoint& kp2);
+
+    CV_PROP_RW Point2f pt; //!< coordinates of the keypoints
+    CV_PROP_RW float size; //!< diameter of the meaningful keypoint neighborhood
+    CV_PROP_RW float angle; //!< computed orientation of the keypoint (-1 if not applicable);
+                            //!< it's in [0,360) degrees and measured relative to
+                            //!< image coordinate system, ie in clockwise.
+    CV_PROP_RW float response; //!< the response by which the most strong keypoints have been selected. Can be used for the further sorting or subsampling
+    CV_PROP_RW int octave; //!< octave (pyramid layer) from which the keypoint has been extracted
+    CV_PROP_RW int class_id; //!< object class (if the keypoints need to be clustered by an object they belong to)
+};
+
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+template<> class DataType<KeyPoint>
+{
+public:
+    typedef KeyPoint      value_type;
+    typedef float         work_type;
+    typedef float         channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = (int)(sizeof(value_type)/sizeof(channel_type)), // 7
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+#endif
+
+
+//////////////////////////////// DMatch /////////////////////////////////
+
+/** @brief Class for matching keypoint descriptors
+
+query descriptor index, train descriptor index, train image index, and distance between
+descriptors.
+*/
+class CV_EXPORTS_W_SIMPLE DMatch
+{
+public:
+    CV_WRAP DMatch();
+    CV_WRAP DMatch(int _queryIdx, int _trainIdx, float _distance);
+    CV_WRAP DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance);
+
+    CV_PROP_RW int queryIdx; //!< query descriptor index
+    CV_PROP_RW int trainIdx; //!< train descriptor index
+    CV_PROP_RW int imgIdx;   //!< train image index
+
+    CV_PROP_RW float distance;
+
+    // less is better
+    bool operator<(const DMatch &m) const;
+};
+
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+template<> class DataType<DMatch>
+{
+public:
+    typedef DMatch      value_type;
+    typedef int         work_type;
+    typedef int         channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = (int)(sizeof(value_type)/sizeof(channel_type)), // 4
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels)
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+#endif
+
+
+///////////////////////////// TermCriteria //////////////////////////////
+
+/** @brief The class defining termination criteria for iterative algorithms.
+
+You can initialize it by default constructor and then override any parameters, or the structure may
+be fully initialized using the advanced variant of the constructor.
+*/
+class CV_EXPORTS TermCriteria
+{
+public:
+    /**
+      Criteria type, can be one of: COUNT, EPS or COUNT + EPS
+    */
+    enum Type
+    {
+        COUNT=1, //!< the maximum number of iterations or elements to compute
+        MAX_ITER=COUNT, //!< ditto
+        EPS=2 //!< the desired accuracy or change in parameters at which the iterative algorithm stops
+    };
+
+    //! default constructor
+    TermCriteria();
+    /**
+    @param type The type of termination criteria, one of TermCriteria::Type
+    @param maxCount The maximum number of iterations or elements to compute.
+    @param epsilon The desired accuracy or change in parameters at which the iterative algorithm stops.
+    */
+    TermCriteria(int type, int maxCount, double epsilon);
+
+    int type; //!< the type of termination criteria: COUNT, EPS or COUNT + EPS
+    int maxCount; //!< the maximum number of iterations/elements
+    double epsilon; //!< the desired accuracy
+};
+
+
+//! @} core_basic
+
+///////////////////////// raster image moments //////////////////////////
+
+//! @addtogroup imgproc_shape
+//! @{
+
+/** @brief struct returned by cv::moments
+
+The spatial moments \f$\texttt{Moments::m}_{ji}\f$ are computed as:
+
+\f[\texttt{m} _{ji}= \sum _{x,y}  \left ( \texttt{array} (x,y)  \cdot x^j  \cdot y^i \right )\f]
+
+The central moments \f$\texttt{Moments::mu}_{ji}\f$ are computed as:
+
+\f[\texttt{mu} _{ji}= \sum _{x,y}  \left ( \texttt{array} (x,y)  \cdot (x -  \bar{x} )^j  \cdot (y -  \bar{y} )^i \right )\f]
+
+where \f$(\bar{x}, \bar{y})\f$ is the mass center:
+
+\f[\bar{x} = \frac{\texttt{m}_{10}}{\texttt{m}_{00}} , \; \bar{y} = \frac{\texttt{m}_{01}}{\texttt{m}_{00}}\f]
+
+The normalized central moments \f$\texttt{Moments::nu}_{ij}\f$ are computed as:
+
+\f[\texttt{nu} _{ji}= \frac{\texttt{mu}_{ji}}{\texttt{m}_{00}^{(i+j)/2+1}} .\f]
+
+@note
+\f$\texttt{mu}_{00}=\texttt{m}_{00}\f$, \f$\texttt{nu}_{00}=1\f$
+\f$\texttt{nu}_{10}=\texttt{mu}_{10}=\texttt{mu}_{01}=\texttt{mu}_{10}=0\f$ , hence the values are not
+stored.
+
+The moments of a contour are defined in the same way but computed using the Green's formula (see
+<http://en.wikipedia.org/wiki/Green_theorem>). So, due to a limited raster resolution, the moments
+computed for a contour are slightly different from the moments computed for the same rasterized
+contour.
+
+@note
+Since the contour moments are computed using Green formula, you may get seemingly odd results for
+contours with self-intersections, e.g. a zero area (m00) for butterfly-shaped contours.
+ */
+class CV_EXPORTS_W_MAP Moments
+{
+public:
+    //! the default constructor
+    Moments();
+    //! the full constructor
+    Moments(double m00, double m10, double m01, double m20, double m11,
+            double m02, double m30, double m21, double m12, double m03 );
+    ////! the conversion from CvMoments
+    //Moments( const CvMoments& moments );
+    ////! the conversion to CvMoments
+    //operator CvMoments() const;
+
+    //! @name spatial moments
+    //! @{
+    CV_PROP_RW double  m00, m10, m01, m20, m11, m02, m30, m21, m12, m03;
+    //! @}
+
+    //! @name central moments
+    //! @{
+    CV_PROP_RW double  mu20, mu11, mu02, mu30, mu21, mu12, mu03;
+    //! @}
+
+    //! @name central normalized moments
+    //! @{
+    CV_PROP_RW double  nu20, nu11, nu02, nu30, nu21, nu12, nu03;
+    //! @}
+};
+
+template<> class DataType<Moments>
+{
+public:
+    typedef Moments     value_type;
+    typedef double      work_type;
+    typedef double      channel_type;
+
+    enum { generic_type = 0,
+           channels     = (int)(sizeof(value_type)/sizeof(channel_type)), // 24
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+           ,depth        = DataType<channel_type>::depth
+           ,type         = CV_MAKETYPE(depth, channels)
+#endif
+         };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+namespace traits {
+template<>
+struct Depth< Moments > { enum { value = Depth<double>::value }; };
+template<>
+struct Type< Moments > { enum { value = CV_MAKETYPE(Depth<double>::value, (int)(sizeof(Moments)/sizeof(double))) }; };
+} // namespace
+
+//! @} imgproc_shape
+
+//! @cond IGNORED
+
+/////////////////////////////////////////////////////////////////////////
+///////////////////////////// Implementation ////////////////////////////
+/////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////// Complex ////////////////////////////////
+
+template<typename _Tp> inline
+Complex<_Tp>::Complex()
+    : re(0), im(0) {}
+
+template<typename _Tp> inline
+Complex<_Tp>::Complex( _Tp _re, _Tp _im )
+    : re(_re), im(_im) {}
+
+template<typename _Tp> template<typename T2> inline
+Complex<_Tp>::operator Complex<T2>() const
+{
+    return Complex<T2>(saturate_cast<T2>(re), saturate_cast<T2>(im));
+}
+
+template<typename _Tp> inline
+Complex<_Tp> Complex<_Tp>::conj() const
+{
+    return Complex<_Tp>(re, -im);
+}
+
+
+template<typename _Tp> static inline
+bool operator == (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return a.re == b.re && a.im == b.im;
+}
+
+template<typename _Tp> static inline
+bool operator != (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return a.re != b.re || a.im != b.im;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator + (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return Complex<_Tp>( a.re + b.re, a.im + b.im );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator += (Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    a.re += b.re; a.im += b.im;
+    return a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator - (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return Complex<_Tp>( a.re - b.re, a.im - b.im );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator -= (Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    a.re -= b.re; a.im -= b.im;
+    return a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator - (const Complex<_Tp>& a)
+{
+    return Complex<_Tp>(-a.re, -a.im);
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator * (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    return Complex<_Tp>( a.re*b.re - a.im*b.im, a.re*b.im + a.im*b.re );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator * (const Complex<_Tp>& a, _Tp b)
+{
+    return Complex<_Tp>( a.re*b, a.im*b );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator * (_Tp b, const Complex<_Tp>& a)
+{
+    return Complex<_Tp>( a.re*b, a.im*b );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator + (const Complex<_Tp>& a, _Tp b)
+{
+    return Complex<_Tp>( a.re + b, a.im );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator - (const Complex<_Tp>& a, _Tp b)
+{ return Complex<_Tp>( a.re - b, a.im ); }
+
+template<typename _Tp> static inline
+Complex<_Tp> operator + (_Tp b, const Complex<_Tp>& a)
+{
+    return Complex<_Tp>( a.re + b, a.im );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator - (_Tp b, const Complex<_Tp>& a)
+{
+    return Complex<_Tp>( b - a.re, -a.im );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator += (Complex<_Tp>& a, _Tp b)
+{
+    a.re += b; return a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator -= (Complex<_Tp>& a, _Tp b)
+{
+    a.re -= b; return a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator *= (Complex<_Tp>& a, _Tp b)
+{
+    a.re *= b; a.im *= b; return a;
+}
+
+template<typename _Tp> static inline
+double abs(const Complex<_Tp>& a)
+{
+    return std::sqrt( (double)a.re*a.re + (double)a.im*a.im);
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator / (const Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    double t = 1./((double)b.re*b.re + (double)b.im*b.im);
+    return Complex<_Tp>( (_Tp)((a.re*b.re + a.im*b.im)*t),
+                        (_Tp)((-a.re*b.im + a.im*b.re)*t) );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp>& operator /= (Complex<_Tp>& a, const Complex<_Tp>& b)
+{
+    a = a / b;
+    return a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator / (const Complex<_Tp>& a, _Tp b)
+{
+    _Tp t = (_Tp)1/b;
+    return Complex<_Tp>( a.re*t, a.im*t );
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator / (_Tp b, const Complex<_Tp>& a)
+{
+    return Complex<_Tp>(b)/a;
+}
+
+template<typename _Tp> static inline
+Complex<_Tp> operator /= (const Complex<_Tp>& a, _Tp b)
+{
+    _Tp t = (_Tp)1/b;
+    a.re *= t; a.im *= t; return a;
+}
+
+
+
+//////////////////////////////// 2D Point ///////////////////////////////
+
+template<typename _Tp> inline
+Point_<_Tp>::Point_()
+    : x(0), y(0) {}
+
+template<typename _Tp> inline
+Point_<_Tp>::Point_(_Tp _x, _Tp _y)
+    : x(_x), y(_y) {}
+
+template<typename _Tp> inline
+Point_<_Tp>::Point_(const Point_& pt)
+    : x(pt.x), y(pt.y) {}
+
+template<typename _Tp> inline
+Point_<_Tp>::Point_(const Size_<_Tp>& sz)
+    : x(sz.width), y(sz.height) {}
+
+template<typename _Tp> inline
+Point_<_Tp>::Point_(const Vec<_Tp,2>& v)
+    : x(v[0]), y(v[1]) {}
+
+template<typename _Tp> inline
+Point_<_Tp>& Point_<_Tp>::operator = (const Point_& pt)
+{
+    x = pt.x; y = pt.y;
+    return *this;
+}
+
+template<typename _Tp> template<typename _Tp2> inline
+Point_<_Tp>::operator Point_<_Tp2>() const
+{
+    return Point_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y));
+}
+
+template<typename _Tp> inline
+Point_<_Tp>::operator Vec<_Tp, 2>() const
+{
+    return Vec<_Tp, 2>(x, y);
+}
+
+template<typename _Tp> inline
+_Tp Point_<_Tp>::dot(const Point_& pt) const
+{
+    return saturate_cast<_Tp>(x*pt.x + y*pt.y);
+}
+
+template<typename _Tp> inline
+double Point_<_Tp>::ddot(const Point_& pt) const
+{
+    return (double)x*pt.x + (double)y*pt.y;
+}
+
+template<typename _Tp> inline
+double Point_<_Tp>::cross(const Point_& pt) const
+{
+    return (double)x*pt.y - (double)y*pt.x;
+}
+
+template<typename _Tp> inline bool
+Point_<_Tp>::inside( const Rect_<_Tp>& r ) const
+{
+    return r.contains(*this);
+}
+
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator += (Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    a.x += b.x;
+    a.y += b.y;
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator -= (Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator *= (Point_<_Tp>& a, int b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator *= (Point_<_Tp>& a, float b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator *= (Point_<_Tp>& a, double b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator /= (Point_<_Tp>& a, int b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator /= (Point_<_Tp>& a, float b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp>& operator /= (Point_<_Tp>& a, double b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+double norm(const Point_<_Tp>& pt)
+{
+    return std::sqrt((double)pt.x*pt.x + (double)pt.y*pt.y);
+}
+
+template<typename _Tp> static inline
+bool operator == (const Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return a.x == b.x && a.y == b.y;
+}
+
+template<typename _Tp> static inline
+bool operator != (const Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return a.x != b.x || a.y != b.y;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator + (const Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(a.x + b.x), saturate_cast<_Tp>(a.y + b.y) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator - (const Point_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(a.x - b.x), saturate_cast<_Tp>(a.y - b.y) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator - (const Point_<_Tp>& a)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(-a.x), saturate_cast<_Tp>(-a.y) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (const Point_<_Tp>& a, int b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (int a, const Point_<_Tp>& b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (const Point_<_Tp>& a, float b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (float a, const Point_<_Tp>& b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (const Point_<_Tp>& a, double b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (double a, const Point_<_Tp>& b)
+{
+    return Point_<_Tp>( saturate_cast<_Tp>(b.x*a), saturate_cast<_Tp>(b.y*a) );
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator * (const Matx<_Tp, 2, 2>& a, const Point_<_Tp>& b)
+{
+    Matx<_Tp, 2, 1> tmp = a * Vec<_Tp,2>(b.x, b.y);
+    return Point_<_Tp>(tmp.val[0], tmp.val[1]);
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (const Matx<_Tp, 3, 3>& a, const Point_<_Tp>& b)
+{
+    Matx<_Tp, 3, 1> tmp = a * Vec<_Tp,3>(b.x, b.y, 1);
+    return Point3_<_Tp>(tmp.val[0], tmp.val[1], tmp.val[2]);
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator / (const Point_<_Tp>& a, int b)
+{
+    Point_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator / (const Point_<_Tp>& a, float b)
+{
+    Point_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Point_<_Tp> operator / (const Point_<_Tp>& a, double b)
+{
+    Point_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+
+template<typename _AccTp> static inline _AccTp normL2Sqr(const Point_<int>& pt);
+template<typename _AccTp> static inline _AccTp normL2Sqr(const Point_<int64>& pt);
+template<typename _AccTp> static inline _AccTp normL2Sqr(const Point_<float>& pt);
+template<typename _AccTp> static inline _AccTp normL2Sqr(const Point_<double>& pt);
+
+template<> inline int normL2Sqr<int>(const Point_<int>& pt) { return pt.dot(pt); }
+template<> inline int64 normL2Sqr<int64>(const Point_<int64>& pt) { return pt.dot(pt); }
+template<> inline float normL2Sqr<float>(const Point_<float>& pt) { return pt.dot(pt); }
+template<> inline double normL2Sqr<double>(const Point_<int>& pt) { return pt.dot(pt); }
+
+template<> inline double normL2Sqr<double>(const Point_<float>& pt) { return pt.ddot(pt); }
+template<> inline double normL2Sqr<double>(const Point_<double>& pt) { return pt.ddot(pt); }
+
+
+
+//////////////////////////////// 3D Point ///////////////////////////////
+
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_()
+    : x(0), y(0), z(0) {}
+
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_(_Tp _x, _Tp _y, _Tp _z)
+    : x(_x), y(_y), z(_z) {}
+
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_(const Point3_& pt)
+    : x(pt.x), y(pt.y), z(pt.z) {}
+
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_(const Point_<_Tp>& pt)
+    : x(pt.x), y(pt.y), z(_Tp()) {}
+
+template<typename _Tp> inline
+Point3_<_Tp>::Point3_(const Vec<_Tp, 3>& v)
+    : x(v[0]), y(v[1]), z(v[2]) {}
+
+template<typename _Tp> template<typename _Tp2> inline
+Point3_<_Tp>::operator Point3_<_Tp2>() const
+{
+    return Point3_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(z));
+}
+
+#if OPENCV_ABI_COMPATIBILITY > 300
+template<typename _Tp> template<typename _Tp2> inline
+Point3_<_Tp>::operator Vec<_Tp2, 3>() const
+{
+    return Vec<_Tp2, 3>(x, y, z);
+}
+#else
+template<typename _Tp> inline
+Point3_<_Tp>::operator Vec<_Tp, 3>() const
+{
+    return Vec<_Tp, 3>(x, y, z);
+}
+#endif
+
+template<typename _Tp> inline
+Point3_<_Tp>& Point3_<_Tp>::operator = (const Point3_& pt)
+{
+    x = pt.x; y = pt.y; z = pt.z;
+    return *this;
+}
+
+template<typename _Tp> inline
+_Tp Point3_<_Tp>::dot(const Point3_& pt) const
+{
+    return saturate_cast<_Tp>(x*pt.x + y*pt.y + z*pt.z);
+}
+
+template<typename _Tp> inline
+double Point3_<_Tp>::ddot(const Point3_& pt) const
+{
+    return (double)x*pt.x + (double)y*pt.y + (double)z*pt.z;
+}
+
+template<typename _Tp> inline
+Point3_<_Tp> Point3_<_Tp>::cross(const Point3_<_Tp>& pt) const
+{
+    return Point3_<_Tp>(y*pt.z - z*pt.y, z*pt.x - x*pt.z, x*pt.y - y*pt.x);
+}
+
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator += (Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    a.x += b.x;
+    a.y += b.y;
+    a.z += b.z;
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator -= (Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    a.z -= b.z;
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator *= (Point3_<_Tp>& a, int b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    a.z = saturate_cast<_Tp>(a.z * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator *= (Point3_<_Tp>& a, float b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    a.z = saturate_cast<_Tp>(a.z * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator *= (Point3_<_Tp>& a, double b)
+{
+    a.x = saturate_cast<_Tp>(a.x * b);
+    a.y = saturate_cast<_Tp>(a.y * b);
+    a.z = saturate_cast<_Tp>(a.z * b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator /= (Point3_<_Tp>& a, int b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    a.z = saturate_cast<_Tp>(a.z / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator /= (Point3_<_Tp>& a, float b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    a.z = saturate_cast<_Tp>(a.z / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp>& operator /= (Point3_<_Tp>& a, double b)
+{
+    a.x = saturate_cast<_Tp>(a.x / b);
+    a.y = saturate_cast<_Tp>(a.y / b);
+    a.z = saturate_cast<_Tp>(a.z / b);
+    return a;
+}
+
+template<typename _Tp> static inline
+double norm(const Point3_<_Tp>& pt)
+{
+    return std::sqrt((double)pt.x*pt.x + (double)pt.y*pt.y + (double)pt.z*pt.z);
+}
+
+template<typename _Tp> static inline
+bool operator == (const Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    return a.x == b.x && a.y == b.y && a.z == b.z;
+}
+
+template<typename _Tp> static inline
+bool operator != (const Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    return a.x != b.x || a.y != b.y || a.z != b.z;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator + (const Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(a.x + b.x), saturate_cast<_Tp>(a.y + b.y), saturate_cast<_Tp>(a.z + b.z));
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator - (const Point3_<_Tp>& a, const Point3_<_Tp>& b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(a.x - b.x), saturate_cast<_Tp>(a.y - b.y), saturate_cast<_Tp>(a.z - b.z));
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator - (const Point3_<_Tp>& a)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(-a.x), saturate_cast<_Tp>(-a.y), saturate_cast<_Tp>(-a.z) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (const Point3_<_Tp>& a, int b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(a.x*b), saturate_cast<_Tp>(a.y*b), saturate_cast<_Tp>(a.z*b) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (int a, const Point3_<_Tp>& b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (const Point3_<_Tp>& a, float b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(a.x * b), saturate_cast<_Tp>(a.y * b), saturate_cast<_Tp>(a.z * b) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (float a, const Point3_<_Tp>& b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (const Point3_<_Tp>& a, double b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(a.x * b), saturate_cast<_Tp>(a.y * b), saturate_cast<_Tp>(a.z * b) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (double a, const Point3_<_Tp>& b)
+{
+    return Point3_<_Tp>( saturate_cast<_Tp>(b.x * a), saturate_cast<_Tp>(b.y * a), saturate_cast<_Tp>(b.z * a) );
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator * (const Matx<_Tp, 3, 3>& a, const Point3_<_Tp>& b)
+{
+    Matx<_Tp, 3, 1> tmp = a * Vec<_Tp,3>(b.x, b.y, b.z);
+    return Point3_<_Tp>(tmp.val[0], tmp.val[1], tmp.val[2]);
+}
+
+template<typename _Tp> static inline
+Matx<_Tp, 4, 1> operator * (const Matx<_Tp, 4, 4>& a, const Point3_<_Tp>& b)
+{
+    return a * Matx<_Tp, 4, 1>(b.x, b.y, b.z, 1);
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator / (const Point3_<_Tp>& a, int b)
+{
+    Point3_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator / (const Point3_<_Tp>& a, float b)
+{
+    Point3_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Point3_<_Tp> operator / (const Point3_<_Tp>& a, double b)
+{
+    Point3_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+
+
+////////////////////////////////// Size /////////////////////////////////
+
+template<typename _Tp> inline
+Size_<_Tp>::Size_()
+    : width(0), height(0) {}
+
+template<typename _Tp> inline
+Size_<_Tp>::Size_(_Tp _width, _Tp _height)
+    : width(_width), height(_height) {}
+
+template<typename _Tp> inline
+Size_<_Tp>::Size_(const Size_& sz)
+    : width(sz.width), height(sz.height) {}
+
+template<typename _Tp> inline
+Size_<_Tp>::Size_(const Point_<_Tp>& pt)
+    : width(pt.x), height(pt.y) {}
+
+template<typename _Tp> template<typename _Tp2> inline
+Size_<_Tp>::operator Size_<_Tp2>() const
+{
+    return Size_<_Tp2>(saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height));
+}
+
+template<typename _Tp> inline
+Size_<_Tp>& Size_<_Tp>::operator = (const Size_<_Tp>& sz)
+{
+    width = sz.width; height = sz.height;
+    return *this;
+}
+
+template<typename _Tp> inline
+_Tp Size_<_Tp>::area() const
+{
+    const _Tp result = width * height;
+    CV_DbgAssert(!std::numeric_limits<_Tp>::is_integer
+        || width == 0 || result / width == height); // make sure the result fits in the return value
+    return result;
+}
+
+template<typename _Tp> inline
+bool Size_<_Tp>::empty() const
+{
+    return width <= 0 || height <= 0;
+}
+
+
+template<typename _Tp> static inline
+Size_<_Tp>& operator *= (Size_<_Tp>& a, _Tp b)
+{
+    a.width *= b;
+    a.height *= b;
+    return a;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp> operator * (const Size_<_Tp>& a, _Tp b)
+{
+    Size_<_Tp> tmp(a);
+    tmp *= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp>& operator /= (Size_<_Tp>& a, _Tp b)
+{
+    a.width /= b;
+    a.height /= b;
+    return a;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp> operator / (const Size_<_Tp>& a, _Tp b)
+{
+    Size_<_Tp> tmp(a);
+    tmp /= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp>& operator += (Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    a.width += b.width;
+    a.height += b.height;
+    return a;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp> operator + (const Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    Size_<_Tp> tmp(a);
+    tmp += b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp>& operator -= (Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    a.width -= b.width;
+    a.height -= b.height;
+    return a;
+}
+
+template<typename _Tp> static inline
+Size_<_Tp> operator - (const Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    Size_<_Tp> tmp(a);
+    tmp -= b;
+    return tmp;
+}
+
+template<typename _Tp> static inline
+bool operator == (const Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    return a.width == b.width && a.height == b.height;
+}
+
+template<typename _Tp> static inline
+bool operator != (const Size_<_Tp>& a, const Size_<_Tp>& b)
+{
+    return !(a == b);
+}
+
+
+
+////////////////////////////////// Rect /////////////////////////////////
+
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_()
+    : x(0), y(0), width(0), height(0) {}
+
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_(_Tp _x, _Tp _y, _Tp _width, _Tp _height)
+    : x(_x), y(_y), width(_width), height(_height) {}
+
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_(const Rect_<_Tp>& r)
+    : x(r.x), y(r.y), width(r.width), height(r.height) {}
+
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_(const Point_<_Tp>& org, const Size_<_Tp>& sz)
+    : x(org.x), y(org.y), width(sz.width), height(sz.height) {}
+
+template<typename _Tp> inline
+Rect_<_Tp>::Rect_(const Point_<_Tp>& pt1, const Point_<_Tp>& pt2)
+{
+    x = std::min(pt1.x, pt2.x);
+    y = std::min(pt1.y, pt2.y);
+    width = std::max(pt1.x, pt2.x) - x;
+    height = std::max(pt1.y, pt2.y) - y;
+}
+
+template<typename _Tp> inline
+Rect_<_Tp>& Rect_<_Tp>::operator = ( const Rect_<_Tp>& r )
+{
+    x = r.x;
+    y = r.y;
+    width = r.width;
+    height = r.height;
+    return *this;
+}
+
+template<typename _Tp> inline
+Point_<_Tp> Rect_<_Tp>::tl() const
+{
+    return Point_<_Tp>(x,y);
+}
+
+template<typename _Tp> inline
+Point_<_Tp> Rect_<_Tp>::br() const
+{
+    return Point_<_Tp>(x + width, y + height);
+}
+
+template<typename _Tp> inline
+Size_<_Tp> Rect_<_Tp>::size() const
+{
+    return Size_<_Tp>(width, height);
+}
+
+template<typename _Tp> inline
+_Tp Rect_<_Tp>::area() const
+{
+    const _Tp result = width * height;
+    CV_DbgAssert(!std::numeric_limits<_Tp>::is_integer
+        || width == 0 || result / width == height); // make sure the result fits in the return value
+    return result;
+}
+
+template<typename _Tp> inline
+bool Rect_<_Tp>::empty() const
+{
+    return width <= 0 || height <= 0;
+}
+
+template<typename _Tp> template<typename _Tp2> inline
+Rect_<_Tp>::operator Rect_<_Tp2>() const
+{
+    return Rect_<_Tp2>(saturate_cast<_Tp2>(x), saturate_cast<_Tp2>(y), saturate_cast<_Tp2>(width), saturate_cast<_Tp2>(height));
+}
+
+template<typename _Tp> inline
+bool Rect_<_Tp>::contains(const Point_<_Tp>& pt) const
+{
+    return x <= pt.x && pt.x < x + width && y <= pt.y && pt.y < y + height;
+}
+
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Point_<_Tp>& b )
+{
+    a.x += b.x;
+    a.y += b.y;
+    return a;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Point_<_Tp>& b )
+{
+    a.x -= b.x;
+    a.y -= b.y;
+    return a;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator += ( Rect_<_Tp>& a, const Size_<_Tp>& b )
+{
+    a.width += b.width;
+    a.height += b.height;
+    return a;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator -= ( Rect_<_Tp>& a, const Size_<_Tp>& b )
+{
+    a.width -= b.width;
+    a.height -= b.height;
+    return a;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator &= ( Rect_<_Tp>& a, const Rect_<_Tp>& b )
+{
+    _Tp x1 = std::max(a.x, b.x);
+    _Tp y1 = std::max(a.y, b.y);
+    a.width = std::min(a.x + a.width, b.x + b.width) - x1;
+    a.height = std::min(a.y + a.height, b.y + b.height) - y1;
+    a.x = x1;
+    a.y = y1;
+    if( a.width <= 0 || a.height <= 0 )
+        a = Rect();
+    return a;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp>& operator |= ( Rect_<_Tp>& a, const Rect_<_Tp>& b )
+{
+    if (a.empty()) {
+        a = b;
+    }
+    else if (!b.empty()) {
+        _Tp x1 = std::min(a.x, b.x);
+        _Tp y1 = std::min(a.y, b.y);
+        a.width = std::max(a.x + a.width, b.x + b.width) - x1;
+        a.height = std::max(a.y + a.height, b.y + b.height) - y1;
+        a.x = x1;
+        a.y = y1;
+    }
+    return a;
+}
+
+template<typename _Tp> static inline
+bool operator == (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
+{
+    return a.x == b.x && a.y == b.y && a.width == b.width && a.height == b.height;
+}
+
+template<typename _Tp> static inline
+bool operator != (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
+{
+    return a.x != b.x || a.y != b.y || a.width != b.width || a.height != b.height;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return Rect_<_Tp>( a.x + b.x, a.y + b.y, a.width, a.height );
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp> operator - (const Rect_<_Tp>& a, const Point_<_Tp>& b)
+{
+    return Rect_<_Tp>( a.x - b.x, a.y - b.y, a.width, a.height );
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp> operator + (const Rect_<_Tp>& a, const Size_<_Tp>& b)
+{
+    return Rect_<_Tp>( a.x, a.y, a.width + b.width, a.height + b.height );
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp> operator & (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
+{
+    Rect_<_Tp> c = a;
+    return c &= b;
+}
+
+template<typename _Tp> static inline
+Rect_<_Tp> operator | (const Rect_<_Tp>& a, const Rect_<_Tp>& b)
+{
+    Rect_<_Tp> c = a;
+    return c |= b;
+}
+
+/**
+ * @brief measure dissimilarity between two sample sets
+ *
+ * computes the complement of the Jaccard Index as described in <https://en.wikipedia.org/wiki/Jaccard_index>.
+ * For rectangles this reduces to computing the intersection over the union.
+ */
+template<typename _Tp> static inline
+double jaccardDistance(const Rect_<_Tp>& a, const Rect_<_Tp>& b) {
+    _Tp Aa = a.area();
+    _Tp Ab = b.area();
+
+    if ((Aa + Ab) <= std::numeric_limits<_Tp>::epsilon()) {
+        // jaccard_index = 1 -> distance = 0
+        return 0.0;
+    }
+
+    double Aab = (a & b).area();
+    // distance = 1 - jaccard_index
+    return 1.0 - Aab / (Aa + Ab - Aab);
+}
+
+////////////////////////////// RotatedRect //////////////////////////////
+
+inline
+RotatedRect::RotatedRect()
+    : center(), size(), angle(0) {}
+
+inline
+RotatedRect::RotatedRect(const Point2f& _center, const Size2f& _size, float _angle)
+    : center(_center), size(_size), angle(_angle) {}
+
+
+
+///////////////////////////////// Range /////////////////////////////////
+
+inline
+Range::Range()
+    : start(0), end(0) {}
+
+inline
+Range::Range(int _start, int _end)
+    : start(_start), end(_end) {}
+
+inline
+int Range::size() const
+{
+    return end - start;
+}
+
+inline
+bool Range::empty() const
+{
+    return start == end;
+}
+
+inline
+Range Range::all()
+{
+    return Range(INT_MIN, INT_MAX);
+}
+
+
+static inline
+bool operator == (const Range& r1, const Range& r2)
+{
+    return r1.start == r2.start && r1.end == r2.end;
+}
+
+static inline
+bool operator != (const Range& r1, const Range& r2)
+{
+    return !(r1 == r2);
+}
+
+static inline
+bool operator !(const Range& r)
+{
+    return r.start == r.end;
+}
+
+static inline
+Range operator & (const Range& r1, const Range& r2)
+{
+    Range r(std::max(r1.start, r2.start), std::min(r1.end, r2.end));
+    r.end = std::max(r.end, r.start);
+    return r;
+}
+
+static inline
+Range& operator &= (Range& r1, const Range& r2)
+{
+    r1 = r1 & r2;
+    return r1;
+}
+
+static inline
+Range operator + (const Range& r1, int delta)
+{
+    return Range(r1.start + delta, r1.end + delta);
+}
+
+static inline
+Range operator + (int delta, const Range& r1)
+{
+    return Range(r1.start + delta, r1.end + delta);
+}
+
+static inline
+Range operator - (const Range& r1, int delta)
+{
+    return r1 + (-delta);
+}
+
+
+
+///////////////////////////////// Scalar ////////////////////////////////
+
+template<typename _Tp> inline
+Scalar_<_Tp>::Scalar_()
+{
+    this->val[0] = this->val[1] = this->val[2] = this->val[3] = 0;
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp>::Scalar_(_Tp v0, _Tp v1, _Tp v2, _Tp v3)
+{
+    this->val[0] = v0;
+    this->val[1] = v1;
+    this->val[2] = v2;
+    this->val[3] = v3;
+}
+
+template<typename _Tp> template<typename _Tp2, int cn> inline
+Scalar_<_Tp>::Scalar_(const Vec<_Tp2, cn>& v)
+{
+    int i;
+    for( i = 0; i < (cn < 4 ? cn : 4); i++ )
+        this->val[i] = cv::saturate_cast<_Tp>(v.val[i]);
+    for( ; i < 4; i++ )
+        this->val[i] = 0;
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp>::Scalar_(_Tp v0)
+{
+    this->val[0] = v0;
+    this->val[1] = this->val[2] = this->val[3] = 0;
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp> Scalar_<_Tp>::all(_Tp v0)
+{
+    return Scalar_<_Tp>(v0, v0, v0, v0);
+}
+
+
+template<typename _Tp> inline
+Scalar_<_Tp> Scalar_<_Tp>::mul(const Scalar_<_Tp>& a, double scale ) const
+{
+    return Scalar_<_Tp>(saturate_cast<_Tp>(this->val[0] * a.val[0] * scale),
+                        saturate_cast<_Tp>(this->val[1] * a.val[1] * scale),
+                        saturate_cast<_Tp>(this->val[2] * a.val[2] * scale),
+                        saturate_cast<_Tp>(this->val[3] * a.val[3] * scale));
+}
+
+template<typename _Tp> inline
+Scalar_<_Tp> Scalar_<_Tp>::conj() const
+{
+    return Scalar_<_Tp>(saturate_cast<_Tp>( this->val[0]),
+                        saturate_cast<_Tp>(-this->val[1]),
+                        saturate_cast<_Tp>(-this->val[2]),
+                        saturate_cast<_Tp>(-this->val[3]));
+}
+
+template<typename _Tp> inline
+bool Scalar_<_Tp>::isReal() const
+{
+    return this->val[1] == 0 && this->val[2] == 0 && this->val[3] == 0;
+}
+
+
+template<typename _Tp> template<typename T2> inline
+Scalar_<_Tp>::operator Scalar_<T2>() const
+{
+    return Scalar_<T2>(saturate_cast<T2>(this->val[0]),
+                       saturate_cast<T2>(this->val[1]),
+                       saturate_cast<T2>(this->val[2]),
+                       saturate_cast<T2>(this->val[3]));
+}
+
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator += (Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    a.val[0] += b.val[0];
+    a.val[1] += b.val[1];
+    a.val[2] += b.val[2];
+    a.val[3] += b.val[3];
+    return a;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator -= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    a.val[0] -= b.val[0];
+    a.val[1] -= b.val[1];
+    a.val[2] -= b.val[2];
+    a.val[3] -= b.val[3];
+    return a;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator *= ( Scalar_<_Tp>& a, _Tp v )
+{
+    a.val[0] *= v;
+    a.val[1] *= v;
+    a.val[2] *= v;
+    a.val[3] *= v;
+    return a;
+}
+
+template<typename _Tp> static inline
+bool operator == ( const Scalar_<_Tp>& a, const Scalar_<_Tp>& b )
+{
+    return a.val[0] == b.val[0] && a.val[1] == b.val[1] &&
+           a.val[2] == b.val[2] && a.val[3] == b.val[3];
+}
+
+template<typename _Tp> static inline
+bool operator != ( const Scalar_<_Tp>& a, const Scalar_<_Tp>& b )
+{
+    return a.val[0] != b.val[0] || a.val[1] != b.val[1] ||
+           a.val[2] != b.val[2] || a.val[3] != b.val[3];
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator + (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    return Scalar_<_Tp>(a.val[0] + b.val[0],
+                        a.val[1] + b.val[1],
+                        a.val[2] + b.val[2],
+                        a.val[3] + b.val[3]);
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator - (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    return Scalar_<_Tp>(saturate_cast<_Tp>(a.val[0] - b.val[0]),
+                        saturate_cast<_Tp>(a.val[1] - b.val[1]),
+                        saturate_cast<_Tp>(a.val[2] - b.val[2]),
+                        saturate_cast<_Tp>(a.val[3] - b.val[3]));
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator * (const Scalar_<_Tp>& a, _Tp alpha)
+{
+    return Scalar_<_Tp>(a.val[0] * alpha,
+                        a.val[1] * alpha,
+                        a.val[2] * alpha,
+                        a.val[3] * alpha);
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator * (_Tp alpha, const Scalar_<_Tp>& a)
+{
+    return a*alpha;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator - (const Scalar_<_Tp>& a)
+{
+    return Scalar_<_Tp>(saturate_cast<_Tp>(-a.val[0]),
+                        saturate_cast<_Tp>(-a.val[1]),
+                        saturate_cast<_Tp>(-a.val[2]),
+                        saturate_cast<_Tp>(-a.val[3]));
+}
+
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator * (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    return Scalar_<_Tp>(saturate_cast<_Tp>(a[0]*b[0] - a[1]*b[1] - a[2]*b[2] - a[3]*b[3]),
+                        saturate_cast<_Tp>(a[0]*b[1] + a[1]*b[0] + a[2]*b[3] - a[3]*b[2]),
+                        saturate_cast<_Tp>(a[0]*b[2] - a[1]*b[3] + a[2]*b[0] + a[3]*b[1]),
+                        saturate_cast<_Tp>(a[0]*b[3] + a[1]*b[2] - a[2]*b[1] + a[3]*b[0]));
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator *= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    a = a * b;
+    return a;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator / (const Scalar_<_Tp>& a, _Tp alpha)
+{
+    return Scalar_<_Tp>(a.val[0] / alpha,
+                        a.val[1] / alpha,
+                        a.val[2] / alpha,
+                        a.val[3] / alpha);
+}
+
+template<typename _Tp> static inline
+Scalar_<float> operator / (const Scalar_<float>& a, float alpha)
+{
+    float s = 1 / alpha;
+    return Scalar_<float>(a.val[0] * s, a.val[1] * s, a.val[2] * s, a.val[3] * s);
+}
+
+template<typename _Tp> static inline
+Scalar_<double> operator / (const Scalar_<double>& a, double alpha)
+{
+    double s = 1 / alpha;
+    return Scalar_<double>(a.val[0] * s, a.val[1] * s, a.val[2] * s, a.val[3] * s);
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator /= (Scalar_<_Tp>& a, _Tp alpha)
+{
+    a = a / alpha;
+    return a;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator / (_Tp a, const Scalar_<_Tp>& b)
+{
+    _Tp s = a / (b[0]*b[0] + b[1]*b[1] + b[2]*b[2] + b[3]*b[3]);
+    return b.conj() * s;
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp> operator / (const Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    return a * ((_Tp)1 / b);
+}
+
+template<typename _Tp> static inline
+Scalar_<_Tp>& operator /= (Scalar_<_Tp>& a, const Scalar_<_Tp>& b)
+{
+    a = a / b;
+    return a;
+}
+
+template<typename _Tp> static inline
+Scalar operator * (const Matx<_Tp, 4, 4>& a, const Scalar& b)
+{
+    Matx<double, 4, 1> c((Matx<double, 4, 4>)a, b, Matx_MatMulOp());
+    return reinterpret_cast<const Scalar&>(c);
+}
+
+template<> inline
+Scalar operator * (const Matx<double, 4, 4>& a, const Scalar& b)
+{
+    Matx<double, 4, 1> c(a, b, Matx_MatMulOp());
+    return reinterpret_cast<const Scalar&>(c);
+}
+
+
+
+//////////////////////////////// KeyPoint ///////////////////////////////
+
+inline
+KeyPoint::KeyPoint()
+    : pt(0,0), size(0), angle(-1), response(0), octave(0), class_id(-1) {}
+
+inline
+KeyPoint::KeyPoint(Point2f _pt, float _size, float _angle, float _response, int _octave, int _class_id)
+    : pt(_pt), size(_size), angle(_angle), response(_response), octave(_octave), class_id(_class_id) {}
+
+inline
+KeyPoint::KeyPoint(float x, float y, float _size, float _angle, float _response, int _octave, int _class_id)
+    : pt(x, y), size(_size), angle(_angle), response(_response), octave(_octave), class_id(_class_id) {}
+
+
+
+///////////////////////////////// DMatch ////////////////////////////////
+
+inline
+DMatch::DMatch()
+    : queryIdx(-1), trainIdx(-1), imgIdx(-1), distance(FLT_MAX) {}
+
+inline
+DMatch::DMatch(int _queryIdx, int _trainIdx, float _distance)
+    : queryIdx(_queryIdx), trainIdx(_trainIdx), imgIdx(-1), distance(_distance) {}
+
+inline
+DMatch::DMatch(int _queryIdx, int _trainIdx, int _imgIdx, float _distance)
+    : queryIdx(_queryIdx), trainIdx(_trainIdx), imgIdx(_imgIdx), distance(_distance) {}
+
+inline
+bool DMatch::operator < (const DMatch &m) const
+{
+    return distance < m.distance;
+}
+
+
+
+////////////////////////////// TermCriteria /////////////////////////////
+
+inline
+TermCriteria::TermCriteria()
+    : type(0), maxCount(0), epsilon(0) {}
+
+inline
+TermCriteria::TermCriteria(int _type, int _maxCount, double _epsilon)
+    : type(_type), maxCount(_maxCount), epsilon(_epsilon) {}
+
+//! @endcond
+
+} // cv
+
+#endif //OPENCV_CORE_TYPES_HPP

+ 1837 - 0
ThresholdTools/include/opencv2/core/types_c.h

@@ -0,0 +1,1837 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_TYPES_H
+#define OPENCV_CORE_TYPES_H
+
+#ifdef HAVE_IPL
+#  ifndef __IPL_H__
+#    if defined _WIN32
+#      include <ipl.h>
+#    else
+#      include <ipl/ipl.h>
+#    endif
+#  endif
+#elif defined __IPL_H__
+#  define HAVE_IPL
+#endif
+
+#include "opencv2/core/cvdef.h"
+
+#ifndef SKIP_INCLUDES
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#endif // SKIP_INCLUDES
+
+#if defined _WIN32
+#  define CV_CDECL __cdecl
+#  define CV_STDCALL __stdcall
+#else
+#  define CV_CDECL
+#  define CV_STDCALL
+#endif
+
+#ifndef CV_DEFAULT
+#  ifdef __cplusplus
+#    define CV_DEFAULT(val) = val
+#  else
+#    define CV_DEFAULT(val)
+#  endif
+#endif
+
+#ifndef CV_EXTERN_C_FUNCPTR
+#  ifdef __cplusplus
+#    define CV_EXTERN_C_FUNCPTR(x) extern "C" { typedef x; }
+#  else
+#    define CV_EXTERN_C_FUNCPTR(x) typedef x
+#  endif
+#endif
+
+#ifndef CVAPI
+#  define CVAPI(rettype) CV_EXTERN_C CV_EXPORTS rettype CV_CDECL
+#endif
+
+#ifndef CV_IMPL
+#  define CV_IMPL CV_EXTERN_C
+#endif
+
+#ifdef __cplusplus
+#  include "opencv2/core.hpp"
+#endif
+
+/** @addtogroup core_c
+    @{
+*/
+
+/** @brief This is the "metatype" used *only* as a function parameter.
+
+It denotes that the function accepts arrays of multiple types, such as IplImage*, CvMat* or even
+CvSeq* sometimes. The particular array type is determined at runtime by analyzing the first 4
+bytes of the header. In C++ interface the role of CvArr is played by InputArray and OutputArray.
+ */
+typedef void CvArr;
+
+typedef int CVStatus;
+
+/** @see cv::Error::Code */
+enum {
+ CV_StsOk=                       0,  /**< everything is ok                */
+ CV_StsBackTrace=               -1,  /**< pseudo error for back trace     */
+ CV_StsError=                   -2,  /**< unknown /unspecified error      */
+ CV_StsInternal=                -3,  /**< internal error (bad state)      */
+ CV_StsNoMem=                   -4,  /**< insufficient memory             */
+ CV_StsBadArg=                  -5,  /**< function arg/param is bad       */
+ CV_StsBadFunc=                 -6,  /**< unsupported function            */
+ CV_StsNoConv=                  -7,  /**< iter. didn't converge           */
+ CV_StsAutoTrace=               -8,  /**< tracing                         */
+ CV_HeaderIsNull=               -9,  /**< image header is NULL            */
+ CV_BadImageSize=              -10,  /**< image size is invalid           */
+ CV_BadOffset=                 -11,  /**< offset is invalid               */
+ CV_BadDataPtr=                -12,  /**/
+ CV_BadStep=                   -13,  /**< image step is wrong, this may happen for a non-continuous matrix */
+ CV_BadModelOrChSeq=           -14,  /**/
+ CV_BadNumChannels=            -15,  /**< bad number of channels, for example, some functions accept only single channel matrices */
+ CV_BadNumChannel1U=           -16,  /**/
+ CV_BadDepth=                  -17,  /**< input image depth is not supported by the function */
+ CV_BadAlphaChannel=           -18,  /**/
+ CV_BadOrder=                  -19,  /**< number of dimensions is out of range */
+ CV_BadOrigin=                 -20,  /**< incorrect input origin               */
+ CV_BadAlign=                  -21,  /**< incorrect input align                */
+ CV_BadCallBack=               -22,  /**/
+ CV_BadTileSize=               -23,  /**/
+ CV_BadCOI=                    -24,  /**< input COI is not supported           */
+ CV_BadROISize=                -25,  /**< incorrect input roi                  */
+ CV_MaskIsTiled=               -26,  /**/
+ CV_StsNullPtr=                -27,  /**< null pointer */
+ CV_StsVecLengthErr=           -28,  /**< incorrect vector length */
+ CV_StsFilterStructContentErr= -29,  /**< incorrect filter structure content */
+ CV_StsKernelStructContentErr= -30,  /**< incorrect transform kernel content */
+ CV_StsFilterOffsetErr=        -31,  /**< incorrect filter offset value */
+ CV_StsBadSize=                -201, /**< the input/output structure size is incorrect  */
+ CV_StsDivByZero=              -202, /**< division by zero */
+ CV_StsInplaceNotSupported=    -203, /**< in-place operation is not supported */
+ CV_StsObjectNotFound=         -204, /**< request can't be completed */
+ CV_StsUnmatchedFormats=       -205, /**< formats of input/output arrays differ */
+ CV_StsBadFlag=                -206, /**< flag is wrong or not supported */
+ CV_StsBadPoint=               -207, /**< bad CvPoint */
+ CV_StsBadMask=                -208, /**< bad format of mask (neither 8uC1 nor 8sC1)*/
+ CV_StsUnmatchedSizes=         -209, /**< sizes of input/output structures do not match */
+ CV_StsUnsupportedFormat=      -210, /**< the data format/type is not supported by the function*/
+ CV_StsOutOfRange=             -211, /**< some of parameters are out of range */
+ CV_StsParseError=             -212, /**< invalid syntax/structure of the parsed file */
+ CV_StsNotImplemented=         -213, /**< the requested function/feature is not implemented */
+ CV_StsBadMemBlock=            -214, /**< an allocated block has been corrupted */
+ CV_StsAssert=                 -215, /**< assertion failed   */
+ CV_GpuNotSupported=           -216, /**< no CUDA support    */
+ CV_GpuApiCallError=           -217, /**< GPU API call error */
+ CV_OpenGlNotSupported=        -218, /**< no OpenGL support  */
+ CV_OpenGlApiCallError=        -219, /**< OpenGL API call error */
+ CV_OpenCLApiCallError=        -220, /**< OpenCL API call error */
+ CV_OpenCLDoubleNotSupported=  -221,
+ CV_OpenCLInitError=           -222, /**< OpenCL initialization error */
+ CV_OpenCLNoAMDBlasFft=        -223
+};
+
+/****************************************************************************************\
+*                             Common macros and inline functions                         *
+\****************************************************************************************/
+
+#define CV_SWAP(a,b,t) ((t) = (a), (a) = (b), (b) = (t))
+
+/** min & max without jumps */
+#define  CV_IMIN(a, b)  ((a) ^ (((a)^(b)) & (((a) < (b)) - 1)))
+
+#define  CV_IMAX(a, b)  ((a) ^ (((a)^(b)) & (((a) > (b)) - 1)))
+
+/** absolute value without jumps */
+#ifndef __cplusplus
+#  define  CV_IABS(a)     (((a) ^ ((a) < 0 ? -1 : 0)) - ((a) < 0 ? -1 : 0))
+#else
+#  define  CV_IABS(a)     abs(a)
+#endif
+#define  CV_CMP(a,b)    (((a) > (b)) - ((a) < (b)))
+#define  CV_SIGN(a)     CV_CMP((a),0)
+
+#define cvInvSqrt(value) ((float)(1./sqrt(value)))
+#define cvSqrt(value)  ((float)sqrt(value))
+
+
+/*************** Random number generation *******************/
+
+typedef uint64 CvRNG;
+
+#define CV_RNG_COEFF 4164903690U
+
+/** @brief Initializes a random number generator state.
+
+The function initializes a random number generator and returns the state. The pointer to the state
+can be then passed to the cvRandInt, cvRandReal and cvRandArr functions. In the current
+implementation a multiply-with-carry generator is used.
+@param seed 64-bit value used to initiate a random sequence
+@sa the C++ class RNG replaced CvRNG.
+ */
+CV_INLINE CvRNG cvRNG( int64 seed CV_DEFAULT(-1))
+{
+    CvRNG rng = seed ? (uint64)seed : (uint64)(int64)-1;
+    return rng;
+}
+
+/** @brief Returns a 32-bit unsigned integer and updates RNG.
+
+The function returns a uniformly-distributed random 32-bit unsigned integer and updates the RNG
+state. It is similar to the rand() function from the C runtime library, except that OpenCV functions
+always generates a 32-bit random number, regardless of the platform.
+@param rng CvRNG state initialized by cvRNG.
+ */
+CV_INLINE unsigned cvRandInt( CvRNG* rng )
+{
+    uint64 temp = *rng;
+    temp = (uint64)(unsigned)temp*CV_RNG_COEFF + (temp >> 32);
+    *rng = temp;
+    return (unsigned)temp;
+}
+
+/** @brief Returns a floating-point random number and updates RNG.
+
+The function returns a uniformly-distributed random floating-point number between 0 and 1 (1 is not
+included).
+@param rng RNG state initialized by cvRNG
+ */
+CV_INLINE double cvRandReal( CvRNG* rng )
+{
+    return cvRandInt(rng)*2.3283064365386962890625e-10 /* 2^-32 */;
+}
+
+/****************************************************************************************\
+*                                  Image type (IplImage)                                 *
+\****************************************************************************************/
+
+#ifndef HAVE_IPL
+
+/*
+ * The following definitions (until #endif)
+ * is an extract from IPL headers.
+ * Copyright (c) 1995 Intel Corporation.
+ */
+#define IPL_DEPTH_SIGN 0x80000000
+
+#define IPL_DEPTH_1U     1
+#define IPL_DEPTH_8U     8
+#define IPL_DEPTH_16U   16
+#define IPL_DEPTH_32F   32
+
+#define IPL_DEPTH_8S  (IPL_DEPTH_SIGN| 8)
+#define IPL_DEPTH_16S (IPL_DEPTH_SIGN|16)
+#define IPL_DEPTH_32S (IPL_DEPTH_SIGN|32)
+
+#define IPL_DATA_ORDER_PIXEL  0
+#define IPL_DATA_ORDER_PLANE  1
+
+#define IPL_ORIGIN_TL 0
+#define IPL_ORIGIN_BL 1
+
+#define IPL_ALIGN_4BYTES   4
+#define IPL_ALIGN_8BYTES   8
+#define IPL_ALIGN_16BYTES 16
+#define IPL_ALIGN_32BYTES 32
+
+#define IPL_ALIGN_DWORD   IPL_ALIGN_4BYTES
+#define IPL_ALIGN_QWORD   IPL_ALIGN_8BYTES
+
+#define IPL_BORDER_CONSTANT   0
+#define IPL_BORDER_REPLICATE  1
+#define IPL_BORDER_REFLECT    2
+#define IPL_BORDER_WRAP       3
+
+/** The IplImage is taken from the Intel Image Processing Library, in which the format is native. OpenCV
+only supports a subset of possible IplImage formats, as outlined in the parameter list above.
+
+In addition to the above restrictions, OpenCV handles ROIs differently. OpenCV functions require
+that the image size or ROI size of all source and destination images match exactly. On the other
+hand, the Intel Image Processing Library processes the area of intersection between the source and
+destination images (or ROIs), allowing them to vary independently.
+*/
+typedef struct
+#ifdef __cplusplus
+  CV_EXPORTS
+#endif
+_IplImage
+{
+    int  nSize;             /**< sizeof(IplImage) */
+    int  ID;                /**< version (=0)*/
+    int  nChannels;         /**< Most of OpenCV functions support 1,2,3 or 4 channels */
+    int  alphaChannel;      /**< Ignored by OpenCV */
+    int  depth;             /**< Pixel depth in bits: IPL_DEPTH_8U, IPL_DEPTH_8S, IPL_DEPTH_16S,
+                               IPL_DEPTH_32S, IPL_DEPTH_32F and IPL_DEPTH_64F are supported.  */
+    char colorModel[4];     /**< Ignored by OpenCV */
+    char channelSeq[4];     /**< ditto */
+    int  dataOrder;         /**< 0 - interleaved color channels, 1 - separate color channels.
+                               cvCreateImage can only create interleaved images */
+    int  origin;            /**< 0 - top-left origin,
+                               1 - bottom-left origin (Windows bitmaps style).  */
+    int  align;             /**< Alignment of image rows (4 or 8).
+                               OpenCV ignores it and uses widthStep instead.    */
+    int  width;             /**< Image width in pixels.                           */
+    int  height;            /**< Image height in pixels.                          */
+    struct _IplROI *roi;    /**< Image ROI. If NULL, the whole image is selected. */
+    struct _IplImage *maskROI;      /**< Must be NULL. */
+    void  *imageId;                 /**< "           " */
+    struct _IplTileInfo *tileInfo;  /**< "           " */
+    int  imageSize;         /**< Image data size in bytes
+                               (==image->height*image->widthStep
+                               in case of interleaved data)*/
+    char *imageData;        /**< Pointer to aligned image data.         */
+    int  widthStep;         /**< Size of aligned image row in bytes.    */
+    int  BorderMode[4];     /**< Ignored by OpenCV.                     */
+    int  BorderConst[4];    /**< Ditto.                                 */
+    char *imageDataOrigin;  /**< Pointer to very origin of image data
+                               (not necessarily aligned) -
+                               needed for correct deallocation */
+
+#ifdef __cplusplus
+    _IplImage() {}
+    _IplImage(const cv::Mat& m);
+#endif
+}
+IplImage;
+
+typedef struct _IplTileInfo IplTileInfo;
+
+typedef struct _IplROI
+{
+    int  coi; /**< 0 - no COI (all channels are selected), 1 - 0th channel is selected ...*/
+    int  xOffset;
+    int  yOffset;
+    int  width;
+    int  height;
+}
+IplROI;
+
+typedef struct _IplConvKernel
+{
+    int  nCols;
+    int  nRows;
+    int  anchorX;
+    int  anchorY;
+    int *values;
+    int  nShiftR;
+}
+IplConvKernel;
+
+typedef struct _IplConvKernelFP
+{
+    int  nCols;
+    int  nRows;
+    int  anchorX;
+    int  anchorY;
+    float *values;
+}
+IplConvKernelFP;
+
+#define IPL_IMAGE_HEADER 1
+#define IPL_IMAGE_DATA   2
+#define IPL_IMAGE_ROI    4
+
+#endif/*HAVE_IPL*/
+
+/** extra border mode */
+#define IPL_BORDER_REFLECT_101    4
+#define IPL_BORDER_TRANSPARENT    5
+
+#define IPL_IMAGE_MAGIC_VAL  ((int)sizeof(IplImage))
+#define CV_TYPE_NAME_IMAGE "opencv-image"
+
+#define CV_IS_IMAGE_HDR(img) \
+    ((img) != NULL && ((const IplImage*)(img))->nSize == sizeof(IplImage))
+
+#define CV_IS_IMAGE(img) \
+    (CV_IS_IMAGE_HDR(img) && ((IplImage*)img)->imageData != NULL)
+
+/** for storing double-precision
+   floating point data in IplImage's */
+#define IPL_DEPTH_64F  64
+
+/** get reference to pixel at (col,row),
+   for multi-channel images (col) should be multiplied by number of channels */
+#define CV_IMAGE_ELEM( image, elemtype, row, col )       \
+    (((elemtype*)((image)->imageData + (image)->widthStep*(row)))[(col)])
+
+/****************************************************************************************\
+*                                  Matrix type (CvMat)                                   *
+\****************************************************************************************/
+
+#define CV_AUTO_STEP  0x7fffffff
+#define CV_WHOLE_ARR  cvSlice( 0, 0x3fffffff )
+
+#define CV_MAGIC_MASK       0xFFFF0000
+#define CV_MAT_MAGIC_VAL    0x42420000
+#define CV_TYPE_NAME_MAT    "opencv-matrix"
+
+/** Matrix elements are stored row by row. Element (i, j) (i - 0-based row index, j - 0-based column
+index) of a matrix can be retrieved or modified using CV_MAT_ELEM macro:
+
+    uchar pixval = CV_MAT_ELEM(grayimg, uchar, i, j)
+    CV_MAT_ELEM(cameraMatrix, float, 0, 2) = image.width*0.5f;
+
+To access multiple-channel matrices, you can use
+CV_MAT_ELEM(matrix, type, i, j\*nchannels + channel_idx).
+
+@deprecated CvMat is now obsolete; consider using Mat instead.
+ */
+typedef struct CvMat
+{
+    int type;
+    int step;
+
+    /* for internal use only */
+    int* refcount;
+    int hdr_refcount;
+
+    union
+    {
+        uchar* ptr;
+        short* s;
+        int* i;
+        float* fl;
+        double* db;
+    } data;
+
+#ifdef __cplusplus
+    union
+    {
+        int rows;
+        int height;
+    };
+
+    union
+    {
+        int cols;
+        int width;
+    };
+#else
+    int rows;
+    int cols;
+#endif
+
+
+#ifdef __cplusplus
+    CvMat() {}
+    CvMat(const CvMat& m) { memcpy(this, &m, sizeof(CvMat));}
+    CvMat(const cv::Mat& m);
+#endif
+
+}
+CvMat;
+
+
+#define CV_IS_MAT_HDR(mat) \
+    ((mat) != NULL && \
+    (((const CvMat*)(mat))->type & CV_MAGIC_MASK) == CV_MAT_MAGIC_VAL && \
+    ((const CvMat*)(mat))->cols > 0 && ((const CvMat*)(mat))->rows > 0)
+
+#define CV_IS_MAT_HDR_Z(mat) \
+    ((mat) != NULL && \
+    (((const CvMat*)(mat))->type & CV_MAGIC_MASK) == CV_MAT_MAGIC_VAL && \
+    ((const CvMat*)(mat))->cols >= 0 && ((const CvMat*)(mat))->rows >= 0)
+
+#define CV_IS_MAT(mat) \
+    (CV_IS_MAT_HDR(mat) && ((const CvMat*)(mat))->data.ptr != NULL)
+
+#define CV_IS_MASK_ARR(mat) \
+    (((mat)->type & (CV_MAT_TYPE_MASK & ~CV_8SC1)) == 0)
+
+#define CV_ARE_TYPES_EQ(mat1, mat2) \
+    ((((mat1)->type ^ (mat2)->type) & CV_MAT_TYPE_MASK) == 0)
+
+#define CV_ARE_CNS_EQ(mat1, mat2) \
+    ((((mat1)->type ^ (mat2)->type) & CV_MAT_CN_MASK) == 0)
+
+#define CV_ARE_DEPTHS_EQ(mat1, mat2) \
+    ((((mat1)->type ^ (mat2)->type) & CV_MAT_DEPTH_MASK) == 0)
+
+#define CV_ARE_SIZES_EQ(mat1, mat2) \
+    ((mat1)->rows == (mat2)->rows && (mat1)->cols == (mat2)->cols)
+
+#define CV_IS_MAT_CONST(mat)  \
+    (((mat)->rows|(mat)->cols) == 1)
+
+#define IPL2CV_DEPTH(depth) \
+    ((((CV_8U)+(CV_16U<<4)+(CV_32F<<8)+(CV_64F<<16)+(CV_8S<<20)+ \
+    (CV_16S<<24)+(CV_32S<<28)) >> ((((depth) & 0xF0) >> 2) + \
+    (((depth) & IPL_DEPTH_SIGN) ? 20 : 0))) & 15)
+
+/** Inline constructor. No data is allocated internally!!!
+ * (Use together with cvCreateData, or use cvCreateMat instead to
+ * get a matrix with allocated data):
+ */
+CV_INLINE CvMat cvMat( int rows, int cols, int type, void* data CV_DEFAULT(NULL))
+{
+    CvMat m;
+
+    assert( (unsigned)CV_MAT_DEPTH(type) <= CV_64F );
+    type = CV_MAT_TYPE(type);
+    m.type = CV_MAT_MAGIC_VAL | CV_MAT_CONT_FLAG | type;
+    m.cols = cols;
+    m.rows = rows;
+    m.step = m.cols*CV_ELEM_SIZE(type);
+    m.data.ptr = (uchar*)data;
+    m.refcount = NULL;
+    m.hdr_refcount = 0;
+
+    return m;
+}
+
+#ifdef __cplusplus
+inline CvMat::CvMat(const cv::Mat& m)
+{
+    CV_DbgAssert(m.dims <= 2);
+    *this = cvMat(m.rows, m.dims == 1 ? 1 : m.cols, m.type(), m.data);
+    step = (int)m.step[0];
+    type = (type & ~cv::Mat::CONTINUOUS_FLAG) | (m.flags & cv::Mat::CONTINUOUS_FLAG);
+}
+#endif
+
+
+#define CV_MAT_ELEM_PTR_FAST( mat, row, col, pix_size )  \
+    (assert( (unsigned)(row) < (unsigned)(mat).rows &&   \
+             (unsigned)(col) < (unsigned)(mat).cols ),   \
+     (mat).data.ptr + (size_t)(mat).step*(row) + (pix_size)*(col))
+
+#define CV_MAT_ELEM_PTR( mat, row, col )                 \
+    CV_MAT_ELEM_PTR_FAST( mat, row, col, CV_ELEM_SIZE((mat).type) )
+
+#define CV_MAT_ELEM( mat, elemtype, row, col )           \
+    (*(elemtype*)CV_MAT_ELEM_PTR_FAST( mat, row, col, sizeof(elemtype)))
+
+/** @brief Returns the particular element of single-channel floating-point matrix.
+
+The function is a fast replacement for cvGetReal2D in the case of single-channel floating-point
+matrices. It is faster because it is inline, it does fewer checks for array type and array element
+type, and it checks for the row and column ranges only in debug mode.
+@param mat Input matrix
+@param row The zero-based index of row
+@param col The zero-based index of column
+ */
+CV_INLINE  double  cvmGet( const CvMat* mat, int row, int col )
+{
+    int type;
+
+    type = CV_MAT_TYPE(mat->type);
+    assert( (unsigned)row < (unsigned)mat->rows &&
+            (unsigned)col < (unsigned)mat->cols );
+
+    if( type == CV_32FC1 )
+        return ((float*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col];
+    else
+    {
+        assert( type == CV_64FC1 );
+        return ((double*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col];
+    }
+}
+
+/** @brief Sets a specific element of a single-channel floating-point matrix.
+
+The function is a fast replacement for cvSetReal2D in the case of single-channel floating-point
+matrices. It is faster because it is inline, it does fewer checks for array type and array element
+type, and it checks for the row and column ranges only in debug mode.
+@param mat The matrix
+@param row The zero-based index of row
+@param col The zero-based index of column
+@param value The new value of the matrix element
+ */
+CV_INLINE  void  cvmSet( CvMat* mat, int row, int col, double value )
+{
+    int type;
+    type = CV_MAT_TYPE(mat->type);
+    assert( (unsigned)row < (unsigned)mat->rows &&
+            (unsigned)col < (unsigned)mat->cols );
+
+    if( type == CV_32FC1 )
+        ((float*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col] = (float)value;
+    else
+    {
+        assert( type == CV_64FC1 );
+        ((double*)(void*)(mat->data.ptr + (size_t)mat->step*row))[col] = value;
+    }
+}
+
+
+CV_INLINE int cvIplDepth( int type )
+{
+    int depth = CV_MAT_DEPTH(type);
+    return CV_ELEM_SIZE1(depth)*8 | (depth == CV_8S || depth == CV_16S ||
+           depth == CV_32S ? IPL_DEPTH_SIGN : 0);
+}
+
+
+/****************************************************************************************\
+*                       Multi-dimensional dense array (CvMatND)                          *
+\****************************************************************************************/
+
+#define CV_MATND_MAGIC_VAL    0x42430000
+#define CV_TYPE_NAME_MATND    "opencv-nd-matrix"
+
+#define CV_MAX_DIM            32
+#define CV_MAX_DIM_HEAP       1024
+
+/**
+  @deprecated consider using cv::Mat instead
+  */
+typedef struct
+#ifdef __cplusplus
+  CV_EXPORTS
+#endif
+CvMatND
+{
+    int type;
+    int dims;
+
+    int* refcount;
+    int hdr_refcount;
+
+    union
+    {
+        uchar* ptr;
+        float* fl;
+        double* db;
+        int* i;
+        short* s;
+    } data;
+
+    struct
+    {
+        int size;
+        int step;
+    }
+    dim[CV_MAX_DIM];
+
+#ifdef __cplusplus
+    CvMatND() {}
+    CvMatND(const cv::Mat& m);
+#endif
+}
+CvMatND;
+
+#define CV_IS_MATND_HDR(mat) \
+    ((mat) != NULL && (((const CvMatND*)(mat))->type & CV_MAGIC_MASK) == CV_MATND_MAGIC_VAL)
+
+#define CV_IS_MATND(mat) \
+    (CV_IS_MATND_HDR(mat) && ((const CvMatND*)(mat))->data.ptr != NULL)
+
+
+/****************************************************************************************\
+*                      Multi-dimensional sparse array (CvSparseMat)                      *
+\****************************************************************************************/
+
+#define CV_SPARSE_MAT_MAGIC_VAL    0x42440000
+#define CV_TYPE_NAME_SPARSE_MAT    "opencv-sparse-matrix"
+
+struct CvSet;
+
+typedef struct
+#ifdef __cplusplus
+  CV_EXPORTS
+#endif
+CvSparseMat
+{
+    int type;
+    int dims;
+    int* refcount;
+    int hdr_refcount;
+
+    struct CvSet* heap;
+    void** hashtable;
+    int hashsize;
+    int valoffset;
+    int idxoffset;
+    int size[CV_MAX_DIM];
+
+#ifdef __cplusplus
+    void copyToSparseMat(cv::SparseMat& m) const;
+#endif
+}
+CvSparseMat;
+
+#ifdef __cplusplus
+    CV_EXPORTS CvSparseMat* cvCreateSparseMat(const cv::SparseMat& m);
+#endif
+
+#define CV_IS_SPARSE_MAT_HDR(mat) \
+    ((mat) != NULL && \
+    (((const CvSparseMat*)(mat))->type & CV_MAGIC_MASK) == CV_SPARSE_MAT_MAGIC_VAL)
+
+#define CV_IS_SPARSE_MAT(mat) \
+    CV_IS_SPARSE_MAT_HDR(mat)
+
+/**************** iteration through a sparse array *****************/
+
+typedef struct CvSparseNode
+{
+    unsigned hashval;
+    struct CvSparseNode* next;
+}
+CvSparseNode;
+
+typedef struct CvSparseMatIterator
+{
+    CvSparseMat* mat;
+    CvSparseNode* node;
+    int curidx;
+}
+CvSparseMatIterator;
+
+#define CV_NODE_VAL(mat,node)   ((void*)((uchar*)(node) + (mat)->valoffset))
+#define CV_NODE_IDX(mat,node)   ((int*)((uchar*)(node) + (mat)->idxoffset))
+
+/****************************************************************************************\
+*                                         Histogram                                      *
+\****************************************************************************************/
+
+typedef int CvHistType;
+
+#define CV_HIST_MAGIC_VAL     0x42450000
+#define CV_HIST_UNIFORM_FLAG  (1 << 10)
+
+/** indicates whether bin ranges are set already or not */
+#define CV_HIST_RANGES_FLAG   (1 << 11)
+
+#define CV_HIST_ARRAY         0
+#define CV_HIST_SPARSE        1
+#define CV_HIST_TREE          CV_HIST_SPARSE
+
+/** should be used as a parameter only,
+   it turns to CV_HIST_UNIFORM_FLAG of hist->type */
+#define CV_HIST_UNIFORM       1
+
+typedef struct CvHistogram
+{
+    int     type;
+    CvArr*  bins;
+    float   thresh[CV_MAX_DIM][2];  /**< For uniform histograms.                      */
+    float** thresh2;                /**< For non-uniform histograms.                  */
+    CvMatND mat;                    /**< Embedded matrix header for array histograms. */
+}
+CvHistogram;
+
+#define CV_IS_HIST( hist ) \
+    ((hist) != NULL  && \
+     (((CvHistogram*)(hist))->type & CV_MAGIC_MASK) == CV_HIST_MAGIC_VAL && \
+     (hist)->bins != NULL)
+
+#define CV_IS_UNIFORM_HIST( hist ) \
+    (((hist)->type & CV_HIST_UNIFORM_FLAG) != 0)
+
+#define CV_IS_SPARSE_HIST( hist ) \
+    CV_IS_SPARSE_MAT((hist)->bins)
+
+#define CV_HIST_HAS_RANGES( hist ) \
+    (((hist)->type & CV_HIST_RANGES_FLAG) != 0)
+
+/****************************************************************************************\
+*                      Other supplementary data type definitions                         *
+\****************************************************************************************/
+
+/*************************************** CvRect *****************************************/
+/** @sa Rect_ */
+typedef struct CvRect
+{
+    int x;
+    int y;
+    int width;
+    int height;
+
+#ifdef __cplusplus
+    CvRect(int _x = 0, int _y = 0, int w = 0, int h = 0): x(_x), y(_y), width(w), height(h) {}
+    template<typename _Tp>
+    CvRect(const cv::Rect_<_Tp>& r): x(cv::saturate_cast<int>(r.x)), y(cv::saturate_cast<int>(r.y)), width(cv::saturate_cast<int>(r.width)), height(cv::saturate_cast<int>(r.height)) {}
+    template<typename _Tp>
+    operator cv::Rect_<_Tp>() const { return cv::Rect_<_Tp>((_Tp)x, (_Tp)y, (_Tp)width, (_Tp)height); }
+#endif
+}
+CvRect;
+
+/** constructs CvRect structure. */
+CV_INLINE  CvRect  cvRect( int x, int y, int width, int height )
+{
+    CvRect r;
+
+    r.x = x;
+    r.y = y;
+    r.width = width;
+    r.height = height;
+
+    return r;
+}
+
+
+CV_INLINE  IplROI  cvRectToROI( CvRect rect, int coi )
+{
+    IplROI roi;
+    roi.xOffset = rect.x;
+    roi.yOffset = rect.y;
+    roi.width = rect.width;
+    roi.height = rect.height;
+    roi.coi = coi;
+
+    return roi;
+}
+
+
+CV_INLINE  CvRect  cvROIToRect( IplROI roi )
+{
+    return cvRect( roi.xOffset, roi.yOffset, roi.width, roi.height );
+}
+
+/*********************************** CvTermCriteria *************************************/
+
+#define CV_TERMCRIT_ITER    1
+#define CV_TERMCRIT_NUMBER  CV_TERMCRIT_ITER
+#define CV_TERMCRIT_EPS     2
+
+/** @sa TermCriteria
+ */
+typedef struct CvTermCriteria
+{
+    int    type;  /**< may be combination of
+                     CV_TERMCRIT_ITER
+                     CV_TERMCRIT_EPS */
+    int    max_iter;
+    double epsilon;
+
+#ifdef __cplusplus
+    CvTermCriteria(int _type = 0, int _iter = 0, double _eps = 0) : type(_type), max_iter(_iter), epsilon(_eps)  {}
+    CvTermCriteria(const cv::TermCriteria& t) : type(t.type), max_iter(t.maxCount), epsilon(t.epsilon)  {}
+    operator cv::TermCriteria() const { return cv::TermCriteria(type, max_iter, epsilon); }
+#endif
+
+}
+CvTermCriteria;
+
+CV_INLINE  CvTermCriteria  cvTermCriteria( int type, int max_iter, double epsilon )
+{
+    CvTermCriteria t;
+
+    t.type = type;
+    t.max_iter = max_iter;
+    t.epsilon = (float)epsilon;
+
+    return t;
+}
+
+
+/******************************* CvPoint and variants ***********************************/
+
+typedef struct CvPoint
+{
+    int x;
+    int y;
+
+#ifdef __cplusplus
+    CvPoint(int _x = 0, int _y = 0): x(_x), y(_y) {}
+    template<typename _Tp>
+    CvPoint(const cv::Point_<_Tp>& pt): x((int)pt.x), y((int)pt.y) {}
+    template<typename _Tp>
+    operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); }
+#endif
+}
+CvPoint;
+
+/** constructs CvPoint structure. */
+CV_INLINE  CvPoint  cvPoint( int x, int y )
+{
+    CvPoint p;
+
+    p.x = x;
+    p.y = y;
+
+    return p;
+}
+
+
+typedef struct CvPoint2D32f
+{
+    float x;
+    float y;
+
+#ifdef __cplusplus
+    CvPoint2D32f(float _x = 0, float _y = 0): x(_x), y(_y) {}
+    template<typename _Tp>
+    CvPoint2D32f(const cv::Point_<_Tp>& pt): x((float)pt.x), y((float)pt.y) {}
+    template<typename _Tp>
+    operator cv::Point_<_Tp>() const { return cv::Point_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y)); }
+#endif
+}
+CvPoint2D32f;
+
+/** constructs CvPoint2D32f structure. */
+CV_INLINE  CvPoint2D32f  cvPoint2D32f( double x, double y )
+{
+    CvPoint2D32f p;
+
+    p.x = (float)x;
+    p.y = (float)y;
+
+    return p;
+}
+
+/** converts CvPoint to CvPoint2D32f. */
+CV_INLINE  CvPoint2D32f  cvPointTo32f( CvPoint point )
+{
+    return cvPoint2D32f( (float)point.x, (float)point.y );
+}
+
+/** converts CvPoint2D32f to CvPoint. */
+CV_INLINE  CvPoint  cvPointFrom32f( CvPoint2D32f point )
+{
+    CvPoint ipt;
+    ipt.x = cvRound(point.x);
+    ipt.y = cvRound(point.y);
+
+    return ipt;
+}
+
+
+typedef struct CvPoint3D32f
+{
+    float x;
+    float y;
+    float z;
+
+#ifdef __cplusplus
+    CvPoint3D32f(float _x = 0, float _y = 0, float _z = 0): x(_x), y(_y), z(_z) {}
+    template<typename _Tp>
+    CvPoint3D32f(const cv::Point3_<_Tp>& pt): x((float)pt.x), y((float)pt.y), z((float)pt.z) {}
+    template<typename _Tp>
+    operator cv::Point3_<_Tp>() const { return cv::Point3_<_Tp>(cv::saturate_cast<_Tp>(x), cv::saturate_cast<_Tp>(y), cv::saturate_cast<_Tp>(z)); }
+#endif
+}
+CvPoint3D32f;
+
+/** constructs CvPoint3D32f structure. */
+CV_INLINE  CvPoint3D32f  cvPoint3D32f( double x, double y, double z )
+{
+    CvPoint3D32f p;
+
+    p.x = (float)x;
+    p.y = (float)y;
+    p.z = (float)z;
+
+    return p;
+}
+
+
+typedef struct CvPoint2D64f
+{
+    double x;
+    double y;
+}
+CvPoint2D64f;
+
+/** constructs CvPoint2D64f structure.*/
+CV_INLINE  CvPoint2D64f  cvPoint2D64f( double x, double y )
+{
+    CvPoint2D64f p;
+
+    p.x = x;
+    p.y = y;
+
+    return p;
+}
+
+
+typedef struct CvPoint3D64f
+{
+    double x;
+    double y;
+    double z;
+}
+CvPoint3D64f;
+
+/** constructs CvPoint3D64f structure. */
+CV_INLINE  CvPoint3D64f  cvPoint3D64f( double x, double y, double z )
+{
+    CvPoint3D64f p;
+
+    p.x = x;
+    p.y = y;
+    p.z = z;
+
+    return p;
+}
+
+
+/******************************** CvSize's & CvBox **************************************/
+
+typedef struct CvSize
+{
+    int width;
+    int height;
+
+#ifdef __cplusplus
+    CvSize(int w = 0, int h = 0): width(w), height(h) {}
+    template<typename _Tp>
+    CvSize(const cv::Size_<_Tp>& sz): width(cv::saturate_cast<int>(sz.width)), height(cv::saturate_cast<int>(sz.height)) {}
+    template<typename _Tp>
+    operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); }
+#endif
+}
+CvSize;
+
+/** constructs CvSize structure. */
+CV_INLINE  CvSize  cvSize( int width, int height )
+{
+    CvSize s;
+
+    s.width = width;
+    s.height = height;
+
+    return s;
+}
+
+typedef struct CvSize2D32f
+{
+    float width;
+    float height;
+
+#ifdef __cplusplus
+    CvSize2D32f(float w = 0, float h = 0): width(w), height(h) {}
+    template<typename _Tp>
+    CvSize2D32f(const cv::Size_<_Tp>& sz): width(cv::saturate_cast<float>(sz.width)), height(cv::saturate_cast<float>(sz.height)) {}
+    template<typename _Tp>
+    operator cv::Size_<_Tp>() const { return cv::Size_<_Tp>(cv::saturate_cast<_Tp>(width), cv::saturate_cast<_Tp>(height)); }
+#endif
+}
+CvSize2D32f;
+
+/** constructs CvSize2D32f structure. */
+CV_INLINE  CvSize2D32f  cvSize2D32f( double width, double height )
+{
+    CvSize2D32f s;
+
+    s.width = (float)width;
+    s.height = (float)height;
+
+    return s;
+}
+
+/** @sa RotatedRect
+ */
+typedef struct CvBox2D
+{
+    CvPoint2D32f center;  /**< Center of the box.                          */
+    CvSize2D32f  size;    /**< Box width and length.                       */
+    float angle;          /**< Angle between the horizontal axis           */
+                          /**< and the first side (i.e. length) in degrees */
+
+#ifdef __cplusplus
+    CvBox2D(CvPoint2D32f c = CvPoint2D32f(), CvSize2D32f s = CvSize2D32f(), float a = 0) : center(c), size(s), angle(a) {}
+    CvBox2D(const cv::RotatedRect& rr) : center(rr.center), size(rr.size), angle(rr.angle) {}
+    operator cv::RotatedRect() const { return cv::RotatedRect(center, size, angle); }
+#endif
+}
+CvBox2D;
+
+
+/** Line iterator state: */
+typedef struct CvLineIterator
+{
+    /** Pointer to the current point: */
+    uchar* ptr;
+
+    /* Bresenham algorithm state: */
+    int  err;
+    int  plus_delta;
+    int  minus_delta;
+    int  plus_step;
+    int  minus_step;
+}
+CvLineIterator;
+
+
+
+/************************************* CvSlice ******************************************/
+#define CV_WHOLE_SEQ_END_INDEX 0x3fffffff
+#define CV_WHOLE_SEQ  cvSlice(0, CV_WHOLE_SEQ_END_INDEX)
+
+typedef struct CvSlice
+{
+    int  start_index, end_index;
+
+#if defined(__cplusplus) && !defined(__CUDACC__)
+    CvSlice(int start = 0, int end = 0) : start_index(start), end_index(end) {}
+    CvSlice(const cv::Range& r) { *this = (r.start != INT_MIN && r.end != INT_MAX) ? CvSlice(r.start, r.end) : CvSlice(0, CV_WHOLE_SEQ_END_INDEX); }
+    operator cv::Range() const { return (start_index == 0 && end_index == CV_WHOLE_SEQ_END_INDEX ) ? cv::Range::all() : cv::Range(start_index, end_index); }
+#endif
+}
+CvSlice;
+
+CV_INLINE  CvSlice  cvSlice( int start, int end )
+{
+    CvSlice slice;
+    slice.start_index = start;
+    slice.end_index = end;
+
+    return slice;
+}
+
+
+
+/************************************* CvScalar *****************************************/
+/** @sa Scalar_
+ */
+typedef struct CvScalar
+{
+    double val[4];
+
+#ifdef __cplusplus
+    CvScalar() {}
+    CvScalar(double d0, double d1 = 0, double d2 = 0, double d3 = 0) { val[0] = d0; val[1] = d1; val[2] = d2; val[3] = d3; }
+    template<typename _Tp>
+    CvScalar(const cv::Scalar_<_Tp>& s) { val[0] = s.val[0]; val[1] = s.val[1]; val[2] = s.val[2]; val[3] = s.val[3]; }
+    template<typename _Tp>
+    operator cv::Scalar_<_Tp>() const { return cv::Scalar_<_Tp>(cv::saturate_cast<_Tp>(val[0]), cv::saturate_cast<_Tp>(val[1]), cv::saturate_cast<_Tp>(val[2]), cv::saturate_cast<_Tp>(val[3])); }
+    template<typename _Tp, int cn>
+    CvScalar(const cv::Vec<_Tp, cn>& v)
+    {
+        int i;
+        for( i = 0; i < (cn < 4 ? cn : 4); i++ ) val[i] = v.val[i];
+        for( ; i < 4; i++ ) val[i] = 0;
+    }
+#endif
+}
+CvScalar;
+
+CV_INLINE  CvScalar  cvScalar( double val0, double val1 CV_DEFAULT(0),
+                               double val2 CV_DEFAULT(0), double val3 CV_DEFAULT(0))
+{
+    CvScalar scalar;
+    scalar.val[0] = val0; scalar.val[1] = val1;
+    scalar.val[2] = val2; scalar.val[3] = val3;
+    return scalar;
+}
+
+
+CV_INLINE  CvScalar  cvRealScalar( double val0 )
+{
+    CvScalar scalar;
+    scalar.val[0] = val0;
+    scalar.val[1] = scalar.val[2] = scalar.val[3] = 0;
+    return scalar;
+}
+
+CV_INLINE  CvScalar  cvScalarAll( double val0123 )
+{
+    CvScalar scalar;
+    scalar.val[0] = val0123;
+    scalar.val[1] = val0123;
+    scalar.val[2] = val0123;
+    scalar.val[3] = val0123;
+    return scalar;
+}
+
+/****************************************************************************************\
+*                                   Dynamic Data structures                              *
+\****************************************************************************************/
+
+/******************************** Memory storage ****************************************/
+
+typedef struct CvMemBlock
+{
+    struct CvMemBlock*  prev;
+    struct CvMemBlock*  next;
+}
+CvMemBlock;
+
+#define CV_STORAGE_MAGIC_VAL    0x42890000
+
+typedef struct CvMemStorage
+{
+    int signature;
+    CvMemBlock* bottom;           /**< First allocated block.                   */
+    CvMemBlock* top;              /**< Current memory block - top of the stack. */
+    struct  CvMemStorage* parent; /**< We get new blocks from parent as needed. */
+    int block_size;               /**< Block size.                              */
+    int free_space;               /**< Remaining free space in current block.   */
+}
+CvMemStorage;
+
+#define CV_IS_STORAGE(storage)  \
+    ((storage) != NULL &&       \
+    (((CvMemStorage*)(storage))->signature & CV_MAGIC_MASK) == CV_STORAGE_MAGIC_VAL)
+
+
+typedef struct CvMemStoragePos
+{
+    CvMemBlock* top;
+    int free_space;
+}
+CvMemStoragePos;
+
+
+/*********************************** Sequence *******************************************/
+
+typedef struct CvSeqBlock
+{
+    struct CvSeqBlock*  prev; /**< Previous sequence block.                   */
+    struct CvSeqBlock*  next; /**< Next sequence block.                       */
+  int    start_index;         /**< Index of the first element in the block +  */
+                              /**< sequence->first->start_index.              */
+    int    count;             /**< Number of elements in the block.           */
+    schar* data;              /**< Pointer to the first element of the block. */
+}
+CvSeqBlock;
+
+
+#define CV_TREE_NODE_FIELDS(node_type)                               \
+    int       flags;             /**< Miscellaneous flags.     */      \
+    int       header_size;       /**< Size of sequence header. */      \
+    struct    node_type* h_prev; /**< Previous sequence.       */      \
+    struct    node_type* h_next; /**< Next sequence.           */      \
+    struct    node_type* v_prev; /**< 2nd previous sequence.   */      \
+    struct    node_type* v_next  /**< 2nd next sequence.       */
+
+/**
+   Read/Write sequence.
+   Elements can be dynamically inserted to or deleted from the sequence.
+*/
+#define CV_SEQUENCE_FIELDS()                                              \
+    CV_TREE_NODE_FIELDS(CvSeq);                                           \
+    int       total;          /**< Total number of elements.            */  \
+    int       elem_size;      /**< Size of sequence element in bytes.   */  \
+    schar*    block_max;      /**< Maximal bound of the last block.     */  \
+    schar*    ptr;            /**< Current write pointer.               */  \
+    int       delta_elems;    /**< Grow seq this many at a time.        */  \
+    CvMemStorage* storage;    /**< Where the seq is stored.             */  \
+    CvSeqBlock* free_blocks;  /**< Free blocks list.                    */  \
+    CvSeqBlock* first;        /**< Pointer to the first sequence block. */
+
+typedef struct CvSeq
+{
+    CV_SEQUENCE_FIELDS()
+}
+CvSeq;
+
+#define CV_TYPE_NAME_SEQ             "opencv-sequence"
+#define CV_TYPE_NAME_SEQ_TREE        "opencv-sequence-tree"
+
+/*************************************** Set ********************************************/
+/** @brief Set
+  Order is not preserved. There can be gaps between sequence elements.
+  After the element has been inserted it stays in the same place all the time.
+  The MSB(most-significant or sign bit) of the first field (flags) is 0 iff the element exists.
+*/
+#define CV_SET_ELEM_FIELDS(elem_type)   \
+    int  flags;                         \
+    struct elem_type* next_free;
+
+typedef struct CvSetElem
+{
+    CV_SET_ELEM_FIELDS(CvSetElem)
+}
+CvSetElem;
+
+#define CV_SET_FIELDS()      \
+    CV_SEQUENCE_FIELDS()     \
+    CvSetElem* free_elems;   \
+    int active_count;
+
+typedef struct CvSet
+{
+    CV_SET_FIELDS()
+}
+CvSet;
+
+
+#define CV_SET_ELEM_IDX_MASK   ((1 << 26) - 1)
+#define CV_SET_ELEM_FREE_FLAG  (1 << (sizeof(int)*8-1))
+
+/** Checks whether the element pointed by ptr belongs to a set or not */
+#define CV_IS_SET_ELEM( ptr )  (((CvSetElem*)(ptr))->flags >= 0)
+
+/************************************* Graph ********************************************/
+
+/** @name Graph
+
+We represent a graph as a set of vertices. Vertices contain their adjacency lists (more exactly,
+pointers to first incoming or outcoming edge (or 0 if isolated vertex)). Edges are stored in
+another set. There is a singly-linked list of incoming/outcoming edges for each vertex.
+
+Each edge consists of:
+
+- Two pointers to the starting and ending vertices (vtx[0] and vtx[1] respectively).
+
+    A graph may be oriented or not. In the latter case, edges between vertex i to vertex j are not
+distinguished during search operations.
+
+- Two pointers to next edges for the starting and ending vertices, where next[0] points to the
+next edge in the vtx[0] adjacency list and next[1] points to the next edge in the vtx[1]
+adjacency list.
+
+@see CvGraphEdge, CvGraphVtx, CvGraphVtx2D, CvGraph
+@{
+*/
+#define CV_GRAPH_EDGE_FIELDS()      \
+    int flags;                      \
+    float weight;                   \
+    struct CvGraphEdge* next[2];    \
+    struct CvGraphVtx* vtx[2];
+
+
+#define CV_GRAPH_VERTEX_FIELDS()    \
+    int flags;                      \
+    struct CvGraphEdge* first;
+
+
+typedef struct CvGraphEdge
+{
+    CV_GRAPH_EDGE_FIELDS()
+}
+CvGraphEdge;
+
+typedef struct CvGraphVtx
+{
+    CV_GRAPH_VERTEX_FIELDS()
+}
+CvGraphVtx;
+
+typedef struct CvGraphVtx2D
+{
+    CV_GRAPH_VERTEX_FIELDS()
+    CvPoint2D32f* ptr;
+}
+CvGraphVtx2D;
+
+/**
+   Graph is "derived" from the set (this is set a of vertices)
+   and includes another set (edges)
+*/
+#define  CV_GRAPH_FIELDS()   \
+    CV_SET_FIELDS()          \
+    CvSet* edges;
+
+typedef struct CvGraph
+{
+    CV_GRAPH_FIELDS()
+}
+CvGraph;
+
+#define CV_TYPE_NAME_GRAPH "opencv-graph"
+
+/** @} */
+
+/*********************************** Chain/Contour *************************************/
+
+typedef struct CvChain
+{
+    CV_SEQUENCE_FIELDS()
+    CvPoint  origin;
+}
+CvChain;
+
+#define CV_CONTOUR_FIELDS()  \
+    CV_SEQUENCE_FIELDS()     \
+    CvRect rect;             \
+    int color;               \
+    int reserved[3];
+
+typedef struct CvContour
+{
+    CV_CONTOUR_FIELDS()
+}
+CvContour;
+
+typedef CvContour CvPoint2DSeq;
+
+/****************************************************************************************\
+*                                    Sequence types                                      *
+\****************************************************************************************/
+
+#define CV_SEQ_MAGIC_VAL             0x42990000
+
+#define CV_IS_SEQ(seq) \
+    ((seq) != NULL && (((CvSeq*)(seq))->flags & CV_MAGIC_MASK) == CV_SEQ_MAGIC_VAL)
+
+#define CV_SET_MAGIC_VAL             0x42980000
+#define CV_IS_SET(set) \
+    ((set) != NULL && (((CvSeq*)(set))->flags & CV_MAGIC_MASK) == CV_SET_MAGIC_VAL)
+
+#define CV_SEQ_ELTYPE_BITS           12
+#define CV_SEQ_ELTYPE_MASK           ((1 << CV_SEQ_ELTYPE_BITS) - 1)
+
+#define CV_SEQ_ELTYPE_POINT          CV_32SC2  /**< (x,y) */
+#define CV_SEQ_ELTYPE_CODE           CV_8UC1   /**< freeman code: 0..7 */
+#define CV_SEQ_ELTYPE_GENERIC        0
+#define CV_SEQ_ELTYPE_PTR            CV_USRTYPE1
+#define CV_SEQ_ELTYPE_PPOINT         CV_SEQ_ELTYPE_PTR  /**< &(x,y) */
+#define CV_SEQ_ELTYPE_INDEX          CV_32SC1  /**< #(x,y) */
+#define CV_SEQ_ELTYPE_GRAPH_EDGE     0  /**< &next_o, &next_d, &vtx_o, &vtx_d */
+#define CV_SEQ_ELTYPE_GRAPH_VERTEX   0  /**< first_edge, &(x,y) */
+#define CV_SEQ_ELTYPE_TRIAN_ATR      0  /**< vertex of the binary tree   */
+#define CV_SEQ_ELTYPE_CONNECTED_COMP 0  /**< connected component  */
+#define CV_SEQ_ELTYPE_POINT3D        CV_32FC3  /**< (x,y,z)  */
+
+#define CV_SEQ_KIND_BITS        2
+#define CV_SEQ_KIND_MASK        (((1 << CV_SEQ_KIND_BITS) - 1)<<CV_SEQ_ELTYPE_BITS)
+
+/** types of sequences */
+#define CV_SEQ_KIND_GENERIC     (0 << CV_SEQ_ELTYPE_BITS)
+#define CV_SEQ_KIND_CURVE       (1 << CV_SEQ_ELTYPE_BITS)
+#define CV_SEQ_KIND_BIN_TREE    (2 << CV_SEQ_ELTYPE_BITS)
+
+/** types of sparse sequences (sets) */
+#define CV_SEQ_KIND_GRAPH       (1 << CV_SEQ_ELTYPE_BITS)
+#define CV_SEQ_KIND_SUBDIV2D    (2 << CV_SEQ_ELTYPE_BITS)
+
+#define CV_SEQ_FLAG_SHIFT       (CV_SEQ_KIND_BITS + CV_SEQ_ELTYPE_BITS)
+
+/** flags for curves */
+#define CV_SEQ_FLAG_CLOSED     (1 << CV_SEQ_FLAG_SHIFT)
+#define CV_SEQ_FLAG_SIMPLE     (0 << CV_SEQ_FLAG_SHIFT)
+#define CV_SEQ_FLAG_CONVEX     (0 << CV_SEQ_FLAG_SHIFT)
+#define CV_SEQ_FLAG_HOLE       (2 << CV_SEQ_FLAG_SHIFT)
+
+/** flags for graphs */
+#define CV_GRAPH_FLAG_ORIENTED (1 << CV_SEQ_FLAG_SHIFT)
+
+#define CV_GRAPH               CV_SEQ_KIND_GRAPH
+#define CV_ORIENTED_GRAPH      (CV_SEQ_KIND_GRAPH|CV_GRAPH_FLAG_ORIENTED)
+
+/** point sets */
+#define CV_SEQ_POINT_SET       (CV_SEQ_KIND_GENERIC| CV_SEQ_ELTYPE_POINT)
+#define CV_SEQ_POINT3D_SET     (CV_SEQ_KIND_GENERIC| CV_SEQ_ELTYPE_POINT3D)
+#define CV_SEQ_POLYLINE        (CV_SEQ_KIND_CURVE  | CV_SEQ_ELTYPE_POINT)
+#define CV_SEQ_POLYGON         (CV_SEQ_FLAG_CLOSED | CV_SEQ_POLYLINE )
+#define CV_SEQ_CONTOUR         CV_SEQ_POLYGON
+#define CV_SEQ_SIMPLE_POLYGON  (CV_SEQ_FLAG_SIMPLE | CV_SEQ_POLYGON  )
+
+/** chain-coded curves */
+#define CV_SEQ_CHAIN           (CV_SEQ_KIND_CURVE  | CV_SEQ_ELTYPE_CODE)
+#define CV_SEQ_CHAIN_CONTOUR   (CV_SEQ_FLAG_CLOSED | CV_SEQ_CHAIN)
+
+/** binary tree for the contour */
+#define CV_SEQ_POLYGON_TREE    (CV_SEQ_KIND_BIN_TREE  | CV_SEQ_ELTYPE_TRIAN_ATR)
+
+/** sequence of the connected components */
+#define CV_SEQ_CONNECTED_COMP  (CV_SEQ_KIND_GENERIC  | CV_SEQ_ELTYPE_CONNECTED_COMP)
+
+/** sequence of the integer numbers */
+#define CV_SEQ_INDEX           (CV_SEQ_KIND_GENERIC  | CV_SEQ_ELTYPE_INDEX)
+
+#define CV_SEQ_ELTYPE( seq )   ((seq)->flags & CV_SEQ_ELTYPE_MASK)
+#define CV_SEQ_KIND( seq )     ((seq)->flags & CV_SEQ_KIND_MASK )
+
+/** flag checking */
+#define CV_IS_SEQ_INDEX( seq )      ((CV_SEQ_ELTYPE(seq) == CV_SEQ_ELTYPE_INDEX) && \
+                                     (CV_SEQ_KIND(seq) == CV_SEQ_KIND_GENERIC))
+
+#define CV_IS_SEQ_CURVE( seq )      (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE)
+#define CV_IS_SEQ_CLOSED( seq )     (((seq)->flags & CV_SEQ_FLAG_CLOSED) != 0)
+#define CV_IS_SEQ_CONVEX( seq )     0
+#define CV_IS_SEQ_HOLE( seq )       (((seq)->flags & CV_SEQ_FLAG_HOLE) != 0)
+#define CV_IS_SEQ_SIMPLE( seq )     1
+
+/** type checking macros */
+#define CV_IS_SEQ_POINT_SET( seq ) \
+    ((CV_SEQ_ELTYPE(seq) == CV_32SC2 || CV_SEQ_ELTYPE(seq) == CV_32FC2))
+
+#define CV_IS_SEQ_POINT_SUBSET( seq ) \
+    (CV_IS_SEQ_INDEX( seq ) || CV_SEQ_ELTYPE(seq) == CV_SEQ_ELTYPE_PPOINT)
+
+#define CV_IS_SEQ_POLYLINE( seq )   \
+    (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE && CV_IS_SEQ_POINT_SET(seq))
+
+#define CV_IS_SEQ_POLYGON( seq )   \
+    (CV_IS_SEQ_POLYLINE(seq) && CV_IS_SEQ_CLOSED(seq))
+
+#define CV_IS_SEQ_CHAIN( seq )   \
+    (CV_SEQ_KIND(seq) == CV_SEQ_KIND_CURVE && (seq)->elem_size == 1)
+
+#define CV_IS_SEQ_CONTOUR( seq )   \
+    (CV_IS_SEQ_CLOSED(seq) && (CV_IS_SEQ_POLYLINE(seq) || CV_IS_SEQ_CHAIN(seq)))
+
+#define CV_IS_SEQ_CHAIN_CONTOUR( seq ) \
+    (CV_IS_SEQ_CHAIN( seq ) && CV_IS_SEQ_CLOSED( seq ))
+
+#define CV_IS_SEQ_POLYGON_TREE( seq ) \
+    (CV_SEQ_ELTYPE (seq) ==  CV_SEQ_ELTYPE_TRIAN_ATR &&    \
+    CV_SEQ_KIND( seq ) ==  CV_SEQ_KIND_BIN_TREE )
+
+#define CV_IS_GRAPH( seq )    \
+    (CV_IS_SET(seq) && CV_SEQ_KIND((CvSet*)(seq)) == CV_SEQ_KIND_GRAPH)
+
+#define CV_IS_GRAPH_ORIENTED( seq )   \
+    (((seq)->flags & CV_GRAPH_FLAG_ORIENTED) != 0)
+
+#define CV_IS_SUBDIV2D( seq )  \
+    (CV_IS_SET(seq) && CV_SEQ_KIND((CvSet*)(seq)) == CV_SEQ_KIND_SUBDIV2D)
+
+/****************************************************************************************/
+/*                            Sequence writer & reader                                  */
+/****************************************************************************************/
+
+#define CV_SEQ_WRITER_FIELDS()                                     \
+    int          header_size;                                      \
+    CvSeq*       seq;        /**< the sequence written */            \
+    CvSeqBlock*  block;      /**< current block */                   \
+    schar*       ptr;        /**< pointer to free space */           \
+    schar*       block_min;  /**< pointer to the beginning of block*/\
+    schar*       block_max;  /**< pointer to the end of block */
+
+typedef struct CvSeqWriter
+{
+    CV_SEQ_WRITER_FIELDS()
+}
+CvSeqWriter;
+
+
+#define CV_SEQ_READER_FIELDS()                                      \
+    int          header_size;                                       \
+    CvSeq*       seq;        /**< sequence, beign read */             \
+    CvSeqBlock*  block;      /**< current block */                    \
+    schar*       ptr;        /**< pointer to element be read next */  \
+    schar*       block_min;  /**< pointer to the beginning of block */\
+    schar*       block_max;  /**< pointer to the end of block */      \
+    int          delta_index;/**< = seq->first->start_index   */      \
+    schar*       prev_elem;  /**< pointer to previous element */
+
+typedef struct CvSeqReader
+{
+    CV_SEQ_READER_FIELDS()
+}
+CvSeqReader;
+
+/****************************************************************************************/
+/*                                Operations on sequences                               */
+/****************************************************************************************/
+
+#define  CV_SEQ_ELEM( seq, elem_type, index )                    \
+/** assert gives some guarantee that <seq> parameter is valid */  \
+(   assert(sizeof((seq)->first[0]) == sizeof(CvSeqBlock) &&      \
+    (seq)->elem_size == sizeof(elem_type)),                      \
+    (elem_type*)((seq)->first && (unsigned)index <               \
+    (unsigned)((seq)->first->count) ?                            \
+    (seq)->first->data + (index) * sizeof(elem_type) :           \
+    cvGetSeqElem( (CvSeq*)(seq), (index) )))
+#define CV_GET_SEQ_ELEM( elem_type, seq, index ) CV_SEQ_ELEM( (seq), elem_type, (index) )
+
+/** Add element to sequence: */
+#define CV_WRITE_SEQ_ELEM_VAR( elem_ptr, writer )     \
+{                                                     \
+    if( (writer).ptr >= (writer).block_max )          \
+    {                                                 \
+        cvCreateSeqBlock( &writer);                   \
+    }                                                 \
+    memcpy((writer).ptr, elem_ptr, (writer).seq->elem_size);\
+    (writer).ptr += (writer).seq->elem_size;          \
+}
+
+#define CV_WRITE_SEQ_ELEM( elem, writer )             \
+{                                                     \
+    assert( (writer).seq->elem_size == sizeof(elem)); \
+    if( (writer).ptr >= (writer).block_max )          \
+    {                                                 \
+        cvCreateSeqBlock( &writer);                   \
+    }                                                 \
+    assert( (writer).ptr <= (writer).block_max - sizeof(elem));\
+    memcpy((writer).ptr, &(elem), sizeof(elem));      \
+    (writer).ptr += sizeof(elem);                     \
+}
+
+
+/** Move reader position forward: */
+#define CV_NEXT_SEQ_ELEM( elem_size, reader )                 \
+{                                                             \
+    if( ((reader).ptr += (elem_size)) >= (reader).block_max ) \
+    {                                                         \
+        cvChangeSeqBlock( &(reader), 1 );                     \
+    }                                                         \
+}
+
+
+/** Move reader position backward: */
+#define CV_PREV_SEQ_ELEM( elem_size, reader )                \
+{                                                            \
+    if( ((reader).ptr -= (elem_size)) < (reader).block_min ) \
+    {                                                        \
+        cvChangeSeqBlock( &(reader), -1 );                   \
+    }                                                        \
+}
+
+/** Read element and move read position forward: */
+#define CV_READ_SEQ_ELEM( elem, reader )                       \
+{                                                              \
+    assert( (reader).seq->elem_size == sizeof(elem));          \
+    memcpy( &(elem), (reader).ptr, sizeof((elem)));            \
+    CV_NEXT_SEQ_ELEM( sizeof(elem), reader )                   \
+}
+
+/** Read element and move read position backward: */
+#define CV_REV_READ_SEQ_ELEM( elem, reader )                     \
+{                                                                \
+    assert( (reader).seq->elem_size == sizeof(elem));            \
+    memcpy(&(elem), (reader).ptr, sizeof((elem)));               \
+    CV_PREV_SEQ_ELEM( sizeof(elem), reader )                     \
+}
+
+
+#define CV_READ_CHAIN_POINT( _pt, reader )                              \
+{                                                                       \
+    (_pt) = (reader).pt;                                                \
+    if( (reader).ptr )                                                  \
+    {                                                                   \
+        CV_READ_SEQ_ELEM( (reader).code, (reader));                     \
+        assert( ((reader).code & ~7) == 0 );                            \
+        (reader).pt.x += (reader).deltas[(int)(reader).code][0];        \
+        (reader).pt.y += (reader).deltas[(int)(reader).code][1];        \
+    }                                                                   \
+}
+
+#define CV_CURRENT_POINT( reader )  (*((CvPoint*)((reader).ptr)))
+#define CV_PREV_POINT( reader )     (*((CvPoint*)((reader).prev_elem)))
+
+#define CV_READ_EDGE( pt1, pt2, reader )               \
+{                                                      \
+    assert( sizeof(pt1) == sizeof(CvPoint) &&          \
+            sizeof(pt2) == sizeof(CvPoint) &&          \
+            reader.seq->elem_size == sizeof(CvPoint)); \
+    (pt1) = CV_PREV_POINT( reader );                   \
+    (pt2) = CV_CURRENT_POINT( reader );                \
+    (reader).prev_elem = (reader).ptr;                 \
+    CV_NEXT_SEQ_ELEM( sizeof(CvPoint), (reader));      \
+}
+
+/************ Graph macros ************/
+
+/** Return next graph edge for given vertex: */
+#define  CV_NEXT_GRAPH_EDGE( edge, vertex )                              \
+     (assert((edge)->vtx[0] == (vertex) || (edge)->vtx[1] == (vertex)),  \
+      (edge)->next[(edge)->vtx[1] == (vertex)])
+
+
+
+/****************************************************************************************\
+*             Data structures for persistence (a.k.a serialization) functionality        *
+\****************************************************************************************/
+
+/** "black box" file storage */
+typedef struct CvFileStorage CvFileStorage;
+
+/** Storage flags: */
+#define CV_STORAGE_READ          0
+#define CV_STORAGE_WRITE         1
+#define CV_STORAGE_WRITE_TEXT    CV_STORAGE_WRITE
+#define CV_STORAGE_WRITE_BINARY  CV_STORAGE_WRITE
+#define CV_STORAGE_APPEND        2
+#define CV_STORAGE_MEMORY        4
+#define CV_STORAGE_FORMAT_MASK   (7<<3)
+#define CV_STORAGE_FORMAT_AUTO   0
+#define CV_STORAGE_FORMAT_XML    8
+#define CV_STORAGE_FORMAT_YAML  16
+#define CV_STORAGE_FORMAT_JSON  24
+#define CV_STORAGE_BASE64       64
+#define CV_STORAGE_WRITE_BASE64  (CV_STORAGE_BASE64 | CV_STORAGE_WRITE)
+
+/** @brief List of attributes. :
+
+In the current implementation, attributes are used to pass extra parameters when writing user
+objects (see cvWrite). XML attributes inside tags are not supported, aside from the object type
+specification (type_id attribute).
+@see cvAttrList, cvAttrValue
+ */
+typedef struct CvAttrList
+{
+    const char** attr;         /**< NULL-terminated array of (attribute_name,attribute_value) pairs. */
+    struct CvAttrList* next;   /**< Pointer to next chunk of the attributes list.                    */
+}
+CvAttrList;
+
+/** initializes CvAttrList structure */
+CV_INLINE CvAttrList cvAttrList( const char** attr CV_DEFAULT(NULL),
+                                 CvAttrList* next CV_DEFAULT(NULL) )
+{
+    CvAttrList l;
+    l.attr = attr;
+    l.next = next;
+
+    return l;
+}
+
+struct CvTypeInfo;
+
+#define CV_NODE_NONE        0
+#define CV_NODE_INT         1
+#define CV_NODE_INTEGER     CV_NODE_INT
+#define CV_NODE_REAL        2
+#define CV_NODE_FLOAT       CV_NODE_REAL
+#define CV_NODE_STR         3
+#define CV_NODE_STRING      CV_NODE_STR
+#define CV_NODE_REF         4 /**< not used */
+#define CV_NODE_SEQ         5
+#define CV_NODE_MAP         6
+#define CV_NODE_TYPE_MASK   7
+
+#define CV_NODE_TYPE(flags)  ((flags) & CV_NODE_TYPE_MASK)
+
+/** file node flags */
+#define CV_NODE_FLOW        8 /**<Used only for writing structures in YAML format. */
+#define CV_NODE_USER        16
+#define CV_NODE_EMPTY       32
+#define CV_NODE_NAMED       64
+
+#define CV_NODE_IS_INT(flags)        (CV_NODE_TYPE(flags) == CV_NODE_INT)
+#define CV_NODE_IS_REAL(flags)       (CV_NODE_TYPE(flags) == CV_NODE_REAL)
+#define CV_NODE_IS_STRING(flags)     (CV_NODE_TYPE(flags) == CV_NODE_STRING)
+#define CV_NODE_IS_SEQ(flags)        (CV_NODE_TYPE(flags) == CV_NODE_SEQ)
+#define CV_NODE_IS_MAP(flags)        (CV_NODE_TYPE(flags) == CV_NODE_MAP)
+#define CV_NODE_IS_COLLECTION(flags) (CV_NODE_TYPE(flags) >= CV_NODE_SEQ)
+#define CV_NODE_IS_FLOW(flags)       (((flags) & CV_NODE_FLOW) != 0)
+#define CV_NODE_IS_EMPTY(flags)      (((flags) & CV_NODE_EMPTY) != 0)
+#define CV_NODE_IS_USER(flags)       (((flags) & CV_NODE_USER) != 0)
+#define CV_NODE_HAS_NAME(flags)      (((flags) & CV_NODE_NAMED) != 0)
+
+#define CV_NODE_SEQ_SIMPLE 256
+#define CV_NODE_SEQ_IS_SIMPLE(seq) (((seq)->flags & CV_NODE_SEQ_SIMPLE) != 0)
+
+typedef struct CvString
+{
+    int len;
+    char* ptr;
+}
+CvString;
+
+/** All the keys (names) of elements in the readed file storage
+   are stored in the hash to speed up the lookup operations: */
+typedef struct CvStringHashNode
+{
+    unsigned hashval;
+    CvString str;
+    struct CvStringHashNode* next;
+}
+CvStringHashNode;
+
+typedef struct CvGenericHash CvFileNodeHash;
+
+/** Basic element of the file storage - scalar or collection: */
+typedef struct CvFileNode
+{
+    int tag;
+    struct CvTypeInfo* info; /**< type information
+            (only for user-defined object, for others it is 0) */
+    union
+    {
+        double f; /**< scalar floating-point number */
+        int i;    /**< scalar integer number */
+        CvString str; /**< text string */
+        CvSeq* seq; /**< sequence (ordered collection of file nodes) */
+        CvFileNodeHash* map; /**< map (collection of named file nodes) */
+    } data;
+}
+CvFileNode;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+typedef int (CV_CDECL *CvIsInstanceFunc)( const void* struct_ptr );
+typedef void (CV_CDECL *CvReleaseFunc)( void** struct_dblptr );
+typedef void* (CV_CDECL *CvReadFunc)( CvFileStorage* storage, CvFileNode* node );
+typedef void (CV_CDECL *CvWriteFunc)( CvFileStorage* storage, const char* name,
+                                      const void* struct_ptr, CvAttrList attributes );
+typedef void* (CV_CDECL *CvCloneFunc)( const void* struct_ptr );
+#ifdef __cplusplus
+}
+#endif
+
+/** @brief Type information
+
+The structure contains information about one of the standard or user-defined types. Instances of the
+type may or may not contain a pointer to the corresponding CvTypeInfo structure. In any case, there
+is a way to find the type info structure for a given object using the cvTypeOf function.
+Alternatively, type info can be found by type name using cvFindType, which is used when an object
+is read from file storage. The user can register a new type with cvRegisterType that adds the type
+information structure into the beginning of the type list. Thus, it is possible to create
+specialized types from generic standard types and override the basic methods.
+ */
+typedef struct CvTypeInfo
+{
+    int flags; /**< not used */
+    int header_size; /**< sizeof(CvTypeInfo) */
+    struct CvTypeInfo* prev; /**< previous registered type in the list */
+    struct CvTypeInfo* next; /**< next registered type in the list */
+    const char* type_name; /**< type name, written to file storage */
+    CvIsInstanceFunc is_instance; /**< checks if the passed object belongs to the type */
+    CvReleaseFunc release; /**< releases object (memory etc.) */
+    CvReadFunc read; /**< reads object from file storage */
+    CvWriteFunc write; /**< writes object to file storage */
+    CvCloneFunc clone; /**< creates a copy of the object */
+}
+CvTypeInfo;
+
+
+/**** System data types ******/
+
+typedef struct CvPluginFuncInfo
+{
+    void** func_addr;
+    void* default_func_addr;
+    const char* func_names;
+    int search_modules;
+    int loaded_from;
+}
+CvPluginFuncInfo;
+
+typedef struct CvModuleInfo
+{
+    struct CvModuleInfo* next;
+    const char* name;
+    const char* version;
+    CvPluginFuncInfo* func_tab;
+}
+CvModuleInfo;
+
+/** @} */
+
+#endif /*OPENCV_CORE_TYPES_H*/
+
+/* End of file. */

+ 1258 - 0
ThresholdTools/include/opencv2/core/utility.hpp

@@ -0,0 +1,1258 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_UTILITY_H
+#define OPENCV_CORE_UTILITY_H
+
+#ifndef __cplusplus
+#  error utility.hpp header must be compiled as C++
+#endif
+
+#if defined(check)
+#  warning Detected Apple 'check' macro definition, it can cause build conflicts. Please, include this header before any Apple headers.
+#endif
+
+#include "opencv2/core.hpp"
+#include <ostream>
+
+#ifdef CV_CXX11
+#include <functional>
+#endif
+
+namespace cv
+{
+
+#ifdef CV_COLLECT_IMPL_DATA
+CV_EXPORTS void setImpl(int flags); // set implementation flags and reset storage arrays
+CV_EXPORTS void addImpl(int flag, const char* func = 0); // add implementation and function name to storage arrays
+// Get stored implementation flags and functions names arrays
+// Each implementation entry correspond to function name entry, so you can find which implementation was executed in which function
+CV_EXPORTS int getImpl(std::vector<int> &impl, std::vector<String> &funName);
+
+CV_EXPORTS bool useCollection(); // return implementation collection state
+CV_EXPORTS void setUseCollection(bool flag); // set implementation collection state
+
+#define CV_IMPL_PLAIN  0x01 // native CPU OpenCV implementation
+#define CV_IMPL_OCL    0x02 // OpenCL implementation
+#define CV_IMPL_IPP    0x04 // IPP implementation
+#define CV_IMPL_MT     0x10 // multithreaded implementation
+
+#define CV_IMPL_ADD(impl)                                                   \
+    if(cv::useCollection())                                                 \
+    {                                                                       \
+        cv::addImpl(impl, CV_Func);                                         \
+    }
+#else
+#define CV_IMPL_ADD(impl)
+#endif
+
+//! @addtogroup core_utils
+//! @{
+
+/** @brief  Automatically Allocated Buffer Class
+
+ The class is used for temporary buffers in functions and methods.
+ If a temporary buffer is usually small (a few K's of memory),
+ but its size depends on the parameters, it makes sense to create a small
+ fixed-size array on stack and use it if it's large enough. If the required buffer size
+ is larger than the fixed size, another buffer of sufficient size is allocated dynamically
+ and released after the processing. Therefore, in typical cases, when the buffer size is small,
+ there is no overhead associated with malloc()/free().
+ At the same time, there is no limit on the size of processed data.
+
+ This is what AutoBuffer does. The template takes 2 parameters - type of the buffer elements and
+ the number of stack-allocated elements. Here is how the class is used:
+
+ \code
+ void my_func(const cv::Mat& m)
+ {
+    cv::AutoBuffer<float> buf(1000); // create automatic buffer containing 1000 floats
+
+    buf.allocate(m.rows); // if m.rows <= 1000, the pre-allocated buffer is used,
+                          // otherwise the buffer of "m.rows" floats will be allocated
+                          // dynamically and deallocated in cv::AutoBuffer destructor
+    ...
+ }
+ \endcode
+*/
+template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
+{
+public:
+    typedef _Tp value_type;
+
+    //! the default constructor
+    AutoBuffer();
+    //! constructor taking the real buffer size
+    explicit AutoBuffer(size_t _size);
+
+    //! the copy constructor
+    AutoBuffer(const AutoBuffer<_Tp, fixed_size>& buf);
+    //! the assignment operator
+    AutoBuffer<_Tp, fixed_size>& operator = (const AutoBuffer<_Tp, fixed_size>& buf);
+
+    //! destructor. calls deallocate()
+    ~AutoBuffer();
+
+    //! allocates the new buffer of size _size. if the _size is small enough, stack-allocated buffer is used
+    void allocate(size_t _size);
+    //! deallocates the buffer if it was dynamically allocated
+    void deallocate();
+    //! resizes the buffer and preserves the content
+    void resize(size_t _size);
+    //! returns the current buffer size
+    size_t size() const;
+    //! returns pointer to the real buffer, stack-allocated or heap-allocated
+    operator _Tp* ();
+    //! returns read-only pointer to the real buffer, stack-allocated or heap-allocated
+    operator const _Tp* () const;
+
+protected:
+    //! pointer to the real buffer, can point to buf if the buffer is small enough
+    _Tp* ptr;
+    //! size of the real buffer
+    size_t sz;
+    //! pre-allocated buffer. At least 1 element to confirm C++ standard requirements
+    _Tp buf[(fixed_size > 0) ? fixed_size : 1];
+};
+
+/**  @brief Sets/resets the break-on-error mode.
+
+When the break-on-error mode is set, the default error handler issues a hardware exception, which
+can make debugging more convenient.
+
+\return the previous state
+ */
+CV_EXPORTS bool setBreakOnError(bool flag);
+
+extern "C" typedef int (*ErrorCallback)( int status, const char* func_name,
+                                       const char* err_msg, const char* file_name,
+                                       int line, void* userdata );
+
+
+/** @brief Sets the new error handler and the optional user data.
+
+  The function sets the new error handler, called from cv::error().
+
+  \param errCallback the new error handler. If NULL, the default error handler is used.
+  \param userdata the optional user data pointer, passed to the callback.
+  \param prevUserdata the optional output parameter where the previous user data pointer is stored
+
+  \return the previous error handler
+*/
+CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback, void* userdata=0, void** prevUserdata=0);
+
+CV_EXPORTS String tempfile( const char* suffix = 0);
+CV_EXPORTS void glob(String pattern, std::vector<String>& result, bool recursive = false);
+
+/** @brief OpenCV will try to set the number of threads for the next parallel region.
+
+If threads == 0, OpenCV will disable threading optimizations and run all it's functions
+sequentially. Passing threads \< 0 will reset threads number to system default. This function must
+be called outside of parallel region.
+
+OpenCV will try to run its functions with specified threads number, but some behaviour differs from
+framework:
+-   `TBB` - User-defined parallel constructions will run with the same threads number, if
+    another is not specified. If later on user creates his own scheduler, OpenCV will use it.
+-   `OpenMP` - No special defined behaviour.
+-   `Concurrency` - If threads == 1, OpenCV will disable threading optimizations and run its
+    functions sequentially.
+-   `GCD` - Supports only values \<= 0.
+-   `C=` - No special defined behaviour.
+@param nthreads Number of threads used by OpenCV.
+@sa getNumThreads, getThreadNum
+ */
+CV_EXPORTS_W void setNumThreads(int nthreads);
+
+/** @brief Returns the number of threads used by OpenCV for parallel regions.
+
+Always returns 1 if OpenCV is built without threading support.
+
+The exact meaning of return value depends on the threading framework used by OpenCV library:
+- `TBB` - The number of threads, that OpenCV will try to use for parallel regions. If there is
+  any tbb::thread_scheduler_init in user code conflicting with OpenCV, then function returns
+  default number of threads used by TBB library.
+- `OpenMP` - An upper bound on the number of threads that could be used to form a new team.
+- `Concurrency` - The number of threads, that OpenCV will try to use for parallel regions.
+- `GCD` - Unsupported; returns the GCD thread pool limit (512) for compatibility.
+- `C=` - The number of threads, that OpenCV will try to use for parallel regions, if before
+  called setNumThreads with threads \> 0, otherwise returns the number of logical CPUs,
+  available for the process.
+@sa setNumThreads, getThreadNum
+ */
+CV_EXPORTS_W int getNumThreads();
+
+/** @brief Returns the index of the currently executed thread within the current parallel region. Always
+returns 0 if called outside of parallel region.
+
+@deprecated Current implementation doesn't corresponding to this documentation.
+
+The exact meaning of the return value depends on the threading framework used by OpenCV library:
+- `TBB` - Unsupported with current 4.1 TBB release. Maybe will be supported in future.
+- `OpenMP` - The thread number, within the current team, of the calling thread.
+- `Concurrency` - An ID for the virtual processor that the current context is executing on (0
+  for master thread and unique number for others, but not necessary 1,2,3,...).
+- `GCD` - System calling thread's ID. Never returns 0 inside parallel region.
+- `C=` - The index of the current parallel task.
+@sa setNumThreads, getNumThreads
+ */
+CV_EXPORTS_W int getThreadNum();
+
+/** @brief Returns full configuration time cmake output.
+
+Returned value is raw cmake output including version control system revision, compiler version,
+compiler flags, enabled modules and third party libraries, etc. Output format depends on target
+architecture.
+ */
+CV_EXPORTS_W const String& getBuildInformation();
+
+/** @brief Returns library version string
+
+For example "3.4.1-dev".
+
+@sa getMajorVersion, getMinorVersion, getRevisionVersion
+*/
+CV_EXPORTS_W String getVersionString();
+
+/** @brief Returns major library version */
+CV_EXPORTS_W int getVersionMajor();
+
+/** @brief Returns minor library version */
+CV_EXPORTS_W int getVersionMinor();
+
+/** @brief Returns revision field of the library version */
+CV_EXPORTS_W int getVersionRevision();
+
+/** @brief Returns the number of ticks.
+
+The function returns the number of ticks after the certain event (for example, when the machine was
+turned on). It can be used to initialize RNG or to measure a function execution time by reading the
+tick count before and after the function call.
+@sa getTickFrequency, TickMeter
+ */
+CV_EXPORTS_W int64 getTickCount();
+
+/** @brief Returns the number of ticks per second.
+
+The function returns the number of ticks per second. That is, the following code computes the
+execution time in seconds:
+@code
+    double t = (double)getTickCount();
+    // do something ...
+    t = ((double)getTickCount() - t)/getTickFrequency();
+@endcode
+@sa getTickCount, TickMeter
+ */
+CV_EXPORTS_W double getTickFrequency();
+
+/** @brief a Class to measure passing time.
+
+The class computes passing time by counting the number of ticks per second. That is, the following code computes the
+execution time in seconds:
+@code
+TickMeter tm;
+tm.start();
+// do something ...
+tm.stop();
+std::cout << tm.getTimeSec();
+@endcode
+
+It is also possible to compute the average time over multiple runs:
+@code
+TickMeter tm;
+for (int i = 0; i < 100; i++)
+{
+    tm.start();
+    // do something ...
+    tm.stop();
+}
+double average_time = tm.getTimeSec() / tm.getCounter();
+std::cout << "Average time in second per iteration is: " << average_time << std::endl;
+@endcode
+@sa getTickCount, getTickFrequency
+*/
+
+class CV_EXPORTS_W TickMeter
+{
+public:
+    //! the default constructor
+    CV_WRAP TickMeter()
+    {
+    reset();
+    }
+
+    /**
+    starts counting ticks.
+    */
+    CV_WRAP void start()
+    {
+    startTime = cv::getTickCount();
+    }
+
+    /**
+    stops counting ticks.
+    */
+    CV_WRAP void stop()
+    {
+    int64 time = cv::getTickCount();
+    if (startTime == 0)
+    return;
+    ++counter;
+    sumTime += (time - startTime);
+    startTime = 0;
+    }
+
+    /**
+    returns counted ticks.
+    */
+    CV_WRAP int64 getTimeTicks() const
+    {
+    return sumTime;
+    }
+
+    /**
+    returns passed time in microseconds.
+    */
+    CV_WRAP double getTimeMicro() const
+    {
+    return getTimeMilli()*1e3;
+    }
+
+    /**
+    returns passed time in milliseconds.
+    */
+    CV_WRAP double getTimeMilli() const
+    {
+    return getTimeSec()*1e3;
+    }
+
+    /**
+    returns passed time in seconds.
+    */
+    CV_WRAP double getTimeSec()   const
+    {
+    return (double)getTimeTicks() / getTickFrequency();
+    }
+
+    /**
+    returns internal counter value.
+    */
+    CV_WRAP int64 getCounter() const
+    {
+    return counter;
+    }
+
+    /**
+    resets internal values.
+    */
+    CV_WRAP void reset()
+    {
+    startTime = 0;
+    sumTime = 0;
+    counter = 0;
+    }
+
+private:
+    int64 counter;
+    int64 sumTime;
+    int64 startTime;
+};
+
+/** @brief output operator
+@code
+TickMeter tm;
+tm.start();
+// do something ...
+tm.stop();
+std::cout << tm;
+@endcode
+*/
+
+static inline
+std::ostream& operator << (std::ostream& out, const TickMeter& tm)
+{
+    return out << tm.getTimeSec() << "sec";
+}
+
+/** @brief Returns the number of CPU ticks.
+
+The function returns the current number of CPU ticks on some architectures (such as x86, x64,
+PowerPC). On other platforms the function is equivalent to getTickCount. It can also be used for
+very accurate time measurements, as well as for RNG initialization. Note that in case of multi-CPU
+systems a thread, from which getCPUTickCount is called, can be suspended and resumed at another CPU
+with its own counter. So, theoretically (and practically) the subsequent calls to the function do
+not necessary return the monotonously increasing values. Also, since a modern CPU varies the CPU
+frequency depending on the load, the number of CPU clocks spent in some code cannot be directly
+converted to time units. Therefore, getTickCount is generally a preferable solution for measuring
+execution time.
+ */
+CV_EXPORTS_W int64 getCPUTickCount();
+
+/** @brief Returns true if the specified feature is supported by the host hardware.
+
+The function returns true if the host hardware supports the specified feature. When user calls
+setUseOptimized(false), the subsequent calls to checkHardwareSupport() will return false until
+setUseOptimized(true) is called. This way user can dynamically switch on and off the optimized code
+in OpenCV.
+@param feature The feature of interest, one of cv::CpuFeatures
+ */
+CV_EXPORTS_W bool checkHardwareSupport(int feature);
+
+/** @brief Returns feature name by ID
+
+Returns empty string if feature is not defined
+*/
+CV_EXPORTS_W String getHardwareFeatureName(int feature);
+
+/** @brief Returns the number of logical CPUs available for the process.
+ */
+CV_EXPORTS_W int getNumberOfCPUs();
+
+
+/** @brief Aligns a pointer to the specified number of bytes.
+
+The function returns the aligned pointer of the same type as the input pointer:
+\f[\texttt{(_Tp*)(((size_t)ptr + n-1) & -n)}\f]
+@param ptr Aligned pointer.
+@param n Alignment size that must be a power of two.
+ */
+template<typename _Tp> static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_Tp))
+{
+    CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2
+    return (_Tp*)(((size_t)ptr + n-1) & -n);
+}
+
+/** @brief Aligns a buffer size to the specified number of bytes.
+
+The function returns the minimum number that is greater than or equal to sz and is divisible by n :
+\f[\texttt{(sz + n-1) & -n}\f]
+@param sz Buffer size to align.
+@param n Alignment size that must be a power of two.
+ */
+static inline size_t alignSize(size_t sz, int n)
+{
+    CV_DbgAssert((n & (n - 1)) == 0); // n is a power of 2
+    return (sz + n-1) & -n;
+}
+
+/** @brief Integer division with result round up.
+
+Use this function instead of `ceil((float)a / b)` expressions.
+
+@sa alignSize
+*/
+static inline int divUp(int a, unsigned int b)
+{
+    CV_DbgAssert(a >= 0);
+    return (a + b - 1) / b;
+}
+/** @overload */
+static inline size_t divUp(size_t a, unsigned int b)
+{
+    return (a + b - 1) / b;
+}
+
+/** @brief Enables or disables the optimized code.
+
+The function can be used to dynamically turn on and off optimized code (code that uses SSE2, AVX,
+and other instructions on the platforms that support it). It sets a global flag that is further
+checked by OpenCV functions. Since the flag is not checked in the inner OpenCV loops, it is only
+safe to call the function on the very top level in your application where you can be sure that no
+other OpenCV function is currently executed.
+
+By default, the optimized code is enabled unless you disable it in CMake. The current status can be
+retrieved using useOptimized.
+@param onoff The boolean flag specifying whether the optimized code should be used (onoff=true)
+or not (onoff=false).
+ */
+CV_EXPORTS_W void setUseOptimized(bool onoff);
+
+/** @brief Returns the status of optimized code usage.
+
+The function returns true if the optimized code is enabled. Otherwise, it returns false.
+ */
+CV_EXPORTS_W bool useOptimized();
+
+static inline size_t getElemSize(int type) { return (size_t)CV_ELEM_SIZE(type); }
+
+/////////////////////////////// Parallel Primitives //////////////////////////////////
+
+/** @brief Base class for parallel data processors
+*/
+class CV_EXPORTS ParallelLoopBody
+{
+public:
+    virtual ~ParallelLoopBody();
+    virtual void operator() (const Range& range) const = 0;
+};
+
+/** @brief Parallel data processor
+*/
+CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
+
+#ifdef CV_CXX11
+class ParallelLoopBodyLambdaWrapper : public ParallelLoopBody
+{
+private:
+    std::function<void(const Range&)> m_functor;
+public:
+    ParallelLoopBodyLambdaWrapper(std::function<void(const Range&)> functor) :
+        m_functor(functor)
+    { }
+
+    virtual void operator() (const cv::Range& range) const CV_OVERRIDE
+    {
+        m_functor(range);
+    }
+};
+
+inline void parallel_for_(const Range& range, std::function<void(const Range&)> functor, double nstripes=-1.)
+{
+    parallel_for_(range, ParallelLoopBodyLambdaWrapper(functor), nstripes);
+}
+#endif
+
+/////////////////////////////// forEach method of cv::Mat ////////////////////////////
+template<typename _Tp, typename Functor> inline
+void Mat::forEach_impl(const Functor& operation) {
+    if (false) {
+        operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast<int*>(0));
+        // If your compiler fails in this line.
+        // Please check that your functor signature is
+        //     (_Tp&, const int*)   <- multi-dimensional
+        //  or (_Tp&, void*)        <- in case you don't need current idx.
+    }
+
+    CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX);
+    const int LINES = static_cast<int>(this->total() / this->size[this->dims - 1]);
+
+    class PixelOperationWrapper :public ParallelLoopBody
+    {
+    public:
+        PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation)
+            : mat(frame), op(_operation) {}
+        virtual ~PixelOperationWrapper(){}
+        // ! Overloaded virtual operator
+        // convert range call to row call.
+        virtual void operator()(const Range &range) const CV_OVERRIDE
+        {
+            const int DIMS = mat->dims;
+            const int COLS = mat->size[DIMS - 1];
+            if (DIMS <= 2) {
+                for (int row = range.start; row < range.end; ++row) {
+                    this->rowCall2(row, COLS);
+                }
+            } else {
+                std::vector<int> idx(DIMS); /// idx is modified in this->rowCall
+                idx[DIMS - 2] = range.start - 1;
+
+                for (int line_num = range.start; line_num < range.end; ++line_num) {
+                    idx[DIMS - 2]++;
+                    for (int i = DIMS - 2; i >= 0; --i) {
+                        if (idx[i] >= mat->size[i]) {
+                            idx[i - 1] += idx[i] / mat->size[i];
+                            idx[i] %= mat->size[i];
+                            continue; // carry-over;
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                    this->rowCall(&idx[0], COLS, DIMS);
+                }
+            }
+        }
+    private:
+        Mat_<_Tp>* const mat;
+        const Functor op;
+        // ! Call operator for each elements in this row.
+        inline void rowCall(int* const idx, const int COLS, const int DIMS) const {
+            int &col = idx[DIMS - 1];
+            col = 0;
+            _Tp* pixel = &(mat->template at<_Tp>(idx));
+
+            while (col < COLS) {
+                op(*pixel, const_cast<const int*>(idx));
+                pixel++; col++;
+            }
+            col = 0;
+        }
+        // ! Call operator for each elements in this row. 2d mat special version.
+        inline void rowCall2(const int row, const int COLS) const {
+            union Index{
+                int body[2];
+                operator const int*() const {
+                    return reinterpret_cast<const int*>(this);
+                }
+                int& operator[](const int i) {
+                    return body[i];
+                }
+            } idx = {{row, 0}};
+            // Special union is needed to avoid
+            // "error: array subscript is above array bounds [-Werror=array-bounds]"
+            // when call the functor `op` such that access idx[3].
+
+            _Tp* pixel = &(mat->template at<_Tp>(idx));
+            const _Tp* const pixel_end = pixel + COLS;
+            while(pixel < pixel_end) {
+                op(*pixel++, static_cast<const int*>(idx));
+                idx[1]++;
+            }
+        }
+        PixelOperationWrapper& operator=(const PixelOperationWrapper &) {
+            CV_Assert(false);
+            // We can not remove this implementation because Visual Studio warning C4822.
+            return *this;
+        }
+    };
+
+    parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast<Mat_<_Tp>*>(this), operation));
+}
+
+/////////////////////////// Synchronization Primitives ///////////////////////////////
+
+class CV_EXPORTS Mutex
+{
+public:
+    Mutex();
+    ~Mutex();
+    Mutex(const Mutex& m);
+    Mutex& operator = (const Mutex& m);
+
+    void lock();
+    bool trylock();
+    void unlock();
+
+    struct Impl;
+protected:
+    Impl* impl;
+};
+
+class CV_EXPORTS AutoLock
+{
+public:
+    AutoLock(Mutex& m) : mutex(&m) { mutex->lock(); }
+    ~AutoLock() { mutex->unlock(); }
+protected:
+    Mutex* mutex;
+private:
+    AutoLock(const AutoLock&);
+    AutoLock& operator = (const AutoLock&);
+};
+
+// TLS interface
+class CV_EXPORTS TLSDataContainer
+{
+protected:
+    TLSDataContainer();
+    virtual ~TLSDataContainer();
+
+    void  gatherData(std::vector<void*> &data) const;
+#if OPENCV_ABI_COMPATIBILITY > 300
+    void* getData() const;
+    void  release();
+
+private:
+#else
+    void  release();
+
+public:
+    void* getData() const;
+#endif
+    virtual void* createDataInstance() const = 0;
+    virtual void  deleteDataInstance(void* pData) const = 0;
+
+    int key_;
+
+public:
+    void cleanup(); //! Release created TLS data container objects. It is similar to release() call, but it keeps TLS container valid.
+};
+
+// Main TLS data class
+template <typename T>
+class TLSData : protected TLSDataContainer
+{
+public:
+    inline TLSData()        {}
+    inline ~TLSData()       { release();            } // Release key and delete associated data
+    inline T* get() const   { return (T*)getData(); } // Get data associated with key
+    inline T& getRef() const { T* ptr = (T*)getData(); CV_Assert(ptr); return *ptr; } // Get data associated with key
+
+    // Get data from all threads
+    inline void gather(std::vector<T*> &data) const
+    {
+        std::vector<void*> &dataVoid = reinterpret_cast<std::vector<void*>&>(data);
+        gatherData(dataVoid);
+    }
+
+    inline void cleanup() { TLSDataContainer::cleanup(); }
+
+private:
+    virtual void* createDataInstance() const CV_OVERRIDE {return new T;}                // Wrapper to allocate data by template
+    virtual void  deleteDataInstance(void* pData) const CV_OVERRIDE {delete (T*)pData;} // Wrapper to release data by template
+
+    // Disable TLS copy operations
+    TLSData(TLSData &) {}
+    TLSData& operator =(const TLSData &) {return *this;}
+};
+
+/** @brief Designed for command line parsing
+
+The sample below demonstrates how to use CommandLineParser:
+@code
+    CommandLineParser parser(argc, argv, keys);
+    parser.about("Application name v1.0.0");
+
+    if (parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+
+    int N = parser.get<int>("N");
+    double fps = parser.get<double>("fps");
+    String path = parser.get<String>("path");
+
+    use_time_stamp = parser.has("timestamp");
+
+    String img1 = parser.get<String>(0);
+    String img2 = parser.get<String>(1);
+
+    int repeat = parser.get<int>(2);
+
+    if (!parser.check())
+    {
+        parser.printErrors();
+        return 0;
+    }
+@endcode
+
+### Keys syntax
+
+The keys parameter is a string containing several blocks, each one is enclosed in curly braces and
+describes one argument. Each argument contains three parts separated by the `|` symbol:
+
+-# argument names is a space-separated list of option synonyms (to mark argument as positional, prefix it with the `@` symbol)
+-# default value will be used if the argument was not provided (can be empty)
+-# help message (can be empty)
+
+For example:
+
+@code{.cpp}
+    const String keys =
+        "{help h usage ? |      | print this message   }"
+        "{@image1        |      | image1 for compare   }"
+        "{@image2        |<none>| image2 for compare   }"
+        "{@repeat        |1     | number               }"
+        "{path           |.     | path to file         }"
+        "{fps            | -1.0 | fps for output video }"
+        "{N count        |100   | count of objects     }"
+        "{ts timestamp   |      | use time stamp       }"
+        ;
+}
+@endcode
+
+Note that there are no default values for `help` and `timestamp` so we can check their presence using the `has()` method.
+Arguments with default values are considered to be always present. Use the `get()` method in these cases to check their
+actual value instead.
+
+String keys like `get<String>("@image1")` return the empty string `""` by default - even with an empty default value.
+Use the special `<none>` default value to enforce that the returned string must not be empty. (like in `get<String>("@image2")`)
+
+### Usage
+
+For the described keys:
+
+@code{.sh}
+    # Good call (3 positional parameters: image1, image2 and repeat; N is 200, ts is true)
+    $ ./app -N=200 1.png 2.jpg 19 -ts
+
+    # Bad call
+    $ ./app -fps=aaa
+    ERRORS:
+    Parameter 'fps': can not convert: [aaa] to [double]
+@endcode
+ */
+class CV_EXPORTS CommandLineParser
+{
+public:
+
+    /** @brief Constructor
+
+    Initializes command line parser object
+
+    @param argc number of command line arguments (from main())
+    @param argv array of command line arguments (from main())
+    @param keys string describing acceptable command line parameters (see class description for syntax)
+    */
+    CommandLineParser(int argc, const char* const argv[], const String& keys);
+
+    /** @brief Copy constructor */
+    CommandLineParser(const CommandLineParser& parser);
+
+    /** @brief Assignment operator */
+    CommandLineParser& operator = (const CommandLineParser& parser);
+
+    /** @brief Destructor */
+    ~CommandLineParser();
+
+    /** @brief Returns application path
+
+    This method returns the path to the executable from the command line (`argv[0]`).
+
+    For example, if the application has been started with such a command:
+    @code{.sh}
+    $ ./bin/my-executable
+    @endcode
+    this method will return `./bin`.
+    */
+    String getPathToApplication() const;
+
+    /** @brief Access arguments by name
+
+    Returns argument converted to selected type. If the argument is not known or can not be
+    converted to selected type, the error flag is set (can be checked with @ref check).
+
+    For example, define:
+    @code{.cpp}
+    String keys = "{N count||}";
+    @endcode
+
+    Call:
+    @code{.sh}
+    $ ./my-app -N=20
+    # or
+    $ ./my-app --count=20
+    @endcode
+
+    Access:
+    @code{.cpp}
+    int N = parser.get<int>("N");
+    @endcode
+
+    @param name name of the argument
+    @param space_delete remove spaces from the left and right of the string
+    @tparam T the argument will be converted to this type if possible
+
+    @note You can access positional arguments by their `@`-prefixed name:
+    @code{.cpp}
+    parser.get<String>("@image");
+    @endcode
+     */
+    template <typename T>
+    T get(const String& name, bool space_delete = true) const
+    {
+        T val = T();
+        getByName(name, space_delete, ParamType<T>::type, (void*)&val);
+        return val;
+    }
+
+    /** @brief Access positional arguments by index
+
+    Returns argument converted to selected type. Indexes are counted from zero.
+
+    For example, define:
+    @code{.cpp}
+    String keys = "{@arg1||}{@arg2||}"
+    @endcode
+
+    Call:
+    @code{.sh}
+    ./my-app abc qwe
+    @endcode
+
+    Access arguments:
+    @code{.cpp}
+    String val_1 = parser.get<String>(0); // returns "abc", arg1
+    String val_2 = parser.get<String>(1); // returns "qwe", arg2
+    @endcode
+
+    @param index index of the argument
+    @param space_delete remove spaces from the left and right of the string
+    @tparam T the argument will be converted to this type if possible
+     */
+    template <typename T>
+    T get(int index, bool space_delete = true) const
+    {
+        T val = T();
+        getByIndex(index, space_delete, ParamType<T>::type, (void*)&val);
+        return val;
+    }
+
+    /** @brief Check if field was provided in the command line
+
+    @param name argument name to check
+    */
+    bool has(const String& name) const;
+
+    /** @brief Check for parsing errors
+
+    Returns false if error occurred while accessing the parameters (bad conversion, missing arguments,
+    etc.). Call @ref printErrors to print error messages list.
+     */
+    bool check() const;
+
+    /** @brief Set the about message
+
+    The about message will be shown when @ref printMessage is called, right before arguments table.
+     */
+    void about(const String& message);
+
+    /** @brief Print help message
+
+    This method will print standard help message containing the about message and arguments description.
+
+    @sa about
+    */
+    void printMessage() const;
+
+    /** @brief Print list of errors occurred
+
+    @sa check
+    */
+    void printErrors() const;
+
+protected:
+    void getByName(const String& name, bool space_delete, int type, void* dst) const;
+    void getByIndex(int index, bool space_delete, int type, void* dst) const;
+
+    struct Impl;
+    Impl* impl;
+};
+
+//! @} core_utils
+
+//! @cond IGNORED
+
+/////////////////////////////// AutoBuffer implementation ////////////////////////////////////////
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::AutoBuffer()
+{
+    ptr = buf;
+    sz = fixed_size;
+}
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::AutoBuffer(size_t _size)
+{
+    ptr = buf;
+    sz = fixed_size;
+    allocate(_size);
+}
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::AutoBuffer(const AutoBuffer<_Tp, fixed_size>& abuf )
+{
+    ptr = buf;
+    sz = fixed_size;
+    allocate(abuf.size());
+    for( size_t i = 0; i < sz; i++ )
+        ptr[i] = abuf.ptr[i];
+}
+
+template<typename _Tp, size_t fixed_size> inline AutoBuffer<_Tp, fixed_size>&
+AutoBuffer<_Tp, fixed_size>::operator = (const AutoBuffer<_Tp, fixed_size>& abuf)
+{
+    if( this != &abuf )
+    {
+        deallocate();
+        allocate(abuf.size());
+        for( size_t i = 0; i < sz; i++ )
+            ptr[i] = abuf.ptr[i];
+    }
+    return *this;
+}
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::~AutoBuffer()
+{ deallocate(); }
+
+template<typename _Tp, size_t fixed_size> inline void
+AutoBuffer<_Tp, fixed_size>::allocate(size_t _size)
+{
+    if(_size <= sz)
+    {
+        sz = _size;
+        return;
+    }
+    deallocate();
+    sz = _size;
+    if(_size > fixed_size)
+    {
+        ptr = new _Tp[_size];
+    }
+}
+
+template<typename _Tp, size_t fixed_size> inline void
+AutoBuffer<_Tp, fixed_size>::deallocate()
+{
+    if( ptr != buf )
+    {
+        delete[] ptr;
+        ptr = buf;
+        sz = fixed_size;
+    }
+}
+
+template<typename _Tp, size_t fixed_size> inline void
+AutoBuffer<_Tp, fixed_size>::resize(size_t _size)
+{
+    if(_size <= sz)
+    {
+        sz = _size;
+        return;
+    }
+    size_t i, prevsize = sz, minsize = MIN(prevsize, _size);
+    _Tp* prevptr = ptr;
+
+    ptr = _size > fixed_size ? new _Tp[_size] : buf;
+    sz = _size;
+
+    if( ptr != prevptr )
+        for( i = 0; i < minsize; i++ )
+            ptr[i] = prevptr[i];
+    for( i = prevsize; i < _size; i++ )
+        ptr[i] = _Tp();
+
+    if( prevptr != buf )
+        delete[] prevptr;
+}
+
+template<typename _Tp, size_t fixed_size> inline size_t
+AutoBuffer<_Tp, fixed_size>::size() const
+{ return sz; }
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::operator _Tp* ()
+{ return ptr; }
+
+template<typename _Tp, size_t fixed_size> inline
+AutoBuffer<_Tp, fixed_size>::operator const _Tp* () const
+{ return ptr; }
+
+template<> inline std::string CommandLineParser::get<std::string>(int index, bool space_delete) const
+{
+    return get<String>(index, space_delete);
+}
+template<> inline std::string CommandLineParser::get<std::string>(const String& name, bool space_delete) const
+{
+    return get<String>(name, space_delete);
+}
+
+//! @endcond
+
+
+// Basic Node class for tree building
+template<class OBJECT>
+class CV_EXPORTS Node
+{
+public:
+    Node()
+    {
+        m_pParent  = 0;
+    }
+    Node(OBJECT& payload) : m_payload(payload)
+    {
+        m_pParent  = 0;
+    }
+    ~Node()
+    {
+        removeChilds();
+        if (m_pParent)
+        {
+            int idx = m_pParent->findChild(this);
+            if (idx >= 0)
+                m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx);
+        }
+    }
+
+    Node<OBJECT>* findChild(OBJECT& payload) const
+    {
+        for(size_t i = 0; i < this->m_childs.size(); i++)
+        {
+            if(this->m_childs[i]->m_payload == payload)
+                return this->m_childs[i];
+        }
+        return NULL;
+    }
+
+    int findChild(Node<OBJECT> *pNode) const
+    {
+        for (size_t i = 0; i < this->m_childs.size(); i++)
+        {
+            if(this->m_childs[i] == pNode)
+                return (int)i;
+        }
+        return -1;
+    }
+
+    void addChild(Node<OBJECT> *pNode)
+    {
+        if(!pNode)
+            return;
+
+        CV_Assert(pNode->m_pParent == 0);
+        pNode->m_pParent = this;
+        this->m_childs.push_back(pNode);
+    }
+
+    void removeChilds()
+    {
+        for(size_t i = 0; i < m_childs.size(); i++)
+        {
+            m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming
+            delete m_childs[i];
+        }
+        m_childs.clear();
+    }
+
+    int getDepth()
+    {
+        int   count   = 0;
+        Node *pParent = m_pParent;
+        while(pParent) count++, pParent = pParent->m_pParent;
+        return count;
+    }
+
+public:
+    OBJECT                     m_payload;
+    Node<OBJECT>*              m_pParent;
+    std::vector<Node<OBJECT>*> m_childs;
+};
+
+// Instrumentation external interface
+namespace instr
+{
+
+#if !defined OPENCV_ABI_CHECK
+
+enum TYPE
+{
+    TYPE_GENERAL = 0,   // OpenCV API function, e.g. exported function
+    TYPE_MARKER,        // Information marker
+    TYPE_WRAPPER,       // Wrapper function for implementation
+    TYPE_FUN,           // Simple function call
+};
+
+enum IMPL
+{
+    IMPL_PLAIN = 0,
+    IMPL_IPP,
+    IMPL_OPENCL,
+};
+
+struct NodeDataTls
+{
+    NodeDataTls()
+    {
+        m_ticksTotal = 0;
+    }
+    uint64      m_ticksTotal;
+};
+
+class CV_EXPORTS NodeData
+{
+public:
+    NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, void* retAddress = NULL, bool alwaysExpand = false, cv::instr::TYPE instrType = TYPE_GENERAL, cv::instr::IMPL implType = IMPL_PLAIN);
+    NodeData(NodeData &ref);
+    ~NodeData();
+    NodeData& operator=(const NodeData&);
+
+    cv::String          m_funName;
+    cv::instr::TYPE     m_instrType;
+    cv::instr::IMPL     m_implType;
+    const char*         m_fileName;
+    int                 m_lineNum;
+    void*               m_retAddress;
+    bool                m_alwaysExpand;
+    bool                m_funError;
+
+    volatile int         m_counter;
+    volatile uint64      m_ticksTotal;
+    TLSData<NodeDataTls> m_tls;
+    int                  m_threads;
+
+    // No synchronization
+    double getTotalMs()   const { return ((double)m_ticksTotal / cv::getTickFrequency()) * 1000; }
+    double getMeanMs()    const { return (((double)m_ticksTotal/m_counter) / cv::getTickFrequency()) * 1000; }
+};
+bool operator==(const NodeData& lhs, const NodeData& rhs);
+
+typedef Node<NodeData> InstrNode;
+
+CV_EXPORTS InstrNode* getTrace();
+
+#endif // !defined OPENCV_ABI_CHECK
+
+
+CV_EXPORTS bool       useInstrumentation();
+CV_EXPORTS void       setUseInstrumentation(bool flag);
+CV_EXPORTS void       resetTrace();
+
+enum FLAGS
+{
+    FLAGS_NONE              = 0,
+    FLAGS_MAPPING           = 0x01,
+    FLAGS_EXPAND_SAME_NAMES = 0x02,
+};
+
+CV_EXPORTS void       setFlags(FLAGS modeFlags);
+static inline void    setFlags(int modeFlags) { setFlags((FLAGS)modeFlags); }
+CV_EXPORTS FLAGS      getFlags();
+}
+
+namespace utils {
+
+CV_EXPORTS int getThreadID();
+
+} // namespace
+
+} //namespace cv
+
+#ifndef DISABLE_OPENCV_24_COMPATIBILITY
+#include "opencv2/core/core_c.h"
+#endif
+
+#endif //OPENCV_CORE_UTILITY_H

+ 71 - 0
ThresholdTools/include/opencv2/core/utils/filesystem.hpp

@@ -0,0 +1,71 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_UTILS_FILESYSTEM_HPP
+#define OPENCV_UTILS_FILESYSTEM_HPP
+
+namespace cv { namespace utils { namespace fs {
+
+
+CV_EXPORTS bool exists(const cv::String& path);
+CV_EXPORTS bool isDirectory(const cv::String& path);
+
+CV_EXPORTS void remove_all(const cv::String& path);
+
+
+CV_EXPORTS cv::String getcwd();
+
+/** Join path components */
+CV_EXPORTS cv::String join(const cv::String& base, const cv::String& path);
+
+/**
+ * Generate a list of all files that match the globbing pattern.
+ *
+ * Result entries are prefixed by base directory path.
+ *
+ * @param directory base directory
+ * @param pattern filter pattern (based on '*'/'?' symbols). Use empty string to disable filtering and return all results
+ * @param[out] result result of globing.
+ * @param recursive scan nested directories too
+ * @param includeDirectories include directories into results list
+ */
+CV_EXPORTS void glob(const cv::String& directory, const cv::String& pattern,
+        CV_OUT std::vector<cv::String>& result,
+        bool recursive = false, bool includeDirectories = false);
+
+/**
+ * Generate a list of all files that match the globbing pattern.
+ *
+ * @param directory base directory
+ * @param pattern filter pattern (based on '*'/'?' symbols). Use empty string to disable filtering and return all results
+ * @param[out] result globbing result with relative paths from base directory
+ * @param recursive scan nested directories too
+ * @param includeDirectories include directories into results list
+ */
+CV_EXPORTS void glob_relative(const cv::String& directory, const cv::String& pattern,
+        CV_OUT std::vector<cv::String>& result,
+        bool recursive = false, bool includeDirectories = false);
+
+
+CV_EXPORTS bool createDirectory(const cv::String& path);
+CV_EXPORTS bool createDirectories(const cv::String& path);
+
+#ifdef __OPENCV_BUILD
+// TODO
+//CV_EXPORTS cv::String getTempDirectory();
+
+/**
+ * @brief Returns directory to store OpenCV cache files
+ * Create sub-directory in common OpenCV cache directory if it doesn't exist.
+ * @param sub_directory_name name of sub-directory. NULL or "" value asks to return root cache directory.
+ * @param configuration_name optional name of configuration parameter name which overrides default behavior.
+ * @return Path to cache directory. Returns empty string if cache directories support is not available. Returns "disabled" if cache disabled by user.
+ */
+CV_EXPORTS cv::String getCacheDirectory(const char* sub_directory_name, const char* configuration_name = NULL);
+
+#endif
+
+}}} // namespace
+
+#endif // OPENCV_UTILS_FILESYSTEM_HPP

+ 22 - 0
ThresholdTools/include/opencv2/core/utils/logger.defines.hpp

@@ -0,0 +1,22 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_LOGGER_DEFINES_HPP
+#define OPENCV_LOGGER_DEFINES_HPP
+
+//! @addtogroup core_logging
+//! @{
+
+// Supported logging levels and their semantic
+#define CV_LOG_LEVEL_SILENT 0          //!< for using in setLogLevel() call
+#define CV_LOG_LEVEL_FATAL 1           //!< Fatal (critical) error (unrecoverable internal error)
+#define CV_LOG_LEVEL_ERROR 2           //!< Error message
+#define CV_LOG_LEVEL_WARN 3            //!< Warning message
+#define CV_LOG_LEVEL_INFO 4            //!< Info message
+#define CV_LOG_LEVEL_DEBUG 5           //!< Debug message. Disabled in the "Release" build.
+#define CV_LOG_LEVEL_VERBOSE 6         //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build.
+
+//! @}
+
+#endif // OPENCV_LOGGER_DEFINES_HPP

+ 87 - 0
ThresholdTools/include/opencv2/core/utils/logger.hpp

@@ -0,0 +1,87 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_LOGGER_HPP
+#define OPENCV_LOGGER_HPP
+
+#include <iostream>
+#include <sstream>
+#include <limits.h> // INT_MAX
+
+#include "logger.defines.hpp"
+
+//! @addtogroup core_logging
+// This section describes OpenCV logging utilities.
+//
+//! @{
+
+namespace cv {
+namespace utils {
+namespace logging {
+
+//! Supported logging levels and their semantic
+enum LogLevel {
+    LOG_LEVEL_SILENT = 0,              //!< for using in setLogVevel() call
+    LOG_LEVEL_FATAL = 1,               //!< Fatal (critical) error (unrecoverable internal error)
+    LOG_LEVEL_ERROR = 2,               //!< Error message
+    LOG_LEVEL_WARNING = 3,             //!< Warning message
+    LOG_LEVEL_INFO = 4,                //!< Info message
+    LOG_LEVEL_DEBUG = 5,               //!< Debug message. Disabled in the "Release" build.
+    LOG_LEVEL_VERBOSE = 6,             //!< Verbose (trace) messages. Requires verbosity level. Disabled in the "Release" build.
+#ifndef CV_DOXYGEN
+    ENUM_LOG_LEVEL_FORCE_INT = INT_MAX
+#endif
+};
+
+/** Set global logging level
+@return previous logging level
+*/
+CV_EXPORTS LogLevel setLogLevel(LogLevel logLevel);
+/** Get global logging level */
+CV_EXPORTS LogLevel getLogLevel();
+
+namespace internal {
+/** Write log message */
+CV_EXPORTS void writeLogMessage(LogLevel logLevel, const char* message);
+} // namespace
+
+/**
+ * \def CV_LOG_STRIP_LEVEL
+ *
+ * Define CV_LOG_STRIP_LEVEL=CV_LOG_LEVEL_[DEBUG|INFO|WARN|ERROR|FATAL|DISABLED] to compile out anything at that and before that logging level
+ */
+#ifndef CV_LOG_STRIP_LEVEL
+# if defined NDEBUG
+#   define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG
+# else
+#   define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE
+# endif
+#endif
+
+
+#define CV_LOG_FATAL(tag, ...)   for(;;) { if (cv::utils::logging::getLogLevel() < cv::utils::logging::LOG_LEVEL_FATAL) break; std::stringstream ss; ss << __VA_ARGS__; cv::utils::logging::internal::writeLogMessage(cv::utils::logging::LOG_LEVEL_FATAL, ss.str().c_str()); break; }
+#define CV_LOG_ERROR(tag, ...)   for(;;) { if (cv::utils::logging::getLogLevel() < cv::utils::logging::LOG_LEVEL_ERROR) break; std::stringstream ss; ss << __VA_ARGS__; cv::utils::logging::internal::writeLogMessage(cv::utils::logging::LOG_LEVEL_ERROR, ss.str().c_str()); break; }
+#define CV_LOG_WARNING(tag, ...) for(;;) { if (cv::utils::logging::getLogLevel() < cv::utils::logging::LOG_LEVEL_WARNING) break; std::stringstream ss; ss << __VA_ARGS__; cv::utils::logging::internal::writeLogMessage(cv::utils::logging::LOG_LEVEL_WARNING, ss.str().c_str()); break; }
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_INFO
+#define CV_LOG_INFO(tag, ...)
+#else
+#define CV_LOG_INFO(tag, ...)    for(;;) { if (cv::utils::logging::getLogLevel() < cv::utils::logging::LOG_LEVEL_INFO) break; std::stringstream ss; ss << __VA_ARGS__; cv::utils::logging::internal::writeLogMessage(cv::utils::logging::LOG_LEVEL_INFO, ss.str().c_str()); break; }
+#endif
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_DEBUG
+#define CV_LOG_DEBUG(tag, ...)
+#else
+#define CV_LOG_DEBUG(tag, ...)   for(;;) { if (cv::utils::logging::getLogLevel() < cv::utils::logging::LOG_LEVEL_DEBUG) break; std::stringstream ss; ss << __VA_ARGS__; cv::utils::logging::internal::writeLogMessage(cv::utils::logging::LOG_LEVEL_DEBUG, ss.str().c_str()); break; }
+#endif
+#if CV_LOG_STRIP_LEVEL <= CV_LOG_LEVEL_VERBOSE
+#define CV_LOG_VERBOSE(tag, v, ...)
+#else
+#define CV_LOG_VERBOSE(tag, v, ...) for(;;) { if (cv::utils::logging::getLogLevel() < cv::utils::logging::LOG_LEVEL_VERBOSE) break; std::stringstream ss; ss << "[VERB" << v << ":" << cv::utils::getThreadID() << "] " << __VA_ARGS__; cv::utils::logging::internal::writeLogMessage(cv::utils::logging::LOG_LEVEL_VERBOSE, ss.str().c_str()); break; }
+#endif
+
+
+}}} // namespace
+
+//! @}
+
+#endif // OPENCV_LOGGER_HPP

+ 250 - 0
ThresholdTools/include/opencv2/core/utils/trace.hpp

@@ -0,0 +1,250 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_TRACE_HPP
+#define OPENCV_TRACE_HPP
+
+#include <opencv2/core/cvdef.h>
+
+//! @addtogroup core_logging
+// This section describes OpenCV tracing utilities.
+//
+//! @{
+
+namespace cv {
+namespace utils {
+namespace trace {
+
+//! Macro to trace function
+#define CV_TRACE_FUNCTION()
+
+#define CV_TRACE_FUNCTION_SKIP_NESTED()
+
+//! Trace code scope.
+//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "initialize".
+#define CV_TRACE_REGION(name_as_static_string_literal)
+//! mark completed of the current opened region and create new one
+//! @note Dynamic names are not supported in this macro (on stack or heap). Use string literals here only, like "step1".
+#define CV_TRACE_REGION_NEXT(name_as_static_string_literal)
+
+//! Macro to trace argument value
+#define CV_TRACE_ARG(arg_id)
+
+//! Macro to trace argument value (expanded version)
+#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value)
+
+//! @cond IGNORED
+#define CV_TRACE_NS cv::utils::trace
+
+namespace details {
+
+#ifndef __OPENCV_TRACE
+# if defined __OPENCV_BUILD && !defined __OPENCV_TESTS && !defined __OPENCV_APPS
+#   define __OPENCV_TRACE 1
+# else
+#   define __OPENCV_TRACE 0
+# endif
+#endif
+
+#ifndef CV_TRACE_FILENAME
+# define CV_TRACE_FILENAME __FILE__
+#endif
+
+#ifndef CV__TRACE_FUNCTION
+# if defined _MSC_VER
+#   define CV__TRACE_FUNCTION __FUNCSIG__
+# elif defined __GNUC__
+#   define CV__TRACE_FUNCTION __PRETTY_FUNCTION__
+# else
+#   define CV__TRACE_FUNCTION "<unknown>"
+# endif
+#endif
+
+//! Thread-local instance (usually allocated on stack)
+class CV_EXPORTS Region
+{
+public:
+    struct LocationExtraData;
+    struct LocationStaticStorage
+    {
+        LocationExtraData** ppExtra;   //< implementation specific data
+        const char* name;              //< region name (function name or other custom name)
+        const char* filename;          //< source code filename
+        int line;                      //< source code line
+        int flags;                     //< flags (implementation code path: Plain, IPP, OpenCL)
+    };
+
+    Region(const LocationStaticStorage& location);
+    inline ~Region()
+    {
+        if (implFlags != 0)
+            destroy();
+        CV_DbgAssert(implFlags == 0);
+        CV_DbgAssert(pImpl == NULL);
+    }
+
+    class Impl;
+    Impl* pImpl; // NULL if current region is not active
+    int implFlags; // see RegionFlag, 0 if region is ignored
+
+    bool isActive() const { return pImpl != NULL; }
+
+    void destroy();
+private:
+    Region(const Region&); // disabled
+    Region& operator= (const Region&); // disabled
+};
+
+//! Specify region flags
+enum RegionLocationFlag {
+    REGION_FLAG_FUNCTION = (1 << 0),             //< region is function (=1) / nested named region (=0)
+    REGION_FLAG_APP_CODE = (1 << 1),             //< region is Application code (=1) / OpenCV library code (=0)
+    REGION_FLAG_SKIP_NESTED = (1 << 2),          //< avoid processing of nested regions
+
+    REGION_FLAG_IMPL_IPP = (1 << 16),            //< region is part of IPP code path
+    REGION_FLAG_IMPL_OPENCL = (2 << 16),         //< region is part of OpenCL code path
+    REGION_FLAG_IMPL_OPENVX = (3 << 16),         //< region is part of OpenVX code path
+
+    REGION_FLAG_IMPL_MASK = (15 << 16),
+
+    REGION_FLAG_REGION_FORCE = (1 << 30),
+    REGION_FLAG_REGION_NEXT = (1 << 31),         //< close previous region (see #CV_TRACE_REGION_NEXT macro)
+
+    ENUM_REGION_FLAG_FORCE_INT = INT_MAX
+};
+
+struct CV_EXPORTS TraceArg {
+public:
+    struct ExtraData;
+    ExtraData** ppExtra;
+    const char* name;
+    int flags;
+};
+/** @brief Add meta information to current region (function)
+ * See CV_TRACE_ARG macro
+ * @param arg argument information structure (global static cache)
+ * @param value argument value (can by dynamic string literal in case of string, static allocation is not required)
+ */
+CV_EXPORTS void traceArg(const TraceArg& arg, const char* value);
+//! @overload
+CV_EXPORTS void traceArg(const TraceArg& arg, int value);
+//! @overload
+CV_EXPORTS void traceArg(const TraceArg& arg, int64 value);
+//! @overload
+CV_EXPORTS void traceArg(const TraceArg& arg, double value);
+
+#define CV__TRACE_LOCATION_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_, loc_id), __LINE__)
+#define CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_trace_location_extra_, loc_id) , __LINE__)
+
+#define CV__TRACE_DEFINE_LOCATION_(loc_id, name, flags) \
+    static CV_TRACE_NS::details::Region::LocationExtraData* CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id) = 0; \
+    static const CV_TRACE_NS::details::Region::LocationStaticStorage \
+        CV__TRACE_LOCATION_VARNAME(loc_id) = { &(CV__TRACE_LOCATION_EXTRA_VARNAME(loc_id)), name, CV_TRACE_FILENAME, __LINE__, flags};
+
+#define CV__TRACE_DEFINE_LOCATION_FN(name, flags) CV__TRACE_DEFINE_LOCATION_(fn, name, (flags | CV_TRACE_NS::details::REGION_FLAG_FUNCTION))
+
+
+#define CV__TRACE_OPENCV_FUNCTION() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, 0); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_OPENCV_FUNCTION_NAME(name) \
+    CV__TRACE_DEFINE_LOCATION_FN(name, 0); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_APP_FUNCTION() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_APP_FUNCTION_NAME(name) \
+    CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+
+#define CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_OPENCV_FUNCTION_NAME_SKIP_NESTED(name) \
+    CV__TRACE_DEFINE_LOCATION_FN(name, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+#define CV__TRACE_APP_FUNCTION_SKIP_NESTED() \
+    CV__TRACE_DEFINE_LOCATION_FN(CV__TRACE_FUNCTION, CV_TRACE_NS::details::REGION_FLAG_SKIP_NESTED | CV_TRACE_NS::details::REGION_FLAG_APP_CODE); \
+    const CV_TRACE_NS::details::Region __region_fn(CV__TRACE_LOCATION_VARNAME(fn));
+
+
+#define CV__TRACE_REGION_(name_as_static_string_literal, flags) \
+    CV__TRACE_DEFINE_LOCATION_(region, name_as_static_string_literal, flags); \
+    CV_TRACE_NS::details::Region CVAUX_CONCAT(__region_, __LINE__)(CV__TRACE_LOCATION_VARNAME(region));
+
+#define CV__TRACE_REGION(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, 0)
+#define CV__TRACE_REGION_NEXT(name_as_static_string_literal) CV__TRACE_REGION_(name_as_static_string_literal, CV_TRACE_NS::details::REGION_FLAG_REGION_NEXT)
+
+#define CV__TRACE_ARG_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_ ## arg_id, __LINE__)
+#define CV__TRACE_ARG_EXTRA_VARNAME(arg_id) CVAUX_CONCAT(__cv_trace_arg_extra_ ## arg_id, __LINE__)
+
+#define CV__TRACE_DEFINE_ARG_(arg_id, name, flags) \
+    static CV_TRACE_NS::details::TraceArg::ExtraData* CV__TRACE_ARG_EXTRA_VARNAME(arg_id) = 0; \
+    static const CV_TRACE_NS::details::TraceArg \
+        CV__TRACE_ARG_VARNAME(arg_id) = { &(CV__TRACE_ARG_EXTRA_VARNAME(arg_id)), name, flags };
+
+#define CV__TRACE_ARG_VALUE(arg_id, arg_name, value) \
+        CV__TRACE_DEFINE_ARG_(arg_id, arg_name, 0); \
+        CV_TRACE_NS::details::traceArg((CV__TRACE_ARG_VARNAME(arg_id)), value);
+
+#define CV__TRACE_ARG(arg_id) CV_TRACE_ARG_VALUE(arg_id, #arg_id, (arg_id))
+
+} // namespace
+
+#ifndef OPENCV_DISABLE_TRACE
+#undef CV_TRACE_FUNCTION
+#undef CV_TRACE_FUNCTION_SKIP_NESTED
+#if __OPENCV_TRACE
+#define CV_TRACE_FUNCTION CV__TRACE_OPENCV_FUNCTION
+#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_OPENCV_FUNCTION_SKIP_NESTED
+#else
+#define CV_TRACE_FUNCTION CV__TRACE_APP_FUNCTION
+#define CV_TRACE_FUNCTION_SKIP_NESTED CV__TRACE_APP_FUNCTION_SKIP_NESTED
+#endif
+
+#undef CV_TRACE_REGION
+#define CV_TRACE_REGION CV__TRACE_REGION
+
+#undef CV_TRACE_REGION_NEXT
+#define CV_TRACE_REGION_NEXT CV__TRACE_REGION_NEXT
+
+#undef CV_TRACE_ARG_VALUE
+#define CV_TRACE_ARG_VALUE(arg_id, arg_name, value) \
+        if (__region_fn.isActive()) \
+        { \
+            CV__TRACE_ARG_VALUE(arg_id, arg_name, value); \
+        }
+
+#undef CV_TRACE_ARG
+#define CV_TRACE_ARG CV__TRACE_ARG
+
+#endif // OPENCV_DISABLE_TRACE
+
+#ifdef OPENCV_TRACE_VERBOSE
+#define CV_TRACE_FUNCTION_VERBOSE CV_TRACE_FUNCTION
+#define CV_TRACE_REGION_VERBOSE CV_TRACE_REGION
+#define CV_TRACE_REGION_NEXT_VERBOSE CV_TRACE_REGION_NEXT
+#define CV_TRACE_ARG_VALUE_VERBOSE CV_TRACE_ARG_VALUE
+#define CV_TRACE_ARG_VERBOSE CV_TRACE_ARG
+#else
+#define CV_TRACE_FUNCTION_VERBOSE(...)
+#define CV_TRACE_REGION_VERBOSE(...)
+#define CV_TRACE_REGION_NEXT_VERBOSE(...)
+#define CV_TRACE_ARG_VALUE_VERBOSE(...)
+#define CV_TRACE_ARG_VERBOSE(...)
+#endif
+
+//! @endcond
+
+}}} // namespace
+
+//! @}
+
+#endif // OPENCV_TRACE_HPP

+ 77 - 0
ThresholdTools/include/opencv2/core/va_intel.hpp

@@ -0,0 +1,77 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2015, Itseez, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#ifndef OPENCV_CORE_VA_INTEL_HPP
+#define OPENCV_CORE_VA_INTEL_HPP
+
+#ifndef __cplusplus
+#  error va_intel.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core.hpp"
+#include "ocl.hpp"
+
+#if defined(HAVE_VA)
+# include "va/va.h"
+#else  // HAVE_VA
+# if !defined(_VA_H_)
+    typedef void* VADisplay;
+    typedef unsigned int VASurfaceID;
+# endif // !_VA_H_
+#endif // HAVE_VA
+
+namespace cv { namespace va_intel {
+
+/** @addtogroup core_va_intel
+This section describes Intel VA-API/OpenCL (CL-VA) interoperability.
+
+To enable CL-VA interoperability support, configure OpenCV using CMake with WITH_VA_INTEL=ON . Currently VA-API is
+supported on Linux only. You should also install Intel Media Server Studio (MSS) to use this feature. You may
+have to specify the path(s) to MSS components for cmake in environment variables: VA_INTEL_MSDK_ROOT for Media SDK
+(default is "/opt/intel/mediasdk"), and VA_INTEL_IOCL_ROOT for Intel OpenCL (default is "/opt/intel/opencl").
+
+To use CL-VA interoperability you should first create VADisplay (libva), and then call initializeContextFromVA()
+function to create OpenCL context and set up interoperability.
+*/
+//! @{
+
+/////////////////// CL-VA Interoperability Functions ///////////////////
+
+namespace ocl {
+using namespace cv::ocl;
+
+// TODO static functions in the Context class
+/** @brief Creates OpenCL context from VA.
+@param display    - VADisplay for which CL interop should be established.
+@param tryInterop - try to set up for interoperability, if true; set up for use slow copy if false.
+@return Returns reference to OpenCL Context
+ */
+CV_EXPORTS Context& initializeContextFromVA(VADisplay display, bool tryInterop = true);
+
+} // namespace cv::va_intel::ocl
+
+/** @brief Converts InputArray to VASurfaceID object.
+@param display - VADisplay object.
+@param src     - source InputArray.
+@param surface - destination VASurfaceID object.
+@param size    - size of image represented by VASurfaceID object.
+ */
+CV_EXPORTS void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size);
+
+/** @brief Converts VASurfaceID object to OutputArray.
+@param display - VADisplay object.
+@param surface - source VASurfaceID object.
+@param size    - size of image represented by VASurfaceID object.
+@param dst     - destination OutputArray.
+ */
+CV_EXPORTS void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, OutputArray dst);
+
+//! @}
+
+}} // namespace cv::va_intel
+
+#endif /* OPENCV_CORE_VA_INTEL_HPP */

+ 26 - 0
ThresholdTools/include/opencv2/core/version.hpp

@@ -0,0 +1,26 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_VERSION_HPP
+#define OPENCV_VERSION_HPP
+
+#define CV_VERSION_MAJOR    3
+#define CV_VERSION_MINOR    4
+#define CV_VERSION_REVISION 2
+#define CV_VERSION_STATUS   ""
+
+#define CVAUX_STR_EXP(__A)  #__A
+#define CVAUX_STR(__A)      CVAUX_STR_EXP(__A)
+
+#define CVAUX_STRW_EXP(__A)  L ## #__A
+#define CVAUX_STRW(__A)      CVAUX_STRW_EXP(__A)
+
+#define CV_VERSION          CVAUX_STR(CV_VERSION_MAJOR) "." CVAUX_STR(CV_VERSION_MINOR) "." CVAUX_STR(CV_VERSION_REVISION) CV_VERSION_STATUS
+
+/* old  style version constants*/
+#define CV_MAJOR_VERSION    CV_VERSION_MAJOR
+#define CV_MINOR_VERSION    CV_VERSION_MINOR
+#define CV_SUBMINOR_VERSION CV_VERSION_REVISION
+
+#endif // OPENCV_VERSION_HPP

+ 1009 - 0
ThresholdTools/include/opencv2/core/vsx_utils.hpp

@@ -0,0 +1,1009 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef OPENCV_HAL_VSX_UTILS_HPP
+#define OPENCV_HAL_VSX_UTILS_HPP
+
+#include "opencv2/core/cvdef.h"
+
+#ifndef SKIP_INCLUDES
+#   include <assert.h>
+#endif
+
+//! @addtogroup core_utils_vsx
+//! @{
+#if CV_VSX
+
+#define __VSX_S16__(c, v) (c){v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}
+#define __VSX_S8__(c, v)  (c){v, v, v, v, v, v, v, v}
+#define __VSX_S4__(c, v)  (c){v, v, v, v}
+#define __VSX_S2__(c, v)  (c){v, v}
+
+typedef __vector unsigned char vec_uchar16;
+#define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__}
+#define vec_uchar16_sp(c)    (__VSX_S16__(vec_uchar16, c))
+#define vec_uchar16_c(v)     ((vec_uchar16)(v))
+#define vec_uchar16_z        vec_uchar16_sp(0)
+
+typedef __vector signed char vec_char16;
+#define vec_char16_set(...) (vec_char16){__VA_ARGS__}
+#define vec_char16_sp(c)    (__VSX_S16__(vec_char16, c))
+#define vec_char16_c(v)     ((vec_char16)(v))
+#define vec_char16_z        vec_char16_sp(0)
+
+typedef __vector unsigned short vec_ushort8;
+#define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__}
+#define vec_ushort8_sp(c)    (__VSX_S8__(vec_ushort8, c))
+#define vec_ushort8_c(v)     ((vec_ushort8)(v))
+#define vec_ushort8_z        vec_ushort8_sp(0)
+
+typedef __vector signed short vec_short8;
+#define vec_short8_set(...) (vec_short8){__VA_ARGS__}
+#define vec_short8_sp(c)    (__VSX_S8__(vec_short8, c))
+#define vec_short8_c(v)     ((vec_short8)(v))
+#define vec_short8_z        vec_short8_sp(0)
+
+typedef __vector unsigned int vec_uint4;
+#define vec_uint4_set(...) (vec_uint4){__VA_ARGS__}
+#define vec_uint4_sp(c)    (__VSX_S4__(vec_uint4, c))
+#define vec_uint4_c(v)     ((vec_uint4)(v))
+#define vec_uint4_z        vec_uint4_sp(0)
+
+typedef __vector signed int vec_int4;
+#define vec_int4_set(...)  (vec_int4){__VA_ARGS__}
+#define vec_int4_sp(c)     (__VSX_S4__(vec_int4, c))
+#define vec_int4_c(v)      ((vec_int4)(v))
+#define vec_int4_z         vec_int4_sp(0)
+
+typedef __vector float vec_float4;
+#define vec_float4_set(...)  (vec_float4){__VA_ARGS__}
+#define vec_float4_sp(c)     (__VSX_S4__(vec_float4, c))
+#define vec_float4_c(v)      ((vec_float4)(v))
+#define vec_float4_z         vec_float4_sp(0)
+
+typedef __vector unsigned long long vec_udword2;
+#define vec_udword2_set(...) (vec_udword2){__VA_ARGS__}
+#define vec_udword2_sp(c)    (__VSX_S2__(vec_udword2, c))
+#define vec_udword2_c(v)     ((vec_udword2)(v))
+#define vec_udword2_z        vec_udword2_sp(0)
+
+typedef __vector signed long long vec_dword2;
+#define vec_dword2_set(...) (vec_dword2){__VA_ARGS__}
+#define vec_dword2_sp(c)    (__VSX_S2__(vec_dword2, c))
+#define vec_dword2_c(v)     ((vec_dword2)(v))
+#define vec_dword2_z        vec_dword2_sp(0)
+
+typedef  __vector double vec_double2;
+#define vec_double2_set(...) (vec_double2){__VA_ARGS__}
+#define vec_double2_c(v)     ((vec_double2)(v))
+#define vec_double2_sp(c)    (__VSX_S2__(vec_double2, c))
+#define vec_double2_z        vec_double2_sp(0)
+
+#define vec_bchar16           __vector __bool char
+#define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__}
+#define vec_bchar16_c(v)     ((vec_bchar16)(v))
+
+#define vec_bshort8           __vector __bool short
+#define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__}
+#define vec_bshort8_c(v)     ((vec_bshort8)(v))
+
+#define vec_bint4             __vector __bool int
+#define vec_bint4_set(...)   (vec_bint4){__VA_ARGS__}
+#define vec_bint4_c(v)       ((vec_bint4)(v))
+
+#define vec_bdword2            __vector __bool long long
+#define vec_bdword2_set(...)  (vec_bdword2){__VA_ARGS__}
+#define vec_bdword2_c(v)      ((vec_bdword2)(v))
+
+#define VSX_FINLINE(tp) extern inline tp __attribute__((always_inline))
+
+#define VSX_REDIRECT_1RG(rt, rg, fnm, fn2)   \
+VSX_FINLINE(rt) fnm(const rg& a) { return fn2(a); }
+
+#define VSX_REDIRECT_2RG(rt, rg, fnm, fn2)   \
+VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); }
+
+/*
+ * GCC VSX compatibility
+**/
+#if defined(__GNUG__) && !defined(__clang__)
+
+// inline asm helper
+#define VSX_IMPL_1RG(rt, rto, rg, rgo, opc, fnm) \
+VSX_FINLINE(rt) fnm(const rg& a)                 \
+{ rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "="#rto (rs) : #rgo (a)); return rs; }
+
+#define VSX_IMPL_1VRG(rt, rg, opc, fnm) \
+VSX_FINLINE(rt) fnm(const rg& a)        \
+{ rt rs; __asm__ __volatile__(#opc" %0,%1" : "=v" (rs) : "v" (a)); return rs; }
+
+#define VSX_IMPL_2VRG_F(rt, rg, fopc, fnm)     \
+VSX_FINLINE(rt) fnm(const rg& a, const rg& b)  \
+{ rt rs; __asm__ __volatile__(fopc : "=v" (rs) : "v" (a), "v" (b)); return rs; }
+
+#define VSX_IMPL_2VRG(rt, rg, opc, fnm) VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%1,%2", fnm)
+
+#if __GNUG__ < 7
+// up to GCC 6 vec_mul only supports precisions and llong
+#   ifdef vec_mul
+#       undef vec_mul
+#   endif
+/*
+ * there's no a direct instruction for supporting 16-bit multiplication in ISA 2.07,
+ * XLC Implement it by using instruction "multiply even", "multiply odd" and "permute"
+ * todo: Do I need to support 8-bit ?
+**/
+#   define VSX_IMPL_MULH(Tvec, Tcast)                                               \
+    VSX_FINLINE(Tvec) vec_mul(const Tvec& a, const Tvec& b)                         \
+    {                                                                               \
+        static const vec_uchar16 even_perm = {0, 1, 16, 17, 4, 5, 20, 21,           \
+                                              8, 9, 24, 25, 12, 13, 28, 29};        \
+        return vec_perm(Tcast(vec_mule(a, b)), Tcast(vec_mulo(a, b)), even_perm);   \
+    }
+    VSX_IMPL_MULH(vec_short8,  vec_short8_c)
+    VSX_IMPL_MULH(vec_ushort8, vec_ushort8_c)
+    // vmuluwm can be used for unsigned or signed integers, that's what they said
+    VSX_IMPL_2VRG(vec_int4,  vec_int4,  vmuluwm, vec_mul)
+    VSX_IMPL_2VRG(vec_uint4, vec_uint4, vmuluwm, vec_mul)
+    // redirect to GCC builtin vec_mul, since it already supports precisions and llong
+    VSX_REDIRECT_2RG(vec_float4,  vec_float4,  vec_mul, __builtin_vec_mul)
+    VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mul, __builtin_vec_mul)
+    VSX_REDIRECT_2RG(vec_dword2,  vec_dword2,  vec_mul, __builtin_vec_mul)
+    VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mul, __builtin_vec_mul)
+#endif // __GNUG__ < 7
+
+#if __GNUG__ < 6
+/*
+ * Instruction "compare greater than or equal" in ISA 2.07 only supports single
+ * and double precision.
+ * In XLC and new versions of GCC implement integers by using instruction "greater than" and NOR.
+**/
+#   ifdef vec_cmpge
+#       undef vec_cmpge
+#   endif
+#   ifdef vec_cmple
+#       undef vec_cmple
+#   endif
+#   define vec_cmple(a, b) vec_cmpge(b, a)
+#   define VSX_IMPL_CMPGE(rt, rg, opc, fnm) \
+    VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%2,%1\n\t xxlnor %x0,%x0,%x0", fnm)
+
+    VSX_IMPL_CMPGE(vec_bchar16, vec_char16,  vcmpgtsb, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bchar16, vec_uchar16, vcmpgtub, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bshort8, vec_short8,  vcmpgtsh, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bshort8, vec_ushort8, vcmpgtuh, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bint4,   vec_int4,    vcmpgtsw, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bint4,   vec_uint4,   vcmpgtuw, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bdword2, vec_dword2,  vcmpgtsd, vec_cmpge)
+    VSX_IMPL_CMPGE(vec_bdword2, vec_udword2, vcmpgtud, vec_cmpge)
+
+// redirect to GCC builtin cmpge, since it already supports precisions
+    VSX_REDIRECT_2RG(vec_bint4,   vec_float4,  vec_cmpge, __builtin_vec_cmpge)
+    VSX_REDIRECT_2RG(vec_bdword2, vec_double2, vec_cmpge, __builtin_vec_cmpge)
+
+// up to gcc5 vec_nor doesn't support bool long long
+#   undef vec_nor
+    template<typename T>
+    VSX_REDIRECT_2RG(T, T, vec_nor, __builtin_vec_nor)
+
+    VSX_FINLINE(vec_bdword2) vec_nor(const vec_bdword2& a, const vec_bdword2& b)
+    { return vec_bdword2_c(__builtin_vec_nor(vec_dword2_c(a), vec_dword2_c(b))); }
+
+// vec_packs doesn't support double words in gcc4 and old versions of gcc5
+#   undef vec_packs
+    VSX_REDIRECT_2RG(vec_char16,  vec_short8,  vec_packs, __builtin_vec_packs)
+    VSX_REDIRECT_2RG(vec_uchar16, vec_ushort8, vec_packs, __builtin_vec_packs)
+    VSX_REDIRECT_2RG(vec_short8,  vec_int4,    vec_packs, __builtin_vec_packs)
+    VSX_REDIRECT_2RG(vec_ushort8, vec_uint4,   vec_packs, __builtin_vec_packs)
+
+    VSX_IMPL_2VRG_F(vec_int4,  vec_dword2,  "vpksdss %0,%2,%1", vec_packs)
+    VSX_IMPL_2VRG_F(vec_uint4, vec_udword2, "vpkudus %0,%2,%1", vec_packs)
+#endif // __GNUG__ < 6
+
+#if __GNUG__ < 5
+// vec_xxpermdi in gcc4 missing little-endian supports just like clang
+#   define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ ((c & 1) << 1 | c >> 1)))
+#else
+#   define vec_permi vec_xxpermdi
+#endif // __GNUG__ < 5
+
+// shift left double by word immediate
+#ifndef vec_sldw
+#   define vec_sldw __builtin_vsx_xxsldwi
+#endif
+
+// vector population count
+VSX_IMPL_1VRG(vec_uchar16, vec_uchar16, vpopcntb, vec_popcntu)
+VSX_IMPL_1VRG(vec_uchar16, vec_char16,  vpopcntb, vec_popcntu)
+VSX_IMPL_1VRG(vec_ushort8, vec_ushort8, vpopcnth, vec_popcntu)
+VSX_IMPL_1VRG(vec_ushort8, vec_short8,  vpopcnth, vec_popcntu)
+VSX_IMPL_1VRG(vec_uint4,   vec_uint4,   vpopcntw, vec_popcntu)
+VSX_IMPL_1VRG(vec_uint4,   vec_int4,    vpopcntw, vec_popcntu)
+VSX_IMPL_1VRG(vec_udword2, vec_udword2, vpopcntd, vec_popcntu)
+VSX_IMPL_1VRG(vec_udword2, vec_dword2,  vpopcntd, vec_popcntu)
+
+// converts between single and double-precision
+VSX_REDIRECT_1RG(vec_float4,  vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp)
+VSX_REDIRECT_1RG(vec_double2, vec_float4,  vec_cvfo, __builtin_vsx_xvcvspdp)
+
+// converts word and doubleword to double-precision
+#ifdef vec_ctd
+#   undef vec_ctd
+#endif
+VSX_IMPL_1RG(vec_double2, wd, vec_int4,    wa, xvcvsxwdp, vec_ctdo)
+VSX_IMPL_1RG(vec_double2, wd, vec_uint4,   wa, xvcvuxwdp, vec_ctdo)
+VSX_IMPL_1RG(vec_double2, wd, vec_dword2,  wi, xvcvsxddp, vec_ctd)
+VSX_IMPL_1RG(vec_double2, wd, vec_udword2, wi, xvcvuxddp, vec_ctd)
+
+// converts word and doubleword to single-precision
+#undef vec_ctf
+VSX_IMPL_1RG(vec_float4, wf, vec_int4,    wa, xvcvsxwsp, vec_ctf)
+VSX_IMPL_1RG(vec_float4, wf, vec_uint4,   wa, xvcvuxwsp, vec_ctf)
+VSX_IMPL_1RG(vec_float4, wf, vec_dword2,  wi, xvcvsxdsp, vec_ctfo)
+VSX_IMPL_1RG(vec_float4, wf, vec_udword2, wi, xvcvuxdsp, vec_ctfo)
+
+// converts single and double precision to signed word
+#undef vec_cts
+VSX_IMPL_1RG(vec_int4,  wa, vec_double2, wd, xvcvdpsxws, vec_ctso)
+VSX_IMPL_1RG(vec_int4,  wa, vec_float4,  wf, xvcvspsxws, vec_cts)
+
+// converts single and double precision to unsigned word
+#undef vec_ctu
+VSX_IMPL_1RG(vec_uint4, wa, vec_double2, wd, xvcvdpuxws, vec_ctuo)
+VSX_IMPL_1RG(vec_uint4, wa, vec_float4,  wf, xvcvspuxws, vec_ctu)
+
+// converts single and double precision to signed doubleword
+#ifdef vec_ctsl
+#   undef vec_ctsl
+#endif
+VSX_IMPL_1RG(vec_dword2, wi, vec_double2, wd, xvcvdpsxds, vec_ctsl)
+VSX_IMPL_1RG(vec_dword2, wi, vec_float4,  wf, xvcvspsxds, vec_ctslo)
+
+// converts single and double precision to unsigned doubleword
+#ifdef vec_ctul
+#   undef vec_ctul
+#endif
+VSX_IMPL_1RG(vec_udword2, wi, vec_double2, wd, xvcvdpuxds, vec_ctul)
+VSX_IMPL_1RG(vec_udword2, wi, vec_float4,  wf, xvcvspuxds, vec_ctulo)
+
+// just in case if GCC doesn't define it
+#ifndef vec_xl
+#   define vec_xl vec_vsx_ld
+#   define vec_xst vec_vsx_st
+#endif
+
+#endif // GCC VSX compatibility
+
+/*
+ * CLANG VSX compatibility
+**/
+#if defined(__clang__) && !defined(__IBMCPP__)
+
+/*
+ * CLANG doesn't support %x<n> in the inline asm template which fixes register number
+ * when using any of the register constraints wa, wd, wf
+ *
+ * For more explanation checkout PowerPC and IBM RS6000 in https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
+ * Also there's already an open bug https://bugs.llvm.org/show_bug.cgi?id=31837
+ *
+ * So we're not able to use inline asm and only use built-in functions that CLANG supports
+ * and use __builtin_convertvector if clang missng any of vector conversions built-in functions
+*/
+
+// convert vector helper
+#define VSX_IMPL_CONVERT(rt, rg, fnm) \
+VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); }
+
+#if __clang_major__ < 5
+// implement vec_permi in a dirty way
+#   define VSX_IMPL_CLANG_4_PERMI(Tvec)                                                 \
+    VSX_FINLINE(Tvec) vec_permi(const Tvec& a, const Tvec& b, unsigned const char c)    \
+    {                                                                                   \
+        switch (c)                                                                      \
+        {                                                                               \
+        case 0:                                                                         \
+            return vec_mergeh(a, b);                                                    \
+        case 1:                                                                         \
+            return vec_mergel(vec_mergeh(a, a), b);                                     \
+        case 2:                                                                         \
+            return vec_mergeh(vec_mergel(a, a), b);                                     \
+        default:                                                                        \
+            return vec_mergel(a, b);                                                    \
+        }                                                                               \
+    }
+    VSX_IMPL_CLANG_4_PERMI(vec_udword2)
+    VSX_IMPL_CLANG_4_PERMI(vec_dword2)
+    VSX_IMPL_CLANG_4_PERMI(vec_double2)
+
+// vec_xxsldwi is missing in clang 4
+#   define vec_xxsldwi(a, b, c) vec_sld(a, b, (c) * 4)
+#else
+// vec_xxpermdi is missing little-endian supports in clang 4 just like gcc4
+#   define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ ((c & 1) << 1 | c >> 1)))
+#endif // __clang_major__ < 5
+
+// shift left double by word immediate
+#ifndef vec_sldw
+#   define vec_sldw vec_xxsldwi
+#endif
+
+// Implement vec_rsqrt since clang only supports vec_rsqrte
+#ifndef vec_rsqrt
+    VSX_FINLINE(vec_float4) vec_rsqrt(const vec_float4& a)
+    { return vec_div(vec_float4_sp(1), vec_sqrt(a)); }
+
+    VSX_FINLINE(vec_double2) vec_rsqrt(const vec_double2& a)
+    { return vec_div(vec_double2_sp(1), vec_sqrt(a)); }
+#endif
+
+// vec_promote missing support for doubleword
+VSX_FINLINE(vec_dword2) vec_promote(long long a, int b)
+{
+    vec_dword2 ret = vec_dword2_z;
+    ret[b & 1] = a;
+    return ret;
+}
+
+VSX_FINLINE(vec_udword2) vec_promote(unsigned long long a, int b)
+{
+    vec_udword2 ret = vec_udword2_z;
+    ret[b & 1] = a;
+    return ret;
+}
+
+// vec_popcnt should return unsigned but clang has different thought just like gcc in vec_vpopcnt
+#define VSX_IMPL_POPCNTU(Tvec, Tvec2, ucast)   \
+VSX_FINLINE(Tvec) vec_popcntu(const Tvec2& a)  \
+{ return ucast(vec_popcnt(a)); }
+VSX_IMPL_POPCNTU(vec_uchar16, vec_char16, vec_uchar16_c);
+VSX_IMPL_POPCNTU(vec_ushort8, vec_short8, vec_ushort8_c);
+VSX_IMPL_POPCNTU(vec_uint4,   vec_int4,   vec_uint4_c);
+// redirect unsigned types
+VSX_REDIRECT_1RG(vec_uchar16, vec_uchar16, vec_popcntu, vec_popcnt)
+VSX_REDIRECT_1RG(vec_ushort8, vec_ushort8, vec_popcntu, vec_popcnt)
+VSX_REDIRECT_1RG(vec_uint4,   vec_uint4,   vec_popcntu, vec_popcnt)
+
+// converts between single and double precision
+VSX_REDIRECT_1RG(vec_float4,  vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp)
+VSX_REDIRECT_1RG(vec_double2, vec_float4,  vec_cvfo, __builtin_vsx_xvcvspdp)
+
+// converts word and doubleword to double-precision
+#ifdef vec_ctd
+#   undef vec_ctd
+#endif
+VSX_REDIRECT_1RG(vec_double2, vec_int4,  vec_ctdo, __builtin_vsx_xvcvsxwdp)
+VSX_REDIRECT_1RG(vec_double2, vec_uint4, vec_ctdo, __builtin_vsx_xvcvuxwdp)
+
+VSX_IMPL_CONVERT(vec_double2, vec_dword2,  vec_ctd)
+VSX_IMPL_CONVERT(vec_double2, vec_udword2, vec_ctd)
+
+// converts word and doubleword to single-precision
+#if __clang_major__ > 4
+#   undef vec_ctf
+#endif
+VSX_IMPL_CONVERT(vec_float4, vec_int4,    vec_ctf)
+VSX_IMPL_CONVERT(vec_float4, vec_uint4,   vec_ctf)
+VSX_REDIRECT_1RG(vec_float4, vec_dword2,  vec_ctfo, __builtin_vsx_xvcvsxdsp)
+VSX_REDIRECT_1RG(vec_float4, vec_udword2, vec_ctfo, __builtin_vsx_xvcvuxdsp)
+
+// converts single and double precision to signed word
+#if __clang_major__ > 4
+#   undef vec_cts
+#endif
+VSX_REDIRECT_1RG(vec_int4,  vec_double2, vec_ctso, __builtin_vsx_xvcvdpsxws)
+VSX_IMPL_CONVERT(vec_int4,  vec_float4,  vec_cts)
+
+// converts single and double precision to unsigned word
+#if __clang_major__ > 4
+#   undef vec_ctu
+#endif
+VSX_REDIRECT_1RG(vec_uint4, vec_double2, vec_ctuo, __builtin_vsx_xvcvdpuxws)
+VSX_IMPL_CONVERT(vec_uint4, vec_float4,  vec_ctu)
+
+// converts single and double precision to signed doubleword
+#ifdef vec_ctsl
+#   undef vec_ctsl
+#endif
+VSX_IMPL_CONVERT(vec_dword2, vec_double2, vec_ctsl)
+// __builtin_convertvector unable to convert, xvcvspsxds is missing on it
+VSX_FINLINE(vec_dword2) vec_ctslo(const vec_float4& a)
+{ return vec_ctsl(vec_cvfo(a)); }
+
+// converts single and double precision to unsigned doubleword
+#ifdef vec_ctul
+#   undef vec_ctul
+#endif
+VSX_IMPL_CONVERT(vec_udword2, vec_double2, vec_ctul)
+// __builtin_convertvector unable to convert, xvcvspuxds is missing on it
+VSX_FINLINE(vec_udword2) vec_ctulo(const vec_float4& a)
+{ return vec_ctul(vec_cvfo(a)); }
+
+#endif // CLANG VSX compatibility
+
+/*
+ * Common GCC, CLANG compatibility
+**/
+#if defined(__GNUG__) && !defined(__IBMCPP__)
+
+#ifdef vec_cvf
+#   undef vec_cvf
+#endif
+
+#define VSX_IMPL_CONV_EVEN_4_2(rt, rg, fnm, fn2) \
+VSX_FINLINE(rt) fnm(const rg& a)                 \
+{ return fn2(vec_sldw(a, a, 1)); }
+
+VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_float4, vec_cvf,  vec_cvfo)
+VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_int4,   vec_ctd,  vec_ctdo)
+VSX_IMPL_CONV_EVEN_4_2(vec_double2, vec_uint4,  vec_ctd,  vec_ctdo)
+
+VSX_IMPL_CONV_EVEN_4_2(vec_dword2,  vec_float4, vec_ctsl, vec_ctslo)
+VSX_IMPL_CONV_EVEN_4_2(vec_udword2, vec_float4, vec_ctul, vec_ctulo)
+
+#define VSX_IMPL_CONV_EVEN_2_4(rt, rg, fnm, fn2) \
+VSX_FINLINE(rt) fnm(const rg& a)                 \
+{                                                \
+    rt v4 = fn2(a);                              \
+    return vec_sldw(v4, v4, 3);                  \
+}
+
+VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_double2, vec_cvf, vec_cvfo)
+VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_dword2,  vec_ctf, vec_ctfo)
+VSX_IMPL_CONV_EVEN_2_4(vec_float4, vec_udword2, vec_ctf, vec_ctfo)
+
+VSX_IMPL_CONV_EVEN_2_4(vec_int4,   vec_double2, vec_cts, vec_ctso)
+VSX_IMPL_CONV_EVEN_2_4(vec_uint4,  vec_double2, vec_ctu, vec_ctuo)
+
+// Only for Eigen!
+/*
+ * changing behavior of conversion intrinsics for gcc has effect on Eigen
+ * so we redfine old behavior again only on gcc, clang
+*/
+#if !defined(__clang__) || __clang_major__ > 4
+    // ignoring second arg since Eigen only truncates toward zero
+#   define VSX_IMPL_CONV_2VARIANT(rt, rg, fnm, fn2)     \
+    VSX_FINLINE(rt) fnm(const rg& a, int only_truncate) \
+    {                                                   \
+        assert(only_truncate == 0);                     \
+        (void)only_truncate;                            \
+        return fn2(a);                                  \
+    }
+    VSX_IMPL_CONV_2VARIANT(vec_int4,   vec_float4,  vec_cts, vec_cts)
+    VSX_IMPL_CONV_2VARIANT(vec_float4, vec_int4,    vec_ctf, vec_ctf)
+    // define vec_cts for converting double precision to signed doubleword
+    // which isn't combitable with xlc but its okay since Eigen only use it for gcc
+    VSX_IMPL_CONV_2VARIANT(vec_dword2, vec_double2, vec_cts, vec_ctsl)
+#endif // Eigen
+
+#endif // Common GCC, CLANG compatibility
+
+/*
+ * XLC VSX compatibility
+**/
+#if defined(__IBMCPP__)
+
+// vector population count
+#define vec_popcntu vec_popcnt
+
+// overload and redirect with setting second arg to zero
+// since we only support conversions without the second arg
+#define VSX_IMPL_OVERLOAD_Z2(rt, rg, fnm) \
+VSX_FINLINE(rt) fnm(const rg& a) { return fnm(a, 0); }
+
+VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_int4,    vec_ctd)
+VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_uint4,   vec_ctd)
+VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_dword2,  vec_ctd)
+VSX_IMPL_OVERLOAD_Z2(vec_double2, vec_udword2, vec_ctd)
+
+VSX_IMPL_OVERLOAD_Z2(vec_float4,  vec_int4,    vec_ctf)
+VSX_IMPL_OVERLOAD_Z2(vec_float4,  vec_uint4,   vec_ctf)
+VSX_IMPL_OVERLOAD_Z2(vec_float4,  vec_dword2,  vec_ctf)
+VSX_IMPL_OVERLOAD_Z2(vec_float4,  vec_udword2, vec_ctf)
+
+VSX_IMPL_OVERLOAD_Z2(vec_int4,    vec_double2, vec_cts)
+VSX_IMPL_OVERLOAD_Z2(vec_int4,    vec_float4,  vec_cts)
+
+VSX_IMPL_OVERLOAD_Z2(vec_uint4,   vec_double2, vec_ctu)
+VSX_IMPL_OVERLOAD_Z2(vec_uint4,   vec_float4,  vec_ctu)
+
+VSX_IMPL_OVERLOAD_Z2(vec_dword2,  vec_double2, vec_ctsl)
+VSX_IMPL_OVERLOAD_Z2(vec_dword2,  vec_float4,  vec_ctsl)
+
+VSX_IMPL_OVERLOAD_Z2(vec_udword2, vec_double2, vec_ctul)
+VSX_IMPL_OVERLOAD_Z2(vec_udword2, vec_float4,  vec_ctul)
+
+// fixme: implement conversions of odd-numbered elements in a dirty way
+// since xlc doesn't support VSX registers operand in inline asm.
+#define VSX_IMPL_CONV_ODD_4_2(rt, rg, fnm, fn2) \
+VSX_FINLINE(rt) fnm(const rg& a) { return fn2(vec_sldw(a, a, 3)); }
+
+VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_float4, vec_cvfo,  vec_cvf)
+VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_int4,   vec_ctdo,  vec_ctd)
+VSX_IMPL_CONV_ODD_4_2(vec_double2, vec_uint4,  vec_ctdo,  vec_ctd)
+
+VSX_IMPL_CONV_ODD_4_2(vec_dword2,  vec_float4, vec_ctslo, vec_ctsl)
+VSX_IMPL_CONV_ODD_4_2(vec_udword2, vec_float4, vec_ctulo, vec_ctul)
+
+#define VSX_IMPL_CONV_ODD_2_4(rt, rg, fnm, fn2)  \
+VSX_FINLINE(rt) fnm(const rg& a)                 \
+{                                                \
+    rt v4 = fn2(a);                              \
+    return vec_sldw(v4, v4, 1);                  \
+}
+
+VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_double2, vec_cvfo, vec_cvf)
+VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_dword2,  vec_ctfo, vec_ctf)
+VSX_IMPL_CONV_ODD_2_4(vec_float4, vec_udword2, vec_ctfo, vec_ctf)
+
+VSX_IMPL_CONV_ODD_2_4(vec_int4,   vec_double2, vec_ctso, vec_cts)
+VSX_IMPL_CONV_ODD_2_4(vec_uint4,  vec_double2, vec_ctuo, vec_ctu)
+
+#endif // XLC VSX compatibility
+
+// ignore GCC warning that caused by -Wunused-but-set-variable in rare cases
+#if defined(__GNUG__) && !defined(__clang__)
+#   define VSX_UNUSED(Tvec) Tvec __attribute__((__unused__))
+#else // CLANG, XLC
+#   define VSX_UNUSED(Tvec) Tvec
+#endif
+
+// gcc can find his way in casting log int and XLC, CLANG ambiguous
+#if defined(__clang__) || defined(__IBMCPP__)
+    VSX_FINLINE(vec_udword2) vec_splats(uint64 v)
+    { return vec_splats((unsigned long long) v); }
+
+    VSX_FINLINE(vec_dword2) vec_splats(int64 v)
+    { return vec_splats((long long) v); }
+
+    VSX_FINLINE(vec_udword2) vec_promote(uint64 a, int b)
+    { return vec_promote((unsigned long long) a, b); }
+
+    VSX_FINLINE(vec_dword2) vec_promote(int64 a, int b)
+    { return vec_promote((long long) a, b); }
+#endif
+
+/*
+ * implement vsx_ld(offset, pointer), vsx_st(vector, offset, pointer)
+ * load and set using offset depend on the pointer type
+ *
+ * implement vsx_ldf(offset, pointer), vsx_stf(vector, offset, pointer)
+ * load and set using offset depend on fixed bytes size
+ *
+ * Note: In clang vec_xl and vec_xst fails to load unaligned addresses
+ * so we are using vec_vsx_ld, vec_vsx_st instead
+*/
+
+#if defined(__clang__) && !defined(__IBMCPP__)
+#   define vsx_ldf  vec_vsx_ld
+#   define vsx_stf  vec_vsx_st
+#else // GCC , XLC
+#   define vsx_ldf  vec_xl
+#   define vsx_stf  vec_xst
+#endif
+
+#define VSX_OFFSET(o, p) ((o) * sizeof(*(p)))
+#define vsx_ld(o, p) vsx_ldf(VSX_OFFSET(o, p), p)
+#define vsx_st(v, o, p) vsx_stf(v, VSX_OFFSET(o, p), p)
+
+/*
+ * implement vsx_ld2(offset, pointer), vsx_st2(vector, offset, pointer) to load and store double words
+ * In GCC vec_xl and vec_xst it maps to vec_vsx_ld, vec_vsx_st which doesn't support long long
+ * and in CLANG we are using vec_vsx_ld, vec_vsx_st because vec_xl, vec_xst fails to load unaligned addresses
+ *
+ * In XLC vec_xl and vec_xst fail to cast int64(long int) to long long
+*/
+#if (defined(__GNUG__) || defined(__clang__)) && !defined(__IBMCPP__)
+    VSX_FINLINE(vec_udword2) vsx_ld2(long o, const uint64* p)
+    { return vec_udword2_c(vsx_ldf(VSX_OFFSET(o, p), (unsigned int*)p)); }
+
+    VSX_FINLINE(vec_dword2) vsx_ld2(long o, const int64* p)
+    { return vec_dword2_c(vsx_ldf(VSX_OFFSET(o, p), (int*)p)); }
+
+    VSX_FINLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p)
+    { vsx_stf(vec_uint4_c(vec), VSX_OFFSET(o, p), (unsigned int*)p); }
+
+    VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p)
+    { vsx_stf(vec_int4_c(vec), VSX_OFFSET(o, p), (int*)p); }
+#else // XLC
+    VSX_FINLINE(vec_udword2) vsx_ld2(long o, const uint64* p)
+    { return vsx_ldf(VSX_OFFSET(o, p), (unsigned long long*)p); }
+
+    VSX_FINLINE(vec_dword2) vsx_ld2(long o, const int64* p)
+    { return vsx_ldf(VSX_OFFSET(o, p), (long long*)p); }
+
+    VSX_FINLINE(void) vsx_st2(const vec_udword2& vec, long o, uint64* p)
+    { vsx_stf(vec, VSX_OFFSET(o, p), (unsigned long long*)p); }
+
+    VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p)
+    { vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); }
+#endif
+
+// Store lower 8 byte
+#define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0)
+
+// Store higher 8 byte
+#define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1)
+
+// Load 64-bits of integer data to lower part
+#define VSX_IMPL_LOAD_L8(Tvec, Tp)                  \
+VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p)            \
+{ return ((Tvec)vec_promote(*((uint64*)p), 0)); }
+
+VSX_IMPL_LOAD_L8(vec_uchar16, uchar)
+VSX_IMPL_LOAD_L8(vec_char16,  schar)
+VSX_IMPL_LOAD_L8(vec_ushort8, ushort)
+VSX_IMPL_LOAD_L8(vec_short8,  short)
+VSX_IMPL_LOAD_L8(vec_uint4,   uint)
+VSX_IMPL_LOAD_L8(vec_int4,    int)
+VSX_IMPL_LOAD_L8(vec_float4,  float)
+VSX_IMPL_LOAD_L8(vec_udword2, uint64)
+VSX_IMPL_LOAD_L8(vec_dword2,  int64)
+VSX_IMPL_LOAD_L8(vec_double2, double)
+
+// logical not
+#define vec_not(a) vec_nor(a, a)
+
+// power9 yaya
+// not equal
+#ifndef vec_cmpne
+#   define vec_cmpne(a, b) vec_not(vec_cmpeq(a, b))
+#endif
+
+// absolute difference
+#ifndef vec_absd
+#   define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b))
+#endif
+
+/*
+ * Implement vec_unpacklu and vec_unpackhu
+ * since vec_unpackl, vec_unpackh only support signed integers
+**/
+#define VSX_IMPL_UNPACKU(rt, rg, zero)      \
+VSX_FINLINE(rt) vec_unpacklu(const rg& a)   \
+{ return (rt)(vec_mergel(a, zero)); }       \
+VSX_FINLINE(rt) vec_unpackhu(const rg& a)   \
+{ return (rt)(vec_mergeh(a, zero));  }
+
+VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z)
+VSX_IMPL_UNPACKU(vec_uint4,   vec_ushort8, vec_ushort8_z)
+VSX_IMPL_UNPACKU(vec_udword2, vec_uint4,   vec_uint4_z)
+
+/*
+ * Implement vec_mergesqe and vec_mergesqo
+ * Merges the sequence values of even and odd elements of two vectors
+*/
+#define VSX_IMPL_PERM(rt, fnm, ...)            \
+VSX_FINLINE(rt) fnm(const rt& a, const rt& b)  \
+{ static const vec_uchar16 perm = {__VA_ARGS__}; return vec_perm(a, b, perm); }
+
+// 16
+#define perm16_mergesqe 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+#define perm16_mergesqo 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+VSX_IMPL_PERM(vec_uchar16, vec_mergesqe, perm16_mergesqe)
+VSX_IMPL_PERM(vec_uchar16, vec_mergesqo, perm16_mergesqo)
+VSX_IMPL_PERM(vec_char16,  vec_mergesqe, perm16_mergesqe)
+VSX_IMPL_PERM(vec_char16,  vec_mergesqo, perm16_mergesqo)
+// 8
+#define perm8_mergesqe 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+#define perm8_mergesqo 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
+VSX_IMPL_PERM(vec_ushort8, vec_mergesqe, perm8_mergesqe)
+VSX_IMPL_PERM(vec_ushort8, vec_mergesqo, perm8_mergesqo)
+VSX_IMPL_PERM(vec_short8,  vec_mergesqe, perm8_mergesqe)
+VSX_IMPL_PERM(vec_short8,  vec_mergesqo, perm8_mergesqo)
+// 4
+#define perm4_mergesqe 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+#define perm4_mergesqo 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+VSX_IMPL_PERM(vec_uint4,  vec_mergesqe, perm4_mergesqe)
+VSX_IMPL_PERM(vec_uint4,  vec_mergesqo, perm4_mergesqo)
+VSX_IMPL_PERM(vec_int4,   vec_mergesqe, perm4_mergesqe)
+VSX_IMPL_PERM(vec_int4,   vec_mergesqo, perm4_mergesqo)
+VSX_IMPL_PERM(vec_float4, vec_mergesqe, perm4_mergesqe)
+VSX_IMPL_PERM(vec_float4, vec_mergesqo, perm4_mergesqo)
+// 2
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqe, vec_mergeh)
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqo, vec_mergel)
+VSX_REDIRECT_2RG(vec_dword2,  vec_dword2,  vec_mergesqe, vec_mergeh)
+VSX_REDIRECT_2RG(vec_dword2,  vec_dword2,  vec_mergesqo, vec_mergel)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqe, vec_mergeh)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqo, vec_mergel)
+
+/*
+ * Implement vec_mergesqh and vec_mergesql
+ * Merges the sequence most and least significant halves of two vectors
+*/
+#define VSX_IMPL_MERGESQHL(Tvec)                                    \
+VSX_FINLINE(Tvec) vec_mergesqh(const Tvec& a, const Tvec& b)        \
+{ return (Tvec)vec_mergeh(vec_udword2_c(a), vec_udword2_c(b)); }    \
+VSX_FINLINE(Tvec) vec_mergesql(const Tvec& a, const Tvec& b)        \
+{ return (Tvec)vec_mergel(vec_udword2_c(a), vec_udword2_c(b)); }
+VSX_IMPL_MERGESQHL(vec_uchar16)
+VSX_IMPL_MERGESQHL(vec_char16)
+VSX_IMPL_MERGESQHL(vec_ushort8)
+VSX_IMPL_MERGESQHL(vec_short8)
+VSX_IMPL_MERGESQHL(vec_uint4)
+VSX_IMPL_MERGESQHL(vec_int4)
+VSX_IMPL_MERGESQHL(vec_float4)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesqh, vec_mergeh)
+VSX_REDIRECT_2RG(vec_udword2, vec_udword2, vec_mergesql, vec_mergel)
+VSX_REDIRECT_2RG(vec_dword2,  vec_dword2,  vec_mergesqh, vec_mergeh)
+VSX_REDIRECT_2RG(vec_dword2,  vec_dword2,  vec_mergesql, vec_mergel)
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesqh, vec_mergeh)
+VSX_REDIRECT_2RG(vec_double2, vec_double2, vec_mergesql, vec_mergel)
+
+
+// 2 and 4 channels interleave for all types except 2 lanes
+#define VSX_IMPL_ST_INTERLEAVE(Tp, Tvec)                                    \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr)  \
+{                                                                           \
+    vsx_stf(vec_mergeh(a, b), 0, ptr);                                      \
+    vsx_stf(vec_mergel(a, b), 16, ptr);                                     \
+}                                                                           \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,           \
+                                     const Tvec& c, const Tvec& d, Tp* ptr) \
+{                                                                           \
+    Tvec ac = vec_mergeh(a, c);                                             \
+    Tvec bd = vec_mergeh(b, d);                                             \
+    vsx_stf(vec_mergeh(ac, bd), 0, ptr);                                    \
+    vsx_stf(vec_mergel(ac, bd), 16, ptr);                                   \
+    ac = vec_mergel(a, c);                                                  \
+    bd = vec_mergel(b, d);                                                  \
+    vsx_stf(vec_mergeh(ac, bd), 32, ptr);                                   \
+    vsx_stf(vec_mergel(ac, bd), 48, ptr);                                   \
+}
+VSX_IMPL_ST_INTERLEAVE(uchar,  vec_uchar16)
+VSX_IMPL_ST_INTERLEAVE(schar,  vec_char16)
+VSX_IMPL_ST_INTERLEAVE(ushort, vec_ushort8)
+VSX_IMPL_ST_INTERLEAVE(short,  vec_short8)
+VSX_IMPL_ST_INTERLEAVE(uint,   vec_uint4)
+VSX_IMPL_ST_INTERLEAVE(int,    vec_int4)
+VSX_IMPL_ST_INTERLEAVE(float,  vec_float4)
+
+// 2 and 4 channels deinterleave for 16 lanes
+#define VSX_IMPL_ST_DINTERLEAVE_8(Tp, Tvec)                                 \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)      \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(16, ptr);                                              \
+    a = vec_mergesqe(v0, v1);                                               \
+    b = vec_mergesqo(v0, v1);                                               \
+}                                                                           \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,      \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(16, ptr);                                              \
+    Tvec v2 = vsx_ld(32, ptr);                                              \
+    Tvec v3 = vsx_ld(48, ptr);                                              \
+    Tvec m0 = vec_mergesqe(v0, v1);                                         \
+    Tvec m1 = vec_mergesqe(v2, v3);                                         \
+    a = vec_mergesqe(m0, m1);                                               \
+    c = vec_mergesqo(m0, m1);                                               \
+    m0 = vec_mergesqo(v0, v1);                                              \
+    m1 = vec_mergesqo(v2, v3);                                              \
+    b = vec_mergesqe(m0, m1);                                               \
+    d = vec_mergesqo(m0, m1);                                               \
+}
+VSX_IMPL_ST_DINTERLEAVE_8(uchar, vec_uchar16)
+VSX_IMPL_ST_DINTERLEAVE_8(schar, vec_char16)
+
+// 2 and 4 channels deinterleave for 8 lanes
+#define VSX_IMPL_ST_DINTERLEAVE_16(Tp, Tvec)                                \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)      \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(8, ptr);                                               \
+    a = vec_mergesqe(v0, v1);                                               \
+    b = vec_mergesqo(v0, v1);                                               \
+}                                                                           \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,      \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(8, ptr);                                               \
+    Tvec m0 = vec_mergeh(v0, v1);                                           \
+    Tvec m1 = vec_mergel(v0, v1);                                           \
+    Tvec ab0 = vec_mergeh(m0, m1);                                          \
+    Tvec cd0 = vec_mergel(m0, m1);                                          \
+    v0 = vsx_ld(16, ptr);                                                   \
+    v1 = vsx_ld(24, ptr);                                                   \
+    m0 = vec_mergeh(v0, v1);                                                \
+    m1 = vec_mergel(v0, v1);                                                \
+    Tvec ab1 = vec_mergeh(m0, m1);                                          \
+    Tvec cd1 = vec_mergel(m0, m1);                                          \
+    a = vec_mergesqh(ab0, ab1);                                             \
+    b = vec_mergesql(ab0, ab1);                                             \
+    c = vec_mergesqh(cd0, cd1);                                             \
+    d = vec_mergesql(cd0, cd1);                                             \
+}
+VSX_IMPL_ST_DINTERLEAVE_16(ushort, vec_ushort8)
+VSX_IMPL_ST_DINTERLEAVE_16(short,  vec_short8)
+
+// 2 and 4 channels deinterleave for 4 lanes
+#define VSX_IMPL_ST_DINTERLEAVE_32(Tp, Tvec)                                \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)      \
+{                                                                           \
+    a = vsx_ld(0, ptr);                                                     \
+    b = vsx_ld(4, ptr);                                                     \
+    Tvec m0 = vec_mergeh(a, b);                                             \
+    Tvec m1 = vec_mergel(a, b);                                             \
+    a = vec_mergeh(m0, m1);                                                 \
+    b = vec_mergel(m0, m1);                                                 \
+}                                                                           \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,      \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = vsx_ld(0, ptr);                                               \
+    Tvec v1 = vsx_ld(4, ptr);                                               \
+    Tvec v2 = vsx_ld(8, ptr);                                               \
+    Tvec v3 = vsx_ld(12, ptr);                                              \
+    Tvec m0 = vec_mergeh(v0, v2);                                           \
+    Tvec m1 = vec_mergeh(v1, v3);                                           \
+    a = vec_mergeh(m0, m1);                                                 \
+    b = vec_mergel(m0, m1);                                                 \
+    m0 = vec_mergel(v0, v2);                                                \
+    m1 = vec_mergel(v1, v3);                                                \
+    c = vec_mergeh(m0, m1);                                                 \
+    d = vec_mergel(m0, m1);                                                 \
+}
+VSX_IMPL_ST_DINTERLEAVE_32(uint,  vec_uint4)
+VSX_IMPL_ST_DINTERLEAVE_32(int,   vec_int4)
+VSX_IMPL_ST_DINTERLEAVE_32(float, vec_float4)
+
+// 2 and 4 channels interleave and deinterleave for 2 lanes
+#define VSX_IMPL_ST_D_INTERLEAVE_64(Tp, Tvec, ld_func, st_func)             \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, Tp* ptr)  \
+{                                                                           \
+    st_func(vec_mergeh(a, b), 0, ptr);                                      \
+    st_func(vec_mergel(a, b), 2, ptr);                                      \
+}                                                                           \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,           \
+                                     const Tvec& c, const Tvec& d, Tp* ptr) \
+{                                                                           \
+    st_func(vec_mergeh(a, b), 0, ptr);                                      \
+    st_func(vec_mergeh(c, d), 2, ptr);                                      \
+    st_func(vec_mergel(a, b), 4, ptr);                                      \
+    st_func(vec_mergel(c, d), 6, ptr);                                      \
+}                                                                           \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b)      \
+{                                                                           \
+    Tvec m0 = ld_func(0, ptr);                                              \
+    Tvec m1 = ld_func(2, ptr);                                              \
+    a = vec_mergeh(m0, m1);                                                 \
+    b = vec_mergel(m0, m1);                                                 \
+}                                                                           \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b,      \
+                                       Tvec& c, Tvec& d)                    \
+{                                                                           \
+    Tvec v0 = ld_func(0, ptr);                                              \
+    Tvec v1 = ld_func(2, ptr);                                              \
+    Tvec v2 = ld_func(4, ptr);                                              \
+    Tvec v3 = ld_func(6, ptr);                                              \
+    a = vec_mergeh(v0, v2);                                                 \
+    b = vec_mergel(v0, v2);                                                 \
+    c = vec_mergeh(v1, v3);                                                 \
+    d = vec_mergel(v1, v3);                                                 \
+}
+VSX_IMPL_ST_D_INTERLEAVE_64(int64,  vec_dword2,  vsx_ld2, vsx_st2)
+VSX_IMPL_ST_D_INTERLEAVE_64(uint64, vec_udword2, vsx_ld2, vsx_st2)
+VSX_IMPL_ST_D_INTERLEAVE_64(double, vec_double2, vsx_ld,  vsx_st)
+
+/* 3 channels */
+#define VSX_IMPL_ST_INTERLEAVE_3CH_16(Tp, Tvec)                                                   \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,                                 \
+                                     const Tvec& c, Tp* ptr)                                      \
+{                                                                                                 \
+    static const vec_uchar16 a12 = {0, 16, 0, 1, 17, 0, 2, 18, 0, 3, 19, 0, 4, 20, 0, 5};         \
+    static const vec_uchar16 a123 = {0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15};    \
+    vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr);                                       \
+    static const vec_uchar16 b12 = {21, 0, 6, 22, 0, 7, 23, 0, 8, 24, 0, 9, 25, 0, 10, 26};       \
+    static const vec_uchar16 b123 = {0, 21, 2, 3, 22, 5, 6, 23, 8, 9, 24, 11, 12, 25, 14, 15};    \
+    vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 16, ptr);                                      \
+    static const vec_uchar16 c12 = {0, 11, 27, 0, 12, 28, 0, 13, 29, 0, 14, 30, 0, 15, 31, 0};    \
+    static const vec_uchar16 c123 = {26, 1, 2, 27, 4, 5, 28, 7, 8, 29, 10, 11, 30, 13, 14, 31};   \
+    vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 32, ptr);                                      \
+}                                                                                                 \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c)                   \
+{                                                                                                 \
+    Tvec v1 = vsx_ld(0, ptr);                                                                     \
+    Tvec v2 = vsx_ld(16, ptr);                                                                    \
+    Tvec v3 = vsx_ld(32, ptr);                                                                    \
+    static const vec_uchar16 a12_perm = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 0, 0, 0, 0, 0};  \
+    static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 20, 23, 26, 29};  \
+    a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm);                                      \
+    static const vec_uchar16 b12_perm = {1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 0, 0, 0, 0, 0}; \
+    static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 18, 21, 24, 27, 30};  \
+    b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm);                                      \
+    static const vec_uchar16 c12_perm = {2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 0, 0, 0, 0, 0, 0};  \
+    static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 19, 22, 25, 28, 31};  \
+    c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm);                                      \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_16(uchar, vec_uchar16)
+VSX_IMPL_ST_INTERLEAVE_3CH_16(schar, vec_char16)
+
+#define VSX_IMPL_ST_INTERLEAVE_3CH_8(Tp, Tvec)                                                    \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,                                 \
+                                     const Tvec& c, Tp* ptr)                                      \
+{                                                                                                 \
+    static const vec_uchar16 a12 = {0, 1, 16, 17, 0, 0, 2, 3, 18, 19, 0, 0, 4, 5, 20, 21};        \
+    static const vec_uchar16 a123 = {0, 1, 2, 3, 16, 17, 6, 7, 8, 9, 18, 19, 12, 13, 14, 15};     \
+    vsx_st(vec_perm(vec_perm(a, b, a12), c, a123), 0, ptr);                                       \
+    static const vec_uchar16 b12 = {0, 0, 6, 7, 22, 23, 0, 0, 8, 9, 24, 25, 0, 0, 10, 11};        \
+    static const vec_uchar16 b123 = {20, 21, 2, 3, 4, 5, 22, 23, 8, 9, 10, 11, 24, 25, 14, 15};   \
+    vsx_st(vec_perm(vec_perm(a, b, b12), c, b123), 8, ptr);                                       \
+    static const vec_uchar16 c12 = {26, 27, 0, 0, 12, 13, 28, 29, 0, 0, 14, 15, 30, 31, 0, 0};    \
+    static const vec_uchar16 c123 = {0, 1, 26, 27, 4, 5, 6, 7, 28, 29, 10, 11, 12, 13, 30, 31};   \
+    vsx_st(vec_perm(vec_perm(a, b, c12), c, c123), 16, ptr);                                      \
+}                                                                                                 \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c)                   \
+{                                                                                                 \
+    Tvec v1 = vsx_ld(0, ptr);                                                                     \
+    Tvec v2 = vsx_ld(8, ptr);                                                                     \
+    Tvec v3 = vsx_ld(16, ptr);                                                                    \
+    static const vec_uchar16 a12_perm = {0, 1, 6, 7, 12, 13, 18, 19, 24, 25, 30, 31, 0, 0, 0, 0}; \
+    static const vec_uchar16 a123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 20, 21, 26, 27};  \
+    a = vec_perm(vec_perm(v1, v2, a12_perm), v3, a123_perm);                                      \
+    static const vec_uchar16 b12_perm = {2, 3, 8, 9, 14, 15, 20, 21, 26, 27, 0, 0, 0, 0, 0, 0};   \
+    static const vec_uchar16 b123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 22, 23, 28, 29};  \
+    b = vec_perm(vec_perm(v1, v2, b12_perm), v3, b123_perm);                                      \
+    static const vec_uchar16 c12_perm = {4, 5, 10, 11, 16, 17, 22, 23, 28, 29, 0, 0, 0, 0, 0, 0}; \
+    static const vec_uchar16 c123_perm = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 18, 19, 24, 25, 30, 31};  \
+    c = vec_perm(vec_perm(v1, v2, c12_perm), v3, c123_perm);                                      \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_8(ushort, vec_ushort8)
+VSX_IMPL_ST_INTERLEAVE_3CH_8(short,  vec_short8)
+
+#define VSX_IMPL_ST_INTERLEAVE_3CH_4(Tp, Tvec)                                                     \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,                                  \
+                                     const Tvec& c, Tp* ptr)                                       \
+{                                                                                                  \
+    Tvec hbc = vec_mergeh(b, c);                                                                   \
+    static const vec_uchar16 ahbc = {0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 4, 5, 6, 7};      \
+    vsx_st(vec_perm(a, hbc, ahbc), 0, ptr);                                                        \
+    Tvec lab = vec_mergel(a, b);                                                                   \
+    vsx_st(vec_sld(lab, hbc, 8), 4, ptr);                                                          \
+    static const vec_uchar16 clab = {8, 9, 10, 11, 24, 25, 26, 27, 28, 29, 30, 31, 12, 13, 14, 15};\
+    vsx_st(vec_perm(c, lab, clab), 8, ptr);                                                        \
+}                                                                                                  \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, Tvec& c)                    \
+{                                                                                                  \
+    Tvec v1 = vsx_ld(0, ptr);                                                                      \
+    Tvec v2 = vsx_ld(4, ptr);                                                                      \
+    Tvec v3 = vsx_ld(8, ptr);                                                                      \
+    static const vec_uchar16 flp = {0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 28, 29, 30, 31};   \
+    a = vec_perm(v1, vec_sld(v3, v2, 8), flp);                                                     \
+    static const vec_uchar16 flp2 = {28, 29, 30, 31, 0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19};  \
+    b = vec_perm(v2, vec_sld(v1, v3, 8), flp2);                                                    \
+    c = vec_perm(vec_sld(v2, v1, 8), v3, flp);                                                     \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_4(uint,  vec_uint4)
+VSX_IMPL_ST_INTERLEAVE_3CH_4(int,   vec_int4)
+VSX_IMPL_ST_INTERLEAVE_3CH_4(float, vec_float4)
+
+#define VSX_IMPL_ST_INTERLEAVE_3CH_2(Tp, Tvec, ld_func, st_func)     \
+VSX_FINLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b,    \
+                                     const Tvec& c, Tp* ptr)         \
+{                                                                    \
+    st_func(vec_mergeh(a, b), 0, ptr);                               \
+    st_func(vec_permi(c, a, 1), 2, ptr);                             \
+    st_func(vec_mergel(b, c), 4, ptr);                               \
+}                                                                    \
+VSX_FINLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a,        \
+                                       Tvec& b, Tvec& c)             \
+{                                                                    \
+    Tvec v1 = ld_func(0, ptr);                                       \
+    Tvec v2 = ld_func(2, ptr);                                       \
+    Tvec v3 = ld_func(4, ptr);                                       \
+    a = vec_permi(v1, v2, 1);                                        \
+    b = vec_permi(v1, v3, 2);                                        \
+    c = vec_permi(v2, v3, 1);                                        \
+}
+VSX_IMPL_ST_INTERLEAVE_3CH_2(int64,  vec_dword2,  vsx_ld2, vsx_st2)
+VSX_IMPL_ST_INTERLEAVE_3CH_2(uint64, vec_udword2, vsx_ld2, vsx_st2)
+VSX_IMPL_ST_INTERLEAVE_3CH_2(double, vec_double2, vsx_ld,  vsx_st)
+
+#endif // CV_VSX
+
+//! @}
+
+#endif // OPENCV_HAL_VSX_UTILS_HPP

+ 603 - 0
ThresholdTools/include/opencv2/core/wimage.hpp

@@ -0,0 +1,603 @@
+/*M//////////////////////////////////////////////////////////////////////////////
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to
+//  this license.  If you do not agree to this license, do not download,
+//  install, copy or use the software.
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2008, Google, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//  * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//  * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//  * The name of Intel Corporation or contributors may not be used to endorse
+//     or promote products derived from this software without specific
+//     prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is"
+// and any express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular purpose
+// are disclaimed. In no event shall the Intel Corporation or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+/////////////////////////////////////////////////////////////////////////////////
+//M*/
+
+#ifndef OPENCV_CORE_WIMAGE_HPP
+#define OPENCV_CORE_WIMAGE_HPP
+
+#include "opencv2/core/core_c.h"
+
+#ifdef __cplusplus
+
+namespace cv {
+
+//! @addtogroup core
+//! @{
+
+template <typename T> class WImage;
+template <typename T> class WImageBuffer;
+template <typename T> class WImageView;
+
+template<typename T, int C> class WImageC;
+template<typename T, int C> class WImageBufferC;
+template<typename T, int C> class WImageViewC;
+
+// Commonly used typedefs.
+typedef WImage<uchar>            WImage_b;
+typedef WImageView<uchar>        WImageView_b;
+typedef WImageBuffer<uchar>      WImageBuffer_b;
+
+typedef WImageC<uchar, 1>        WImage1_b;
+typedef WImageViewC<uchar, 1>    WImageView1_b;
+typedef WImageBufferC<uchar, 1>  WImageBuffer1_b;
+
+typedef WImageC<uchar, 3>        WImage3_b;
+typedef WImageViewC<uchar, 3>    WImageView3_b;
+typedef WImageBufferC<uchar, 3>  WImageBuffer3_b;
+
+typedef WImage<float>            WImage_f;
+typedef WImageView<float>        WImageView_f;
+typedef WImageBuffer<float>      WImageBuffer_f;
+
+typedef WImageC<float, 1>        WImage1_f;
+typedef WImageViewC<float, 1>    WImageView1_f;
+typedef WImageBufferC<float, 1>  WImageBuffer1_f;
+
+typedef WImageC<float, 3>        WImage3_f;
+typedef WImageViewC<float, 3>    WImageView3_f;
+typedef WImageBufferC<float, 3>  WImageBuffer3_f;
+
+// There isn't a standard for signed and unsigned short so be more
+// explicit in the typename for these cases.
+typedef WImage<short>            WImage_16s;
+typedef WImageView<short>        WImageView_16s;
+typedef WImageBuffer<short>      WImageBuffer_16s;
+
+typedef WImageC<short, 1>        WImage1_16s;
+typedef WImageViewC<short, 1>    WImageView1_16s;
+typedef WImageBufferC<short, 1>  WImageBuffer1_16s;
+
+typedef WImageC<short, 3>        WImage3_16s;
+typedef WImageViewC<short, 3>    WImageView3_16s;
+typedef WImageBufferC<short, 3>  WImageBuffer3_16s;
+
+typedef WImage<ushort>            WImage_16u;
+typedef WImageView<ushort>        WImageView_16u;
+typedef WImageBuffer<ushort>      WImageBuffer_16u;
+
+typedef WImageC<ushort, 1>        WImage1_16u;
+typedef WImageViewC<ushort, 1>    WImageView1_16u;
+typedef WImageBufferC<ushort, 1>  WImageBuffer1_16u;
+
+typedef WImageC<ushort, 3>        WImage3_16u;
+typedef WImageViewC<ushort, 3>    WImageView3_16u;
+typedef WImageBufferC<ushort, 3>  WImageBuffer3_16u;
+
+/** @brief Image class which provides a thin layer around an IplImage.
+
+The goals of the class design are:
+
+    -# All the data has explicit ownership to avoid memory leaks
+    -# No hidden allocations or copies for performance.
+    -# Easy access to OpenCV methods (which will access IPP if available)
+    -# Can easily treat external data as an image
+    -# Easy to create images which are subsets of other images
+    -# Fast pixel access which can take advantage of number of channels if known at compile time.
+
+The WImage class is the image class which provides the data accessors. The 'W' comes from the fact
+that it is also a wrapper around the popular but inconvenient IplImage class. A WImage can be
+constructed either using a WImageBuffer class which allocates and frees the data, or using a
+WImageView class which constructs a subimage or a view into external data. The view class does no
+memory management. Each class actually has two versions, one when the number of channels is known
+at compile time and one when it isn't. Using the one with the number of channels specified can
+provide some compile time optimizations by using the fact that the number of channels is a
+constant.
+
+We use the convention (c,r) to refer to column c and row r with (0,0) being the upper left corner.
+This is similar to standard Euclidean coordinates with the first coordinate varying in the
+horizontal direction and the second coordinate varying in the vertical direction. Thus (c,r) is
+usually in the domain [0, width) X [0, height)
+
+Example usage:
+@code
+WImageBuffer3_b  im(5,7);  // Make a 5X7 3 channel image of type uchar
+WImageView3_b  sub_im(im, 2,2, 3,3); // 3X3 submatrix
+vector<float> vec(10, 3.0f);
+WImageView1_f user_im(&vec[0], 2, 5);  // 2X5 image w/ supplied data
+
+im.SetZero();  // same as cvSetZero(im.Ipl())
+*im(2, 3) = 15;  // Modify the element at column 2, row 3
+MySetRand(&sub_im);
+
+// Copy the second row into the first.  This can be done with no memory
+// allocation and will use SSE if IPP is available.
+int w = im.Width();
+im.View(0,0, w,1).CopyFrom(im.View(0,1, w,1));
+
+// Doesn't care about source of data since using WImage
+void MySetRand(WImage_b* im) { // Works with any number of channels
+for (int r = 0; r < im->Height(); ++r) {
+ float* row = im->Row(r);
+ for (int c = 0; c < im->Width(); ++c) {
+    for (int ch = 0; ch < im->Channels(); ++ch, ++row) {
+      *row = uchar(rand() & 255);
+    }
+ }
+}
+}
+@endcode
+
+Functions that are not part of the basic image allocation, viewing, and access should come from
+OpenCV, except some useful functions that are not part of OpenCV can be found in wimage_util.h
+*/
+template<typename T>
+class WImage
+{
+public:
+    typedef T BaseType;
+
+    // WImage is an abstract class with no other virtual methods so make the
+    // destructor virtual.
+    virtual ~WImage() = 0;
+
+    // Accessors
+    IplImage* Ipl() {return image_; }
+    const IplImage* Ipl() const {return image_; }
+    T* ImageData() { return reinterpret_cast<T*>(image_->imageData); }
+    const T* ImageData() const {
+        return reinterpret_cast<const T*>(image_->imageData);
+    }
+
+    int Width() const {return image_->width; }
+    int Height() const {return image_->height; }
+
+    // WidthStep is the number of bytes to go to the pixel with the next y coord
+    int WidthStep() const {return image_->widthStep; }
+
+    int Channels() const {return image_->nChannels; }
+    int ChannelSize() const {return sizeof(T); }  // number of bytes per channel
+
+    // Number of bytes per pixel
+    int PixelSize() const {return Channels() * ChannelSize(); }
+
+    // Return depth type (e.g. IPL_DEPTH_8U, IPL_DEPTH_32F) which is the number
+    // of bits per channel and with the signed bit set.
+    // This is known at compile time using specializations.
+    int Depth() const;
+
+    inline const T* Row(int r) const {
+        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep);
+    }
+
+    inline T* Row(int r) {
+        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep);
+    }
+
+    // Pixel accessors which returns a pointer to the start of the channel
+    inline T* operator() (int c, int r)  {
+        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep) +
+            c*Channels();
+    }
+
+    inline const T* operator() (int c, int r) const  {
+        return reinterpret_cast<T*>(image_->imageData + r*image_->widthStep) +
+            c*Channels();
+    }
+
+    // Copy the contents from another image which is just a convenience to cvCopy
+    void CopyFrom(const WImage<T>& src) { cvCopy(src.Ipl(), image_); }
+
+    // Set contents to zero which is just a convenient to cvSetZero
+    void SetZero() { cvSetZero(image_); }
+
+    // Construct a view into a region of this image
+    WImageView<T> View(int c, int r, int width, int height);
+
+protected:
+    // Disallow copy and assignment
+    WImage(const WImage&);
+    void operator=(const WImage&);
+
+    explicit WImage(IplImage* img) : image_(img) {
+        assert(!img || img->depth == Depth());
+    }
+
+    void SetIpl(IplImage* image) {
+        assert(!image || image->depth == Depth());
+        image_ = image;
+    }
+
+    IplImage* image_;
+};
+
+
+/** Image class when both the pixel type and number of channels
+are known at compile time.  This wrapper will speed up some of the operations
+like accessing individual pixels using the () operator.
+*/
+template<typename T, int C>
+class WImageC : public WImage<T>
+{
+public:
+    typedef typename WImage<T>::BaseType BaseType;
+    enum { kChannels = C };
+
+    explicit WImageC(IplImage* img) : WImage<T>(img) {
+        assert(!img || img->nChannels == Channels());
+    }
+
+    // Construct a view into a region of this image
+    WImageViewC<T, C> View(int c, int r, int width, int height);
+
+    // Copy the contents from another image which is just a convenience to cvCopy
+    void CopyFrom(const WImageC<T, C>& src) {
+        cvCopy(src.Ipl(), WImage<T>::image_);
+    }
+
+    // WImageC is an abstract class with no other virtual methods so make the
+    // destructor virtual.
+    virtual ~WImageC() = 0;
+
+    int Channels() const {return C; }
+
+protected:
+    // Disallow copy and assignment
+    WImageC(const WImageC&);
+    void operator=(const WImageC&);
+
+    void SetIpl(IplImage* image) {
+        assert(!image || image->depth == WImage<T>::Depth());
+        WImage<T>::SetIpl(image);
+    }
+};
+
+/** Image class which owns the data, so it can be allocated and is always
+freed.  It cannot be copied but can be explicitly cloned.
+*/
+template<typename T>
+class WImageBuffer : public WImage<T>
+{
+public:
+    typedef typename WImage<T>::BaseType BaseType;
+
+    // Default constructor which creates an object that can be
+    WImageBuffer() : WImage<T>(0) {}
+
+    WImageBuffer(int width, int height, int nchannels) : WImage<T>(0) {
+        Allocate(width, height, nchannels);
+    }
+
+    // Constructor which takes ownership of a given IplImage so releases
+    // the image on destruction.
+    explicit WImageBuffer(IplImage* img) : WImage<T>(img) {}
+
+    // Allocate an image.  Does nothing if current size is the same as
+    // the new size.
+    void Allocate(int width, int height, int nchannels);
+
+    // Set the data to point to an image, releasing the old data
+    void SetIpl(IplImage* img) {
+        ReleaseImage();
+        WImage<T>::SetIpl(img);
+    }
+
+    // Clone an image which reallocates the image if of a different dimension.
+    void CloneFrom(const WImage<T>& src) {
+        Allocate(src.Width(), src.Height(), src.Channels());
+        CopyFrom(src);
+    }
+
+    ~WImageBuffer() {
+        ReleaseImage();
+    }
+
+    // Release the image if it isn't null.
+    void ReleaseImage() {
+        if (WImage<T>::image_) {
+            IplImage* image = WImage<T>::image_;
+            cvReleaseImage(&image);
+            WImage<T>::SetIpl(0);
+        }
+    }
+
+    bool IsNull() const {return WImage<T>::image_ == NULL; }
+
+private:
+    // Disallow copy and assignment
+    WImageBuffer(const WImageBuffer&);
+    void operator=(const WImageBuffer&);
+};
+
+/** Like a WImageBuffer class but when the number of channels is known at compile time.
+*/
+template<typename T, int C>
+class WImageBufferC : public WImageC<T, C>
+{
+public:
+    typedef typename WImage<T>::BaseType BaseType;
+    enum { kChannels = C };
+
+    // Default constructor which creates an object that can be
+    WImageBufferC() : WImageC<T, C>(0) {}
+
+    WImageBufferC(int width, int height) : WImageC<T, C>(0) {
+        Allocate(width, height);
+    }
+
+    // Constructor which takes ownership of a given IplImage so releases
+    // the image on destruction.
+    explicit WImageBufferC(IplImage* img) : WImageC<T, C>(img) {}
+
+    // Allocate an image.  Does nothing if current size is the same as
+    // the new size.
+    void Allocate(int width, int height);
+
+    // Set the data to point to an image, releasing the old data
+    void SetIpl(IplImage* img) {
+        ReleaseImage();
+        WImageC<T, C>::SetIpl(img);
+    }
+
+    // Clone an image which reallocates the image if of a different dimension.
+    void CloneFrom(const WImageC<T, C>& src) {
+        Allocate(src.Width(), src.Height());
+        CopyFrom(src);
+    }
+
+    ~WImageBufferC() {
+        ReleaseImage();
+    }
+
+    // Release the image if it isn't null.
+    void ReleaseImage() {
+        if (WImage<T>::image_) {
+            IplImage* image = WImage<T>::image_;
+            cvReleaseImage(&image);
+            WImageC<T, C>::SetIpl(0);
+        }
+    }
+
+    bool IsNull() const {return WImage<T>::image_ == NULL; }
+
+private:
+    // Disallow copy and assignment
+    WImageBufferC(const WImageBufferC&);
+    void operator=(const WImageBufferC&);
+};
+
+/** View into an image class which allows treating a subimage as an image or treating external data
+as an image
+*/
+template<typename T> class WImageView : public WImage<T>
+{
+public:
+    typedef typename WImage<T>::BaseType BaseType;
+
+    // Construct a subimage.  No checks are done that the subimage lies
+    // completely inside the original image.
+    WImageView(WImage<T>* img, int c, int r, int width, int height);
+
+    // Refer to external data.
+    // If not given width_step assumed to be same as width.
+    WImageView(T* data, int width, int height, int channels, int width_step = -1);
+
+    // Refer to external data.  This does NOT take ownership
+    // of the supplied IplImage.
+    WImageView(IplImage* img) : WImage<T>(img) {}
+
+    // Copy constructor
+    WImageView(const WImage<T>& img) : WImage<T>(0) {
+        header_ = *(img.Ipl());
+        WImage<T>::SetIpl(&header_);
+    }
+
+    WImageView& operator=(const WImage<T>& img) {
+        header_ = *(img.Ipl());
+        WImage<T>::SetIpl(&header_);
+        return *this;
+    }
+
+protected:
+    IplImage header_;
+};
+
+
+template<typename T, int C>
+class WImageViewC : public WImageC<T, C>
+{
+public:
+    typedef typename WImage<T>::BaseType BaseType;
+    enum { kChannels = C };
+
+    // Default constructor needed for vectors of views.
+    WImageViewC();
+
+    virtual ~WImageViewC() {}
+
+    // Construct a subimage.  No checks are done that the subimage lies
+    // completely inside the original image.
+    WImageViewC(WImageC<T, C>* img,
+        int c, int r, int width, int height);
+
+    // Refer to external data
+    WImageViewC(T* data, int width, int height, int width_step = -1);
+
+    // Refer to external data.  This does NOT take ownership
+    // of the supplied IplImage.
+    WImageViewC(IplImage* img) : WImageC<T, C>(img) {}
+
+    // Copy constructor which does a shallow copy to allow multiple views
+    // of same data.  gcc-4.1.1 gets confused if both versions of
+    // the constructor and assignment operator are not provided.
+    WImageViewC(const WImageC<T, C>& img) : WImageC<T, C>(0) {
+        header_ = *(img.Ipl());
+        WImageC<T, C>::SetIpl(&header_);
+    }
+    WImageViewC(const WImageViewC<T, C>& img) : WImageC<T, C>(0) {
+        header_ = *(img.Ipl());
+        WImageC<T, C>::SetIpl(&header_);
+    }
+
+    WImageViewC& operator=(const WImageC<T, C>& img) {
+        header_ = *(img.Ipl());
+        WImageC<T, C>::SetIpl(&header_);
+        return *this;
+    }
+    WImageViewC& operator=(const WImageViewC<T, C>& img) {
+        header_ = *(img.Ipl());
+        WImageC<T, C>::SetIpl(&header_);
+        return *this;
+    }
+
+protected:
+    IplImage header_;
+};
+
+
+// Specializations for depth
+template<>
+inline int WImage<uchar>::Depth() const {return IPL_DEPTH_8U; }
+template<>
+inline int WImage<signed char>::Depth() const {return IPL_DEPTH_8S; }
+template<>
+inline int WImage<short>::Depth() const {return IPL_DEPTH_16S; }
+template<>
+inline int WImage<ushort>::Depth() const {return IPL_DEPTH_16U; }
+template<>
+inline int WImage<int>::Depth() const {return IPL_DEPTH_32S; }
+template<>
+inline int WImage<float>::Depth() const {return IPL_DEPTH_32F; }
+template<>
+inline int WImage<double>::Depth() const {return IPL_DEPTH_64F; }
+
+template<typename T> inline WImage<T>::~WImage() {}
+template<typename T, int C> inline WImageC<T, C>::~WImageC() {}
+
+template<typename T>
+inline void WImageBuffer<T>::Allocate(int width, int height, int nchannels)
+{
+    if (IsNull() || WImage<T>::Width() != width ||
+        WImage<T>::Height() != height || WImage<T>::Channels() != nchannels) {
+        ReleaseImage();
+        WImage<T>::image_ = cvCreateImage(cvSize(width, height),
+            WImage<T>::Depth(), nchannels);
+    }
+}
+
+template<typename T, int C>
+inline void WImageBufferC<T, C>::Allocate(int width, int height)
+{
+    if (IsNull() || WImage<T>::Width() != width || WImage<T>::Height() != height) {
+        ReleaseImage();
+        WImageC<T, C>::SetIpl(cvCreateImage(cvSize(width, height),WImage<T>::Depth(), C));
+    }
+}
+
+template<typename T>
+WImageView<T>::WImageView(WImage<T>* img, int c, int r, int width, int height)
+        : WImage<T>(0)
+{
+    header_ = *(img->Ipl());
+    header_.imageData = reinterpret_cast<char*>((*img)(c, r));
+    header_.width = width;
+    header_.height = height;
+    WImage<T>::SetIpl(&header_);
+}
+
+template<typename T>
+WImageView<T>::WImageView(T* data, int width, int height, int nchannels, int width_step)
+          : WImage<T>(0)
+{
+    cvInitImageHeader(&header_, cvSize(width, height), WImage<T>::Depth(), nchannels);
+    header_.imageData = reinterpret_cast<char*>(data);
+    if (width_step > 0) {
+        header_.widthStep = width_step;
+    }
+    WImage<T>::SetIpl(&header_);
+}
+
+template<typename T, int C>
+WImageViewC<T, C>::WImageViewC(WImageC<T, C>* img, int c, int r, int width, int height)
+        : WImageC<T, C>(0)
+{
+    header_ = *(img->Ipl());
+    header_.imageData = reinterpret_cast<char*>((*img)(c, r));
+    header_.width = width;
+    header_.height = height;
+    WImageC<T, C>::SetIpl(&header_);
+}
+
+template<typename T, int C>
+WImageViewC<T, C>::WImageViewC() : WImageC<T, C>(0) {
+    cvInitImageHeader(&header_, cvSize(0, 0), WImage<T>::Depth(), C);
+    header_.imageData = reinterpret_cast<char*>(0);
+    WImageC<T, C>::SetIpl(&header_);
+}
+
+template<typename T, int C>
+WImageViewC<T, C>::WImageViewC(T* data, int width, int height, int width_step)
+    : WImageC<T, C>(0)
+{
+    cvInitImageHeader(&header_, cvSize(width, height), WImage<T>::Depth(), C);
+    header_.imageData = reinterpret_cast<char*>(data);
+    if (width_step > 0) {
+        header_.widthStep = width_step;
+    }
+    WImageC<T, C>::SetIpl(&header_);
+}
+
+// Construct a view into a region of an image
+template<typename T>
+WImageView<T> WImage<T>::View(int c, int r, int width, int height) {
+    return WImageView<T>(this, c, r, width, height);
+}
+
+template<typename T, int C>
+WImageViewC<T, C> WImageC<T, C>::View(int c, int r, int width, int height) {
+    return WImageViewC<T, C>(this, c, r, width, height);
+}
+
+//! @} core
+
+}  // end of namespace
+
+#endif // __cplusplus
+
+#endif

+ 248 - 0
ThresholdTools/include/opencv2/cvconfig.h

@@ -0,0 +1,248 @@
+#ifndef OPENCV_CVCONFIG_H_INCLUDED
+#define OPENCV_CVCONFIG_H_INCLUDED
+
+/* OpenCV compiled as static or dynamic libs */
+#define BUILD_SHARED_LIBS
+
+/* OpenCV intrinsics optimized code */
+#define CV_ENABLE_INTRINSICS
+
+/* OpenCV additional optimized code */
+/* #undef CV_DISABLE_OPTIMIZATION */
+
+/* Compile for 'real' NVIDIA GPU architectures */
+#define CUDA_ARCH_BIN ""
+
+/* Create PTX or BIN for 1.0 compute capability */
+/* #undef CUDA_ARCH_BIN_OR_PTX_10 */
+
+/* NVIDIA GPU features are used */
+#define CUDA_ARCH_FEATURES ""
+
+/* Compile for 'virtual' NVIDIA PTX architectures */
+#define CUDA_ARCH_PTX ""
+
+/* AVFoundation video libraries */
+/* #undef HAVE_AVFOUNDATION */
+
+/* V4L capturing support */
+/* #undef HAVE_CAMV4L */
+
+/* V4L2 capturing support */
+/* #undef HAVE_CAMV4L2 */
+
+/* Carbon windowing environment */
+/* #undef HAVE_CARBON */
+
+/* AMD's Basic Linear Algebra Subprograms Library*/
+/* #undef HAVE_CLAMDBLAS */
+
+/* AMD's OpenCL Fast Fourier Transform Library*/
+/* #undef HAVE_CLAMDFFT */
+
+/* Clp support */
+/* #undef HAVE_CLP */
+
+/* Cocoa API */
+/* #undef HAVE_COCOA */
+
+/* C= */
+/* #undef HAVE_CSTRIPES */
+
+/* NVIDIA CUDA Basic Linear Algebra Subprograms (BLAS) API*/
+/* #undef HAVE_CUBLAS */
+
+/* NVIDIA CUDA Runtime API*/
+/* #undef HAVE_CUDA */
+
+/* NVIDIA CUDA Fast Fourier Transform (FFT) API*/
+/* #undef HAVE_CUFFT */
+
+/* IEEE1394 capturing support */
+/* #undef HAVE_DC1394 */
+
+/* IEEE1394 capturing support - libdc1394 v2.x */
+/* #undef HAVE_DC1394_2 */
+
+/* DirectX */
+#define HAVE_DIRECTX
+#define HAVE_DIRECTX_NV12
+#define HAVE_D3D11
+#define HAVE_D3D10
+#define HAVE_D3D9
+
+/* DirectShow Video Capture library */
+#define HAVE_DSHOW
+
+/* Eigen Matrix & Linear Algebra Library */
+/* #undef HAVE_EIGEN */
+
+/* FFMpeg video library */
+#define HAVE_FFMPEG
+
+/* Geospatial Data Abstraction Library */
+/* #undef HAVE_GDAL */
+
+/* GStreamer multimedia framework */
+/* #undef HAVE_GSTREAMER */
+
+/* GTK+ 2.0 Thread support */
+/* #undef HAVE_GTHREAD */
+
+/* GTK+ 2.x toolkit */
+/* #undef HAVE_GTK */
+
+/* Halide support */
+/* #undef HAVE_HALIDE */
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Intel Perceptual Computing SDK library */
+/* #undef HAVE_INTELPERC */
+
+/* Intel Integrated Performance Primitives */
+#define HAVE_IPP
+#define HAVE_IPP_ICV
+#define HAVE_IPP_IW
+
+/* Intel IPP Async */
+/* #undef HAVE_IPP_A */
+
+/* JPEG-2000 codec */
+#define HAVE_JASPER
+
+/* IJG JPEG codec */
+#define HAVE_JPEG
+
+/* libpng/png.h needs to be included */
+/* #undef HAVE_LIBPNG_PNG_H */
+
+/* GDCM DICOM codec */
+/* #undef HAVE_GDCM */
+
+/* V4L/V4L2 capturing support via libv4l */
+/* #undef HAVE_LIBV4L */
+
+/* Microsoft Media Foundation Capture library */
+#define HAVE_MSMF
+
+/* NVIDIA Video Decoding API*/
+/* #undef HAVE_NVCUVID */
+
+/* NVIDIA Video Encoding API*/
+/* #undef HAVE_NVCUVENC */
+
+/* OpenCL Support */
+#define HAVE_OPENCL
+/* #undef HAVE_OPENCL_STATIC */
+/* #undef HAVE_OPENCL_SVM */
+
+/* OpenEXR codec */
+#define HAVE_OPENEXR
+
+/* OpenGL support*/
+/* #undef HAVE_OPENGL */
+
+/* OpenNI library */
+/* #undef HAVE_OPENNI */
+
+/* OpenNI library */
+/* #undef HAVE_OPENNI2 */
+
+/* PNG codec */
+#define HAVE_PNG
+
+/* Posix threads (pthreads) */
+/* #undef HAVE_PTHREAD */
+
+/* parallel_for with pthreads */
+/* #undef HAVE_PTHREADS_PF */
+
+/* Qt support */
+/* #undef HAVE_QT */
+
+/* Qt OpenGL support */
+/* #undef HAVE_QT_OPENGL */
+
+/* QuickTime video libraries */
+/* #undef HAVE_QUICKTIME */
+
+/* QTKit video libraries */
+/* #undef HAVE_QTKIT */
+
+/* Intel Threading Building Blocks */
+/* #undef HAVE_TBB */
+
+/* TIFF codec */
+#define HAVE_TIFF
+
+/* Unicap video capture library */
+/* #undef HAVE_UNICAP */
+
+/* Video for Windows support */
+#define HAVE_VFW
+
+/* V4L2 capturing support in videoio.h */
+/* #undef HAVE_VIDEOIO */
+
+/* Win32 UI */
+#define HAVE_WIN32UI
+
+/* XIMEA camera support */
+/* #undef HAVE_XIMEA */
+
+/* Xine video library */
+/* #undef HAVE_XINE */
+
+/* Define if your processor stores words with the most significant byte
+   first (like Motorola and SPARC, unlike Intel and VAX). */
+/* #undef WORDS_BIGENDIAN */
+
+/* gPhoto2 library */
+/* #undef HAVE_GPHOTO2 */
+
+/* VA library (libva) */
+/* #undef HAVE_VA */
+
+/* Intel VA-API/OpenCL */
+/* #undef HAVE_VA_INTEL */
+
+/* Intel Media SDK */
+/* #undef HAVE_MFX */
+
+/* Lapack */
+/* #undef HAVE_LAPACK */
+
+/* Library was compiled with functions instrumentation */
+/* #undef ENABLE_INSTRUMENTATION */
+
+/* OpenVX */
+/* #undef HAVE_OPENVX */
+
+#if defined(HAVE_XINE)         || \
+    defined(HAVE_GSTREAMER)    || \
+    defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
+    defined(HAVE_AVFOUNDATION) || \
+    /*defined(HAVE_OPENNI)     || too specialized */ \
+    defined(HAVE_FFMPEG)       || \
+    defined(HAVE_MSMF)
+#define HAVE_VIDEO_INPUT
+#endif
+
+#if /*defined(HAVE_XINE)       || */\
+    defined(HAVE_GSTREAMER)    || \
+    defined(HAVE_QUICKTIME)    || \
+    defined(HAVE_QTKIT)        || \
+    defined(HAVE_AVFOUNDATION) || \
+    defined(HAVE_FFMPEG)       || \
+    defined(HAVE_MSMF)
+#define HAVE_VIDEO_OUTPUT
+#endif
+
+/* OpenCV trace utilities */
+#define OPENCV_TRACE
+
+
+#endif // OPENCV_CVCONFIG_H_INCLUDED

+ 64 - 0
ThresholdTools/include/opencv2/dnn.hpp

@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_DNN_HPP
+#define OPENCV_DNN_HPP
+
+// This is an umbrella header to include into you project.
+// We are free to change headers layout in dnn subfolder, so please include
+// this header for future compatibility
+
+
+/** @defgroup dnn Deep Neural Network module
+  @{
+    This module contains:
+        - API for new layers creation, layers are building bricks of neural networks;
+        - set of built-in most-useful Layers;
+        - API to construct and modify comprehensive neural networks from layers;
+        - functionality for loading serialized networks models from different frameworks.
+
+    Functionality of this module is designed only for forward pass computations (i.e. network testing).
+    A network training is in principle not supported.
+  @}
+*/
+#include <opencv2/dnn/dnn.hpp>
+
+#endif /* OPENCV_DNN_HPP */

+ 623 - 0
ThresholdTools/include/opencv2/dnn/all_layers.hpp

@@ -0,0 +1,623 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP
+#define OPENCV_DNN_DNN_ALL_LAYERS_HPP
+#include <opencv2/dnn.hpp>
+
+namespace cv {
+namespace dnn {
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+//! @addtogroup dnn
+//! @{
+
+/** @defgroup dnnLayerList Partial List of Implemented Layers
+  @{
+  This subsection of dnn module contains information about built-in layers and their descriptions.
+
+  Classes listed here, in fact, provides C++ API for creating instances of built-in layers.
+  In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
+  You can use both API, but factory API is less convenient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()).
+
+  Built-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
+  In particular, the following layers and Caffe importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
+  - Convolution
+  - Deconvolution
+  - Pooling
+  - InnerProduct
+  - TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
+  - Softmax
+  - Reshape, Flatten, Slice, Split
+  - LRN
+  - MVN
+  - Dropout (since it does nothing on forward pass -))
+*/
+
+    class CV_EXPORTS BlankLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams &params);
+    };
+
+    //! LSTM recurrent layer
+    class CV_EXPORTS LSTMLayer : public Layer
+    {
+    public:
+        /** Creates instance of LSTM layer */
+        static Ptr<LSTMLayer> create(const LayerParams& params);
+
+        /** @deprecated Use LayerParams::blobs instead.
+        @brief Set trained weights for LSTM layer.
+
+        LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
+
+        Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
+        Than current output and current cell state is computed as follows:
+        @f{eqnarray*}{
+        h_t &= o_t \odot tanh(c_t),               \\
+        c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
+        @f}
+        where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned wights.
+
+        Gates are computed as follows:
+        @f{eqnarray*}{
+        i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
+        f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
+        o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
+        g_t &= tanh   &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
+        @f}
+        where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
+        @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
+
+        For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
+        (i.e. @f$W_x@f$ is vertical concatenation of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
+        The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
+        and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
+
+        @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_h @f$)
+        @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_x @f$)
+        @param b  is bias vector (i.e. according to above mentioned notation is @f$ b @f$)
+        */
+        CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
+
+        /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
+          * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
+          * where `Wh` is parameter from setWeights().
+          */
+        virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
+
+        /** @deprecated Use flag `produce_cell_output` in LayerParams.
+          * @brief Specifies either interpret first dimension of input blob as timestamp dimenion either as sample.
+          *
+          * If flag is set to true then shape of input blob will be interpreted as [`T`, `N`, `[data dims]`] where `T` specifies number of timestamps, `N` is number of independent streams.
+          * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
+          *
+          * If flag is set to false then shape of input blob will be interpreted as [`N`, `[data dims]`].
+          * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
+          */
+        CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0;
+
+        /** @deprecated Use flag `use_timestamp_dim` in LayerParams.
+         * @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
+         * @details Shape of the second output is the same as first output.
+         */
+        CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0;
+
+        /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
+         * @param input should contain packed values @f$x_t@f$
+         * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
+         *
+         * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
+         * where `T` specifies number of timestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
+         *
+         * If setUseTimstampsDim() is set to false then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
+         * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
+        */
+
+        int inputNameToIndex(String inputName) CV_OVERRIDE;
+        int outputNameToIndex(const String& outputName) CV_OVERRIDE;
+    };
+
+    /** @brief Classical recurrent layer
+
+    Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
+
+    - input: should contain packed input @f$x_t@f$.
+    - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
+
+    input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
+
+    output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
+
+    If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
+    */
+    class CV_EXPORTS RNNLayer : public Layer
+    {
+    public:
+        /** Creates instance of RNNLayer */
+        static Ptr<RNNLayer> create(const LayerParams& params);
+
+        /** Setups learned weights.
+
+        Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
+        @f{eqnarray*}{
+        h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h),  \\
+        o_t &= tanh&(W_{ho} h_t + b_o),
+        @f}
+
+        @param Wxh is @f$ W_{xh} @f$ matrix
+        @param bh  is @f$ b_{h}  @f$ vector
+        @param Whh is @f$ W_{hh} @f$ matrix
+        @param Who is @f$ W_{xo} @f$ matrix
+        @param bo  is @f$ b_{o}  @f$ vector
+        */
+        virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0;
+
+        /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
+         * @details Shape of the second output is the same as first output.
+         */
+        virtual void setProduceHiddenOutput(bool produce = false) = 0;
+
+    };
+
+    class CV_EXPORTS BaseConvolutionLayer : public Layer
+    {
+    public:
+        Size kernel, stride, pad, dilation, adjustPad;
+        String padMode;
+        int numOutput;
+    };
+
+    class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
+    {
+    public:
+        static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer
+    {
+    public:
+        static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS LRNLayer : public Layer
+    {
+    public:
+        int type;
+
+        int size;
+        float alpha, beta, bias;
+        bool normBySize;
+
+        static Ptr<LRNLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS PoolingLayer : public Layer
+    {
+    public:
+        int type;
+        Size kernel, stride, pad;
+        bool globalPooling;
+        bool computeMaxIdx;
+        String padMode;
+        bool ceilMode;
+        // If true for average pooling with padding, divide an every output region
+        // by a whole kernel area. Otherwise exclude zero padded values and divide
+        // by number of real values.
+        bool avePoolPaddedArea;
+        // ROIPooling parameters.
+        Size pooledSize;
+        float spatialScale;
+        // PSROIPooling parameters.
+        int psRoiOutChannels;
+
+        static Ptr<PoolingLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS SoftmaxLayer : public Layer
+    {
+    public:
+        bool logSoftMax;
+
+        static Ptr<SoftmaxLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS InnerProductLayer : public Layer
+    {
+    public:
+        int axis;
+        static Ptr<InnerProductLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS MVNLayer : public Layer
+    {
+    public:
+        float eps;
+        bool normVariance, acrossChannels;
+
+        static Ptr<MVNLayer> create(const LayerParams& params);
+    };
+
+    /* Reshaping */
+
+    class CV_EXPORTS ReshapeLayer : public Layer
+    {
+    public:
+        MatShape newShapeDesc;
+        Range newShapeRange;
+
+        static Ptr<ReshapeLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS FlattenLayer : public Layer
+    {
+    public:
+        static Ptr<FlattenLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ConcatLayer : public Layer
+    {
+    public:
+        int axis;
+        /**
+         * @brief Add zero padding in case of concatenation of blobs with different
+         * spatial sizes.
+         *
+         * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
+         */
+        bool padding;
+
+        static Ptr<ConcatLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS SplitLayer : public Layer
+    {
+    public:
+        int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
+
+        static Ptr<SplitLayer> create(const LayerParams &params);
+    };
+
+    /**
+     * Slice layer has several modes:
+     * 1. Caffe mode
+     * @param[in] axis Axis of split operation
+     * @param[in] slice_point Array of split points
+     *
+     * Number of output blobs equals to number of split points plus one. The
+     * first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis,
+     * the second output blob is a slice of input from @p slice_point[0] to
+     * @p slice_point[1] - 1 by @p axis and the last output blob is a slice of
+     * input from @p slice_point[-1] up to the end of @p axis size.
+     *
+     * 2. TensorFlow mode
+     * @param begin Vector of start indices
+     * @param size Vector of sizes
+     *
+     * More convenient numpy-like slice. One and only output blob
+     * is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]`
+     *
+     * 3. Torch mode
+     * @param axis Axis of split operation
+     *
+     * Split input blob on the equal parts by @p axis.
+     */
+    class CV_EXPORTS SliceLayer : public Layer
+    {
+    public:
+        /**
+         * @brief Vector of slice ranges.
+         *
+         * The first dimension equals number of output blobs.
+         * Inner vector has slice ranges for the first number of input dimensions.
+         */
+        std::vector<std::vector<Range> > sliceRanges;
+        int axis;
+
+        static Ptr<SliceLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS PermuteLayer : public Layer
+    {
+    public:
+        static Ptr<PermuteLayer> create(const LayerParams& params);
+    };
+
+    /**
+     * Permute channels of 4-dimensional input blob.
+     * @param group Number of groups to split input channels and pick in turns
+     *              into output blob.
+     *
+     * \f[ groupSize = \frac{number\ of\ channels}{group} \f]
+     * \f[ output(n, c, h, w) = input(n, groupSize \times (c \% group) + \lfloor \frac{c}{group} \rfloor, h, w) \f]
+     * Read more at https://arxiv.org/pdf/1707.01083.pdf
+     */
+    class CV_EXPORTS ShuffleChannelLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams& params);
+
+        int group;
+    };
+
+    /**
+     * @brief Adds extra values for specific axes.
+     * @param paddings Vector of paddings in format
+     *                 @code
+     *                 [ pad_before, pad_after,  // [0]th dimension
+     *                   pad_before, pad_after,  // [1]st dimension
+     *                   ...
+     *                   pad_before, pad_after ] // [n]th dimension
+     *                 @endcode
+     *                 that represents number of padded values at every dimension
+     *                 starting from the first one. The rest of dimensions won't
+     *                 be padded.
+     * @param value Value to be padded. Defaults to zero.
+     * @param type Padding type: 'constant', 'reflect'
+     * @param input_dims Torch's parameter. If @p input_dims is not equal to the
+     *                   actual input dimensionality then the `[0]th` dimension
+     *                   is considered as a batch dimension and @p paddings are shifted
+     *                   to a one dimension. Defaults to `-1` that means padding
+     *                   corresponding to @p paddings.
+     */
+    class CV_EXPORTS PaddingLayer : public Layer
+    {
+    public:
+        static Ptr<PaddingLayer> create(const LayerParams& params);
+    };
+
+    /* Activations */
+    class CV_EXPORTS ActivationLayer : public Layer
+    {
+    public:
+        virtual void forwardSlice(const float* src, float* dst, int len,
+                                  size_t outPlaneSize, int cn0, int cn1) const = 0;
+    };
+
+    class CV_EXPORTS ReLULayer : public ActivationLayer
+    {
+    public:
+        float negativeSlope;
+
+        static Ptr<ReLULayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ReLU6Layer : public ActivationLayer
+    {
+    public:
+        float minValue, maxValue;
+
+        static Ptr<ReLU6Layer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS ELULayer : public ActivationLayer
+    {
+    public:
+        static Ptr<ELULayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS TanHLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<TanHLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS SigmoidLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<SigmoidLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS BNLLLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<BNLLLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS AbsLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<AbsLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS PowerLayer : public ActivationLayer
+    {
+    public:
+        float power, scale, shift;
+
+        static Ptr<PowerLayer> create(const LayerParams &params);
+    };
+
+    /* Layers used in semantic segmentation */
+
+    class CV_EXPORTS CropLayer : public Layer
+    {
+    public:
+        int startAxis;
+        std::vector<int> offset;
+
+        static Ptr<CropLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS EltwiseLayer : public Layer
+    {
+    public:
+        static Ptr<EltwiseLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS BatchNormLayer : public Layer
+    {
+    public:
+        bool hasWeights, hasBias;
+        float epsilon;
+
+        static Ptr<BatchNormLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS MaxUnpoolLayer : public Layer
+    {
+    public:
+        Size poolKernel;
+        Size poolPad;
+        Size poolStride;
+
+        static Ptr<MaxUnpoolLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ScaleLayer : public Layer
+    {
+    public:
+        bool hasBias;
+        int axis;
+
+        static Ptr<ScaleLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS ShiftLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS PriorBoxLayer : public Layer
+    {
+    public:
+        static Ptr<PriorBoxLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS ReorgLayer : public Layer
+    {
+    public:
+        static Ptr<ReorgLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS RegionLayer : public Layer
+    {
+    public:
+        static Ptr<RegionLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS DetectionOutputLayer : public Layer
+    {
+    public:
+        static Ptr<DetectionOutputLayer> create(const LayerParams& params);
+    };
+
+    /**
+     * @brief \f$ L_p \f$ - normalization layer.
+     * @param p Normalization factor. The most common `p = 1` for \f$ L_1 \f$ -
+     *          normalization or `p = 2` for \f$ L_2 \f$ - normalization or a custom one.
+     * @param eps Parameter \f$ \epsilon \f$ to prevent a division by zero.
+     * @param across_spatial If true, normalize an input across all non-batch dimensions.
+     *                       Otherwise normalize an every channel separately.
+     *
+     * Across spatial:
+     * @f[
+     * norm = \sqrt[p]{\epsilon + \sum_{x, y, c} |src(x, y, c)|^p } \\
+     * dst(x, y, c) = \frac{ src(x, y, c) }{norm}
+     * @f]
+     *
+     * Channel wise normalization:
+     * @f[
+     * norm(c) = \sqrt[p]{\epsilon + \sum_{x, y} |src(x, y, c)|^p } \\
+     * dst(x, y, c) = \frac{ src(x, y, c) }{norm(c)}
+     * @f]
+     *
+     * Where `x, y` - spatial coordinates, `c` - channel.
+     *
+     * An every sample in the batch is normalized separately. Optionally,
+     * output is scaled by the trained parameters.
+     */
+    class CV_EXPORTS NormalizeBBoxLayer : public Layer
+    {
+    public:
+        float pnorm, epsilon;
+        CV_DEPRECATED bool acrossSpatial;
+
+        static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
+    };
+
+    /**
+     * @brief Resize input 4-dimensional blob by nearest neighbor or bilinear strategy.
+     *
+     * Layer is used to support TensorFlow's resize_nearest_neighbor and resize_bilinear ops.
+     */
+    class CV_EXPORTS ResizeLayer : public Layer
+    {
+    public:
+        static Ptr<ResizeLayer> create(const LayerParams& params);
+    };
+
+    /**
+     * @brief Bilinear resize layer from https://github.com/cdmh/deeplab-public
+     *
+     * It differs from @ref ResizeLayer in output shape and resize scales computations.
+     */
+    class CV_EXPORTS InterpLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS ProposalLayer : public Layer
+    {
+    public:
+        static Ptr<ProposalLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS CropAndResizeLayer : public Layer
+    {
+    public:
+        static Ptr<Layer> create(const LayerParams& params);
+    };
+
+//! @}
+//! @}
+CV__DNN_EXPERIMENTAL_NS_END
+}
+}
+#endif

+ 156 - 0
ThresholdTools/include/opencv2/dnn/dict.hpp

@@ -0,0 +1,156 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include <opencv2/core.hpp>
+#include <map>
+#include <ostream>
+
+#include <opencv2/dnn/dnn.hpp>
+
+#ifndef OPENCV_DNN_DNN_DICT_HPP
+#define OPENCV_DNN_DNN_DICT_HPP
+
+namespace cv {
+namespace dnn {
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+//! @addtogroup dnn
+//! @{
+
+/** @brief This struct stores the scalar value (or array) of one of the following type: double, cv::String or int64.
+ *  @todo Maybe int64 is useless because double type exactly stores at least 2^52 integers.
+ */
+struct CV_EXPORTS_W DictValue
+{
+    DictValue(const DictValue &r);
+    DictValue(int64 i = 0)      : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; }       //!< Constructs integer scalar
+    CV_WRAP DictValue(int i)            : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; }       //!< Constructs integer scalar
+    DictValue(unsigned p)       : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; }       //!< Constructs integer scalar
+    CV_WRAP DictValue(double p)         : type(Param::REAL), pd(new AutoBuffer<double,1>) { (*pd)[0] = p; }     //!< Constructs floating point scalar
+    CV_WRAP DictValue(const String &s)  : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; }   //!< Constructs string scalar
+    DictValue(const char *s)    : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; }   //!< @overload
+
+    template<typename TypeIter>
+    static DictValue arrayInt(TypeIter begin, int size);    //!< Constructs integer array
+    template<typename TypeIter>
+    static DictValue arrayReal(TypeIter begin, int size);   //!< Constructs floating point array
+    template<typename TypeIter>
+    static DictValue arrayString(TypeIter begin, int size); //!< Constructs array of strings
+
+    template<typename T>
+    T get(int idx = -1) const; //!< Tries to convert array element with specified index to requested type and returns its.
+
+    int size() const;
+
+    CV_WRAP bool isInt() const;
+    CV_WRAP bool isString() const;
+    CV_WRAP bool isReal() const;
+
+    CV_WRAP int getIntValue(int idx = -1) const;
+    CV_WRAP double getRealValue(int idx = -1) const;
+    CV_WRAP String getStringValue(int idx = -1) const;
+
+    DictValue &operator=(const DictValue &r);
+
+    friend std::ostream &operator<<(std::ostream &stream, const DictValue &dictv);
+
+    ~DictValue();
+
+private:
+
+    int type;
+
+    union
+    {
+        AutoBuffer<int64, 1> *pi;
+        AutoBuffer<double, 1> *pd;
+        AutoBuffer<String, 1> *ps;
+        void *pv;
+    };
+
+    DictValue(int _type, void *_p) : type(_type), pv(_p) {}
+    void release();
+};
+
+/** @brief This class implements name-value dictionary, values are instances of DictValue. */
+class CV_EXPORTS Dict
+{
+    typedef std::map<String, DictValue> _Dict;
+    _Dict dict;
+
+public:
+
+    //! Checks a presence of the @p key in the dictionary.
+    bool has(const String &key) const;
+
+    //! If the @p key in the dictionary then returns pointer to its value, else returns NULL.
+    DictValue *ptr(const String &key);
+
+    /** @overload */
+    const DictValue *ptr(const String &key) const;
+
+    //! If the @p key in the dictionary then returns its value, else an error will be generated.
+    const DictValue &get(const String &key) const;
+
+    /** @overload */
+    template <typename T>
+    T get(const String &key) const;
+
+    //! If the @p key in the dictionary then returns its value, else returns @p defaultValue.
+    template <typename T>
+    T get(const String &key, const T &defaultValue) const;
+
+    //! Sets new @p value for the @p key, or adds new key-value pair into the dictionary.
+    template<typename T>
+    const T &set(const String &key, const T &value);
+
+    friend std::ostream &operator<<(std::ostream &stream, const Dict &dict);
+
+    std::map<String, DictValue>::const_iterator begin() const;
+
+    std::map<String, DictValue>::const_iterator end() const;
+};
+
+//! @}
+CV__DNN_EXPERIMENTAL_NS_END
+}
+}
+
+#endif

+ 860 - 0
ThresholdTools/include/opencv2/dnn/dnn.hpp

@@ -0,0 +1,860 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_DNN_DNN_HPP
+#define OPENCV_DNN_DNN_HPP
+
+#include <vector>
+#include <opencv2/core.hpp>
+
+#if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS
+#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_v5 {
+#define CV__DNN_EXPERIMENTAL_NS_END }
+namespace cv { namespace dnn { namespace experimental_dnn_v5 { } using namespace experimental_dnn_v5; }}
+#else
+#define CV__DNN_EXPERIMENTAL_NS_BEGIN
+#define CV__DNN_EXPERIMENTAL_NS_END
+#endif
+
+#include <opencv2/dnn/dict.hpp>
+
+namespace cv {
+namespace dnn {
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+//! @addtogroup dnn
+//! @{
+
+    typedef std::vector<int> MatShape;
+
+    /**
+     * @brief Enum of computation backends supported by layers.
+     * @see Net::setPreferableBackend
+     */
+    enum Backend
+    {
+        //! DNN_BACKEND_DEFAULT equals to DNN_BACKEND_INFERENCE_ENGINE if
+        //! OpenCV is built with Intel's Inference Engine library or
+        //! DNN_BACKEND_OPENCV otherwise.
+        DNN_BACKEND_DEFAULT,
+        DNN_BACKEND_HALIDE,
+        DNN_BACKEND_INFERENCE_ENGINE,
+        DNN_BACKEND_OPENCV
+    };
+
+    /**
+     * @brief Enum of target devices for computations.
+     * @see Net::setPreferableTarget
+     */
+    enum Target
+    {
+        DNN_TARGET_CPU,
+        DNN_TARGET_OPENCL,
+        DNN_TARGET_OPENCL_FP16,
+        DNN_TARGET_MYRIAD
+    };
+
+    /** @brief This class provides all data needed to initialize layer.
+     *
+     * It includes dictionary with scalar params (which can be read by using Dict interface),
+     * blob params #blobs and optional meta information: #name and #type of layer instance.
+    */
+    class CV_EXPORTS LayerParams : public Dict
+    {
+    public:
+        //TODO: Add ability to name blob params
+        std::vector<Mat> blobs; //!< List of learned parameters stored as blobs.
+
+        String name; //!< Name of the layer instance (optional, can be used internal purposes).
+        String type; //!< Type name which was used for creating layer by layer factory (optional).
+    };
+
+   /**
+    * @brief Derivatives of this class encapsulates functions of certain backends.
+    */
+    class BackendNode
+    {
+    public:
+        BackendNode(int backendId);
+
+        virtual ~BackendNode(); //!< Virtual destructor to make polymorphism.
+
+        int backendId; //!< Backend identifier.
+    };
+
+    /**
+     * @brief Derivatives of this class wraps cv::Mat for different backends and targets.
+     */
+    class BackendWrapper
+    {
+    public:
+        BackendWrapper(int backendId, int targetId);
+
+        /**
+         * @brief Wrap cv::Mat for specific backend and target.
+         * @param[in] targetId Target identifier.
+         * @param[in] m cv::Mat for wrapping.
+         *
+         * Make CPU->GPU data transfer if it's require for the target.
+         */
+        BackendWrapper(int targetId, const cv::Mat& m);
+
+        /**
+         * @brief Make wrapper for reused cv::Mat.
+         * @param[in] base Wrapper of cv::Mat that will be reused.
+         * @param[in] shape Specific shape.
+         *
+         * Initialize wrapper from another one. It'll wrap the same host CPU
+         * memory and mustn't allocate memory on device(i.e. GPU). It might
+         * has different shape. Use in case of CPU memory reusing for reuse
+         * associated memory on device too.
+         */
+        BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape);
+
+        virtual ~BackendWrapper(); //!< Virtual destructor to make polymorphism.
+
+        /**
+         * @brief Transfer data to CPU host memory.
+         */
+        virtual void copyToHost() = 0;
+
+        /**
+         * @brief Indicate that an actual data is on CPU.
+         */
+        virtual void setHostDirty() = 0;
+
+        int backendId;  //!< Backend identifier.
+        int targetId;   //!< Target identifier.
+    };
+
+    class CV_EXPORTS ActivationLayer;
+    class CV_EXPORTS BatchNormLayer;
+    class CV_EXPORTS ScaleLayer;
+
+    /** @brief This interface class allows to build new Layers - are building blocks of networks.
+     *
+     * Each class, derived from Layer, must implement allocate() methods to declare own outputs and forward() to compute outputs.
+     * Also before using the new layer into networks you must register your layer by using one of @ref dnnLayerFactory "LayerFactory" macros.
+     */
+    class CV_EXPORTS_W Layer : public Algorithm
+    {
+    public:
+
+        //! List of learned parameters must be stored here to allow read them by using Net::getParam().
+        CV_PROP_RW std::vector<Mat> blobs;
+
+        /** @brief Computes and sets internal parameters according to inputs, outputs and blobs.
+         *  @param[in]  input  vector of already allocated input blobs
+         *  @param[out] output vector of already allocated output blobs
+         *
+         * If this method is called after network has allocated all memory for input and output blobs
+         * and before inferencing.
+         */
+        virtual void finalize(const std::vector<Mat*> &input, std::vector<Mat> &output);
+
+        /** @brief Given the @p input blobs, computes the output @p blobs.
+         *  @param[in]  input  the input blobs.
+         *  @param[out] output allocated output blobs, which will store results of the computation.
+         *  @param[out] internals allocated internal blobs
+         */
+        virtual void forward(std::vector<Mat*> &input, std::vector<Mat> &output, std::vector<Mat> &internals) = 0;
+
+        /** @brief Given the @p input blobs, computes the output @p blobs.
+         *  @param[in]  inputs  the input blobs.
+         *  @param[out] outputs allocated output blobs, which will store results of the computation.
+         *  @param[out] internals allocated internal blobs
+         */
+        virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) = 0;
+
+        /** @brief Given the @p input blobs, computes the output @p blobs.
+         *  @param[in]  inputs  the input blobs.
+         *  @param[out] outputs allocated output blobs, which will store results of the computation.
+         *  @param[out] internals allocated internal blobs
+         */
+        void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals);
+
+        /** @brief @overload */
+        CV_WRAP void finalize(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs);
+
+        /** @brief @overload */
+        CV_WRAP std::vector<Mat> finalize(const std::vector<Mat> &inputs);
+
+        /** @brief Allocates layer and computes output. */
+        CV_WRAP void run(const std::vector<Mat> &inputs, CV_OUT std::vector<Mat> &outputs,
+                         CV_IN_OUT std::vector<Mat> &internals);
+
+        /** @brief Returns index of input blob into the input array.
+         *  @param inputName label of input blob
+         *
+         * Each layer input and output can be labeled to easily identify them using "%<layer_name%>[.output_name]" notation.
+         * This method maps label of input blob to its index into input vector.
+         */
+        virtual int inputNameToIndex(String inputName);
+        /** @brief Returns index of output blob in output array.
+         *  @see inputNameToIndex()
+         */
+        CV_WRAP virtual int outputNameToIndex(const String& outputName);
+
+        /**
+         * @brief Ask layer if it support specific backend for doing computations.
+         * @param[in] backendId computation backend identifier.
+         * @see Backend
+         */
+        virtual bool supportBackend(int backendId);
+
+        /**
+         * @brief Returns Halide backend node.
+         * @param[in] inputs Input Halide buffers.
+         * @see BackendNode, BackendWrapper
+         *
+         * Input buffers should be exactly the same that will be used in forward invocations.
+         * Despite we can use Halide::ImageParam based on input shape only,
+         * it helps prevent some memory management issues (if something wrong,
+         * Halide tests will be failed).
+         */
+        virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs);
+
+        virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &inputs);
+
+       /**
+        * @brief Automatic Halide scheduling based on layer hyper-parameters.
+        * @param[in] node Backend node with Halide functions.
+        * @param[in] inputs Blobs that will be used in forward invocations.
+        * @param[in] outputs Blobs that will be used in forward invocations.
+        * @param[in] targetId Target identifier
+        * @see BackendNode, Target
+        *
+        * Layer don't use own Halide::Func members because we can have applied
+        * layers fusing. In this way the fused function should be scheduled.
+        */
+        virtual void applyHalideScheduler(Ptr<BackendNode>& node,
+                                          const std::vector<Mat*> &inputs,
+                                          const std::vector<Mat> &outputs,
+                                          int targetId) const;
+
+        /**
+         * @brief Implement layers fusing.
+         * @param[in] node Backend node of bottom layer.
+         * @see BackendNode
+         *
+         * Actual for graph-based backends. If layer attached successfully,
+         * returns non-empty cv::Ptr to node of the same backend.
+         * Fuse only over the last function.
+         */
+        virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node);
+
+        /**
+         * @brief Tries to attach to the layer the subsequent activation layer, i.e. do the layer fusion in a partial case.
+         * @param[in] layer The subsequent activation layer.
+         *
+         * Returns true if the activation layer has been attached successfully.
+         */
+        virtual bool setActivation(const Ptr<ActivationLayer>& layer);
+
+        /**
+         * @brief Try to fuse current layer with a next one
+         * @param[in] top Next layer to be fused.
+         * @returns True if fusion was performed.
+         */
+        virtual bool tryFuse(Ptr<Layer>& top);
+
+        /**
+         * @brief Returns parameters of layers with channel-wise multiplication and addition.
+         * @param[out] scale Channel-wise multipliers. Total number of values should
+         *                   be equal to number of channels.
+         * @param[out] shift Channel-wise offsets. Total number of values should
+         *                   be equal to number of channels.
+         *
+         * Some layers can fuse their transformations with further layers.
+         * In example, convolution + batch normalization. This way base layer
+         * use weights from layer after it. Fused layer is skipped.
+         * By default, @p scale and @p shift are empty that means layer has no
+         * element-wise multiplications or additions.
+         */
+        virtual void getScaleShift(Mat& scale, Mat& shift) const;
+
+        /**
+         * @brief "Deattaches" all the layers, attached to particular layer.
+         */
+        virtual void unsetAttached();
+
+        virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
+                                     const int requiredOutputs,
+                                     std::vector<MatShape> &outputs,
+                                     std::vector<MatShape> &internals) const;
+        virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
+                               const std::vector<MatShape> &outputs) const {(void)inputs; (void)outputs; return 0;}
+
+        CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
+        CV_PROP String type; //!< Type name which was used for creating layer by layer factory.
+        CV_PROP int preferableTarget; //!< prefer target for layer forwarding
+
+        Layer();
+        explicit Layer(const LayerParams &params);      //!< Initializes only #name, #type and #blobs fields.
+        void setParamsFrom(const LayerParams &params);  //!< Initializes only #name, #type and #blobs fields.
+        virtual ~Layer();
+    };
+
+    /** @brief This class allows to create and manipulate comprehensive artificial neural networks.
+     *
+     * Neural network is presented as directed acyclic graph (DAG), where vertices are Layer instances,
+     * and edges specify relationships between layers inputs and outputs.
+     *
+     * Each network layer has unique integer id and unique string name inside its network.
+     * LayerId can store either layer name or layer id.
+     *
+     * This class supports reference counting of its instances, i. e. copies point to the same instance.
+     */
+    class CV_EXPORTS_W_SIMPLE Net
+    {
+    public:
+
+        CV_WRAP Net();  //!< Default constructor.
+        CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore.
+
+        /** @brief Create a network from Intel's Model Optimizer intermediate representation.
+         *  @param[in] xml XML configuration file with network's topology.
+         *  @param[in] bin Binary file with trained weights.
+         *  Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
+         *  backend.
+         */
+        CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin);
+
+        /** Returns true if there are no layers in the network. */
+        CV_WRAP bool empty() const;
+
+        /** @brief Adds new layer to the net.
+         *  @param name   unique name of the adding layer.
+         *  @param type   typename of the adding layer (type must be registered in LayerRegister).
+         *  @param params parameters which will be used to initialize the creating layer.
+         *  @returns unique identifier of created layer, or -1 if a failure will happen.
+         */
+        int addLayer(const String &name, const String &type, LayerParams &params);
+        /** @brief Adds new layer and connects its first input to the first output of previously added layer.
+         *  @see addLayer()
+         */
+        int addLayerToPrev(const String &name, const String &type, LayerParams &params);
+
+        /** @brief Converts string name of the layer to the integer identifier.
+         *  @returns id of the layer, or -1 if the layer wasn't found.
+         */
+        CV_WRAP int getLayerId(const String &layer);
+
+        CV_WRAP std::vector<String> getLayerNames() const;
+
+        /** @brief Container for strings and integers. */
+        typedef DictValue LayerId;
+
+        /** @brief Returns pointer to layer with specified id or name which the network use. */
+        CV_WRAP Ptr<Layer> getLayer(LayerId layerId);
+
+        /** @brief Returns pointers to input layers of specific layer. */
+        std::vector<Ptr<Layer> > getLayerInputs(LayerId layerId); // FIXIT: CV_WRAP
+
+        /** @brief Delete layer for the network (not implemented yet) */
+        CV_WRAP void deleteLayer(LayerId layer);
+
+        /** @brief Connects output of the first layer to input of the second layer.
+         *  @param outPin descriptor of the first layer output.
+         *  @param inpPin descriptor of the second layer input.
+         *
+         * Descriptors have the following template <DFN>&lt;layer_name&gt;[.input_number]</DFN>:
+         * - the first part of the template <DFN>layer_name</DFN> is sting name of the added layer.
+         *   If this part is empty then the network input pseudo layer will be used;
+         * - the second optional part of the template <DFN>input_number</DFN>
+         *   is either number of the layer input, either label one.
+         *   If this part is omitted then the first layer input will be used.
+         *
+         *  @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex()
+         */
+        CV_WRAP void connect(String outPin, String inpPin);
+
+        /** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer.
+         *  @param outLayerId identifier of the first layer
+         *  @param outNum number of the first layer output
+         *  @param inpLayerId identifier of the second layer
+         *  @param inpNum number of the second layer input
+         */
+        void connect(int outLayerId, int outNum, int inpLayerId, int inpNum);
+
+        /** @brief Sets outputs names of the network input pseudo layer.
+         *
+         * Each net always has special own the network input pseudo layer with id=0.
+         * This layer stores the user blobs only and don't make any computations.
+         * In fact, this layer provides the only way to pass user data into the network.
+         * As any other layer, this layer can label its outputs and this function provides an easy way to do this.
+         */
+        CV_WRAP void setInputsNames(const std::vector<String> &inputBlobNames);
+
+        /** @brief Runs forward pass to compute output of layer with name @p outputName.
+         *  @param outputName name for layer which output is needed to get
+         *  @return blob for first output of specified layer.
+         *  @details By default runs forward pass for the whole network.
+         */
+        CV_WRAP Mat forward(const String& outputName = String());
+
+        /** @brief Runs forward pass to compute output of layer with name @p outputName.
+         *  @param outputBlobs contains all output blobs for specified layer.
+         *  @param outputName name for layer which output is needed to get
+         *  @details If @p outputName is empty, runs forward pass for the whole network.
+         */
+        CV_WRAP void forward(OutputArrayOfArrays outputBlobs, const String& outputName = String());
+
+        /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
+         *  @param outputBlobs contains blobs for first outputs of specified layers.
+         *  @param outBlobNames names for layers which outputs are needed to get
+         */
+        CV_WRAP void forward(OutputArrayOfArrays outputBlobs,
+                             const std::vector<String>& outBlobNames);
+
+        /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
+         *  @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames.
+         *  @param outBlobNames names for layers which outputs are needed to get
+         */
+        CV_WRAP_AS(forwardAndRetrieve) void forward(CV_OUT std::vector<std::vector<Mat> >& outputBlobs,
+                                                    const std::vector<String>& outBlobNames);
+
+        /**
+         * @brief Compile Halide layers.
+         * @param[in] scheduler Path to YAML file with scheduling directives.
+         * @see setPreferableBackend
+         *
+         * Schedule layers that support Halide backend. Then compile them for
+         * specific target. For layers that not represented in scheduling file
+         * or if no manual scheduling used at all, automatic scheduling will be applied.
+         */
+        CV_WRAP void setHalideScheduler(const String& scheduler);
+
+        /**
+         * @brief Ask network to use specific computation backend where it supported.
+         * @param[in] backendId backend identifier.
+         * @see Backend
+         *
+         * If OpenCV is compiled with Intel's Inference Engine library, DNN_BACKEND_DEFAULT
+         * means DNN_BACKEND_INFERENCE_ENGINE. Otherwise it equals to DNN_BACKEND_OPENCV.
+         */
+        CV_WRAP void setPreferableBackend(int backendId);
+
+        /**
+         * @brief Ask network to make computations on specific target device.
+         * @param[in] targetId target identifier.
+         * @see Target
+         *
+         * List of supported combinations backend / target:
+         * |                        | DNN_BACKEND_OPENCV | DNN_BACKEND_INFERENCE_ENGINE | DNN_BACKEND_HALIDE |
+         * |------------------------|--------------------|------------------------------|--------------------|
+         * | DNN_TARGET_CPU         |                  + |                            + |                  + |
+         * | DNN_TARGET_OPENCL      |                  + |                            + |                  + |
+         * | DNN_TARGET_OPENCL_FP16 |                  + |                            + |                    |
+         * | DNN_TARGET_MYRIAD      |                    |                            + |                    |
+         */
+        CV_WRAP void setPreferableTarget(int targetId);
+
+        /** @brief Sets the new value for the layer output blob
+         *  @param name descriptor of the updating layer output blob.
+         *  @param blob new blob.
+         *  @see connect(String, String) to know format of the descriptor.
+         *  @note If updating blob is not empty then @p blob must have the same shape,
+         *  because network reshaping is not implemented yet.
+         */
+        CV_WRAP void setInput(InputArray blob, const String& name = "");
+
+        /** @brief Sets the new value for the learned param of the layer.
+         *  @param layer name or id of the layer.
+         *  @param numParam index of the layer parameter in the Layer::blobs array.
+         *  @param blob the new value.
+         *  @see Layer::blobs
+         *  @note If shape of the new blob differs from the previous shape,
+         *  then the following forward pass may fail.
+        */
+        CV_WRAP void setParam(LayerId layer, int numParam, const Mat &blob);
+
+        /** @brief Returns parameter blob of the layer.
+         *  @param layer name or id of the layer.
+         *  @param numParam index of the layer parameter in the Layer::blobs array.
+         *  @see Layer::blobs
+         */
+        CV_WRAP Mat getParam(LayerId layer, int numParam = 0);
+
+        /** @brief Returns indexes of layers with unconnected outputs.
+         */
+        CV_WRAP std::vector<int> getUnconnectedOutLayers() const;
+        /** @brief Returns input and output shapes for all layers in loaded model;
+         *  preliminary inferencing isn't necessary.
+         *  @param netInputShapes shapes for all input blobs in net input layer.
+         *  @param layersIds output parameter for layer IDs.
+         *  @param inLayersShapes output parameter for input layers shapes;
+         * order is the same as in layersIds
+         *  @param outLayersShapes output parameter for output layers shapes;
+         * order is the same as in layersIds
+         */
+        CV_WRAP void getLayersShapes(const std::vector<MatShape>& netInputShapes,
+                                     CV_OUT std::vector<int>& layersIds,
+                                     CV_OUT std::vector<std::vector<MatShape> >& inLayersShapes,
+                                     CV_OUT std::vector<std::vector<MatShape> >& outLayersShapes) const;
+
+        /** @overload */
+        CV_WRAP void getLayersShapes(const MatShape& netInputShape,
+                                     CV_OUT std::vector<int>& layersIds,
+                                     CV_OUT std::vector<std::vector<MatShape> >& inLayersShapes,
+                                     CV_OUT std::vector<std::vector<MatShape> >& outLayersShapes) const;
+
+        /** @brief Returns input and output shapes for layer with specified
+         * id in loaded model; preliminary inferencing isn't necessary.
+         *  @param netInputShape shape input blob in net input layer.
+         *  @param layerId id for layer.
+         *  @param inLayerShapes output parameter for input layers shapes;
+         * order is the same as in layersIds
+         *  @param outLayerShapes output parameter for output layers shapes;
+         * order is the same as in layersIds
+         */
+        void getLayerShapes(const MatShape& netInputShape,
+                                    const int layerId,
+                                    CV_OUT std::vector<MatShape>& inLayerShapes,
+                                    CV_OUT std::vector<MatShape>& outLayerShapes) const; // FIXIT: CV_WRAP
+
+        /** @overload */
+        void getLayerShapes(const std::vector<MatShape>& netInputShapes,
+                                    const int layerId,
+                                    CV_OUT std::vector<MatShape>& inLayerShapes,
+                                    CV_OUT std::vector<MatShape>& outLayerShapes) const; // FIXIT: CV_WRAP
+
+        /** @brief Computes FLOP for whole loaded model with specified input shapes.
+         * @param netInputShapes vector of shapes for all net inputs.
+         * @returns computed FLOP.
+         */
+        CV_WRAP int64 getFLOPS(const std::vector<MatShape>& netInputShapes) const;
+        /** @overload */
+        CV_WRAP int64 getFLOPS(const MatShape& netInputShape) const;
+        /** @overload */
+        CV_WRAP int64 getFLOPS(const int layerId,
+                               const std::vector<MatShape>& netInputShapes) const;
+        /** @overload */
+        CV_WRAP int64 getFLOPS(const int layerId,
+                               const MatShape& netInputShape) const;
+
+        /** @brief Returns list of types for layer used in model.
+         * @param layersTypes output parameter for returning types.
+         */
+        CV_WRAP void getLayerTypes(CV_OUT std::vector<String>& layersTypes) const;
+
+        /** @brief Returns count of layers of specified type.
+         * @param layerType type.
+         * @returns count of layers
+         */
+        CV_WRAP int getLayersCount(const String& layerType) const;
+
+        /** @brief Computes bytes number which are required to store
+         * all weights and intermediate blobs for model.
+         * @param netInputShapes vector of shapes for all net inputs.
+         * @param weights output parameter to store resulting bytes for weights.
+         * @param blobs output parameter to store resulting bytes for intermediate blobs.
+         */
+        void getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
+                                          CV_OUT size_t& weights, CV_OUT size_t& blobs) const; // FIXIT: CV_WRAP
+        /** @overload */
+        CV_WRAP void getMemoryConsumption(const MatShape& netInputShape,
+                                          CV_OUT size_t& weights, CV_OUT size_t& blobs) const;
+        /** @overload */
+        CV_WRAP void getMemoryConsumption(const int layerId,
+                                          const std::vector<MatShape>& netInputShapes,
+                                          CV_OUT size_t& weights, CV_OUT size_t& blobs) const;
+        /** @overload */
+        CV_WRAP void getMemoryConsumption(const int layerId,
+                                          const MatShape& netInputShape,
+                                          CV_OUT size_t& weights, CV_OUT size_t& blobs) const;
+
+        /** @brief Computes bytes number which are required to store
+         * all weights and intermediate blobs for each layer.
+         * @param netInputShapes vector of shapes for all net inputs.
+         * @param layerIds output vector to save layer IDs.
+         * @param weights output parameter to store resulting bytes for weights.
+         * @param blobs output parameter to store resulting bytes for intermediate blobs.
+         */
+        void getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
+                                          CV_OUT std::vector<int>& layerIds,
+                                          CV_OUT std::vector<size_t>& weights,
+                                          CV_OUT std::vector<size_t>& blobs) const; // FIXIT: CV_WRAP
+        /** @overload */
+        void getMemoryConsumption(const MatShape& netInputShape,
+                                          CV_OUT std::vector<int>& layerIds,
+                                          CV_OUT std::vector<size_t>& weights,
+                                          CV_OUT std::vector<size_t>& blobs) const; // FIXIT: CV_WRAP
+
+        /** @brief Enables or disables layer fusion in the network.
+         * @param fusion true to enable the fusion, false to disable. The fusion is enabled by default.
+         */
+        CV_WRAP void enableFusion(bool fusion);
+
+        /** @brief Returns overall time for inference and timings (in ticks) for layers.
+         * Indexes in returned vector correspond to layers ids. Some layers can be fused with others,
+         * in this case zero ticks count will be return for that skipped layers.
+         * @param timings vector for tick timings for all layers.
+         * @return overall ticks for model inference.
+         */
+        CV_WRAP int64 getPerfProfile(CV_OUT std::vector<double>& timings);
+
+    private:
+        struct Impl;
+        Ptr<Impl> impl;
+    };
+
+    /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files.
+    *  @param cfgFile      path to the .cfg file with text description of the network architecture.
+    *  @param darknetModel path to the .weights file with learned network.
+    *  @returns Network object that ready to do forward, throw an exception in failure cases.
+    *  @returns Net object.
+    */
+    CV_EXPORTS_W Net readNetFromDarknet(const String &cfgFile, const String &darknetModel = String());
+
+    /** @brief Reads a network model stored in <a href="http://caffe.berkeleyvision.org">Caffe</a> framework's format.
+      * @param prototxt   path to the .prototxt file with text description of the network architecture.
+      * @param caffeModel path to the .caffemodel file with learned network.
+      * @returns Net object.
+      */
+    CV_EXPORTS_W Net readNetFromCaffe(const String &prototxt, const String &caffeModel = String());
+
+    /** @brief Reads a network model stored in Caffe model in memory.
+      * @details This is an overloaded member function, provided for convenience.
+      * It differs from the above function only in what argument(s) it accepts.
+      * @param bufferProto buffer containing the content of the .prototxt file
+      * @param lenProto length of bufferProto
+      * @param bufferModel buffer containing the content of the .caffemodel file
+      * @param lenModel length of bufferModel
+      * @returns Net object.
+      */
+    CV_EXPORTS Net readNetFromCaffe(const char *bufferProto, size_t lenProto,
+                                    const char *bufferModel = NULL, size_t lenModel = 0);
+
+    /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
+      * @param model  path to the .pb file with binary protobuf description of the network architecture
+      * @param config path to the .pbtxt file that contains text graph definition in protobuf format.
+      *               Resulting Net object is built by text graph using weights from a binary one that
+      *               let us make it more flexible.
+      * @returns Net object.
+      */
+    CV_EXPORTS_W Net readNetFromTensorflow(const String &model, const String &config = String());
+
+    /** @brief Reads a network model stored in <a href="https://www.tensorflow.org/">TensorFlow</a> framework's format.
+      * @details This is an overloaded member function, provided for convenience.
+      * It differs from the above function only in what argument(s) it accepts.
+      * @param bufferModel buffer containing the content of the pb file
+      * @param lenModel length of bufferModel
+      * @param bufferConfig buffer containing the content of the pbtxt file
+      * @param lenConfig length of bufferConfig
+      */
+    CV_EXPORTS Net readNetFromTensorflow(const char *bufferModel, size_t lenModel,
+                                         const char *bufferConfig = NULL, size_t lenConfig = 0);
+
+    /**
+     *  @brief Reads a network model stored in <a href="http://torch.ch">Torch7</a> framework's format.
+     *  @param model    path to the file, dumped from Torch by using torch.save() function.
+     *  @param isBinary specifies whether the network was serialized in ascii mode or binary.
+     *  @returns Net object.
+     *
+     *  @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use `long` type of C language,
+     *  which has various bit-length on different systems.
+     *
+     * The loading file must contain serialized <a href="https://github.com/torch/nn/blob/master/doc/module.md">nn.Module</a> object
+     * with importing network. Try to eliminate a custom objects from serialazing data to avoid importing errors.
+     *
+     * List of supported layers (i.e. object instances derived from Torch nn.Module class):
+     * - nn.Sequential
+     * - nn.Parallel
+     * - nn.Concat
+     * - nn.Linear
+     * - nn.SpatialConvolution
+     * - nn.SpatialMaxPooling, nn.SpatialAveragePooling
+     * - nn.ReLU, nn.TanH, nn.Sigmoid
+     * - nn.Reshape
+     * - nn.SoftMax, nn.LogSoftMax
+     *
+     * Also some equivalents of these classes from cunn, cudnn, and fbcunn may be successfully imported.
+     */
+     CV_EXPORTS_W Net readNetFromTorch(const String &model, bool isBinary = true);
+
+     /**
+      * @brief Read deep learning network represented in one of the supported formats.
+      * @param[in] model Binary file contains trained weights. The following file
+      *                  extensions are expected for models from different frameworks:
+      *                  * `*.caffemodel` (Caffe, http://caffe.berkeleyvision.org/)
+      *                  * `*.pb` (TensorFlow, https://www.tensorflow.org/)
+      *                  * `*.t7` | `*.net` (Torch, http://torch.ch/)
+      *                  * `*.weights` (Darknet, https://pjreddie.com/darknet/)
+      *                  * `*.bin` (DLDT, https://software.intel.com/openvino-toolkit)
+      * @param[in] config Text file contains network configuration. It could be a
+      *                   file with the following extensions:
+      *                  * `*.prototxt` (Caffe, http://caffe.berkeleyvision.org/)
+      *                  * `*.pbtxt` (TensorFlow, https://www.tensorflow.org/)
+      *                  * `*.cfg` (Darknet, https://pjreddie.com/darknet/)
+      *                  * `*.xml` (DLDT, https://software.intel.com/openvino-toolkit)
+      * @param[in] framework Explicit framework name tag to determine a format.
+      * @returns Net object.
+      *
+      * This function automatically detects an origin framework of trained model
+      * and calls an appropriate function such @ref readNetFromCaffe, @ref readNetFromTensorflow,
+      * @ref readNetFromTorch or @ref readNetFromDarknet. An order of @p model and @p config
+      * arguments does not matter.
+      */
+     CV_EXPORTS_W Net readNet(const String& model, const String& config = "", const String& framework = "");
+
+    /** @brief Loads blob which was serialized as torch.Tensor object of Torch7 framework.
+     *  @warning This function has the same limitations as readNetFromTorch().
+     */
+    CV_EXPORTS_W Mat readTorchBlob(const String &filename, bool isBinary = true);
+
+    /** @brief Load a network from Intel's Model Optimizer intermediate representation.
+     *  @param[in] xml XML configuration file with network's topology.
+     *  @param[in] bin Binary file with trained weights.
+     *  @returns Net object.
+     *  Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
+     *  backend.
+     */
+    CV_EXPORTS_W Net readNetFromModelOptimizer(const String &xml, const String &bin);
+
+    /** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
+     *  subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
+     *  @param image input image (with 1-, 3- or 4-channels).
+     *  @param size spatial size for output image
+     *  @param mean scalar with mean values which are subtracted from channels. Values are intended
+     *  to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
+     *  @param scalefactor multiplier for @p image values.
+     *  @param swapRB flag which indicates that swap first and last channels
+     *  in 3-channel image is necessary.
+     *  @param crop flag which indicates whether image will be cropped after resize or not
+     *  @details if @p crop is true, input image is resized so one side after resize is equal to corresponding
+     *  dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
+     *  If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
+     *  @returns 4-dimensional Mat with NCHW dimensions order.
+     */
+    CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
+                                   const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
+
+    /** @brief Creates 4-dimensional blob from image.
+     *  @details This is an overloaded member function, provided for convenience.
+     *           It differs from the above function only in what argument(s) it accepts.
+     */
+    CV_EXPORTS void blobFromImage(InputArray image, OutputArray blob, double scalefactor=1.0,
+                                  const Size& size = Size(), const Scalar& mean = Scalar(),
+                                  bool swapRB=true, bool crop=true);
+
+
+    /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
+     *  crops @p images from center, subtract @p mean values, scales values by @p scalefactor,
+     *  swap Blue and Red channels.
+     *  @param images input images (all with 1-, 3- or 4-channels).
+     *  @param size spatial size for output image
+     *  @param mean scalar with mean values which are subtracted from channels. Values are intended
+     *  to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
+     *  @param scalefactor multiplier for @p images values.
+     *  @param swapRB flag which indicates that swap first and last channels
+     *  in 3-channel image is necessary.
+     *  @param crop flag which indicates whether image will be cropped after resize or not
+     *  @details if @p crop is true, input image is resized so one side after resize is equal to corresponding
+     *  dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
+     *  If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
+     *  @returns 4-dimansional Mat with NCHW dimensions order.
+     */
+    CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0,
+                                    Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
+
+    /** @brief Creates 4-dimensional blob from series of images.
+     *  @details This is an overloaded member function, provided for convenience.
+     *           It differs from the above function only in what argument(s) it accepts.
+     */
+    CV_EXPORTS void blobFromImages(InputArrayOfArrays images, OutputArray blob,
+                                   double scalefactor=1.0, Size size = Size(),
+                                   const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
+
+    /** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure
+     *  (std::vector<cv::Mat>).
+     *  @param[in] blob_ 4 dimensional array (images, channels, height, width) in floating point precision (CV_32F) from
+     *  which you would like to extract the images.
+     *  @param[out] images_ array of 2D Mat containing the images extracted from the blob in floating point precision
+     *  (CV_32F). They are non normalized neither mean added. The number of returned images equals the first dimension
+     *  of the blob (batch size). Every image has a number of channels equals to the second dimension of the blob (depth).
+     */
+    CV_EXPORTS_W void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_);
+
+    /** @brief Convert all weights of Caffe network to half precision floating point.
+     * @param src Path to origin model from Caffe framework contains single
+     *            precision floating point weights (usually has `.caffemodel` extension).
+     * @param dst Path to destination model with updated weights.
+     * @param layersTypes Set of layers types which parameters will be converted.
+     *                    By default, converts only Convolutional and Fully-Connected layers'
+     *                    weights.
+     *
+     * @note Shrinked model has no origin float32 weights so it can't be used
+     *       in origin Caffe framework anymore. However the structure of data
+     *       is taken from NVidia's Caffe fork: https://github.com/NVIDIA/caffe.
+     *       So the resulting model may be used there.
+     */
+    CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst,
+                                       const std::vector<String>& layersTypes = std::vector<String>());
+
+    /** @brief Performs non maximum suppression given boxes and corresponding scores.
+
+     * @param bboxes a set of bounding boxes to apply NMS.
+     * @param scores a set of corresponding confidences.
+     * @param score_threshold a threshold used to filter boxes by score.
+     * @param nms_threshold a threshold used in non maximum suppression.
+     * @param indices the kept indices of bboxes after NMS.
+     * @param eta a coefficient in adaptive threshold formula: \f$nms\_threshold_{i+1}=eta\cdot nms\_threshold_i\f$.
+     * @param top_k if `>0`, keep at most @p top_k picked indices.
+     */
+    CV_EXPORTS_W void NMSBoxes(const std::vector<Rect>& bboxes, const std::vector<float>& scores,
+                               const float score_threshold, const float nms_threshold,
+                               CV_OUT std::vector<int>& indices,
+                               const float eta = 1.f, const int top_k = 0);
+
+    CV_EXPORTS void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores,
+                             const float score_threshold, const float nms_threshold,
+                             CV_OUT std::vector<int>& indices,
+                             const float eta = 1.f, const int top_k = 0);
+
+//! @}
+CV__DNN_EXPERIMENTAL_NS_END
+}
+}
+
+#include <opencv2/dnn/layer.hpp>
+#include <opencv2/dnn/dnn.inl.hpp>
+
+#endif  /* OPENCV_DNN_DNN_HPP */

+ 390 - 0
ThresholdTools/include/opencv2/dnn/dnn.inl.hpp

@@ -0,0 +1,390 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_DNN_DNN_INL_HPP
+#define OPENCV_DNN_DNN_INL_HPP
+
+#include <opencv2/dnn.hpp>
+
+namespace cv {
+namespace dnn {
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+
+template<typename TypeIter>
+DictValue DictValue::arrayInt(TypeIter begin, int size)
+{
+    DictValue res(Param::INT, new AutoBuffer<int64, 1>(size));
+    for (int j = 0; j < size; begin++, j++)
+        (*res.pi)[j] = *begin;
+    return res;
+}
+
+template<typename TypeIter>
+DictValue DictValue::arrayReal(TypeIter begin, int size)
+{
+    DictValue res(Param::REAL, new AutoBuffer<double, 1>(size));
+    for (int j = 0; j < size; begin++, j++)
+        (*res.pd)[j] = *begin;
+    return res;
+}
+
+template<typename TypeIter>
+DictValue DictValue::arrayString(TypeIter begin, int size)
+{
+    DictValue res(Param::STRING, new AutoBuffer<String, 1>(size));
+    for (int j = 0; j < size; begin++, j++)
+        (*res.ps)[j] = *begin;
+    return res;
+}
+
+template<>
+inline DictValue DictValue::get<DictValue>(int idx) const
+{
+    CV_Assert(idx == -1);
+    return *this;
+}
+
+template<>
+inline int64 DictValue::get<int64>(int idx) const
+{
+    CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
+    idx = (idx == -1) ? 0 : idx;
+
+    if (type == Param::INT)
+    {
+        return (*pi)[idx];
+    }
+    else if (type == Param::REAL)
+    {
+        double doubleValue = (*pd)[idx];
+
+        double fracpart, intpart;
+        fracpart = std::modf(doubleValue, &intpart);
+        CV_Assert(fracpart == 0.0);
+
+        return (int64)doubleValue;
+    }
+    else if (type == Param::STRING)
+    {
+        return std::atoi((*ps)[idx].c_str());
+    }
+    else
+    {
+        CV_Assert(isInt() || isReal() || isString());
+        return 0;
+    }
+}
+
+template<>
+inline int DictValue::get<int>(int idx) const
+{
+    return (int)get<int64>(idx);
+}
+
+inline int DictValue::getIntValue(int idx) const
+{
+    return (int)get<int64>(idx);
+}
+
+template<>
+inline unsigned DictValue::get<unsigned>(int idx) const
+{
+    return (unsigned)get<int64>(idx);
+}
+
+template<>
+inline bool DictValue::get<bool>(int idx) const
+{
+    return (get<int64>(idx) != 0);
+}
+
+template<>
+inline double DictValue::get<double>(int idx) const
+{
+    CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
+    idx = (idx == -1) ? 0 : idx;
+
+    if (type == Param::REAL)
+    {
+        return (*pd)[idx];
+    }
+    else if (type == Param::INT)
+    {
+        return (double)(*pi)[idx];
+    }
+    else if (type == Param::STRING)
+    {
+        return std::atof((*ps)[idx].c_str());
+    }
+    else
+    {
+        CV_Assert(isReal() || isInt() || isString());
+        return 0;
+    }
+}
+
+inline double DictValue::getRealValue(int idx) const
+{
+    return get<double>(idx);
+}
+
+template<>
+inline float DictValue::get<float>(int idx) const
+{
+    return (float)get<double>(idx);
+}
+
+template<>
+inline String DictValue::get<String>(int idx) const
+{
+    CV_Assert(isString());
+    CV_Assert((idx == -1 && ps->size() == 1) || (idx >= 0 && idx < (int)ps->size()));
+    return (*ps)[(idx == -1) ? 0 : idx];
+}
+
+
+inline String DictValue::getStringValue(int idx) const
+{
+    return get<String>(idx);
+}
+
+inline void DictValue::release()
+{
+    switch (type)
+    {
+    case Param::INT:
+        delete pi;
+        break;
+    case Param::STRING:
+        delete ps;
+        break;
+    case Param::REAL:
+        delete pd;
+        break;
+    }
+}
+
+inline DictValue::~DictValue()
+{
+    release();
+}
+
+inline DictValue & DictValue::operator=(const DictValue &r)
+{
+    if (&r == this)
+        return *this;
+
+    if (r.type == Param::INT)
+    {
+        AutoBuffer<int64, 1> *tmp = new AutoBuffer<int64, 1>(*r.pi);
+        release();
+        pi = tmp;
+    }
+    else if (r.type == Param::STRING)
+    {
+        AutoBuffer<String, 1> *tmp = new AutoBuffer<String, 1>(*r.ps);
+        release();
+        ps = tmp;
+    }
+    else if (r.type == Param::REAL)
+    {
+        AutoBuffer<double, 1> *tmp = new AutoBuffer<double, 1>(*r.pd);
+        release();
+        pd = tmp;
+    }
+
+    type = r.type;
+
+    return *this;
+}
+
+inline DictValue::DictValue(const DictValue &r)
+{
+    type = r.type;
+
+    if (r.type == Param::INT)
+        pi = new AutoBuffer<int64, 1>(*r.pi);
+    else if (r.type == Param::STRING)
+        ps = new AutoBuffer<String, 1>(*r.ps);
+    else if (r.type == Param::REAL)
+        pd = new AutoBuffer<double, 1>(*r.pd);
+}
+
+inline bool DictValue::isString() const
+{
+    return (type == Param::STRING);
+}
+
+inline bool DictValue::isInt() const
+{
+    return (type == Param::INT);
+}
+
+inline bool DictValue::isReal() const
+{
+    return (type == Param::REAL || type == Param::INT);
+}
+
+inline int DictValue::size() const
+{
+    switch (type)
+    {
+    case Param::INT:
+        return (int)pi->size();
+    case Param::STRING:
+        return (int)ps->size();
+    case Param::REAL:
+        return (int)pd->size();
+    }
+#ifdef __OPENCV_BUILD
+    CV_Error(Error::StsInternal, "");
+#else
+    CV_ErrorNoReturn(Error::StsInternal, "");
+#endif
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const DictValue &dictv)
+{
+    int i;
+
+    if (dictv.isInt())
+    {
+        for (i = 0; i < dictv.size() - 1; i++)
+            stream << dictv.get<int64>(i) << ", ";
+        stream << dictv.get<int64>(i);
+    }
+    else if (dictv.isReal())
+    {
+        for (i = 0; i < dictv.size() - 1; i++)
+            stream << dictv.get<double>(i) << ", ";
+        stream << dictv.get<double>(i);
+    }
+    else if (dictv.isString())
+    {
+        for (i = 0; i < dictv.size() - 1; i++)
+            stream << "\"" << dictv.get<String>(i) << "\", ";
+        stream << dictv.get<String>(i);
+    }
+
+    return stream;
+}
+
+/////////////////////////////////////////////////////////////////
+
+inline bool Dict::has(const String &key) const
+{
+    return dict.count(key) != 0;
+}
+
+inline DictValue *Dict::ptr(const String &key)
+{
+    _Dict::iterator i = dict.find(key);
+    return (i == dict.end()) ? NULL : &i->second;
+}
+
+inline const DictValue *Dict::ptr(const String &key) const
+{
+    _Dict::const_iterator i = dict.find(key);
+    return (i == dict.end()) ? NULL : &i->second;
+}
+
+inline const DictValue &Dict::get(const String &key) const
+{
+    _Dict::const_iterator i = dict.find(key);
+    if (i == dict.end())
+        CV_Error(Error::StsObjectNotFound, "Required argument \"" + key + "\" not found into dictionary");
+    return i->second;
+}
+
+template <typename T>
+inline T Dict::get(const String &key) const
+{
+    return this->get(key).get<T>();
+}
+
+template <typename T>
+inline T Dict::get(const String &key, const T &defaultValue) const
+{
+    _Dict::const_iterator i = dict.find(key);
+
+    if (i != dict.end())
+        return i->second.get<T>();
+    else
+        return defaultValue;
+}
+
+template<typename T>
+inline const T &Dict::set(const String &key, const T &value)
+{
+    _Dict::iterator i = dict.find(key);
+
+    if (i != dict.end())
+        i->second = DictValue(value);
+    else
+        dict.insert(std::make_pair(key, DictValue(value)));
+
+    return value;
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const Dict &dict)
+{
+    Dict::_Dict::const_iterator it;
+    for (it = dict.dict.begin(); it != dict.dict.end(); it++)
+        stream << it->first << " : " << it->second << "\n";
+
+    return stream;
+}
+
+inline std::map<String, DictValue>::const_iterator Dict::begin() const
+{
+    return dict.begin();
+}
+
+inline std::map<String, DictValue>::const_iterator Dict::end() const
+{
+    return dict.end();
+}
+
+CV__DNN_EXPERIMENTAL_NS_END
+}
+}
+
+#endif

+ 78 - 0
ThresholdTools/include/opencv2/dnn/layer.details.hpp

@@ -0,0 +1,78 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+#ifndef OPENCV_DNN_LAYER_DETAILS_HPP
+#define OPENCV_DNN_LAYER_DETAILS_HPP
+
+#include <opencv2/dnn/layer.hpp>
+
+namespace cv {
+namespace dnn {
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+
+/** @brief Registers layer constructor in runtime.
+*   @param type string, containing type name of the layer.
+*   @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer.
+*   @details This macros must be placed inside the function code.
+*/
+#define CV_DNN_REGISTER_LAYER_FUNC(type, constructorFunc) \
+    cv::dnn::LayerFactory::registerLayer(#type, constructorFunc);
+
+/** @brief Registers layer class in runtime.
+ *  @param type string, containing type name of the layer.
+ *  @param class C++ class, derived from Layer.
+ *  @details This macros must be placed inside the function code.
+ */
+#define CV_DNN_REGISTER_LAYER_CLASS(type, class) \
+    cv::dnn::LayerFactory::registerLayer(#type, cv::dnn::details::_layerDynamicRegisterer<class>);
+
+/** @brief Registers layer constructor on module load time.
+*   @param type string, containing type name of the layer.
+*   @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer.
+*   @details This macros must be placed outside the function code.
+*/
+#define CV_DNN_REGISTER_LAYER_FUNC_STATIC(type, constructorFunc) \
+static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constructorFunc);
+
+/** @brief Registers layer class on module load time.
+ *  @param type string, containing type name of the layer.
+ *  @param class C++ class, derived from Layer.
+ *  @details This macros must be placed outside the function code.
+ */
+#define CV_DNN_REGISTER_LAYER_CLASS_STATIC(type, class)                         \
+Ptr<Layer> __LayerStaticRegisterer_func_##type(LayerParams &params) \
+    { return Ptr<Layer>(new class(params)); }                       \
+static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, __LayerStaticRegisterer_func_##type);
+
+namespace details {
+
+template<typename LayerClass>
+Ptr<Layer> _layerDynamicRegisterer(LayerParams &params)
+{
+    return Ptr<Layer>(LayerClass::create(params));
+}
+
+//allows automatically register created layer on module load time
+class _LayerStaticRegisterer
+{
+    String type;
+public:
+
+    _LayerStaticRegisterer(const String &layerType, LayerFactory::Constructor layerConstructor)
+    {
+        this->type = layerType;
+        LayerFactory::registerLayer(layerType, layerConstructor);
+    }
+
+    ~_LayerStaticRegisterer()
+    {
+        LayerFactory::unregisterLayer(type);
+    }
+};
+
+} // namespace
+CV__DNN_EXPERIMENTAL_NS_END
+}} // namespace
+
+#endif

+ 85 - 0
ThresholdTools/include/opencv2/dnn/layer.hpp

@@ -0,0 +1,85 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_DNN_LAYER_HPP
+#define OPENCV_DNN_LAYER_HPP
+#include <opencv2/dnn.hpp>
+
+namespace cv {
+namespace dnn {
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+//! @addtogroup dnn
+//! @{
+//!
+//! @defgroup dnnLayerFactory Utilities for New Layers Registration
+//! @{
+
+/** @brief %Layer factory allows to create instances of registered layers. */
+class CV_EXPORTS LayerFactory
+{
+public:
+
+    //! Each Layer class must provide this function to the factory
+    typedef Ptr<Layer>(*Constructor)(LayerParams &params);
+
+    //! Registers the layer class with typename @p type and specified @p constructor. Thread-safe.
+    static void registerLayer(const String &type, Constructor constructor);
+
+    //! Unregisters registered layer with specified type name. Thread-safe.
+    static void unregisterLayer(const String &type);
+
+    /** @brief Creates instance of registered layer.
+     *  @param type type name of creating layer.
+     *  @param params parameters which will be used for layer initialization.
+     *  @note Thread-safe.
+     */
+    static Ptr<Layer> createLayerInstance(const String &type, LayerParams& params);
+
+private:
+    LayerFactory();
+};
+
+//! @}
+//! @}
+CV__DNN_EXPERIMENTAL_NS_END
+}
+}
+#endif

+ 211 - 0
ThresholdTools/include/opencv2/dnn/shape_utils.hpp

@@ -0,0 +1,211 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_DNN_DNN_SHAPE_UTILS_HPP
+#define OPENCV_DNN_DNN_SHAPE_UTILS_HPP
+
+#include <opencv2/core.hpp>
+#include <opencv2/core/types_c.h>
+#include <ostream>
+
+namespace cv {
+namespace dnn {
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+
+//Useful shortcut
+inline std::ostream &operator<< (std::ostream &s, cv::Range &r)
+{
+    return s << "[" << r.start << ", " << r.end << ")";
+}
+
+//Slicing
+
+struct _Range : public cv::Range
+{
+    _Range(const Range &r) : cv::Range(r) {}
+    _Range(int start_, int size_ = 1) : cv::Range(start_, start_ + size_) {}
+};
+
+static inline Mat slice(const Mat &m, const _Range &r0)
+{
+    Range ranges[CV_MAX_DIM];
+    for (int i = 1; i < m.dims; i++)
+        ranges[i] = Range::all();
+    ranges[0] = r0;
+    return m(&ranges[0]);
+}
+
+static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1)
+{
+    CV_Assert(m.dims >= 2);
+    Range ranges[CV_MAX_DIM];
+    for (int i = 2; i < m.dims; i++)
+        ranges[i] = Range::all();
+    ranges[0] = r0;
+    ranges[1] = r1;
+    return m(&ranges[0]);
+}
+
+static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2)
+{
+    CV_Assert(m.dims >= 3);
+    Range ranges[CV_MAX_DIM];
+    for (int i = 3; i < m.dims; i++)
+        ranges[i] = Range::all();
+    ranges[0] = r0;
+    ranges[1] = r1;
+    ranges[2] = r2;
+    return m(&ranges[0]);
+}
+
+static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2, const _Range &r3)
+{
+    CV_Assert(m.dims >= 4);
+    Range ranges[CV_MAX_DIM];
+    for (int i = 4; i < m.dims; i++)
+        ranges[i] = Range::all();
+    ranges[0] = r0;
+    ranges[1] = r1;
+    ranges[2] = r2;
+    ranges[3] = r3;
+    return m(&ranges[0]);
+}
+
+static inline Mat getPlane(const Mat &m, int n, int cn)
+{
+    CV_Assert(m.dims > 2);
+    int sz[CV_MAX_DIM];
+    for(int i = 2; i < m.dims; i++)
+    {
+        sz[i-2] = m.size.p[i];
+    }
+    return Mat(m.dims - 2, sz, m.type(), (void*)m.ptr<float>(n, cn));
+}
+
+static inline MatShape shape(const int* dims, const int n)
+{
+    MatShape shape;
+    shape.assign(dims, dims + n);
+    return shape;
+}
+
+static inline MatShape shape(const Mat& mat)
+{
+    return shape(mat.size.p, mat.dims);
+}
+
+static inline MatShape shape(const MatSize& sz)
+{
+    return shape(sz.p, sz.dims());
+}
+
+static inline MatShape shape(const UMat& mat)
+{
+    return shape(mat.size.p, mat.dims);
+}
+
+namespace {inline bool is_neg(int i) { return i < 0; }}
+
+static inline MatShape shape(int a0, int a1=-1, int a2=-1, int a3=-1)
+{
+    int dims[] = {a0, a1, a2, a3};
+    MatShape s = shape(dims, 4);
+    s.erase(std::remove_if(s.begin(), s.end(), is_neg), s.end());
+    return s;
+}
+
+static inline int total(const MatShape& shape, int start = -1, int end = -1)
+{
+    if (start == -1) start = 0;
+    if (end == -1) end = (int)shape.size();
+
+    if (shape.empty())
+        return 0;
+
+    int elems = 1;
+    CV_Assert(start <= (int)shape.size() && end <= (int)shape.size() &&
+              start <= end);
+    for(int i = start; i < end; i++)
+    {
+        elems *= shape[i];
+    }
+    return elems;
+}
+
+static inline MatShape concat(const MatShape& a, const MatShape& b)
+{
+    MatShape c = a;
+    c.insert(c.end(), b.begin(), b.end());
+
+    return c;
+}
+
+inline void print(const MatShape& shape, const String& name = "")
+{
+    printf("%s: [", name.c_str());
+    size_t i, n = shape.size();
+    for( i = 0; i < n; i++ )
+        printf(" %d", shape[i]);
+    printf(" ]\n");
+}
+
+inline int clamp(int ax, int dims)
+{
+    return ax < 0 ? ax + dims : ax;
+}
+
+inline int clamp(int ax, const MatShape& shape)
+{
+    return clamp(ax, (int)shape.size());
+}
+
+inline Range clamp(const Range& r, int axisSize)
+{
+    Range clamped(std::max(r.start, 0),
+                  r.end > 0 ? std::min(r.end, axisSize) : axisSize + r.end + 1);
+    CV_Assert(clamped.start < clamped.end, clamped.end <= axisSize);
+    return clamped;
+}
+
+CV__DNN_EXPERIMENTAL_NS_END
+}
+}
+#endif

+ 1424 - 0
ThresholdTools/include/opencv2/features2d.hpp

@@ -0,0 +1,1424 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_FEATURES_2D_HPP
+#define OPENCV_FEATURES_2D_HPP
+
+#include "opencv2/opencv_modules.hpp"
+#include "opencv2/core.hpp"
+
+#ifdef HAVE_OPENCV_FLANN
+#include "opencv2/flann/miniflann.hpp"
+#endif
+
+/**
+  @defgroup features2d 2D Features Framework
+  @{
+    @defgroup features2d_main Feature Detection and Description
+    @defgroup features2d_match Descriptor Matchers
+
+Matchers of keypoint descriptors in OpenCV have wrappers with a common interface that enables you to
+easily switch between different algorithms solving the same problem. This section is devoted to
+matching descriptors that are represented as vectors in a multidimensional space. All objects that
+implement vector descriptor matchers inherit the DescriptorMatcher interface.
+
+@note
+   -   An example explaining keypoint matching can be found at
+        opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
+    -   An example on descriptor matching evaluation can be found at
+        opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp
+    -   An example on one to many image matching can be found at
+        opencv_source_code/samples/cpp/matching_to_many_images.cpp
+
+    @defgroup features2d_draw Drawing Function of Keypoints and Matches
+    @defgroup features2d_category Object Categorization
+
+This section describes approaches based on local 2D features and used to categorize objects.
+
+@note
+   -   A complete Bag-Of-Words sample can be found at
+        opencv_source_code/samples/cpp/bagofwords_classification.cpp
+    -   (Python) An example using the features2D framework to perform object categorization can be
+        found at opencv_source_code/samples/python/find_obj.py
+
+  @}
+ */
+
+namespace cv
+{
+
+//! @addtogroup features2d
+//! @{
+
+// //! writes vector of keypoints to the file storage
+// CV_EXPORTS void write(FileStorage& fs, const String& name, const std::vector<KeyPoint>& keypoints);
+// //! reads vector of keypoints from the specified file storage node
+// CV_EXPORTS void read(const FileNode& node, CV_OUT std::vector<KeyPoint>& keypoints);
+
+/** @brief A class filters a vector of keypoints.
+
+ Because now it is difficult to provide a convenient interface for all usage scenarios of the
+ keypoints filter class, it has only several needed by now static methods.
+ */
+class CV_EXPORTS KeyPointsFilter
+{
+public:
+    KeyPointsFilter(){}
+
+    /*
+     * Remove keypoints within borderPixels of an image edge.
+     */
+    static void runByImageBorder( std::vector<KeyPoint>& keypoints, Size imageSize, int borderSize );
+    /*
+     * Remove keypoints of sizes out of range.
+     */
+    static void runByKeypointSize( std::vector<KeyPoint>& keypoints, float minSize,
+                                   float maxSize=FLT_MAX );
+    /*
+     * Remove keypoints from some image by mask for pixels of this image.
+     */
+    static void runByPixelsMask( std::vector<KeyPoint>& keypoints, const Mat& mask );
+    /*
+     * Remove duplicated keypoints.
+     */
+    static void removeDuplicated( std::vector<KeyPoint>& keypoints );
+    /*
+     * Remove duplicated keypoints and sort the remaining keypoints
+     */
+    static void removeDuplicatedSorted( std::vector<KeyPoint>& keypoints );
+
+    /*
+     * Retain the specified number of the best keypoints (according to the response)
+     */
+    static void retainBest( std::vector<KeyPoint>& keypoints, int npoints );
+};
+
+
+/************************************ Base Classes ************************************/
+
+/** @brief Abstract base class for 2D image feature detectors and descriptor extractors
+*/
+class CV_EXPORTS_W Feature2D : public virtual Algorithm
+{
+public:
+    virtual ~Feature2D();
+
+    /** @brief Detects keypoints in an image (first variant) or image set (second variant).
+
+    @param image Image.
+    @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set
+    of keypoints detected in images[i] .
+    @param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer
+    matrix with non-zero values in the region of interest.
+     */
+    CV_WRAP virtual void detect( InputArray image,
+                                 CV_OUT std::vector<KeyPoint>& keypoints,
+                                 InputArray mask=noArray() );
+
+    /** @overload
+    @param images Image set.
+    @param keypoints The detected keypoints. In the second variant of the method keypoints[i] is a set
+    of keypoints detected in images[i] .
+    @param masks Masks for each input image specifying where to look for keypoints (optional).
+    masks[i] is a mask for images[i].
+    */
+    CV_WRAP virtual void detect( InputArrayOfArrays images,
+                         CV_OUT std::vector<std::vector<KeyPoint> >& keypoints,
+                         InputArrayOfArrays masks=noArray() );
+
+    /** @brief Computes the descriptors for a set of keypoints detected in an image (first variant) or image set
+    (second variant).
+
+    @param image Image.
+    @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be
+    computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint
+    with several dominant orientations (for each orientation).
+    @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are
+    descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the
+    descriptor for keypoint j-th keypoint.
+     */
+    CV_WRAP virtual void compute( InputArray image,
+                                  CV_OUT CV_IN_OUT std::vector<KeyPoint>& keypoints,
+                                  OutputArray descriptors );
+
+    /** @overload
+
+    @param images Image set.
+    @param keypoints Input collection of keypoints. Keypoints for which a descriptor cannot be
+    computed are removed. Sometimes new keypoints can be added, for example: SIFT duplicates keypoint
+    with several dominant orientations (for each orientation).
+    @param descriptors Computed descriptors. In the second variant of the method descriptors[i] are
+    descriptors computed for a keypoints[i]. Row j is the keypoints (or keypoints[i]) is the
+    descriptor for keypoint j-th keypoint.
+    */
+    CV_WRAP virtual void compute( InputArrayOfArrays images,
+                          CV_OUT CV_IN_OUT std::vector<std::vector<KeyPoint> >& keypoints,
+                          OutputArrayOfArrays descriptors );
+
+    /** Detects keypoints and computes the descriptors */
+    CV_WRAP virtual void detectAndCompute( InputArray image, InputArray mask,
+                                           CV_OUT std::vector<KeyPoint>& keypoints,
+                                           OutputArray descriptors,
+                                           bool useProvidedKeypoints=false );
+
+    CV_WRAP virtual int descriptorSize() const;
+    CV_WRAP virtual int descriptorType() const;
+    CV_WRAP virtual int defaultNorm() const;
+
+    CV_WRAP void write( const String& fileName ) const;
+
+    CV_WRAP void read( const String& fileName );
+
+    virtual void write( FileStorage&) const CV_OVERRIDE;
+
+    // see corresponding cv::Algorithm method
+    CV_WRAP virtual void read( const FileNode&) CV_OVERRIDE;
+
+    //! Return true if detector object is empty
+    CV_WRAP virtual bool empty() const CV_OVERRIDE;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+
+    // see corresponding cv::Algorithm method
+    CV_WRAP inline void write(const Ptr<FileStorage>& fs, const String& name = String()) const { Algorithm::write(fs, name); }
+};
+
+/** Feature detectors in OpenCV have wrappers with a common interface that enables you to easily switch
+between different algorithms solving the same problem. All objects that implement keypoint detectors
+inherit the FeatureDetector interface. */
+typedef Feature2D FeatureDetector;
+
+/** Extractors of keypoint descriptors in OpenCV have wrappers with a common interface that enables you
+to easily switch between different algorithms solving the same problem. This section is devoted to
+computing descriptors represented as vectors in a multidimensional space. All objects that implement
+the vector descriptor extractors inherit the DescriptorExtractor interface.
+ */
+typedef Feature2D DescriptorExtractor;
+
+//! @addtogroup features2d_main
+//! @{
+
+/** @brief Class implementing the BRISK keypoint detector and descriptor extractor, described in @cite LCS11 .
+ */
+class CV_EXPORTS_W BRISK : public Feature2D
+{
+public:
+    /** @brief The BRISK constructor
+
+    @param thresh AGAST detection threshold score.
+    @param octaves detection octaves. Use 0 to do single scale.
+    @param patternScale apply this scale to the pattern used for sampling the neighbourhood of a
+    keypoint.
+     */
+    CV_WRAP static Ptr<BRISK> create(int thresh=30, int octaves=3, float patternScale=1.0f);
+
+    /** @brief The BRISK constructor for a custom pattern
+
+    @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for
+    keypoint scale 1).
+    @param numberList defines the number of sampling points on the sampling circle. Must be the same
+    size as radiusList..
+    @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint
+    scale 1).
+    @param dMin threshold for the long pairings used for orientation determination (in pixels for
+    keypoint scale 1).
+    @param indexChange index remapping of the bits. */
+    CV_WRAP static Ptr<BRISK> create(const std::vector<float> &radiusList, const std::vector<int> &numberList,
+        float dMax=5.85f, float dMin=8.2f, const std::vector<int>& indexChange=std::vector<int>());
+
+    /** @brief The BRISK constructor for a custom pattern, detection threshold and octaves
+
+    @param thresh AGAST detection threshold score.
+    @param octaves detection octaves. Use 0 to do single scale.
+    @param radiusList defines the radii (in pixels) where the samples around a keypoint are taken (for
+    keypoint scale 1).
+    @param numberList defines the number of sampling points on the sampling circle. Must be the same
+    size as radiusList..
+    @param dMax threshold for the short pairings used for descriptor formation (in pixels for keypoint
+    scale 1).
+    @param dMin threshold for the long pairings used for orientation determination (in pixels for
+    keypoint scale 1).
+    @param indexChange index remapping of the bits. */
+    CV_WRAP static Ptr<BRISK> create(int thresh, int octaves, const std::vector<float> &radiusList,
+        const std::vector<int> &numberList, float dMax=5.85f, float dMin=8.2f,
+        const std::vector<int>& indexChange=std::vector<int>());
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+};
+
+/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor
+
+described in @cite RRKB11 . The algorithm uses FAST in pyramids to detect stable keypoints, selects
+the strongest features using FAST or Harris response, finds their orientation using first-order
+moments and computes the descriptors using BRIEF (where the coordinates of random point pairs (or
+k-tuples) are rotated according to the measured orientation).
+ */
+class CV_EXPORTS_W ORB : public Feature2D
+{
+public:
+    enum { kBytes = 32, HARRIS_SCORE=0, FAST_SCORE=1 };
+
+    /** @brief The ORB constructor
+
+    @param nfeatures The maximum number of features to retain.
+    @param scaleFactor Pyramid decimation ratio, greater than 1. scaleFactor==2 means the classical
+    pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor
+    will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor
+    will mean that to cover certain scale range you will need more pyramid levels and so the speed
+    will suffer.
+    @param nlevels The number of pyramid levels. The smallest level will have linear size equal to
+    input_image_linear_size/pow(scaleFactor, nlevels - firstLevel).
+    @param edgeThreshold This is size of the border where the features are not detected. It should
+    roughly match the patchSize parameter.
+    @param firstLevel The level of pyramid to put source image to. Previous layers are filled
+    with upscaled source image.
+    @param WTA_K The number of points that produce each element of the oriented BRIEF descriptor. The
+    default value 2 means the BRIEF where we take a random point pair and compare their brightnesses,
+    so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3
+    random points (of course, those point coordinates are random, but they are generated from the
+    pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel
+    rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such
+    output will occupy 2 bits, and therefore it will need a special variant of Hamming distance,
+    denoted as NORM_HAMMING2 (2 bits per bin). When WTA_K=4, we take 4 random points to compute each
+    bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3).
+    @param scoreType The default HARRIS_SCORE means that Harris algorithm is used to rank features
+    (the score is written to KeyPoint::score and is used to retain best nfeatures features);
+    FAST_SCORE is alternative value of the parameter that produces slightly less stable keypoints,
+    but it is a little faster to compute.
+    @param patchSize size of the patch used by the oriented BRIEF descriptor. Of course, on smaller
+    pyramid layers the perceived image area covered by a feature will be larger.
+    @param fastThreshold
+     */
+    CV_WRAP static Ptr<ORB> create(int nfeatures=500, float scaleFactor=1.2f, int nlevels=8, int edgeThreshold=31,
+        int firstLevel=0, int WTA_K=2, int scoreType=ORB::HARRIS_SCORE, int patchSize=31, int fastThreshold=20);
+
+    CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0;
+    CV_WRAP virtual int getMaxFeatures() const = 0;
+
+    CV_WRAP virtual void setScaleFactor(double scaleFactor) = 0;
+    CV_WRAP virtual double getScaleFactor() const = 0;
+
+    CV_WRAP virtual void setNLevels(int nlevels) = 0;
+    CV_WRAP virtual int getNLevels() const = 0;
+
+    CV_WRAP virtual void setEdgeThreshold(int edgeThreshold) = 0;
+    CV_WRAP virtual int getEdgeThreshold() const = 0;
+
+    CV_WRAP virtual void setFirstLevel(int firstLevel) = 0;
+    CV_WRAP virtual int getFirstLevel() const = 0;
+
+    CV_WRAP virtual void setWTA_K(int wta_k) = 0;
+    CV_WRAP virtual int getWTA_K() const = 0;
+
+    CV_WRAP virtual void setScoreType(int scoreType) = 0;
+    CV_WRAP virtual int getScoreType() const = 0;
+
+    CV_WRAP virtual void setPatchSize(int patchSize) = 0;
+    CV_WRAP virtual int getPatchSize() const = 0;
+
+    CV_WRAP virtual void setFastThreshold(int fastThreshold) = 0;
+    CV_WRAP virtual int getFastThreshold() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+};
+
+/** @brief Maximally stable extremal region extractor
+
+The class encapsulates all the parameters of the %MSER extraction algorithm (see [wiki
+article](http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions)).
+
+- there are two different implementation of %MSER: one for grey image, one for color image
+
+- the grey image algorithm is taken from: @cite nister2008linear ;  the paper claims to be faster
+than union-find method; it actually get 1.5~2m/s on my centrino L7200 1.2GHz laptop.
+
+- the color image algorithm is taken from: @cite forssen2007maximally ; it should be much slower
+than grey image method ( 3~4 times ); the chi_table.h file is taken directly from paper's source
+code which is distributed under GPL.
+
+- (Python) A complete example showing the use of the %MSER detector can be found at samples/python/mser.py
+*/
+class CV_EXPORTS_W MSER : public Feature2D
+{
+public:
+    /** @brief Full consturctor for %MSER detector
+
+    @param _delta it compares \f$(size_{i}-size_{i-delta})/size_{i-delta}\f$
+    @param _min_area prune the area which smaller than minArea
+    @param _max_area prune the area which bigger than maxArea
+    @param _max_variation prune the area have similar size to its children
+    @param _min_diversity for color image, trace back to cut off mser with diversity less than min_diversity
+    @param _max_evolution  for color image, the evolution steps
+    @param _area_threshold for color image, the area threshold to cause re-initialize
+    @param _min_margin for color image, ignore too small margin
+    @param _edge_blur_size for color image, the aperture size for edge blur
+     */
+    CV_WRAP static Ptr<MSER> create( int _delta=5, int _min_area=60, int _max_area=14400,
+          double _max_variation=0.25, double _min_diversity=.2,
+          int _max_evolution=200, double _area_threshold=1.01,
+          double _min_margin=0.003, int _edge_blur_size=5 );
+
+    /** @brief Detect %MSER regions
+
+    @param image input image (8UC1, 8UC3 or 8UC4, must be greater or equal than 3x3)
+    @param msers resulting list of point sets
+    @param bboxes resulting bounding boxes
+    */
+    CV_WRAP virtual void detectRegions( InputArray image,
+                                        CV_OUT std::vector<std::vector<Point> >& msers,
+                                        CV_OUT std::vector<Rect>& bboxes ) = 0;
+
+    CV_WRAP virtual void setDelta(int delta) = 0;
+    CV_WRAP virtual int getDelta() const = 0;
+
+    CV_WRAP virtual void setMinArea(int minArea) = 0;
+    CV_WRAP virtual int getMinArea() const = 0;
+
+    CV_WRAP virtual void setMaxArea(int maxArea) = 0;
+    CV_WRAP virtual int getMaxArea() const = 0;
+
+    CV_WRAP virtual void setPass2Only(bool f) = 0;
+    CV_WRAP virtual bool getPass2Only() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+};
+
+/** @overload */
+CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector<KeyPoint>& keypoints,
+                      int threshold, bool nonmaxSuppression=true );
+
+/** @brief Detects corners using the FAST algorithm
+
+@param image grayscale image where keypoints (corners) are detected.
+@param keypoints keypoints detected on the image.
+@param threshold threshold on difference between intensity of the central pixel and pixels of a
+circle around this pixel.
+@param nonmaxSuppression if true, non-maximum suppression is applied to detected corners
+(keypoints).
+@param type one of the three neighborhoods as defined in the paper:
+FastFeatureDetector::TYPE_9_16, FastFeatureDetector::TYPE_7_12,
+FastFeatureDetector::TYPE_5_8
+
+Detects corners using the FAST algorithm by @cite Rosten06 .
+
+@note In Python API, types are given as cv2.FAST_FEATURE_DETECTOR_TYPE_5_8,
+cv2.FAST_FEATURE_DETECTOR_TYPE_7_12 and cv2.FAST_FEATURE_DETECTOR_TYPE_9_16. For corner
+detection, use cv2.FAST.detect() method.
+ */
+CV_EXPORTS void FAST( InputArray image, CV_OUT std::vector<KeyPoint>& keypoints,
+                      int threshold, bool nonmaxSuppression, int type );
+
+//! @} features2d_main
+
+//! @addtogroup features2d_main
+//! @{
+
+/** @brief Wrapping class for feature detection using the FAST method. :
+ */
+class CV_EXPORTS_W FastFeatureDetector : public Feature2D
+{
+public:
+    enum
+    {
+        TYPE_5_8 = 0, TYPE_7_12 = 1, TYPE_9_16 = 2,
+        THRESHOLD = 10000, NONMAX_SUPPRESSION=10001, FAST_N=10002,
+    };
+
+    CV_WRAP static Ptr<FastFeatureDetector> create( int threshold=10,
+                                                    bool nonmaxSuppression=true,
+                                                    int type=FastFeatureDetector::TYPE_9_16 );
+
+    CV_WRAP virtual void setThreshold(int threshold) = 0;
+    CV_WRAP virtual int getThreshold() const = 0;
+
+    CV_WRAP virtual void setNonmaxSuppression(bool f) = 0;
+    CV_WRAP virtual bool getNonmaxSuppression() const = 0;
+
+    CV_WRAP virtual void setType(int type) = 0;
+    CV_WRAP virtual int getType() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+};
+
+/** @overload */
+CV_EXPORTS void AGAST( InputArray image, CV_OUT std::vector<KeyPoint>& keypoints,
+                      int threshold, bool nonmaxSuppression=true );
+
+/** @brief Detects corners using the AGAST algorithm
+
+@param image grayscale image where keypoints (corners) are detected.
+@param keypoints keypoints detected on the image.
+@param threshold threshold on difference between intensity of the central pixel and pixels of a
+circle around this pixel.
+@param nonmaxSuppression if true, non-maximum suppression is applied to detected corners
+(keypoints).
+@param type one of the four neighborhoods as defined in the paper:
+AgastFeatureDetector::AGAST_5_8, AgastFeatureDetector::AGAST_7_12d,
+AgastFeatureDetector::AGAST_7_12s, AgastFeatureDetector::OAST_9_16
+
+For non-Intel platforms, there is a tree optimised variant of AGAST with same numerical results.
+The 32-bit binary tree tables were generated automatically from original code using perl script.
+The perl script and examples of tree generation are placed in features2d/doc folder.
+Detects corners using the AGAST algorithm by @cite mair2010_agast .
+
+ */
+CV_EXPORTS void AGAST( InputArray image, CV_OUT std::vector<KeyPoint>& keypoints,
+                      int threshold, bool nonmaxSuppression, int type );
+//! @} features2d_main
+
+//! @addtogroup features2d_main
+//! @{
+
+/** @brief Wrapping class for feature detection using the AGAST method. :
+ */
+class CV_EXPORTS_W AgastFeatureDetector : public Feature2D
+{
+public:
+    enum
+    {
+        AGAST_5_8 = 0, AGAST_7_12d = 1, AGAST_7_12s = 2, OAST_9_16 = 3,
+        THRESHOLD = 10000, NONMAX_SUPPRESSION = 10001,
+    };
+
+    CV_WRAP static Ptr<AgastFeatureDetector> create( int threshold=10,
+                                                     bool nonmaxSuppression=true,
+                                                     int type=AgastFeatureDetector::OAST_9_16 );
+
+    CV_WRAP virtual void setThreshold(int threshold) = 0;
+    CV_WRAP virtual int getThreshold() const = 0;
+
+    CV_WRAP virtual void setNonmaxSuppression(bool f) = 0;
+    CV_WRAP virtual bool getNonmaxSuppression() const = 0;
+
+    CV_WRAP virtual void setType(int type) = 0;
+    CV_WRAP virtual int getType() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+};
+
+/** @brief Wrapping class for feature detection using the goodFeaturesToTrack function. :
+ */
+class CV_EXPORTS_W GFTTDetector : public Feature2D
+{
+public:
+    CV_WRAP static Ptr<GFTTDetector> create( int maxCorners=1000, double qualityLevel=0.01, double minDistance=1,
+                                             int blockSize=3, bool useHarrisDetector=false, double k=0.04 );
+    CV_WRAP static Ptr<GFTTDetector> create( int maxCorners, double qualityLevel, double minDistance,
+                                             int blockSize, int gradiantSize, bool useHarrisDetector=false, double k=0.04 );
+    CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0;
+    CV_WRAP virtual int getMaxFeatures() const = 0;
+
+    CV_WRAP virtual void setQualityLevel(double qlevel) = 0;
+    CV_WRAP virtual double getQualityLevel() const = 0;
+
+    CV_WRAP virtual void setMinDistance(double minDistance) = 0;
+    CV_WRAP virtual double getMinDistance() const = 0;
+
+    CV_WRAP virtual void setBlockSize(int blockSize) = 0;
+    CV_WRAP virtual int getBlockSize() const = 0;
+
+    CV_WRAP virtual void setHarrisDetector(bool val) = 0;
+    CV_WRAP virtual bool getHarrisDetector() const = 0;
+
+    CV_WRAP virtual void setK(double k) = 0;
+    CV_WRAP virtual double getK() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+};
+
+/** @brief Class for extracting blobs from an image. :
+
+The class implements a simple algorithm for extracting blobs from an image:
+
+1.  Convert the source image to binary images by applying thresholding with several thresholds from
+    minThreshold (inclusive) to maxThreshold (exclusive) with distance thresholdStep between
+    neighboring thresholds.
+2.  Extract connected components from every binary image by findContours and calculate their
+    centers.
+3.  Group centers from several binary images by their coordinates. Close centers form one group that
+    corresponds to one blob, which is controlled by the minDistBetweenBlobs parameter.
+4.  From the groups, estimate final centers of blobs and their radiuses and return as locations and
+    sizes of keypoints.
+
+This class performs several filtrations of returned blobs. You should set filterBy\* to true/false
+to turn on/off corresponding filtration. Available filtrations:
+
+-   **By color**. This filter compares the intensity of a binary image at the center of a blob to
+blobColor. If they differ, the blob is filtered out. Use blobColor = 0 to extract dark blobs
+and blobColor = 255 to extract light blobs.
+-   **By area**. Extracted blobs have an area between minArea (inclusive) and maxArea (exclusive).
+-   **By circularity**. Extracted blobs have circularity
+(\f$\frac{4*\pi*Area}{perimeter * perimeter}\f$) between minCircularity (inclusive) and
+maxCircularity (exclusive).
+-   **By ratio of the minimum inertia to maximum inertia**. Extracted blobs have this ratio
+between minInertiaRatio (inclusive) and maxInertiaRatio (exclusive).
+-   **By convexity**. Extracted blobs have convexity (area / area of blob convex hull) between
+minConvexity (inclusive) and maxConvexity (exclusive).
+
+Default values of parameters are tuned to extract dark circular blobs.
+ */
+class CV_EXPORTS_W SimpleBlobDetector : public Feature2D
+{
+public:
+  struct CV_EXPORTS_W_SIMPLE Params
+  {
+      CV_WRAP Params();
+      CV_PROP_RW float thresholdStep;
+      CV_PROP_RW float minThreshold;
+      CV_PROP_RW float maxThreshold;
+      CV_PROP_RW size_t minRepeatability;
+      CV_PROP_RW float minDistBetweenBlobs;
+
+      CV_PROP_RW bool filterByColor;
+      CV_PROP_RW uchar blobColor;
+
+      CV_PROP_RW bool filterByArea;
+      CV_PROP_RW float minArea, maxArea;
+
+      CV_PROP_RW bool filterByCircularity;
+      CV_PROP_RW float minCircularity, maxCircularity;
+
+      CV_PROP_RW bool filterByInertia;
+      CV_PROP_RW float minInertiaRatio, maxInertiaRatio;
+
+      CV_PROP_RW bool filterByConvexity;
+      CV_PROP_RW float minConvexity, maxConvexity;
+
+      void read( const FileNode& fn );
+      void write( FileStorage& fs ) const;
+  };
+
+  CV_WRAP static Ptr<SimpleBlobDetector>
+    create(const SimpleBlobDetector::Params &parameters = SimpleBlobDetector::Params());
+  CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+};
+
+//! @} features2d_main
+
+//! @addtogroup features2d_main
+//! @{
+
+/** @brief Class implementing the KAZE keypoint detector and descriptor extractor, described in @cite ABD12 .
+
+@note AKAZE descriptor can only be used with KAZE or AKAZE keypoints .. [ABD12] KAZE Features. Pablo
+F. Alcantarilla, Adrien Bartoli and Andrew J. Davison. In European Conference on Computer Vision
+(ECCV), Fiorenze, Italy, October 2012.
+*/
+class CV_EXPORTS_W KAZE : public Feature2D
+{
+public:
+    enum
+    {
+        DIFF_PM_G1 = 0,
+        DIFF_PM_G2 = 1,
+        DIFF_WEICKERT = 2,
+        DIFF_CHARBONNIER = 3
+    };
+
+    /** @brief The KAZE constructor
+
+    @param extended Set to enable extraction of extended (128-byte) descriptor.
+    @param upright Set to enable use of upright descriptors (non rotation-invariant).
+    @param threshold Detector response threshold to accept point
+    @param nOctaves Maximum octave evolution of the image
+    @param nOctaveLayers Default number of sublevels per scale level
+    @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or
+    DIFF_CHARBONNIER
+     */
+    CV_WRAP static Ptr<KAZE> create(bool extended=false, bool upright=false,
+                                    float threshold = 0.001f,
+                                    int nOctaves = 4, int nOctaveLayers = 4,
+                                    int diffusivity = KAZE::DIFF_PM_G2);
+
+    CV_WRAP virtual void setExtended(bool extended) = 0;
+    CV_WRAP virtual bool getExtended() const = 0;
+
+    CV_WRAP virtual void setUpright(bool upright) = 0;
+    CV_WRAP virtual bool getUpright() const = 0;
+
+    CV_WRAP virtual void setThreshold(double threshold) = 0;
+    CV_WRAP virtual double getThreshold() const = 0;
+
+    CV_WRAP virtual void setNOctaves(int octaves) = 0;
+    CV_WRAP virtual int getNOctaves() const = 0;
+
+    CV_WRAP virtual void setNOctaveLayers(int octaveLayers) = 0;
+    CV_WRAP virtual int getNOctaveLayers() const = 0;
+
+    CV_WRAP virtual void setDiffusivity(int diff) = 0;
+    CV_WRAP virtual int getDiffusivity() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+};
+
+/** @brief Class implementing the AKAZE keypoint detector and descriptor extractor, described in @cite ANB13.
+
+@details AKAZE descriptors can only be used with KAZE or AKAZE keypoints. This class is thread-safe.
+
+@note When you need descriptors use Feature2D::detectAndCompute, which
+provides better performance. When using Feature2D::detect followed by
+Feature2D::compute scale space pyramid is computed twice.
+
+@note AKAZE implements T-API. When image is passed as UMat some parts of the algorithm
+will use OpenCL.
+
+@note [ANB13] Fast Explicit Diffusion for Accelerated Features in Nonlinear
+Scale Spaces. Pablo F. Alcantarilla, Jesús Nuevo and Adrien Bartoli. In
+British Machine Vision Conference (BMVC), Bristol, UK, September 2013.
+
+*/
+class CV_EXPORTS_W AKAZE : public Feature2D
+{
+public:
+    // AKAZE descriptor type
+    enum
+    {
+        DESCRIPTOR_KAZE_UPRIGHT = 2, ///< Upright descriptors, not invariant to rotation
+        DESCRIPTOR_KAZE = 3,
+        DESCRIPTOR_MLDB_UPRIGHT = 4, ///< Upright descriptors, not invariant to rotation
+        DESCRIPTOR_MLDB = 5
+    };
+
+    /** @brief The AKAZE constructor
+
+    @param descriptor_type Type of the extracted descriptor: DESCRIPTOR_KAZE,
+    DESCRIPTOR_KAZE_UPRIGHT, DESCRIPTOR_MLDB or DESCRIPTOR_MLDB_UPRIGHT.
+    @param descriptor_size Size of the descriptor in bits. 0 -\> Full size
+    @param descriptor_channels Number of channels in the descriptor (1, 2, 3)
+    @param threshold Detector response threshold to accept point
+    @param nOctaves Maximum octave evolution of the image
+    @param nOctaveLayers Default number of sublevels per scale level
+    @param diffusivity Diffusivity type. DIFF_PM_G1, DIFF_PM_G2, DIFF_WEICKERT or
+    DIFF_CHARBONNIER
+     */
+    CV_WRAP static Ptr<AKAZE> create(int descriptor_type=AKAZE::DESCRIPTOR_MLDB,
+                                     int descriptor_size = 0, int descriptor_channels = 3,
+                                     float threshold = 0.001f, int nOctaves = 4,
+                                     int nOctaveLayers = 4, int diffusivity = KAZE::DIFF_PM_G2);
+
+    CV_WRAP virtual void setDescriptorType(int dtype) = 0;
+    CV_WRAP virtual int getDescriptorType() const = 0;
+
+    CV_WRAP virtual void setDescriptorSize(int dsize) = 0;
+    CV_WRAP virtual int getDescriptorSize() const = 0;
+
+    CV_WRAP virtual void setDescriptorChannels(int dch) = 0;
+    CV_WRAP virtual int getDescriptorChannels() const = 0;
+
+    CV_WRAP virtual void setThreshold(double threshold) = 0;
+    CV_WRAP virtual double getThreshold() const = 0;
+
+    CV_WRAP virtual void setNOctaves(int octaves) = 0;
+    CV_WRAP virtual int getNOctaves() const = 0;
+
+    CV_WRAP virtual void setNOctaveLayers(int octaveLayers) = 0;
+    CV_WRAP virtual int getNOctaveLayers() const = 0;
+
+    CV_WRAP virtual void setDiffusivity(int diff) = 0;
+    CV_WRAP virtual int getDiffusivity() const = 0;
+    CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
+};
+
+//! @} features2d_main
+
+/****************************************************************************************\
+*                                      Distance                                          *
+\****************************************************************************************/
+
+template<typename T>
+struct CV_EXPORTS Accumulator
+{
+    typedef T Type;
+};
+
+template<> struct Accumulator<unsigned char>  { typedef float Type; };
+template<> struct Accumulator<unsigned short> { typedef float Type; };
+template<> struct Accumulator<char>   { typedef float Type; };
+template<> struct Accumulator<short>  { typedef float Type; };
+
+/*
+ * Squared Euclidean distance functor
+ */
+template<class T>
+struct CV_EXPORTS SL2
+{
+    enum { normType = NORM_L2SQR };
+    typedef T ValueType;
+    typedef typename Accumulator<T>::Type ResultType;
+
+    ResultType operator()( const T* a, const T* b, int size ) const
+    {
+        return normL2Sqr<ValueType, ResultType>(a, b, size);
+    }
+};
+
+/*
+ * Euclidean distance functor
+ */
+template<class T>
+struct L2
+{
+    enum { normType = NORM_L2 };
+    typedef T ValueType;
+    typedef typename Accumulator<T>::Type ResultType;
+
+    ResultType operator()( const T* a, const T* b, int size ) const
+    {
+        return (ResultType)std::sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
+    }
+};
+
+/*
+ * Manhattan distance (city block distance) functor
+ */
+template<class T>
+struct L1
+{
+    enum { normType = NORM_L1 };
+    typedef T ValueType;
+    typedef typename Accumulator<T>::Type ResultType;
+
+    ResultType operator()( const T* a, const T* b, int size ) const
+    {
+        return normL1<ValueType, ResultType>(a, b, size);
+    }
+};
+
+/****************************************************************************************\
+*                                  DescriptorMatcher                                     *
+\****************************************************************************************/
+
+//! @addtogroup features2d_match
+//! @{
+
+/** @brief Abstract base class for matching keypoint descriptors.
+
+It has two groups of match methods: for matching descriptors of an image with another image or with
+an image set.
+ */
+class CV_EXPORTS_W DescriptorMatcher : public Algorithm
+{
+public:
+   enum
+    {
+        FLANNBASED            = 1,
+        BRUTEFORCE            = 2,
+        BRUTEFORCE_L1         = 3,
+        BRUTEFORCE_HAMMING    = 4,
+        BRUTEFORCE_HAMMINGLUT = 5,
+        BRUTEFORCE_SL2        = 6
+    };
+    virtual ~DescriptorMatcher();
+
+    /** @brief Adds descriptors to train a CPU(trainDescCollectionis) or GPU(utrainDescCollectionis) descriptor
+    collection.
+
+    If the collection is not empty, the new descriptors are added to existing train descriptors.
+
+    @param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same
+    train image.
+     */
+    CV_WRAP virtual void add( InputArrayOfArrays descriptors );
+
+    /** @brief Returns a constant link to the train descriptor collection trainDescCollection .
+     */
+    CV_WRAP const std::vector<Mat>& getTrainDescriptors() const;
+
+    /** @brief Clears the train descriptor collections.
+     */
+    CV_WRAP virtual void clear() CV_OVERRIDE;
+
+    /** @brief Returns true if there are no train descriptors in the both collections.
+     */
+    CV_WRAP virtual bool empty() const CV_OVERRIDE;
+
+    /** @brief Returns true if the descriptor matcher supports masking permissible matches.
+     */
+    CV_WRAP virtual bool isMaskSupported() const = 0;
+
+    /** @brief Trains a descriptor matcher
+
+    Trains a descriptor matcher (for example, the flann index). In all methods to match, the method
+    train() is run every time before matching. Some descriptor matchers (for example, BruteForceMatcher)
+    have an empty implementation of this method. Other matchers really train their inner structures (for
+    example, FlannBasedMatcher trains flann::Index ).
+     */
+    CV_WRAP virtual void train();
+
+    /** @brief Finds the best match for each descriptor from a query set.
+
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param matches Matches. If a query descriptor is masked out in mask , no match is added for this
+    descriptor. So, matches size may be smaller than the query descriptors count.
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+
+    In the first variant of this method, the train descriptors are passed as an input argument. In the
+    second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
+    used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
+    matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
+    mask.at\<uchar\>(i,j) is non-zero.
+     */
+    CV_WRAP void match( InputArray queryDescriptors, InputArray trainDescriptors,
+                CV_OUT std::vector<DMatch>& matches, InputArray mask=noArray() ) const;
+
+    /** @brief Finds the k best matches for each descriptor from a query set.
+
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+    @param matches Matches. Each matches[i] is k or less matches for the same query descriptor.
+    @param k Count of best matches found per each query descriptor or less if a query descriptor has
+    less than k possible matches in total.
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+
+    These extended variants of DescriptorMatcher::match methods find several best matches for each query
+    descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match
+    for the details about query and train descriptors.
+     */
+    CV_WRAP void knnMatch( InputArray queryDescriptors, InputArray trainDescriptors,
+                   CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
+                   InputArray mask=noArray(), bool compactResult=false ) const;
+
+    /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance.
+
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param matches Found matches.
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+    @param maxDistance Threshold for the distance between matched descriptors. Distance means here
+    metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
+    in Pixels)!
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+
+    For each query descriptor, the methods find such training descriptors that the distance between the
+    query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
+    returned in the distance increasing order.
+     */
+    CV_WRAP void radiusMatch( InputArray queryDescriptors, InputArray trainDescriptors,
+                      CV_OUT std::vector<std::vector<DMatch> >& matches, float maxDistance,
+                      InputArray mask=noArray(), bool compactResult=false ) const;
+
+    /** @overload
+    @param queryDescriptors Query set of descriptors.
+    @param matches Matches. If a query descriptor is masked out in mask , no match is added for this
+    descriptor. So, matches size may be smaller than the query descriptors count.
+    @param masks Set of masks. Each masks[i] specifies permissible matches between the input query
+    descriptors and stored train descriptors from the i-th image trainDescCollection[i].
+    */
+    CV_WRAP void match( InputArray queryDescriptors, CV_OUT std::vector<DMatch>& matches,
+                        InputArrayOfArrays masks=noArray() );
+    /** @overload
+    @param queryDescriptors Query set of descriptors.
+    @param matches Matches. Each matches[i] is k or less matches for the same query descriptor.
+    @param k Count of best matches found per each query descriptor or less if a query descriptor has
+    less than k possible matches in total.
+    @param masks Set of masks. Each masks[i] specifies permissible matches between the input query
+    descriptors and stored train descriptors from the i-th image trainDescCollection[i].
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+    */
+    CV_WRAP void knnMatch( InputArray queryDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, int k,
+                           InputArrayOfArrays masks=noArray(), bool compactResult=false );
+    /** @overload
+    @param queryDescriptors Query set of descriptors.
+    @param matches Found matches.
+    @param maxDistance Threshold for the distance between matched descriptors. Distance means here
+    metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
+    in Pixels)!
+    @param masks Set of masks. Each masks[i] specifies permissible matches between the input query
+    descriptors and stored train descriptors from the i-th image trainDescCollection[i].
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+    */
+    CV_WRAP void radiusMatch( InputArray queryDescriptors, CV_OUT std::vector<std::vector<DMatch> >& matches, float maxDistance,
+                      InputArrayOfArrays masks=noArray(), bool compactResult=false );
+
+
+    CV_WRAP void write( const String& fileName ) const
+    {
+        FileStorage fs(fileName, FileStorage::WRITE);
+        write(fs);
+    }
+
+    CV_WRAP void read( const String& fileName )
+    {
+        FileStorage fs(fileName, FileStorage::READ);
+        read(fs.root());
+    }
+    // Reads matcher object from a file node
+    // see corresponding cv::Algorithm method
+    CV_WRAP virtual void read( const FileNode& ) CV_OVERRIDE;
+    // Writes matcher object to a file storage
+    virtual void write( FileStorage& ) const CV_OVERRIDE;
+
+    /** @brief Clones the matcher.
+
+    @param emptyTrainData If emptyTrainData is false, the method creates a deep copy of the object,
+    that is, copies both parameters and train data. If emptyTrainData is true, the method creates an
+    object copy with the current parameters but with empty train data.
+     */
+    CV_WRAP virtual Ptr<DescriptorMatcher> clone( bool emptyTrainData=false ) const = 0;
+
+    /** @brief Creates a descriptor matcher of a given type with the default parameters (using default
+    constructor).
+
+    @param descriptorMatcherType Descriptor matcher type. Now the following matcher types are
+    supported:
+    -   `BruteForce` (it uses L2 )
+    -   `BruteForce-L1`
+    -   `BruteForce-Hamming`
+    -   `BruteForce-Hamming(2)`
+    -   `FlannBased`
+     */
+    CV_WRAP static Ptr<DescriptorMatcher> create( const String& descriptorMatcherType );
+
+    CV_WRAP static Ptr<DescriptorMatcher> create( int matcherType );
+
+
+    // see corresponding cv::Algorithm method
+    CV_WRAP inline void write(const Ptr<FileStorage>& fs, const String& name = String()) const { Algorithm::write(fs, name); }
+
+protected:
+    /**
+     * Class to work with descriptors from several images as with one merged matrix.
+     * It is used e.g. in FlannBasedMatcher.
+     */
+    class CV_EXPORTS DescriptorCollection
+    {
+    public:
+        DescriptorCollection();
+        DescriptorCollection( const DescriptorCollection& collection );
+        virtual ~DescriptorCollection();
+
+        // Vector of matrices "descriptors" will be merged to one matrix "mergedDescriptors" here.
+        void set( const std::vector<Mat>& descriptors );
+        virtual void clear();
+
+        const Mat& getDescriptors() const;
+        const Mat getDescriptor( int imgIdx, int localDescIdx ) const;
+        const Mat getDescriptor( int globalDescIdx ) const;
+        void getLocalIdx( int globalDescIdx, int& imgIdx, int& localDescIdx ) const;
+
+        int size() const;
+
+    protected:
+        Mat mergedDescriptors;
+        std::vector<int> startIdxs;
+    };
+
+    //! In fact the matching is implemented only by the following two methods. These methods suppose
+    //! that the class object has been trained already. Public match methods call these methods
+    //! after calling train().
+    virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
+        InputArrayOfArrays masks=noArray(), bool compactResult=false ) = 0;
+    virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
+        InputArrayOfArrays masks=noArray(), bool compactResult=false ) = 0;
+
+    static bool isPossibleMatch( InputArray mask, int queryIdx, int trainIdx );
+    static bool isMaskedOut( InputArrayOfArrays masks, int queryIdx );
+
+    static Mat clone_op( Mat m ) { return m.clone(); }
+    void checkMasks( InputArrayOfArrays masks, int queryDescriptorsCount ) const;
+
+    //! Collection of descriptors from train images.
+    std::vector<Mat> trainDescCollection;
+    std::vector<UMat> utrainDescCollection;
+};
+
+/** @brief Brute-force descriptor matcher.
+
+For each descriptor in the first set, this matcher finds the closest descriptor in the second set
+by trying each one. This descriptor matcher supports masking permissible matches of descriptor
+sets.
+ */
+class CV_EXPORTS_W BFMatcher : public DescriptorMatcher
+{
+public:
+    /** @brief Brute-force matcher constructor (obsolete). Please use BFMatcher.create()
+     *
+     *
+    */
+    CV_WRAP BFMatcher( int normType=NORM_L2, bool crossCheck=false );
+
+    virtual ~BFMatcher() {}
+
+    virtual bool isMaskSupported() const CV_OVERRIDE { return true; }
+
+    /** @brief Brute-force matcher create method.
+    @param normType One of NORM_L1, NORM_L2, NORM_HAMMING, NORM_HAMMING2. L1 and L2 norms are
+    preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and
+    BRIEF, NORM_HAMMING2 should be used with ORB when WTA_K==3 or 4 (see ORB::ORB constructor
+    description).
+    @param crossCheck If it is false, this is will be default BFMatcher behaviour when it finds the k
+    nearest neighbors for each query descriptor. If crossCheck==true, then the knnMatch() method with
+    k=1 will only return pairs (i,j) such that for i-th query descriptor the j-th descriptor in the
+    matcher's collection is the nearest and vice versa, i.e. the BFMatcher will only return consistent
+    pairs. Such technique usually produces best results with minimal number of outliers when there are
+    enough matches. This is alternative to the ratio test, used by D. Lowe in SIFT paper.
+     */
+    CV_WRAP static Ptr<BFMatcher> create( int normType=NORM_L2, bool crossCheck=false ) ;
+
+    virtual Ptr<DescriptorMatcher> clone( bool emptyTrainData=false ) const CV_OVERRIDE;
+protected:
+    virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
+        InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE;
+    virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
+        InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE;
+
+    int normType;
+    bool crossCheck;
+};
+
+#if defined(HAVE_OPENCV_FLANN) || defined(CV_DOXYGEN)
+
+/** @brief Flann-based descriptor matcher.
+
+This matcher trains cv::flann::Index on a train descriptor collection and calls its nearest search
+methods to find the best matches. So, this matcher may be faster when matching a large train
+collection than the brute force matcher. FlannBasedMatcher does not support masking permissible
+matches of descriptor sets because flann::Index does not support this. :
+ */
+class CV_EXPORTS_W FlannBasedMatcher : public DescriptorMatcher
+{
+public:
+    CV_WRAP FlannBasedMatcher( const Ptr<flann::IndexParams>& indexParams=makePtr<flann::KDTreeIndexParams>(),
+                       const Ptr<flann::SearchParams>& searchParams=makePtr<flann::SearchParams>() );
+
+    virtual void add( InputArrayOfArrays descriptors ) CV_OVERRIDE;
+    virtual void clear() CV_OVERRIDE;
+
+    // Reads matcher object from a file node
+    virtual void read( const FileNode& ) CV_OVERRIDE;
+    // Writes matcher object to a file storage
+    virtual void write( FileStorage& ) const CV_OVERRIDE;
+
+    virtual void train() CV_OVERRIDE;
+    virtual bool isMaskSupported() const CV_OVERRIDE;
+
+    CV_WRAP static Ptr<FlannBasedMatcher> create();
+
+    virtual Ptr<DescriptorMatcher> clone( bool emptyTrainData=false ) const CV_OVERRIDE;
+protected:
+    static void convertToDMatches( const DescriptorCollection& descriptors,
+                                   const Mat& indices, const Mat& distances,
+                                   std::vector<std::vector<DMatch> >& matches );
+
+    virtual void knnMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, int k,
+        InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE;
+    virtual void radiusMatchImpl( InputArray queryDescriptors, std::vector<std::vector<DMatch> >& matches, float maxDistance,
+        InputArrayOfArrays masks=noArray(), bool compactResult=false ) CV_OVERRIDE;
+
+    Ptr<flann::IndexParams> indexParams;
+    Ptr<flann::SearchParams> searchParams;
+    Ptr<flann::Index> flannIndex;
+
+    DescriptorCollection mergedDescriptors;
+    int addedDescCount;
+};
+
+#endif
+
+//! @} features2d_match
+
+/****************************************************************************************\
+*                                   Drawing functions                                    *
+\****************************************************************************************/
+
+//! @addtogroup features2d_draw
+//! @{
+
+struct CV_EXPORTS DrawMatchesFlags
+{
+    enum{ DEFAULT = 0, //!< Output image matrix will be created (Mat::create),
+                       //!< i.e. existing memory of output image may be reused.
+                       //!< Two source image, matches and single keypoints will be drawn.
+                       //!< For each keypoint only the center point will be drawn (without
+                       //!< the circle around keypoint with keypoint size and orientation).
+          DRAW_OVER_OUTIMG = 1, //!< Output image matrix will not be created (Mat::create).
+                                //!< Matches will be drawn on existing content of output image.
+          NOT_DRAW_SINGLE_POINTS = 2, //!< Single keypoints will not be drawn.
+          DRAW_RICH_KEYPOINTS = 4 //!< For each keypoint the circle around keypoint with keypoint size and
+                                  //!< orientation will be drawn.
+        };
+};
+
+/** @brief Draws keypoints.
+
+@param image Source image.
+@param keypoints Keypoints from the source image.
+@param outImage Output image. Its content depends on the flags value defining what is drawn in the
+output image. See possible flags bit values below.
+@param color Color of keypoints.
+@param flags Flags setting drawing features. Possible flags bit values are defined by
+DrawMatchesFlags. See details above in drawMatches .
+
+@note
+For Python API, flags are modified as cv2.DRAW_MATCHES_FLAGS_DEFAULT,
+cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS, cv2.DRAW_MATCHES_FLAGS_DRAW_OVER_OUTIMG,
+cv2.DRAW_MATCHES_FLAGS_NOT_DRAW_SINGLE_POINTS
+ */
+CV_EXPORTS_W void drawKeypoints( InputArray image, const std::vector<KeyPoint>& keypoints, InputOutputArray outImage,
+                               const Scalar& color=Scalar::all(-1), int flags=DrawMatchesFlags::DEFAULT );
+
+/** @brief Draws the found matches of keypoints from two images.
+
+@param img1 First source image.
+@param keypoints1 Keypoints from the first source image.
+@param img2 Second source image.
+@param keypoints2 Keypoints from the second source image.
+@param matches1to2 Matches from the first image to the second one, which means that keypoints1[i]
+has a corresponding point in keypoints2[matches[i]] .
+@param outImg Output image. Its content depends on the flags value defining what is drawn in the
+output image. See possible flags bit values below.
+@param matchColor Color of matches (lines and connected keypoints). If matchColor==Scalar::all(-1)
+, the color is generated randomly.
+@param singlePointColor Color of single keypoints (circles), which means that keypoints do not
+have the matches. If singlePointColor==Scalar::all(-1) , the color is generated randomly.
+@param matchesMask Mask determining which matches are drawn. If the mask is empty, all matches are
+drawn.
+@param flags Flags setting drawing features. Possible flags bit values are defined by
+DrawMatchesFlags.
+
+This function draws matches of keypoints from two images in the output image. Match is a line
+connecting two keypoints (circles). See cv::DrawMatchesFlags.
+ */
+CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector<KeyPoint>& keypoints1,
+                             InputArray img2, const std::vector<KeyPoint>& keypoints2,
+                             const std::vector<DMatch>& matches1to2, InputOutputArray outImg,
+                             const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1),
+                             const std::vector<char>& matchesMask=std::vector<char>(), int flags=DrawMatchesFlags::DEFAULT );
+
+/** @overload */
+CV_EXPORTS_AS(drawMatchesKnn) void drawMatches( InputArray img1, const std::vector<KeyPoint>& keypoints1,
+                             InputArray img2, const std::vector<KeyPoint>& keypoints2,
+                             const std::vector<std::vector<DMatch> >& matches1to2, InputOutputArray outImg,
+                             const Scalar& matchColor=Scalar::all(-1), const Scalar& singlePointColor=Scalar::all(-1),
+                             const std::vector<std::vector<char> >& matchesMask=std::vector<std::vector<char> >(), int flags=DrawMatchesFlags::DEFAULT );
+
+//! @} features2d_draw
+
+/****************************************************************************************\
+*   Functions to evaluate the feature detectors and [generic] descriptor extractors      *
+\****************************************************************************************/
+
+CV_EXPORTS void evaluateFeatureDetector( const Mat& img1, const Mat& img2, const Mat& H1to2,
+                                         std::vector<KeyPoint>* keypoints1, std::vector<KeyPoint>* keypoints2,
+                                         float& repeatability, int& correspCount,
+                                         const Ptr<FeatureDetector>& fdetector=Ptr<FeatureDetector>() );
+
+CV_EXPORTS void computeRecallPrecisionCurve( const std::vector<std::vector<DMatch> >& matches1to2,
+                                             const std::vector<std::vector<uchar> >& correctMatches1to2Mask,
+                                             std::vector<Point2f>& recallPrecisionCurve );
+
+CV_EXPORTS float getRecall( const std::vector<Point2f>& recallPrecisionCurve, float l_precision );
+CV_EXPORTS int getNearestPoint( const std::vector<Point2f>& recallPrecisionCurve, float l_precision );
+
+/****************************************************************************************\
+*                                     Bag of visual words                                *
+\****************************************************************************************/
+
+//! @addtogroup features2d_category
+//! @{
+
+/** @brief Abstract base class for training the *bag of visual words* vocabulary from a set of descriptors.
+
+For details, see, for example, *Visual Categorization with Bags of Keypoints* by Gabriella Csurka,
+Christopher R. Dance, Lixin Fan, Jutta Willamowski, Cedric Bray, 2004. :
+ */
+class CV_EXPORTS_W BOWTrainer
+{
+public:
+    BOWTrainer();
+    virtual ~BOWTrainer();
+
+    /** @brief Adds descriptors to a training set.
+
+    @param descriptors Descriptors to add to a training set. Each row of the descriptors matrix is a
+    descriptor.
+
+    The training set is clustered using clustermethod to construct the vocabulary.
+     */
+    CV_WRAP void add( const Mat& descriptors );
+
+    /** @brief Returns a training set of descriptors.
+    */
+    CV_WRAP const std::vector<Mat>& getDescriptors() const;
+
+    /** @brief Returns the count of all descriptors stored in the training set.
+    */
+    CV_WRAP int descriptorsCount() const;
+
+    CV_WRAP virtual void clear();
+
+    /** @overload */
+    CV_WRAP virtual Mat cluster() const = 0;
+
+    /** @brief Clusters train descriptors.
+
+    @param descriptors Descriptors to cluster. Each row of the descriptors matrix is a descriptor.
+    Descriptors are not added to the inner train descriptor set.
+
+    The vocabulary consists of cluster centers. So, this method returns the vocabulary. In the first
+    variant of the method, train descriptors stored in the object are clustered. In the second variant,
+    input descriptors are clustered.
+     */
+    CV_WRAP virtual Mat cluster( const Mat& descriptors ) const = 0;
+
+protected:
+    std::vector<Mat> descriptors;
+    int size;
+};
+
+/** @brief kmeans -based class to train visual vocabulary using the *bag of visual words* approach. :
+ */
+class CV_EXPORTS_W BOWKMeansTrainer : public BOWTrainer
+{
+public:
+    /** @brief The constructor.
+
+    @see cv::kmeans
+    */
+    CV_WRAP BOWKMeansTrainer( int clusterCount, const TermCriteria& termcrit=TermCriteria(),
+                      int attempts=3, int flags=KMEANS_PP_CENTERS );
+    virtual ~BOWKMeansTrainer();
+
+    // Returns trained vocabulary (i.e. cluster centers).
+    CV_WRAP virtual Mat cluster() const CV_OVERRIDE;
+    CV_WRAP virtual Mat cluster( const Mat& descriptors ) const CV_OVERRIDE;
+
+protected:
+
+    int clusterCount;
+    TermCriteria termcrit;
+    int attempts;
+    int flags;
+};
+
+/** @brief Class to compute an image descriptor using the *bag of visual words*.
+
+Such a computation consists of the following steps:
+
+1.  Compute descriptors for a given image and its keypoints set.
+2.  Find the nearest visual words from the vocabulary for each keypoint descriptor.
+3.  Compute the bag-of-words image descriptor as is a normalized histogram of vocabulary words
+encountered in the image. The i-th bin of the histogram is a frequency of i-th word of the
+vocabulary in the given image.
+ */
+class CV_EXPORTS_W BOWImgDescriptorExtractor
+{
+public:
+    /** @brief The constructor.
+
+    @param dextractor Descriptor extractor that is used to compute descriptors for an input image and
+    its keypoints.
+    @param dmatcher Descriptor matcher that is used to find the nearest word of the trained vocabulary
+    for each keypoint descriptor of the image.
+     */
+    CV_WRAP BOWImgDescriptorExtractor( const Ptr<DescriptorExtractor>& dextractor,
+                               const Ptr<DescriptorMatcher>& dmatcher );
+    /** @overload */
+    BOWImgDescriptorExtractor( const Ptr<DescriptorMatcher>& dmatcher );
+    virtual ~BOWImgDescriptorExtractor();
+
+    /** @brief Sets a visual vocabulary.
+
+    @param vocabulary Vocabulary (can be trained using the inheritor of BOWTrainer ). Each row of the
+    vocabulary is a visual word (cluster center).
+     */
+    CV_WRAP void setVocabulary( const Mat& vocabulary );
+
+    /** @brief Returns the set vocabulary.
+    */
+    CV_WRAP const Mat& getVocabulary() const;
+
+    /** @brief Computes an image descriptor using the set visual vocabulary.
+
+    @param image Image, for which the descriptor is computed.
+    @param keypoints Keypoints detected in the input image.
+    @param imgDescriptor Computed output image descriptor.
+    @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that
+    pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary)
+    returned if it is non-zero.
+    @param descriptors Descriptors of the image keypoints that are returned if they are non-zero.
+     */
+    void compute( InputArray image, std::vector<KeyPoint>& keypoints, OutputArray imgDescriptor,
+                  std::vector<std::vector<int> >* pointIdxsOfClusters=0, Mat* descriptors=0 );
+    /** @overload
+    @param keypointDescriptors Computed descriptors to match with vocabulary.
+    @param imgDescriptor Computed output image descriptor.
+    @param pointIdxsOfClusters Indices of keypoints that belong to the cluster. This means that
+    pointIdxsOfClusters[i] are keypoint indices that belong to the i -th cluster (word of vocabulary)
+    returned if it is non-zero.
+    */
+    void compute( InputArray keypointDescriptors, OutputArray imgDescriptor,
+                  std::vector<std::vector<int> >* pointIdxsOfClusters=0 );
+    // compute() is not constant because DescriptorMatcher::match is not constant
+
+    CV_WRAP_AS(compute) void compute2( const Mat& image, std::vector<KeyPoint>& keypoints, CV_OUT Mat& imgDescriptor )
+    { compute(image,keypoints,imgDescriptor); }
+
+    /** @brief Returns an image descriptor size if the vocabulary is set. Otherwise, it returns 0.
+    */
+    CV_WRAP int descriptorSize() const;
+
+    /** @brief Returns an image descriptor type.
+     */
+    CV_WRAP int descriptorType() const;
+
+protected:
+    Mat vocabulary;
+    Ptr<DescriptorExtractor> dextractor;
+    Ptr<DescriptorMatcher> dmatcher;
+};
+
+//! @} features2d_category
+
+//! @} features2d
+
+} /* namespace cv */
+
+#endif

+ 48 - 0
ThresholdTools/include/opencv2/features2d/features2d.hpp

@@ -0,0 +1,48 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __OPENCV_BUILD
+#error this is a compatibility header which should not be used inside the OpenCV library
+#endif
+
+#include "opencv2/features2d.hpp"

+ 33 - 0
ThresholdTools/include/opencv2/features2d/hal/interface.h

@@ -0,0 +1,33 @@
+#ifndef OPENCV_FEATURE2D_HAL_INTERFACE_H
+#define OPENCV_FEATURE2D_HAL_INTERFACE_H
+
+#include "opencv2/core/cvdef.h"
+//! @addtogroup featrure2d_hal_interface
+//! @{
+
+//! @name Fast feature detector types
+//! @sa cv::FastFeatureDetector
+//! @{
+#define CV_HAL_TYPE_5_8  0
+#define CV_HAL_TYPE_7_12 1
+#define CV_HAL_TYPE_9_16 2
+//! @}
+
+//! @name Key point
+//! @sa cv::KeyPoint
+//! @{
+struct CV_EXPORTS cvhalKeyPoint
+{
+    float x;
+    float y;
+    float size;
+    float angle;
+    float response;
+    int octave;
+    int class_id;
+};
+//! @}
+
+//! @}
+
+#endif

+ 531 - 0
ThresholdTools/include/opencv2/flann.hpp

@@ -0,0 +1,531 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_FLANN_HPP
+#define OPENCV_FLANN_HPP
+
+#include "opencv2/core.hpp"
+#include "opencv2/flann/miniflann.hpp"
+#include "opencv2/flann/flann_base.hpp"
+
+/**
+@defgroup flann Clustering and Search in Multi-Dimensional Spaces
+
+This section documents OpenCV's interface to the FLANN library. FLANN (Fast Library for Approximate
+Nearest Neighbors) is a library that contains a collection of algorithms optimized for fast nearest
+neighbor search in large datasets and for high dimensional features. More information about FLANN
+can be found in @cite Muja2009 .
+*/
+
+namespace cvflann
+{
+    CV_EXPORTS flann_distance_t flann_distance_type();
+    CV_DEPRECATED CV_EXPORTS void set_distance_type(flann_distance_t distance_type, int order);
+}
+
+
+namespace cv
+{
+namespace flann
+{
+
+
+//! @addtogroup flann
+//! @{
+
+template <typename T> struct CvType {};
+template <> struct CvType<unsigned char> { static int type() { return CV_8U; } };
+template <> struct CvType<char> { static int type() { return CV_8S; } };
+template <> struct CvType<unsigned short> { static int type() { return CV_16U; } };
+template <> struct CvType<short> { static int type() { return CV_16S; } };
+template <> struct CvType<int> { static int type() { return CV_32S; } };
+template <> struct CvType<float> { static int type() { return CV_32F; } };
+template <> struct CvType<double> { static int type() { return CV_64F; } };
+
+
+// bring the flann parameters into this namespace
+using ::cvflann::get_param;
+using ::cvflann::print_params;
+
+// bring the flann distances into this namespace
+using ::cvflann::L2_Simple;
+using ::cvflann::L2;
+using ::cvflann::L1;
+using ::cvflann::MinkowskiDistance;
+using ::cvflann::MaxDistance;
+using ::cvflann::HammingLUT;
+using ::cvflann::Hamming;
+using ::cvflann::Hamming2;
+using ::cvflann::HistIntersectionDistance;
+using ::cvflann::HellingerDistance;
+using ::cvflann::ChiSquareDistance;
+using ::cvflann::KL_Divergence;
+
+
+/** @brief The FLANN nearest neighbor index class. This class is templated with the type of elements for which
+the index is built.
+ */
+template <typename Distance>
+class GenericIndex
+{
+public:
+        typedef typename Distance::ElementType ElementType;
+        typedef typename Distance::ResultType DistanceType;
+
+        /** @brief Constructs a nearest neighbor search index for a given dataset.
+
+        @param features Matrix of containing the features(points) to index. The size of the matrix is
+        num_features x feature_dimensionality and the data type of the elements in the matrix must
+        coincide with the type of the index.
+        @param params Structure containing the index parameters. The type of index that will be
+        constructed depends on the type of this parameter. See the description.
+        @param distance
+
+        The method constructs a fast search structure from a set of features using the specified algorithm
+        with specified parameters, as defined by params. params is a reference to one of the following class
+        IndexParams descendants:
+
+        - **LinearIndexParams** When passing an object of this type, the index will perform a linear,
+        brute-force search. :
+        @code
+        struct LinearIndexParams : public IndexParams
+        {
+        };
+        @endcode
+        - **KDTreeIndexParams** When passing an object of this type the index constructed will consist of
+        a set of randomized kd-trees which will be searched in parallel. :
+        @code
+        struct KDTreeIndexParams : public IndexParams
+        {
+            KDTreeIndexParams( int trees = 4 );
+        };
+        @endcode
+        - **KMeansIndexParams** When passing an object of this type the index constructed will be a
+        hierarchical k-means tree. :
+        @code
+        struct KMeansIndexParams : public IndexParams
+        {
+            KMeansIndexParams(
+                int branching = 32,
+                int iterations = 11,
+                flann_centers_init_t centers_init = CENTERS_RANDOM,
+                float cb_index = 0.2 );
+        };
+        @endcode
+        - **CompositeIndexParams** When using a parameters object of this type the index created
+        combines the randomized kd-trees and the hierarchical k-means tree. :
+        @code
+        struct CompositeIndexParams : public IndexParams
+        {
+            CompositeIndexParams(
+                int trees = 4,
+                int branching = 32,
+                int iterations = 11,
+                flann_centers_init_t centers_init = CENTERS_RANDOM,
+                float cb_index = 0.2 );
+        };
+        @endcode
+        - **LshIndexParams** When using a parameters object of this type the index created uses
+        multi-probe LSH (by Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity Search
+        by Qin Lv, William Josephson, Zhe Wang, Moses Charikar, Kai Li., Proceedings of the 33rd
+        International Conference on Very Large Data Bases (VLDB). Vienna, Austria. September 2007) :
+        @code
+        struct LshIndexParams : public IndexParams
+        {
+            LshIndexParams(
+                unsigned int table_number,
+                unsigned int key_size,
+                unsigned int multi_probe_level );
+        };
+        @endcode
+        - **AutotunedIndexParams** When passing an object of this type the index created is
+        automatically tuned to offer the best performance, by choosing the optimal index type
+        (randomized kd-trees, hierarchical kmeans, linear) and parameters for the dataset provided. :
+        @code
+        struct AutotunedIndexParams : public IndexParams
+        {
+            AutotunedIndexParams(
+                float target_precision = 0.9,
+                float build_weight = 0.01,
+                float memory_weight = 0,
+                float sample_fraction = 0.1 );
+        };
+        @endcode
+        - **SavedIndexParams** This object type is used for loading a previously saved index from the
+        disk. :
+        @code
+        struct SavedIndexParams : public IndexParams
+        {
+            SavedIndexParams( String filename );
+        };
+        @endcode
+         */
+        GenericIndex(const Mat& features, const ::cvflann::IndexParams& params, Distance distance = Distance());
+
+        ~GenericIndex();
+
+        /** @brief Performs a K-nearest neighbor search for a given query point using the index.
+
+        @param query The query point
+        @param indices Vector that will contain the indices of the K-nearest neighbors found. It must have
+        at least knn size.
+        @param dists Vector that will contain the distances to the K-nearest neighbors found. It must have
+        at least knn size.
+        @param knn Number of nearest neighbors to search for.
+        @param params SearchParams
+         */
+        void knnSearch(const std::vector<ElementType>& query, std::vector<int>& indices,
+                       std::vector<DistanceType>& dists, int knn, const ::cvflann::SearchParams& params);
+        void knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& params);
+
+        int radiusSearch(const std::vector<ElementType>& query, std::vector<int>& indices,
+                         std::vector<DistanceType>& dists, DistanceType radius, const ::cvflann::SearchParams& params);
+        int radiusSearch(const Mat& query, Mat& indices, Mat& dists,
+                         DistanceType radius, const ::cvflann::SearchParams& params);
+
+        void save(String filename) { nnIndex->save(filename); }
+
+        int veclen() const { return nnIndex->veclen(); }
+
+        int size() const { return nnIndex->size(); }
+
+        ::cvflann::IndexParams getParameters() { return nnIndex->getParameters(); }
+
+        CV_DEPRECATED const ::cvflann::IndexParams* getIndexParameters() { return nnIndex->getIndexParameters(); }
+
+private:
+        ::cvflann::Index<Distance>* nnIndex;
+};
+
+//! @cond IGNORED
+
+#define FLANN_DISTANCE_CHECK \
+    if ( ::cvflann::flann_distance_type() != cvflann::FLANN_DIST_L2) { \
+        printf("[WARNING] You are using cv::flann::Index (or cv::flann::GenericIndex) and have also changed "\
+        "the distance using cvflann::set_distance_type. This is no longer working as expected "\
+        "(cv::flann::Index always uses L2). You should create the index templated on the distance, "\
+        "for example for L1 distance use: GenericIndex< L1<float> > \n"); \
+    }
+
+
+template <typename Distance>
+GenericIndex<Distance>::GenericIndex(const Mat& dataset, const ::cvflann::IndexParams& params, Distance distance)
+{
+    CV_Assert(dataset.type() == CvType<ElementType>::type());
+    CV_Assert(dataset.isContinuous());
+    ::cvflann::Matrix<ElementType> m_dataset((ElementType*)dataset.ptr<ElementType>(0), dataset.rows, dataset.cols);
+
+    nnIndex = new ::cvflann::Index<Distance>(m_dataset, params, distance);
+
+    FLANN_DISTANCE_CHECK
+
+    nnIndex->buildIndex();
+}
+
+template <typename Distance>
+GenericIndex<Distance>::~GenericIndex()
+{
+    delete nnIndex;
+}
+
+template <typename Distance>
+void GenericIndex<Distance>::knnSearch(const std::vector<ElementType>& query, std::vector<int>& indices, std::vector<DistanceType>& dists, int knn, const ::cvflann::SearchParams& searchParams)
+{
+    ::cvflann::Matrix<ElementType> m_query((ElementType*)&query[0], 1, query.size());
+    ::cvflann::Matrix<int> m_indices(&indices[0], 1, indices.size());
+    ::cvflann::Matrix<DistanceType> m_dists(&dists[0], 1, dists.size());
+
+    FLANN_DISTANCE_CHECK
+
+    nnIndex->knnSearch(m_query,m_indices,m_dists,knn,searchParams);
+}
+
+
+template <typename Distance>
+void GenericIndex<Distance>::knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& searchParams)
+{
+    CV_Assert(queries.type() == CvType<ElementType>::type());
+    CV_Assert(queries.isContinuous());
+    ::cvflann::Matrix<ElementType> m_queries((ElementType*)queries.ptr<ElementType>(0), queries.rows, queries.cols);
+
+    CV_Assert(indices.type() == CV_32S);
+    CV_Assert(indices.isContinuous());
+    ::cvflann::Matrix<int> m_indices((int*)indices.ptr<int>(0), indices.rows, indices.cols);
+
+    CV_Assert(dists.type() == CvType<DistanceType>::type());
+    CV_Assert(dists.isContinuous());
+    ::cvflann::Matrix<DistanceType> m_dists((DistanceType*)dists.ptr<DistanceType>(0), dists.rows, dists.cols);
+
+    FLANN_DISTANCE_CHECK
+
+    nnIndex->knnSearch(m_queries,m_indices,m_dists,knn, searchParams);
+}
+
+template <typename Distance>
+int GenericIndex<Distance>::radiusSearch(const std::vector<ElementType>& query, std::vector<int>& indices, std::vector<DistanceType>& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams)
+{
+    ::cvflann::Matrix<ElementType> m_query((ElementType*)&query[0], 1, query.size());
+    ::cvflann::Matrix<int> m_indices(&indices[0], 1, indices.size());
+    ::cvflann::Matrix<DistanceType> m_dists(&dists[0], 1, dists.size());
+
+    FLANN_DISTANCE_CHECK
+
+    return nnIndex->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
+}
+
+template <typename Distance>
+int GenericIndex<Distance>::radiusSearch(const Mat& query, Mat& indices, Mat& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams)
+{
+    CV_Assert(query.type() == CvType<ElementType>::type());
+    CV_Assert(query.isContinuous());
+    ::cvflann::Matrix<ElementType> m_query((ElementType*)query.ptr<ElementType>(0), query.rows, query.cols);
+
+    CV_Assert(indices.type() == CV_32S);
+    CV_Assert(indices.isContinuous());
+    ::cvflann::Matrix<int> m_indices((int*)indices.ptr<int>(0), indices.rows, indices.cols);
+
+    CV_Assert(dists.type() == CvType<DistanceType>::type());
+    CV_Assert(dists.isContinuous());
+    ::cvflann::Matrix<DistanceType> m_dists((DistanceType*)dists.ptr<DistanceType>(0), dists.rows, dists.cols);
+
+    FLANN_DISTANCE_CHECK
+
+    return nnIndex->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
+}
+
+//! @endcond
+
+/**
+ * @deprecated Use GenericIndex class instead
+ */
+template <typename T>
+class Index_
+{
+public:
+    typedef typename L2<T>::ElementType ElementType;
+    typedef typename L2<T>::ResultType DistanceType;
+
+    CV_DEPRECATED Index_(const Mat& dataset, const ::cvflann::IndexParams& params)
+    {
+        printf("[WARNING] The cv::flann::Index_<T> class is deperecated, use cv::flann::GenericIndex<Distance> instead\n");
+
+        CV_Assert(dataset.type() == CvType<ElementType>::type());
+        CV_Assert(dataset.isContinuous());
+        ::cvflann::Matrix<ElementType> m_dataset((ElementType*)dataset.ptr<ElementType>(0), dataset.rows, dataset.cols);
+
+        if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L2 ) {
+            nnIndex_L1 = NULL;
+            nnIndex_L2 = new ::cvflann::Index< L2<ElementType> >(m_dataset, params);
+        }
+        else if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L1 ) {
+            nnIndex_L1 = new ::cvflann::Index< L1<ElementType> >(m_dataset, params);
+            nnIndex_L2 = NULL;
+        }
+        else {
+            printf("[ERROR] cv::flann::Index_<T> only provides backwards compatibility for the L1 and L2 distances. "
+                   "For other distance types you must use cv::flann::GenericIndex<Distance>\n");
+            CV_Assert(0);
+        }
+        if (nnIndex_L1) nnIndex_L1->buildIndex();
+        if (nnIndex_L2) nnIndex_L2->buildIndex();
+    }
+    CV_DEPRECATED ~Index_()
+    {
+        if (nnIndex_L1) delete nnIndex_L1;
+        if (nnIndex_L2) delete nnIndex_L2;
+    }
+
+    CV_DEPRECATED void knnSearch(const std::vector<ElementType>& query, std::vector<int>& indices, std::vector<DistanceType>& dists, int knn, const ::cvflann::SearchParams& searchParams)
+    {
+        ::cvflann::Matrix<ElementType> m_query((ElementType*)&query[0], 1, query.size());
+        ::cvflann::Matrix<int> m_indices(&indices[0], 1, indices.size());
+        ::cvflann::Matrix<DistanceType> m_dists(&dists[0], 1, dists.size());
+
+        if (nnIndex_L1) nnIndex_L1->knnSearch(m_query,m_indices,m_dists,knn,searchParams);
+        if (nnIndex_L2) nnIndex_L2->knnSearch(m_query,m_indices,m_dists,knn,searchParams);
+    }
+    CV_DEPRECATED void knnSearch(const Mat& queries, Mat& indices, Mat& dists, int knn, const ::cvflann::SearchParams& searchParams)
+    {
+        CV_Assert(queries.type() == CvType<ElementType>::type());
+        CV_Assert(queries.isContinuous());
+        ::cvflann::Matrix<ElementType> m_queries((ElementType*)queries.ptr<ElementType>(0), queries.rows, queries.cols);
+
+        CV_Assert(indices.type() == CV_32S);
+        CV_Assert(indices.isContinuous());
+        ::cvflann::Matrix<int> m_indices((int*)indices.ptr<int>(0), indices.rows, indices.cols);
+
+        CV_Assert(dists.type() == CvType<DistanceType>::type());
+        CV_Assert(dists.isContinuous());
+        ::cvflann::Matrix<DistanceType> m_dists((DistanceType*)dists.ptr<DistanceType>(0), dists.rows, dists.cols);
+
+        if (nnIndex_L1) nnIndex_L1->knnSearch(m_queries,m_indices,m_dists,knn, searchParams);
+        if (nnIndex_L2) nnIndex_L2->knnSearch(m_queries,m_indices,m_dists,knn, searchParams);
+    }
+
+    CV_DEPRECATED int radiusSearch(const std::vector<ElementType>& query, std::vector<int>& indices, std::vector<DistanceType>& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams)
+    {
+        ::cvflann::Matrix<ElementType> m_query((ElementType*)&query[0], 1, query.size());
+        ::cvflann::Matrix<int> m_indices(&indices[0], 1, indices.size());
+        ::cvflann::Matrix<DistanceType> m_dists(&dists[0], 1, dists.size());
+
+        if (nnIndex_L1) return nnIndex_L1->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
+        if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
+    }
+
+    CV_DEPRECATED int radiusSearch(const Mat& query, Mat& indices, Mat& dists, DistanceType radius, const ::cvflann::SearchParams& searchParams)
+    {
+        CV_Assert(query.type() == CvType<ElementType>::type());
+        CV_Assert(query.isContinuous());
+        ::cvflann::Matrix<ElementType> m_query((ElementType*)query.ptr<ElementType>(0), query.rows, query.cols);
+
+        CV_Assert(indices.type() == CV_32S);
+        CV_Assert(indices.isContinuous());
+        ::cvflann::Matrix<int> m_indices((int*)indices.ptr<int>(0), indices.rows, indices.cols);
+
+        CV_Assert(dists.type() == CvType<DistanceType>::type());
+        CV_Assert(dists.isContinuous());
+        ::cvflann::Matrix<DistanceType> m_dists((DistanceType*)dists.ptr<DistanceType>(0), dists.rows, dists.cols);
+
+        if (nnIndex_L1) return nnIndex_L1->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
+        if (nnIndex_L2) return nnIndex_L2->radiusSearch(m_query,m_indices,m_dists,radius,searchParams);
+    }
+
+    CV_DEPRECATED void save(String filename)
+    {
+        if (nnIndex_L1) nnIndex_L1->save(filename);
+        if (nnIndex_L2) nnIndex_L2->save(filename);
+    }
+
+    CV_DEPRECATED int veclen() const
+    {
+        if (nnIndex_L1) return nnIndex_L1->veclen();
+        if (nnIndex_L2) return nnIndex_L2->veclen();
+    }
+
+    CV_DEPRECATED int size() const
+    {
+        if (nnIndex_L1) return nnIndex_L1->size();
+        if (nnIndex_L2) return nnIndex_L2->size();
+    }
+
+    CV_DEPRECATED ::cvflann::IndexParams getParameters()
+    {
+        if (nnIndex_L1) return nnIndex_L1->getParameters();
+        if (nnIndex_L2) return nnIndex_L2->getParameters();
+
+    }
+
+    CV_DEPRECATED const ::cvflann::IndexParams* getIndexParameters()
+    {
+        if (nnIndex_L1) return nnIndex_L1->getIndexParameters();
+        if (nnIndex_L2) return nnIndex_L2->getIndexParameters();
+    }
+
+private:
+    // providing backwards compatibility for L2 and L1 distances (most common)
+    ::cvflann::Index< L2<ElementType> >* nnIndex_L2;
+    ::cvflann::Index< L1<ElementType> >* nnIndex_L1;
+};
+
+
+/** @brief Clusters features using hierarchical k-means algorithm.
+
+@param features The points to be clustered. The matrix must have elements of type
+Distance::ElementType.
+@param centers The centers of the clusters obtained. The matrix must have type
+Distance::ResultType. The number of rows in this matrix represents the number of clusters desired,
+however, because of the way the cut in the hierarchical tree is chosen, the number of clusters
+computed will be the highest number of the form (branching-1)\*k+1 that's lower than the number of
+clusters desired, where branching is the tree's branching factor (see description of the
+KMeansIndexParams).
+@param params Parameters used in the construction of the hierarchical k-means tree.
+@param d Distance to be used for clustering.
+
+The method clusters the given feature vectors by constructing a hierarchical k-means tree and
+choosing a cut in the tree that minimizes the cluster's variance. It returns the number of clusters
+found.
+ */
+template <typename Distance>
+int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::KMeansIndexParams& params,
+                           Distance d = Distance())
+{
+    typedef typename Distance::ElementType ElementType;
+    typedef typename Distance::ResultType DistanceType;
+
+    CV_Assert(features.type() == CvType<ElementType>::type());
+    CV_Assert(features.isContinuous());
+    ::cvflann::Matrix<ElementType> m_features((ElementType*)features.ptr<ElementType>(0), features.rows, features.cols);
+
+    CV_Assert(centers.type() == CvType<DistanceType>::type());
+    CV_Assert(centers.isContinuous());
+    ::cvflann::Matrix<DistanceType> m_centers((DistanceType*)centers.ptr<DistanceType>(0), centers.rows, centers.cols);
+
+    return ::cvflann::hierarchicalClustering<Distance>(m_features, m_centers, params, d);
+}
+
+/** @deprecated
+*/
+template <typename ELEM_TYPE, typename DIST_TYPE>
+CV_DEPRECATED int hierarchicalClustering(const Mat& features, Mat& centers, const ::cvflann::KMeansIndexParams& params)
+{
+    printf("[WARNING] cv::flann::hierarchicalClustering<ELEM_TYPE,DIST_TYPE> is deprecated, use "
+        "cv::flann::hierarchicalClustering<Distance> instead\n");
+
+    if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L2 ) {
+        return hierarchicalClustering< L2<ELEM_TYPE> >(features, centers, params);
+    }
+    else if ( ::cvflann::flann_distance_type() == cvflann::FLANN_DIST_L1 ) {
+        return hierarchicalClustering< L1<ELEM_TYPE> >(features, centers, params);
+    }
+    else {
+        printf("[ERROR] cv::flann::hierarchicalClustering<ELEM_TYPE,DIST_TYPE> only provides backwards "
+        "compatibility for the L1 and L2 distances. "
+        "For other distance types you must use cv::flann::hierarchicalClustering<Distance>\n");
+        CV_Assert(0);
+    }
+}
+
+//! @} flann
+
+} } // namespace cv::flann
+
+#endif

+ 155 - 0
ThresholdTools/include/opencv2/flann/all_indices.h

@@ -0,0 +1,155 @@
+/***********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
+ * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *************************************************************************/
+
+
+#ifndef OPENCV_FLANN_ALL_INDICES_H_
+#define OPENCV_FLANN_ALL_INDICES_H_
+
+#include "general.h"
+
+#include "nn_index.h"
+#include "kdtree_index.h"
+#include "kdtree_single_index.h"
+#include "kmeans_index.h"
+#include "composite_index.h"
+#include "linear_index.h"
+#include "hierarchical_clustering_index.h"
+#include "lsh_index.h"
+#include "autotuned_index.h"
+
+
+namespace cvflann
+{
+
+template<typename KDTreeCapability, typename VectorSpace, typename Distance>
+struct index_creator
+{
+    static NNIndex<Distance>* create(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance)
+    {
+        flann_algorithm_t index_type = get_param<flann_algorithm_t>(params, "algorithm");
+
+        NNIndex<Distance>* nnIndex;
+        switch (index_type) {
+        case FLANN_INDEX_LINEAR:
+            nnIndex = new LinearIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_KDTREE_SINGLE:
+            nnIndex = new KDTreeSingleIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_KDTREE:
+            nnIndex = new KDTreeIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_KMEANS:
+            nnIndex = new KMeansIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_COMPOSITE:
+            nnIndex = new CompositeIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_AUTOTUNED:
+            nnIndex = new AutotunedIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_HIERARCHICAL:
+            nnIndex = new HierarchicalClusteringIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_LSH:
+            nnIndex = new LshIndex<Distance>(dataset, params, distance);
+            break;
+        default:
+            throw FLANNException("Unknown index type");
+        }
+
+        return nnIndex;
+    }
+};
+
+template<typename VectorSpace, typename Distance>
+struct index_creator<False,VectorSpace,Distance>
+{
+    static NNIndex<Distance>* create(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance)
+    {
+        flann_algorithm_t index_type = get_param<flann_algorithm_t>(params, "algorithm");
+
+        NNIndex<Distance>* nnIndex;
+        switch (index_type) {
+        case FLANN_INDEX_LINEAR:
+            nnIndex = new LinearIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_KMEANS:
+            nnIndex = new KMeansIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_HIERARCHICAL:
+            nnIndex = new HierarchicalClusteringIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_LSH:
+            nnIndex = new LshIndex<Distance>(dataset, params, distance);
+            break;
+        default:
+            throw FLANNException("Unknown index type");
+        }
+
+        return nnIndex;
+    }
+};
+
+template<typename Distance>
+struct index_creator<False,False,Distance>
+{
+    static NNIndex<Distance>* create(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance)
+    {
+        flann_algorithm_t index_type = get_param<flann_algorithm_t>(params, "algorithm");
+
+        NNIndex<Distance>* nnIndex;
+        switch (index_type) {
+        case FLANN_INDEX_LINEAR:
+            nnIndex = new LinearIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_HIERARCHICAL:
+            nnIndex = new HierarchicalClusteringIndex<Distance>(dataset, params, distance);
+            break;
+        case FLANN_INDEX_LSH:
+            nnIndex = new LshIndex<Distance>(dataset, params, distance);
+            break;
+        default:
+            throw FLANNException("Unknown index type");
+        }
+
+        return nnIndex;
+    }
+};
+
+template<typename Distance>
+NNIndex<Distance>* create_index_by_type(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance)
+{
+    return index_creator<typename Distance::is_kdtree_distance,
+                         typename Distance::is_vector_space_distance,
+                         Distance>::create(dataset, params,distance);
+}
+
+}
+
+#endif /* OPENCV_FLANN_ALL_INDICES_H_ */

+ 192 - 0
ThresholdTools/include/opencv2/flann/allocator.h

@@ -0,0 +1,192 @@
+/***********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
+ * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
+ *
+ * THE BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *************************************************************************/
+
+#ifndef OPENCV_FLANN_ALLOCATOR_H_
+#define OPENCV_FLANN_ALLOCATOR_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+
+
+namespace cvflann
+{
+
+/**
+ * Allocates (using C's malloc) a generic type T.
+ *
+ * Params:
+ *     count = number of instances to allocate.
+ * Returns: pointer (of type T*) to memory buffer
+ */
+template <typename T>
+T* allocate(size_t count = 1)
+{
+    T* mem = (T*) ::malloc(sizeof(T)*count);
+    return mem;
+}
+
+
+/**
+ * Pooled storage allocator
+ *
+ * The following routines allow for the efficient allocation of storage in
+ * small chunks from a specified pool.  Rather than allowing each structure
+ * to be freed individually, an entire pool of storage is freed at once.
+ * This method has two advantages over just using malloc() and free().  First,
+ * it is far more efficient for allocating small objects, as there is
+ * no overhead for remembering all the information needed to free each
+ * object or consolidating fragmented memory.  Second, the decision about
+ * how long to keep an object is made at the time of allocation, and there
+ * is no need to track down all the objects to free them.
+ *
+ */
+
+const size_t     WORDSIZE=16;
+const  size_t     BLOCKSIZE=8192;
+
+class PooledAllocator
+{
+    /* We maintain memory alignment to word boundaries by requiring that all
+        allocations be in multiples of the machine wordsize.  */
+    /* Size of machine word in bytes.  Must be power of 2. */
+    /* Minimum number of bytes requested at a time from	the system.  Must be multiple of WORDSIZE. */
+
+
+    int     remaining;  /* Number of bytes left in current block of storage. */
+    void*   base;     /* Pointer to base of current block of storage. */
+    void*   loc;      /* Current location in block to next allocate memory. */
+    int     blocksize;
+
+
+public:
+    int     usedMemory;
+    int     wastedMemory;
+
+    /**
+        Default constructor. Initializes a new pool.
+     */
+    PooledAllocator(int blockSize = BLOCKSIZE)
+    {
+        blocksize = blockSize;
+        remaining = 0;
+        base = NULL;
+        loc = NULL;
+
+        usedMemory = 0;
+        wastedMemory = 0;
+    }
+
+    /**
+     * Destructor. Frees all the memory allocated in this pool.
+     */
+    ~PooledAllocator()
+    {
+        void* prev;
+
+        while (base != NULL) {
+            prev = *((void**) base); /* Get pointer to prev block. */
+            ::free(base);
+            base = prev;
+        }
+    }
+
+    /**
+     * Returns a pointer to a piece of new memory of the given size in bytes
+     * allocated from the pool.
+     */
+    void* allocateMemory(int size)
+    {
+        int blockSize;
+
+        /* Round size up to a multiple of wordsize.  The following expression
+            only works for WORDSIZE that is a power of 2, by masking last bits of
+            incremented size to zero.
+         */
+        size = (size + (WORDSIZE - 1)) & ~(WORDSIZE - 1);
+
+        /* Check whether a new block must be allocated.  Note that the first word
+            of a block is reserved for a pointer to the previous block.
+         */
+        if (size > remaining) {
+
+            wastedMemory += remaining;
+
+            /* Allocate new storage. */
+            blockSize = (size + sizeof(void*) + (WORDSIZE-1) > BLOCKSIZE) ?
+                        size + sizeof(void*) + (WORDSIZE-1) : BLOCKSIZE;
+
+            // use the standard C malloc to allocate memory
+            void* m = ::malloc(blockSize);
+            if (!m) {
+                fprintf(stderr,"Failed to allocate memory.\n");
+                return NULL;
+            }
+
+            /* Fill first word of new block with pointer to previous block. */
+            ((void**) m)[0] = base;
+            base = m;
+
+            int shift = 0;
+            //int shift = (WORDSIZE - ( (((size_t)m) + sizeof(void*)) & (WORDSIZE-1))) & (WORDSIZE-1);
+
+            remaining = blockSize - sizeof(void*) - shift;
+            loc = ((char*)m + sizeof(void*) + shift);
+        }
+        void* rloc = loc;
+        loc = (char*)loc + size;
+        remaining -= size;
+
+        usedMemory += size;
+
+        return rloc;
+    }
+
+    /**
+     * Allocates (using this pool) a generic type T.
+     *
+     * Params:
+     *     count = number of instances to allocate.
+     * Returns: pointer (of type T*) to memory buffer
+     */
+    template <typename T>
+    T* allocate(size_t count = 1)
+    {
+        T* mem = (T*) this->allocateMemory((int)(sizeof(T)*count));
+        return mem;
+    }
+
+private:
+    PooledAllocator(const PooledAllocator &); // copy disabled
+    PooledAllocator& operator=(const PooledAllocator &); // assign disabled
+};
+
+}
+
+#endif //OPENCV_FLANN_ALLOCATOR_H_

+ 330 - 0
ThresholdTools/include/opencv2/flann/any.h

@@ -0,0 +1,330 @@
+#ifndef OPENCV_FLANN_ANY_H_
+#define OPENCV_FLANN_ANY_H_
+/*
+ * (C) Copyright Christopher Diggins 2005-2011
+ * (C) Copyright Pablo Aguilar 2005
+ * (C) Copyright Kevlin Henney 2001
+ *
+ * Distributed under the Boost Software License, Version 1.0. (See
+ * accompanying file LICENSE_1_0.txt or copy at
+ * http://www.boost.org/LICENSE_1_0.txt
+ *
+ * Adapted for FLANN by Marius Muja
+ */
+
+#include "defines.h"
+#include <stdexcept>
+#include <ostream>
+#include <typeinfo>
+
+namespace cvflann
+{
+
+namespace anyimpl
+{
+
+struct bad_any_cast
+{
+};
+
+struct empty_any
+{
+};
+
+inline std::ostream& operator <<(std::ostream& out, const empty_any&)
+{
+    out << "[empty_any]";
+    return out;
+}
+
+struct base_any_policy
+{
+    virtual void static_delete(void** x) = 0;
+    virtual void copy_from_value(void const* src, void** dest) = 0;
+    virtual void clone(void* const* src, void** dest) = 0;
+    virtual void move(void* const* src, void** dest) = 0;
+    virtual void* get_value(void** src) = 0;
+    virtual const void* get_value(void* const * src) = 0;
+    virtual ::size_t get_size() = 0;
+    virtual const std::type_info& type() = 0;
+    virtual void print(std::ostream& out, void* const* src) = 0;
+    virtual ~base_any_policy() {}
+};
+
+template<typename T>
+struct typed_base_any_policy : base_any_policy
+{
+    virtual ::size_t get_size() CV_OVERRIDE { return sizeof(T); }
+    virtual const std::type_info& type() CV_OVERRIDE { return typeid(T); }
+
+};
+
+template<typename T>
+struct small_any_policy CV_FINAL : typed_base_any_policy<T>
+{
+    virtual void static_delete(void**) CV_OVERRIDE { }
+    virtual void copy_from_value(void const* src, void** dest) CV_OVERRIDE
+    {
+        new (dest) T(* reinterpret_cast<T const*>(src));
+    }
+    virtual void clone(void* const* src, void** dest) CV_OVERRIDE { *dest = *src; }
+    virtual void move(void* const* src, void** dest) CV_OVERRIDE { *dest = *src; }
+    virtual void* get_value(void** src) CV_OVERRIDE { return reinterpret_cast<void*>(src); }
+    virtual const void* get_value(void* const * src) CV_OVERRIDE { return reinterpret_cast<const void*>(src); }
+    virtual void print(std::ostream& out, void* const* src) CV_OVERRIDE { out << *reinterpret_cast<T const*>(src); }
+};
+
+template<typename T>
+struct big_any_policy CV_FINAL : typed_base_any_policy<T>
+{
+    virtual void static_delete(void** x) CV_OVERRIDE
+    {
+        if (* x) delete (* reinterpret_cast<T**>(x));
+        *x = NULL;
+    }
+    virtual void copy_from_value(void const* src, void** dest) CV_OVERRIDE
+    {
+        *dest = new T(*reinterpret_cast<T const*>(src));
+    }
+    virtual void clone(void* const* src, void** dest) CV_OVERRIDE
+    {
+        *dest = new T(**reinterpret_cast<T* const*>(src));
+    }
+    virtual void move(void* const* src, void** dest) CV_OVERRIDE
+    {
+        (*reinterpret_cast<T**>(dest))->~T();
+        **reinterpret_cast<T**>(dest) = **reinterpret_cast<T* const*>(src);
+    }
+    virtual void* get_value(void** src) CV_OVERRIDE { return *src; }
+    virtual const void* get_value(void* const * src) CV_OVERRIDE { return *src; }
+    virtual void print(std::ostream& out, void* const* src) CV_OVERRIDE { out << *reinterpret_cast<T const*>(*src); }
+};
+
+template<> inline void big_any_policy<flann_centers_init_t>::print(std::ostream& out, void* const* src)
+{
+    out << int(*reinterpret_cast<flann_centers_init_t const*>(*src));
+}
+
+template<> inline void big_any_policy<flann_algorithm_t>::print(std::ostream& out, void* const* src)
+{
+    out << int(*reinterpret_cast<flann_algorithm_t const*>(*src));
+}
+
+template<> inline void big_any_policy<cv::String>::print(std::ostream& out, void* const* src)
+{
+    out << (*reinterpret_cast<cv::String const*>(*src)).c_str();
+}
+
+template<typename T>
+struct choose_policy
+{
+    typedef big_any_policy<T> type;
+};
+
+template<typename T>
+struct choose_policy<T*>
+{
+    typedef small_any_policy<T*> type;
+};
+
+struct any;
+
+/// Choosing the policy for an any type is illegal, but should never happen.
+/// This is designed to throw a compiler error.
+template<>
+struct choose_policy<any>
+{
+    typedef void type;
+};
+
+/// Specializations for small types.
+#define SMALL_POLICY(TYPE) \
+    template<> \
+    struct choose_policy<TYPE> { typedef small_any_policy<TYPE> type; \
+    }
+
+SMALL_POLICY(signed char);
+SMALL_POLICY(unsigned char);
+SMALL_POLICY(signed short);
+SMALL_POLICY(unsigned short);
+SMALL_POLICY(signed int);
+SMALL_POLICY(unsigned int);
+SMALL_POLICY(signed long);
+SMALL_POLICY(unsigned long);
+SMALL_POLICY(float);
+SMALL_POLICY(bool);
+
+#undef SMALL_POLICY
+
+template <typename T>
+class SinglePolicy
+{
+    SinglePolicy();
+    SinglePolicy(const SinglePolicy& other);
+    SinglePolicy& operator=(const SinglePolicy& other);
+
+public:
+    static base_any_policy* get_policy();
+
+private:
+    static typename choose_policy<T>::type policy;
+};
+
+template <typename T>
+typename choose_policy<T>::type SinglePolicy<T>::policy;
+
+/// This function will return a different policy for each type.
+template <typename T>
+inline base_any_policy* SinglePolicy<T>::get_policy() { return &policy; }
+
+} // namespace anyimpl
+
+struct any
+{
+private:
+    // fields
+    anyimpl::base_any_policy* policy;
+    void* object;
+
+public:
+    /// Initializing constructor.
+    template <typename T>
+    any(const T& x)
+        : policy(anyimpl::SinglePolicy<anyimpl::empty_any>::get_policy()), object(NULL)
+    {
+        assign(x);
+    }
+
+    /// Empty constructor.
+    any()
+        : policy(anyimpl::SinglePolicy<anyimpl::empty_any>::get_policy()), object(NULL)
+    { }
+
+    /// Special initializing constructor for string literals.
+    any(const char* x)
+        : policy(anyimpl::SinglePolicy<anyimpl::empty_any>::get_policy()), object(NULL)
+    {
+        assign(x);
+    }
+
+    /// Copy constructor.
+    any(const any& x)
+        : policy(anyimpl::SinglePolicy<anyimpl::empty_any>::get_policy()), object(NULL)
+    {
+        assign(x);
+    }
+
+    /// Destructor.
+    ~any()
+    {
+        policy->static_delete(&object);
+    }
+
+    /// Assignment function from another any.
+    any& assign(const any& x)
+    {
+        reset();
+        policy = x.policy;
+        policy->clone(&x.object, &object);
+        return *this;
+    }
+
+    /// Assignment function.
+    template <typename T>
+    any& assign(const T& x)
+    {
+        reset();
+        policy = anyimpl::SinglePolicy<T>::get_policy();
+        policy->copy_from_value(&x, &object);
+        return *this;
+    }
+
+    /// Assignment operator.
+    template<typename T>
+    any& operator=(const T& x)
+    {
+        return assign(x);
+    }
+
+    /// Assignment operator. Template-based version above doesn't work as expected. We need regular assignment operator here.
+    any& operator=(const any& x)
+    {
+        return assign(x);
+    }
+
+    /// Assignment operator, specialed for literal strings.
+    /// They have types like const char [6] which don't work as expected.
+    any& operator=(const char* x)
+    {
+        return assign(x);
+    }
+
+    /// Utility functions
+    any& swap(any& x)
+    {
+        std::swap(policy, x.policy);
+        std::swap(object, x.object);
+        return *this;
+    }
+
+    /// Cast operator. You can only cast to the original type.
+    template<typename T>
+    T& cast()
+    {
+        if (policy->type() != typeid(T)) throw anyimpl::bad_any_cast();
+        T* r = reinterpret_cast<T*>(policy->get_value(&object));
+        return *r;
+    }
+
+    /// Cast operator. You can only cast to the original type.
+    template<typename T>
+    const T& cast() const
+    {
+        if (policy->type() != typeid(T)) throw anyimpl::bad_any_cast();
+        const T* r = reinterpret_cast<const T*>(policy->get_value(&object));
+        return *r;
+    }
+
+    /// Returns true if the any contains no value.
+    bool empty() const
+    {
+        return policy->type() == typeid(anyimpl::empty_any);
+    }
+
+    /// Frees any allocated memory, and sets the value to NULL.
+    void reset()
+    {
+        policy->static_delete(&object);
+        policy = anyimpl::SinglePolicy<anyimpl::empty_any>::get_policy();
+    }
+
+    /// Returns true if the two types are the same.
+    bool compatible(const any& x) const
+    {
+        return policy->type() == x.policy->type();
+    }
+
+    /// Returns if the type is compatible with the policy
+    template<typename T>
+    bool has_type()
+    {
+        return policy->type() == typeid(T);
+    }
+
+    const std::type_info& type() const
+    {
+        return policy->type();
+    }
+
+    friend std::ostream& operator <<(std::ostream& out, const any& any_val);
+};
+
+inline std::ostream& operator <<(std::ostream& out, const any& any_val)
+{
+    any_val.policy->print(out,&any_val.object);
+    return out;
+}
+
+}
+
+#endif // OPENCV_FLANN_ANY_H_

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно