all_layers.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14. // Third party copyrights are property of their respective owners.
  15. //
  16. // Redistribution and use in source and binary forms, with or without modification,
  17. // are permitted provided that the following conditions are met:
  18. //
  19. // * Redistribution's of source code must retain the above copyright notice,
  20. // this list of conditions and the following disclaimer.
  21. //
  22. // * Redistribution's in binary form must reproduce the above copyright notice,
  23. // this list of conditions and the following disclaimer in the documentation
  24. // and/or other materials provided with the distribution.
  25. //
  26. // * The name of the copyright holders may not be used to endorse or promote products
  27. // derived from this software without specific prior written permission.
  28. //
  29. // This software is provided by the copyright holders and contributors "as is" and
  30. // any express or implied warranties, including, but not limited to, the implied
  31. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32. // In no event shall the Intel Corporation or contributors be liable for any direct,
  33. // indirect, incidental, special, exemplary, or consequential damages
  34. // (including, but not limited to, procurement of substitute goods or services;
  35. // loss of use, data, or profits; or business interruption) however caused
  36. // and on any theory of liability, whether in contract, strict liability,
  37. // or tort (including negligence or otherwise) arising in any way out of
  38. // the use of this software, even if advised of the possibility of such damage.
  39. //
  40. //M*/
  41. #ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP
  42. #define OPENCV_DNN_DNN_ALL_LAYERS_HPP
  43. #include <opencv2/dnn.hpp>
  44. namespace cv {
  45. namespace dnn {
  46. CV__DNN_EXPERIMENTAL_NS_BEGIN
  47. //! @addtogroup dnn
  48. //! @{
  49. /** @defgroup dnnLayerList Partial List of Implemented Layers
  50. @{
  51. This subsection of dnn module contains information about bult-in layers and their descriptions.
  52. Classes listed here, in fact, provides C++ API for creating intances of bult-in layers.
  53. In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
  54. You can use both API, but factory API is less convinient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()).
  55. Bult-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
  56. In partuclar, the following layers and Caffe @ref Importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
  57. - Convolution
  58. - Deconvolution
  59. - Pooling
  60. - InnerProduct
  61. - TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
  62. - Softmax
  63. - Reshape, Flatten, Slice, Split
  64. - LRN
  65. - MVN
  66. - Dropout (since it does nothing on forward pass -))
  67. */
  68. class CV_EXPORTS BlankLayer : public Layer
  69. {
  70. public:
  71. static Ptr<BlankLayer> create(const LayerParams &params);
  72. };
  73. //! LSTM recurrent layer
  74. class CV_EXPORTS LSTMLayer : public Layer
  75. {
  76. public:
  77. /** Creates instance of LSTM layer */
  78. static Ptr<LSTMLayer> create(const LayerParams& params);
  79. /** @deprecated Use LayerParams::blobs instead.
  80. @brief Set trained weights for LSTM layer.
  81. LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
  82. Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
  83. Than current output and current cell state is computed as follows:
  84. @f{eqnarray*}{
  85. h_t &= o_t \odot tanh(c_t), \\
  86. c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
  87. @f}
  88. where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned wights.
  89. Gates are computed as follows:
  90. @f{eqnarray*}{
  91. i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
  92. f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
  93. o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
  94. g_t &= tanh &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
  95. @f}
  96. where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
  97. @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
  98. For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
  99. (i.e. @f$W_x@f$ is vertical contacentaion of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
  100. The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
  101. and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
  102. @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_h @f$)
  103. @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_x @f$)
  104. @param b is bias vector (i.e. according to abovemtioned notation is @f$ b @f$)
  105. */
  106. CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
  107. /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
  108. * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
  109. * where `Wh` is parameter from setWeights().
  110. */
  111. virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
  112. /** @deprecated Use flag `produce_cell_output` in LayerParams.
  113. * @brief Specifies either interpet first dimension of input blob as timestamp dimenion either as sample.
  114. *
  115. * If flag is set to true then shape of input blob will be interpeted as [`T`, `N`, `[data dims]`] where `T` specifies number of timpestamps, `N` is number of independent streams.
  116. * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
  117. *
  118. * If flag is set to false then shape of input blob will be interpeted as [`N`, `[data dims]`].
  119. * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
  120. */
  121. CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0;
  122. /** @deprecated Use flag `use_timestamp_dim` in LayerParams.
  123. * @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
  124. * @details Shape of the second output is the same as first output.
  125. */
  126. CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0;
  127. /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
  128. * @param input should contain packed values @f$x_t@f$
  129. * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
  130. *
  131. * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
  132. * where `T` specifies number of timpestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
  133. *
  134. * If setUseTimstampsDim() is set to fase then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
  135. * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
  136. */
  137. int inputNameToIndex(String inputName);
  138. int outputNameToIndex(String outputName);
  139. };
  140. /** @brief Classical recurrent layer
  141. Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
  142. - input: should contain packed input @f$x_t@f$.
  143. - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
  144. input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
  145. output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
  146. If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
  147. */
  148. class CV_EXPORTS RNNLayer : public Layer
  149. {
  150. public:
  151. /** Creates instance of RNNLayer */
  152. static Ptr<RNNLayer> create(const LayerParams& params);
  153. /** Setups learned weights.
  154. Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
  155. @f{eqnarray*}{
  156. h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h), \\
  157. o_t &= tanh&(W_{ho} h_t + b_o),
  158. @f}
  159. @param Wxh is @f$ W_{xh} @f$ matrix
  160. @param bh is @f$ b_{h} @f$ vector
  161. @param Whh is @f$ W_{hh} @f$ matrix
  162. @param Who is @f$ W_{xo} @f$ matrix
  163. @param bo is @f$ b_{o} @f$ vector
  164. */
  165. virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0;
  166. /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
  167. * @details Shape of the second output is the same as first output.
  168. */
  169. virtual void setProduceHiddenOutput(bool produce = false) = 0;
  170. };
  171. class CV_EXPORTS BaseConvolutionLayer : public Layer
  172. {
  173. public:
  174. Size kernel, stride, pad, dilation, adjustPad;
  175. String padMode;
  176. int numOutput;
  177. };
  178. class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
  179. {
  180. public:
  181. static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
  182. };
  183. class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer
  184. {
  185. public:
  186. static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
  187. };
  188. class CV_EXPORTS LRNLayer : public Layer
  189. {
  190. public:
  191. enum Type
  192. {
  193. CHANNEL_NRM,
  194. SPATIAL_NRM
  195. };
  196. int type;
  197. int size;
  198. float alpha, beta, bias;
  199. bool normBySize;
  200. static Ptr<LRNLayer> create(const LayerParams& params);
  201. };
  202. class CV_EXPORTS PoolingLayer : public Layer
  203. {
  204. public:
  205. enum Type
  206. {
  207. MAX,
  208. AVE,
  209. STOCHASTIC
  210. };
  211. int type;
  212. Size kernel, stride, pad;
  213. bool globalPooling;
  214. bool computeMaxIdx;
  215. String padMode;
  216. bool ceilMode;
  217. static Ptr<PoolingLayer> create(const LayerParams& params);
  218. };
  219. class CV_EXPORTS SoftmaxLayer : public Layer
  220. {
  221. public:
  222. bool logSoftMax;
  223. static Ptr<SoftmaxLayer> create(const LayerParams& params);
  224. };
  225. class CV_EXPORTS InnerProductLayer : public Layer
  226. {
  227. public:
  228. int axis;
  229. static Ptr<InnerProductLayer> create(const LayerParams& params);
  230. };
  231. class CV_EXPORTS MVNLayer : public Layer
  232. {
  233. public:
  234. float eps;
  235. bool normVariance, acrossChannels;
  236. static Ptr<MVNLayer> create(const LayerParams& params);
  237. };
  238. /* Reshaping */
  239. class CV_EXPORTS ReshapeLayer : public Layer
  240. {
  241. public:
  242. MatShape newShapeDesc;
  243. Range newShapeRange;
  244. static Ptr<ReshapeLayer> create(const LayerParams& params);
  245. };
  246. class CV_EXPORTS FlattenLayer : public Layer
  247. {
  248. public:
  249. static Ptr<FlattenLayer> create(const LayerParams &params);
  250. };
  251. class CV_EXPORTS ConcatLayer : public Layer
  252. {
  253. public:
  254. int axis;
  255. /**
  256. * @brief Add zero padding in case of concatenation of blobs with different
  257. * spatial sizes.
  258. *
  259. * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
  260. */
  261. bool padding;
  262. static Ptr<ConcatLayer> create(const LayerParams &params);
  263. };
  264. class CV_EXPORTS SplitLayer : public Layer
  265. {
  266. public:
  267. int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
  268. static Ptr<SplitLayer> create(const LayerParams &params);
  269. };
  270. /**
  271. * Slice layer has several modes:
  272. * 1. Caffe mode
  273. * @param[in] axis Axis of split operation
  274. * @param[in] slice_point Array of split points
  275. *
  276. * Number of output blobs equals to number of split points plus one. The
  277. * first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis,
  278. * the second output blob is a slice of input from @p slice_point[0] to
  279. * @p slice_point[1] - 1 by @p axis and the last output blob is a slice of
  280. * input from @p slice_point[-1] up to the end of @p axis size.
  281. *
  282. * 2. TensorFlow mode
  283. * @param begin Vector of start indices
  284. * @param size Vector of sizes
  285. *
  286. * More convinient numpy-like slice. One and only output blob
  287. * is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]`
  288. *
  289. * 3. Torch mode
  290. * @param axis Axis of split operation
  291. *
  292. * Split input blob on the equal parts by @p axis.
  293. */
  294. class CV_EXPORTS SliceLayer : public Layer
  295. {
  296. public:
  297. /**
  298. * @brief Vector of slice ranges.
  299. *
  300. * The first dimension equals number of output blobs.
  301. * Inner vector has slice ranges for the first number of input dimensions.
  302. */
  303. std::vector<std::vector<Range> > sliceRanges;
  304. int axis;
  305. static Ptr<SliceLayer> create(const LayerParams &params);
  306. };
  307. class CV_EXPORTS PermuteLayer : public Layer
  308. {
  309. public:
  310. static Ptr<PermuteLayer> create(const LayerParams& params);
  311. };
  312. /**
  313. * @brief Adds extra values for specific axes.
  314. * @param paddings Vector of paddings in format
  315. * @code
  316. * [ pad_before, pad_after, // [0]th dimension
  317. * pad_before, pad_after, // [1]st dimension
  318. * ...
  319. * pad_before, pad_after ] // [n]th dimension
  320. * @endcode
  321. * that represents number of padded values at every dimension
  322. * starting from the first one. The rest of dimensions won't
  323. * be padded.
  324. * @param value Value to be padded. Defaults to zero.
  325. * @param input_dims Torch's parameter. If @p input_dims is not equal to the
  326. * actual input dimensionality then the `[0]th` dimension
  327. * is considered as a batch dimension and @p paddings are shifted
  328. * to a one dimension. Defaults to `-1` that means padding
  329. * corresponding to @p paddings.
  330. */
  331. class CV_EXPORTS PaddingLayer : public Layer
  332. {
  333. public:
  334. static Ptr<PaddingLayer> create(const LayerParams& params);
  335. };
  336. /* Activations */
  337. class CV_EXPORTS ActivationLayer : public Layer
  338. {
  339. public:
  340. virtual void forwardSlice(const float* src, float* dst, int len,
  341. size_t outPlaneSize, int cn0, int cn1) const = 0;
  342. };
  343. class CV_EXPORTS ReLULayer : public ActivationLayer
  344. {
  345. public:
  346. float negativeSlope;
  347. static Ptr<ReLULayer> create(const LayerParams &params);
  348. };
  349. class CV_EXPORTS ReLU6Layer : public ActivationLayer
  350. {
  351. public:
  352. static Ptr<ReLU6Layer> create(const LayerParams &params);
  353. };
  354. class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
  355. {
  356. public:
  357. static Ptr<Layer> create(const LayerParams& params);
  358. };
  359. class CV_EXPORTS ELULayer : public ActivationLayer
  360. {
  361. public:
  362. static Ptr<ELULayer> create(const LayerParams &params);
  363. };
  364. class CV_EXPORTS TanHLayer : public ActivationLayer
  365. {
  366. public:
  367. static Ptr<TanHLayer> create(const LayerParams &params);
  368. };
  369. class CV_EXPORTS SigmoidLayer : public ActivationLayer
  370. {
  371. public:
  372. static Ptr<SigmoidLayer> create(const LayerParams &params);
  373. };
  374. class CV_EXPORTS BNLLLayer : public ActivationLayer
  375. {
  376. public:
  377. static Ptr<BNLLLayer> create(const LayerParams &params);
  378. };
  379. class CV_EXPORTS AbsLayer : public ActivationLayer
  380. {
  381. public:
  382. static Ptr<AbsLayer> create(const LayerParams &params);
  383. };
  384. class CV_EXPORTS PowerLayer : public ActivationLayer
  385. {
  386. public:
  387. float power, scale, shift;
  388. static Ptr<PowerLayer> create(const LayerParams &params);
  389. };
  390. /* Layers used in semantic segmentation */
  391. class CV_EXPORTS CropLayer : public Layer
  392. {
  393. public:
  394. int startAxis;
  395. std::vector<int> offset;
  396. static Ptr<CropLayer> create(const LayerParams &params);
  397. };
  398. class CV_EXPORTS EltwiseLayer : public Layer
  399. {
  400. public:
  401. enum EltwiseOp
  402. {
  403. PROD = 0,
  404. SUM = 1,
  405. MAX = 2,
  406. };
  407. static Ptr<EltwiseLayer> create(const LayerParams &params);
  408. };
  409. class CV_EXPORTS BatchNormLayer : public Layer
  410. {
  411. public:
  412. bool hasWeights, hasBias;
  413. float epsilon;
  414. virtual void getScaleShift(Mat& scale, Mat& shift) const = 0;
  415. static Ptr<BatchNormLayer> create(const LayerParams &params);
  416. };
  417. class CV_EXPORTS MaxUnpoolLayer : public Layer
  418. {
  419. public:
  420. Size poolKernel;
  421. Size poolPad;
  422. Size poolStride;
  423. static Ptr<MaxUnpoolLayer> create(const LayerParams &params);
  424. };
  425. class CV_EXPORTS ScaleLayer : public Layer
  426. {
  427. public:
  428. bool hasBias;
  429. static Ptr<ScaleLayer> create(const LayerParams& params);
  430. };
  431. class CV_EXPORTS ShiftLayer : public Layer
  432. {
  433. public:
  434. static Ptr<ShiftLayer> create(const LayerParams& params);
  435. };
  436. class CV_EXPORTS PriorBoxLayer : public Layer
  437. {
  438. public:
  439. static Ptr<PriorBoxLayer> create(const LayerParams& params);
  440. };
  441. class CV_EXPORTS ReorgLayer : public Layer
  442. {
  443. public:
  444. static Ptr<ReorgLayer> create(const LayerParams& params);
  445. };
  446. class CV_EXPORTS RegionLayer : public Layer
  447. {
  448. public:
  449. static Ptr<RegionLayer> create(const LayerParams& params);
  450. };
  451. class CV_EXPORTS DetectionOutputLayer : public Layer
  452. {
  453. public:
  454. static Ptr<DetectionOutputLayer> create(const LayerParams& params);
  455. };
  456. /**
  457. * @brief \f$ L_p \f$ - normalization layer.
  458. * @param p Normalization factor. The most common `p = 1` for \f$ L_1 \f$ -
  459. * normalization or `p = 2` for \f$ L_2 \f$ - normalization or a custom one.
  460. * @param eps Parameter \f$ \epsilon \f$ to prevent a division by zero.
  461. * @param across_spatial If true, normalize an input across all non-batch dimensions.
  462. * Otherwise normalize an every channel separately.
  463. *
  464. * Across spatial:
  465. * @f[
  466. * norm = \sqrt[p]{\epsilon + \sum_{x, y, c} |src(x, y, c)|^p } \\
  467. * dst(x, y, c) = \frac{ src(x, y, c) }{norm}
  468. * @f]
  469. *
  470. * Channel wise normalization:
  471. * @f[
  472. * norm(c) = \sqrt[p]{\epsilon + \sum_{x, y} |src(x, y, c)|^p } \\
  473. * dst(x, y, c) = \frac{ src(x, y, c) }{norm(c)}
  474. * @f]
  475. *
  476. * Where `x, y` - spatial cooridnates, `c` - channel.
  477. *
  478. * An every sample in the batch is normalized separately. Optionally,
  479. * output is scaled by the trained parameters.
  480. */
  481. class NormalizeBBoxLayer : public Layer
  482. {
  483. public:
  484. float pnorm, epsilon;
  485. bool acrossSpatial;
  486. static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
  487. };
  488. /**
  489. * @brief Resize input 4-dimensional blob by nearest neghbor strategy.
  490. *
  491. * Layer is used to support TensorFlow's resize_nearest_neighbor op.
  492. */
  493. class CV_EXPORTS ResizeNearestNeighborLayer : public Layer
  494. {
  495. public:
  496. static Ptr<ResizeNearestNeighborLayer> create(const LayerParams& params);
  497. };
  498. //! @}
  499. //! @}
  500. CV__DNN_EXPERIMENTAL_NS_END
  501. }
  502. }
  503. #endif