LCOV - code coverage report
Current view: top level - ASM_SSSE3 - av1_inv_txfm_ssse3.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 32 35 91.4 %
Date: 2019-11-25 17:38:06 Functions: 4 4 100.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2018, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : #ifndef AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_
      12             : #define AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_
      13             : 
      14             : #include <emmintrin.h>  // SSE2
      15             : #include <tmmintrin.h>  // SSSE3
      16             : 
      17             : #ifdef __cplusplus
      18             : extern "C" {
      19             : #endif
      20             : 
      21             : #define btf_16_ssse3(w0, w1, in, out0, out1)    \
      22             :   do {                                          \
      23             :     const __m128i _w0 = _mm_set1_epi16(w0 * 8); \
      24             :     const __m128i _w1 = _mm_set1_epi16(w1 * 8); \
      25             :     const __m128i _in = in;                     \
      26             :     out0 = _mm_mulhrs_epi16(_in, _w0);          \
      27             :     out1 = _mm_mulhrs_epi16(_in, _w1);          \
      28             :   } while (0)
      29             : 
      30             : #define btf_16_adds_subs_sse2(in0, in1) \
      31             :   do {                                  \
      32             :     const __m128i _in0 = in0;           \
      33             :     const __m128i _in1 = in1;           \
      34             :     in0 = _mm_adds_epi16(_in0, _in1);   \
      35             :     in1 = _mm_subs_epi16(_in0, _in1);   \
      36             :   } while (0)
      37             : 
      38             : #define btf_16_subs_adds_sse2(in0, in1) \
      39             :   do {                                  \
      40             :     const __m128i _in0 = in0;           \
      41             :     const __m128i _in1 = in1;           \
      42             :     in1 = _mm_subs_epi16(_in0, _in1);   \
      43             :     in0 = _mm_adds_epi16(_in0, _in1);   \
      44             :   } while (0)
      45             : 
      46             : #define btf_16_adds_subs_out_sse2(out0, out1, in0, in1) \
      47             :   do {                                                  \
      48             :     const __m128i _in0 = in0;                           \
      49             :     const __m128i _in1 = in1;                           \
      50             :     out0 = _mm_adds_epi16(_in0, _in1);                  \
      51             :     out1 = _mm_subs_epi16(_in0, _in1);                  \
      52             :   } while (0)
      53             : 
      54    65251900 :     static INLINE void round_shift_16bit_ssse3(__m128i *in, int32_t size, int32_t bit) {
      55    65251900 :         if (bit < 0) {
      56    65252900 :             const __m128i scale = _mm_set1_epi16(1 << (15 + bit));
      57   608051000 :             for (int32_t i = 0; i < size; ++i)
      58  1085600000 :                 in[i] = _mm_mulhrs_epi16(in[i], scale);
      59             :         }
      60           0 :         else if (bit > 0) {
      61           0 :             for (int32_t i = 0; i < size; ++i)
      62           0 :                 in[i] = _mm_slli_epi16(in[i], bit);
      63             :         }
      64    65251900 :     }
      65             : 
      66             :     // 1D itx types
      67             :     typedef enum ATTRIBUTE_PACKED {
      68             :         IDCT_1D,
      69             :         IADST_1D,
      70             :         IFLIPADST_1D = IADST_1D,
      71             :         IIDENTITY_1D,
      72             :         ITX_TYPES_1D,
      73             :     } ITX_TYPE_1D;
      74             : 
      75             :     static const ITX_TYPE_1D vitx_1d_tab[TX_TYPES] = {
      76             :       IDCT_1D,      IADST_1D,     IDCT_1D,      IADST_1D,
      77             :       IFLIPADST_1D, IDCT_1D,      IFLIPADST_1D, IADST_1D,
      78             :       IFLIPADST_1D, IIDENTITY_1D, IDCT_1D,      IIDENTITY_1D,
      79             :       IADST_1D,     IIDENTITY_1D, IFLIPADST_1D, IIDENTITY_1D,
      80             :     };
      81             : 
      82             :     static const ITX_TYPE_1D hitx_1d_tab[TX_TYPES] = {
      83             :       IDCT_1D,      IDCT_1D,      IADST_1D,     IADST_1D,
      84             :       IDCT_1D,      IFLIPADST_1D, IFLIPADST_1D, IFLIPADST_1D,
      85             :       IADST_1D,     IIDENTITY_1D, IIDENTITY_1D, IDCT_1D,
      86             :       IIDENTITY_1D, IADST_1D,     IIDENTITY_1D, IFLIPADST_1D,
      87             :     };
      88             : 
      89             :     DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x8_default[8]) = {
      90             :       0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707,
      91             :     };
      92             : 
      93             :     DECLARE_ALIGNED(16, static const int16_t,
      94             :     av1_eob_to_eobxy_16x16_default[16]) = {
      95             : 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
      96             : 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
      97             :     };
      98             : 
      99             :     DECLARE_ALIGNED(16, static const int16_t,
     100             :     av1_eob_to_eobxy_32x32_default[32]) = {
     101             : 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
     102             : 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
     103             : 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
     104             : 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
     105             :     };
     106             : 
     107             :     DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = {
     108             :       0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
     109             :       0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
     110             :     };
     111             : 
     112             :     DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x8_default[8]) = {
     113             :       0x0707, 0x0707, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f,
     114             :     };
     115             : 
     116             :     DECLARE_ALIGNED(16, static const int16_t,
     117             :     av1_eob_to_eobxy_16x32_default[32]) = {
     118             : 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
     119             : 0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
     120             : 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
     121             : 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
     122             :     };
     123             : 
     124             :     DECLARE_ALIGNED(16, static const int16_t,
     125             :     av1_eob_to_eobxy_32x16_default[16]) = {
     126             : 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
     127             : 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
     128             :     };
     129             : 
     130             :     DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = {
     131             :       0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
     132             :       0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07,
     133             :       0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
     134             :       0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
     135             :     };
     136             : 
     137             :     DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_32x8_default[8]) = {
     138             :       0x0707, 0x070f, 0x070f, 0x071f, 0x071f, 0x071f, 0x071f, 0x071f,
     139             :     };
     140             : 
     141             :     DECLARE_ALIGNED(16, static const int16_t *,
     142             :     av1_eob_to_eobxy_default[TX_SIZES_ALL]) = {
     143             : NULL,
     144             : av1_eob_to_eobxy_8x8_default,
     145             : av1_eob_to_eobxy_16x16_default,
     146             : av1_eob_to_eobxy_32x32_default,
     147             : av1_eob_to_eobxy_32x32_default,
     148             : NULL,
     149             : NULL,
     150             : av1_eob_to_eobxy_8x16_default,
     151             : av1_eob_to_eobxy_16x8_default,
     152             : av1_eob_to_eobxy_16x32_default,
     153             : av1_eob_to_eobxy_32x16_default,
     154             : av1_eob_to_eobxy_32x32_default,
     155             : av1_eob_to_eobxy_32x32_default,
     156             : NULL,
     157             : NULL,
     158             : av1_eob_to_eobxy_8x32_default,
     159             : av1_eob_to_eobxy_32x8_default,
     160             : av1_eob_to_eobxy_16x32_default,
     161             : av1_eob_to_eobxy_32x16_default,
     162             :     };
     163             : 
     164             :     static const int32_t lowbd_txfm_all_1d_zeros_idx[32] = {
     165             :       0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
     166             :       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
     167             :     };
     168             : 
     169             :     // Transform block width in log2 for eob (size of 64 map to 32)
     170             :     static const int32_t tx_size_wide_log2_eob[TX_SIZES_ALL] = {
     171             :       2, 3, 4, 5, 5, 2, 3, 3, 4, 4, 5, 5, 5, 2, 4, 3, 5, 4, 5,
     172             :     };
     173             : 
     174    18792940 :     static INLINE void get_eobx_eoby_scan_default(int32_t *eobx, int32_t *eoby,
     175             :         TxSize tx_size, int32_t eob) {
     176    18792940 :         if (eob == 1) {
     177        2765 :             *eobx = 0;
     178        2765 :             *eoby = 0;
     179        2765 :             return;
     180             :         }
     181             : 
     182    18790230 :         const int32_t tx_w_log2 = tx_size_wide_log2_eob[tx_size];
     183    18790230 :         const int32_t eob_row = (eob - 1) >> tx_w_log2;
     184    18790230 :         const int32_t eobxy = av1_eob_to_eobxy_default[tx_size][eob_row];
     185    18790230 :         *eobx = eobxy & 0xFF;
     186    18790230 :         *eoby = eobxy >> 8;
     187             :     }
     188             : 
     189             :     static int32_t eob_fill[32] = {
     190             :       0,  7,  7,  7,  7,  7,  7,  7,  15, 15, 15, 15, 15, 15, 15, 15,
     191             :       31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
     192             :     };
     193             : 
     194     1131751 :     static INLINE void get_eobx_eoby_scan_h_identity(int32_t *eobx, int32_t *eoby,
     195             :         TxSize tx_size, int32_t eob) {
     196     1131751 :         eob -= 1;
     197     1131751 :         const int32_t txfm_size_col = tx_size_wide[tx_size];
     198     1131751 :         const int32_t eobx_max = AOMMIN(32, txfm_size_col) - 1;
     199     1131751 :         *eobx = (eob >= eobx_max) ? eobx_max : eob_fill[eob];
     200     1131751 :         const int32_t temp_eoby = eob / (eobx_max + 1);
     201     1131751 :         assert(temp_eoby < 32);
     202     1131751 :         *eoby = eob_fill[temp_eoby];
     203     1131751 :     }
     204             : 
     205     1262287 :     static INLINE void get_eobx_eoby_scan_v_identity(int32_t *eobx, int32_t *eoby,
     206             :         TxSize tx_size, int32_t eob) {
     207     1262287 :         eob -= 1;
     208     1262287 :         const int32_t txfm_size_row = tx_size_high[tx_size];
     209     1262287 :         const int32_t eoby_max = AOMMIN(32, txfm_size_row) - 1;
     210     1262287 :         *eobx = eob / (eoby_max + 1);
     211     1262287 :         *eoby = (eob >= eoby_max) ? eoby_max : eob_fill[eob];
     212     1262287 :     }
     213             : 
     214             :     typedef void(*transform_1d_ssse3)(const __m128i *input, __m128i *output,
     215             :         int8_t cos_bit);
     216             : 
     217             :     void eb_av1_lowbd_inv_txfm2d_add_ssse3(const int32_t *input,
     218             :         uint8_t *output_r, int32_t stride_r,
     219             :         uint8_t *output_w, int32_t stride_w,
     220             :         TxType tx_type,
     221             :         TxSize tx_size, int32_t eob);
     222             : #ifdef __cplusplus
     223             : }  // extern "C"
     224             : #endif
     225             : 
     226             : #endif  // AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_

Generated by: LCOV version 1.14