LCOV - code coverage report
Current view: top level - ASM_SSE2 - EbTransforms_Intrinsic_SSE2.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 44 0.0 %
Date: 2019-11-25 17:38:06 Functions: 0 1 0.0 %

          Line data    Source code
       1             : /*
       2             : * Copyright(c) 2019 Intel Corporation
       3             : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
       4             : */
       5             : 
       6             : #include "EbDefinitions.h"
       7             : #include "EbIntrinMacros16bit_SSE2.h"
       8             : #include <emmintrin.h>
       9             : 
      10             : /*****************************
      11             : * Defines
      12             : *****************************/
      13             : 
      14             : #define MACRO_TRANS_2MAC_NO_SAVE(XMM_1, XMM_2, XMM_3, XMM_4, XMM_OFFSET, OFFSET1, OFFSET2, SHIFT)\
      15             :     XMM_3 = _mm_load_si128((__m128i *)(TransformAsmConst + OFFSET1));\
      16             :     XMM_4 = _mm_load_si128((__m128i *)(TransformAsmConst + OFFSET2));\
      17             :     XMM_3 = _mm_madd_epi16(XMM_3, XMM_1);\
      18             :     XMM_4 = _mm_madd_epi16(XMM_4, XMM_2);\
      19             :     XMM_3 = _mm_srai_epi32(_mm_add_epi32(XMM_4, _mm_add_epi32(XMM_3, XMM_OFFSET)), SHIFT);\
      20             :     XMM_3 = _mm_packs_epi32(XMM_3, XMM_3);
      21             : 
      22             : #define MACRO_TRANS_2MAC(XMM_1, XMM_2, XMM_3, XMM_4, XMM_OFFSET, OFFSET1, OFFSET2, SHIFT, OFFSET3)\
      23             :     MACRO_TRANS_2MAC_NO_SAVE(XMM_1, XMM_2, XMM_3, XMM_4, XMM_OFFSET, OFFSET1, OFFSET2, SHIFT)\
      24             :     _mm_storel_epi64((__m128i *)(transform_coefficients+OFFSET3), XMM_3);
      25             : 
      26             : #define TRANS8x8_OFFSET_83_36    0
      27             : #define TRANS8x8_OFFSET_36_N83  (8 + TRANS8x8_OFFSET_83_36)
      28             : #define TRANS8x8_OFFSET_89_75   (8 + TRANS8x8_OFFSET_36_N83)
      29             : #define TRANS8x8_OFFSET_50_18   (8 + TRANS8x8_OFFSET_89_75)
      30             : #define TRANS8x8_OFFSET_75_N18  (8 + TRANS8x8_OFFSET_50_18)
      31             : #define TRANS8x8_OFFSET_N89_N50 (8 + TRANS8x8_OFFSET_75_N18)
      32             : #define TRANS8x8_OFFSET_50_N89  (8 + TRANS8x8_OFFSET_N89_N50)
      33             : #define TRANS8x8_OFFSET_18_75   (8 + TRANS8x8_OFFSET_50_N89)
      34             : #define TRANS8x8_OFFSET_18_N50  (8 + TRANS8x8_OFFSET_18_75)
      35             : #define TRANS8x8_OFFSET_75_N89  (8 + TRANS8x8_OFFSET_18_N50)
      36             : #define TRANS8x8_OFFSET_256     (8 + TRANS8x8_OFFSET_75_N89)
      37             : #define TRANS8x8_OFFSET_64_64   (8 + TRANS8x8_OFFSET_256)
      38             : #define TRANS8x8_OFFSET_N18_N50 (8 + TRANS8x8_OFFSET_64_64)
      39             : #define TRANS8x8_OFFSET_N75_N89 (8 + TRANS8x8_OFFSET_N18_N50)
      40             : #define TRANS8x8_OFFSET_N36_N83 (8 + TRANS8x8_OFFSET_N75_N89)
      41             : #define TRANS8x8_OFFSET_N83_N36 (8 + TRANS8x8_OFFSET_N36_N83)
      42             : #define TRANS8x8_OFFSET_36_83   (8 + TRANS8x8_OFFSET_N83_N36)
      43             : #define TRANS8x8_OFFSET_50_89   (8 + TRANS8x8_OFFSET_36_83)
      44             : #define TRANS8x8_OFFSET_18_N75  (8 + TRANS8x8_OFFSET_50_89)
      45             : #define TRANS8x8_OFFSET_N64_64  (8 + TRANS8x8_OFFSET_18_N75)
      46             : #define TRANS8x8_OFFSET_64_N64  (8 + TRANS8x8_OFFSET_N64_64)
      47             : #define TRANS8x8_OFFSET_N75_N18 (8 + TRANS8x8_OFFSET_64_N64)
      48             : #define TRANS8x8_OFFSET_89_N50  (8 + TRANS8x8_OFFSET_N75_N18)
      49             : #define TRANS8x8_OFFSET_83_N36  (8 + TRANS8x8_OFFSET_89_N50)
      50             : #define TRANS8x8_OFFSET_N36_83  (8 + TRANS8x8_OFFSET_83_N36)
      51             : #define TRANS8x8_OFFSET_N83_36  (8 + TRANS8x8_OFFSET_N36_83)
      52             : #define TRANS8x8_OFFSET_89_N75  (8 + TRANS8x8_OFFSET_N83_36)
      53             : #define TRANS8x8_OFFSET_50_N18  (8 + TRANS8x8_OFFSET_89_N75)
      54             : 
      55             : #define MACRO_CALC_EVEN_ODD(XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8)\
      56             :     even0 = _mm_add_epi16(XMM1, XMM8);\
      57             :     even1 = _mm_add_epi16(XMM2, XMM7);\
      58             :     even2 = _mm_add_epi16(XMM3, XMM6);\
      59             :     even3 = _mm_add_epi16(XMM4, XMM5);\
      60             :     odd0 = _mm_sub_epi16(XMM1, XMM8);\
      61             :     odd1 = _mm_sub_epi16(XMM2, XMM7);\
      62             :     odd2 = _mm_sub_epi16(XMM3, XMM6);\
      63             :     odd3 = _mm_sub_epi16(XMM4, XMM5);
      64             : 
      65             : #define MACRO_TRANS_4MAC_NO_SAVE(XMM1, XMM2, XMM3, XMM4, XMM_RET, XMM_OFFSET, MEM, OFFSET1, OFFSET2, SHIFT)\
      66             :     XMM_RET = _mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(_mm_madd_epi16(XMM1, _mm_load_si128((__m128i*)(MEM+OFFSET1))),\
      67             :                                                                          _mm_madd_epi16(XMM3, _mm_load_si128((__m128i*)(MEM+OFFSET2)))), XMM_OFFSET), SHIFT),\
      68             :                               _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(_mm_madd_epi16(XMM2, _mm_load_si128((__m128i*)(MEM+OFFSET1))),\
      69             :                                                                          _mm_madd_epi16(XMM4, _mm_load_si128((__m128i*)(MEM+OFFSET2)))), XMM_OFFSET), SHIFT));
      70             : 
      71             : #define MACRO_TRANS_8MAC(XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM_OFST, MEM, OFST1, OFST2, OFST3, OFST4, SHIFT, INSTR, DST, OFST5)\
      72             :     sum1 = _mm_add_epi32(_mm_madd_epi16(XMM1, _mm_loadu_si128((__m128i *)(MEM + OFST1))), _mm_madd_epi16(XMM2, _mm_loadu_si128((__m128i *)(MEM + OFST2))));\
      73             :     sum2 = _mm_add_epi32(_mm_madd_epi16(XMM3, _mm_loadu_si128((__m128i *)(MEM + OFST3))), _mm_madd_epi16(XMM4, _mm_loadu_si128((__m128i *)(MEM + OFST4))));\
      74             :     sum1 = _mm_srai_epi32(_mm_add_epi32(XMM_OFST, _mm_add_epi32(sum1, sum2)), SHIFT);\
      75             :     sum3 = _mm_add_epi32(_mm_madd_epi16(XMM5, _mm_loadu_si128((__m128i *)(MEM + OFST1))), _mm_madd_epi16(XMM6, _mm_loadu_si128((__m128i *)(MEM + OFST2))));\
      76             :     sum4 = _mm_add_epi32(_mm_madd_epi16(XMM7, _mm_loadu_si128((__m128i *)(MEM + OFST3))), _mm_madd_epi16(XMM8, _mm_loadu_si128((__m128i *)(MEM + OFST4))));\
      77             :     sum3 = _mm_srai_epi32(_mm_add_epi32(XMM_OFST, _mm_add_epi32(sum3, sum4)), SHIFT);\
      78             :     sum = _mm_packs_epi32(sum1, sum3);\
      79             :     INSTR((__m128i *)(DST + OFST5), sum);
      80             : 
      81             : #define MACRO_TRANS_8MAC_PF_N2(XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM_OFST, MEM, OFST1, OFST2, OFST3, OFST4, SHIFT, INSTR, DST, OFST5)\
      82             :     sum1 = _mm_add_epi32(_mm_madd_epi16(XMM1, _mm_loadu_si128((__m128i *)(MEM + OFST1))), _mm_madd_epi16(XMM2, _mm_loadu_si128((__m128i *)(MEM + OFST2))));\
      83             :     sum2 = _mm_add_epi32(_mm_madd_epi16(XMM3, _mm_loadu_si128((__m128i *)(MEM + OFST3))), _mm_madd_epi16(XMM4, _mm_loadu_si128((__m128i *)(MEM + OFST4))));\
      84             :     sum1 = _mm_srai_epi32(_mm_add_epi32(XMM_OFST, _mm_add_epi32(sum1, sum2)), SHIFT);\
      85             :     /*sum3 = _mm_add_epi32(_mm_madd_epi16(XMM5, _mm_loadu_si128((__m128i *)(MEM + OFST1))), _mm_madd_epi16(XMM6, _mm_loadu_si128((__m128i *)(MEM + OFST2))));*/\
      86             :     /*sum4 = _mm_add_epi32(_mm_madd_epi16(XMM7, _mm_loadu_si128((__m128i *)(MEM + OFST3))), _mm_madd_epi16(XMM8, _mm_loadu_si128((__m128i *)(MEM + OFST4))));*/\
      87             :     /*sum3 = _mm_srai_epi32(_mm_add_epi32(XMM_OFST, _mm_add_epi32(sum3, sum4)), SHIFT);*/\
      88             :     /*sum = _mm_packs_epi32(sum1, sum3);*/\
      89             :     sum = _mm_packs_epi32(sum1, sum1);\
      90             :     INSTR((__m128i *)(DST + OFST5), sum);
      91             : #define MACRO_TRANS_8MAC_PF_N4(XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM_OFST, MEM, OFST1, OFST2, OFST3, OFST4, SHIFT, INSTR, DST, OFST5)\
      92             :     sum1 = _mm_add_epi32(_mm_madd_epi16(XMM1, _mm_loadu_si128((__m128i *)(MEM + OFST1))), _mm_madd_epi16(XMM2, _mm_loadu_si128((__m128i *)(MEM + OFST2))));\
      93             :     sum2 = _mm_add_epi32(_mm_madd_epi16(XMM3, _mm_loadu_si128((__m128i *)(MEM + OFST3))), _mm_madd_epi16(XMM4, _mm_loadu_si128((__m128i *)(MEM + OFST4))));\
      94             :     sum1 = _mm_srai_epi32(_mm_add_epi32(XMM_OFST, _mm_add_epi32(sum1, sum2)), SHIFT);\
      95             :     /*sum3 = _mm_add_epi32(_mm_madd_epi16(XMM5, _mm_loadu_si128((__m128i *)(MEM + OFST1))), _mm_madd_epi16(XMM6, _mm_loadu_si128((__m128i *)(MEM + OFST2))));*/\
      96             :     /*sum4 = _mm_add_epi32(_mm_madd_epi16(XMM7, _mm_loadu_si128((__m128i *)(MEM + OFST3))), _mm_madd_epi16(XMM8, _mm_loadu_si128((__m128i *)(MEM + OFST4))));*/\
      97             :     /*sum3 = _mm_srai_epi32(_mm_add_epi32(XMM_OFST, _mm_add_epi32(sum3, sum4)), SHIFT);*/\
      98             :     /*sum = _mm_packs_epi32(sum1, sum3);*/\
      99             :     sum = _mm_packs_epi32(sum1, sum1);\
     100             :     INSTR((__m128i *)(DST + OFST5), sum);
     101             : 
     102             : #ifdef __GNUC__
     103             : #ifndef __cplusplus
     104             : __attribute__((visibility("hidden")))
     105             : #endif
     106             : #endif
     107             : EB_ALIGN(16) const int16_t dst_transform_asm_const_sse2[] = {
     108             :     1, 0, 1, 0, 1, 0, 1, 0,
     109             :     29, 55, 29, 55, 29, 55, 29, 55,
     110             :     74, 84, 74, 84, 74, 84, 74, 84,
     111             :     84, -29, 84, -29, 84, -29, 84, -29,
     112             :     -74, 55, -74, 55, -74, 55, -74, 55,
     113             :     55, -84, 55, -84, 55, -84, 55, -84,
     114             :     74, -29, 74, -29, 74, -29, 74, -29,
     115             :     37, 37, 37, 37, 37, 37, 37, 37,
     116             :     74, 74, 74, 74, 74, 74, 74, 74,
     117             :     0, -37, 0, -37, 0, -37, 0, -37,
     118             :     0, -74, 0, -74, 0, -74, 0, -74,
     119             :     //74,    0,   74,    0,   74,    0,   74,    0,
     120             :     //55,  -29,   55,  -29,   55,  -29,   55,  -29,
     121             : };
     122             : 
     123             : #ifdef __GNUC__
     124             : #ifndef __cplusplus
     125             : __attribute__((visibility("hidden")))
     126             : #endif
     127             : #endif
     128             : EB_ALIGN(16) const int16_t inv_transform_asm_const_sse2[] = {
     129             :     2, 0, 2, 0, 2, 0, 2, 0,
     130             :     4, 0, 4, 0, 4, 0, 4, 0,
     131             :     8, 0, 8, 0, 8, 0, 8, 0,
     132             :     9, 0, 9, 0, 9, 0, 9, 0,
     133             :     64, 0, 64, 0, 64, 0, 64, 0,
     134             :     256, 0, 256, 0, 256, 0, 256, 0,
     135             :     512, 0, 512, 0, 512, 0, 512, 0,
     136             :     1024, 0, 1024, 0, 1024, 0, 1024, 0,
     137             :     2048, 0, 2048, 0, 2048, 0, 2048, 0,
     138             :     7, 0, 0, 0, 0, 0, 0, 0,
     139             :     12, 0, 0, 0, 0, 0, 0, 0,
     140             :     64, 64, 64, 64, 64, 64, 64, 64,
     141             :     90, 57, 90, 57, 90, 57, 90, 57,
     142             :     89, 50, 89, 50, 89, 50, 89, 50,
     143             :     87, 43, 87, 43, 87, 43, 87, 43,
     144             :     83, 36, 83, 36, 83, 36, 83, 36,
     145             :     80, 25, 80, 25, 80, 25, 80, 25,
     146             :     75, 18, 75, 18, 75, 18, 75, 18,
     147             :     70, 9, 70, 9, 70, 9, 70, 9,
     148             :     64, -64, 64, -64, 64, -64, 64, -64,
     149             :     87, -80, 87, -80, 87, -80, 87, -80,
     150             :     75, -89, 75, -89, 75, -89, 75, -89,
     151             :     57, -90, 57, -90, 57, -90, 57, -90,
     152             :     36, -83, 36, -83, 36, -83, 36, -83,
     153             :     9, -70, 9, -70, 9, -70, 9, -70,
     154             :     -18, -50, -18, -50, -18, -50, -18, -50,
     155             :     -43, -25, -43, -25, -43, -25, -43, -25,
     156             :     80, -25, 80, -25, 80, -25, 80, -25,
     157             :     50, 18, 50, 18, 50, 18, 50, 18,
     158             :     9, 57, 9, 57, 9, 57, 9, 57,
     159             :     -36, 83, -36, 83, -36, 83, -36, 83,
     160             :     -70, 90, -70, 90, -70, 90, -70, 90,
     161             :     -89, 75, -89, 75, -89, 75, -89, 75,
     162             :     -87, 43, -87, 43, -87, 43, -87, 43,
     163             :     70, 90, 70, 90, 70, 90, 70, 90,
     164             :     18, 75, 18, 75, 18, 75, 18, 75,
     165             :     -43, 25, -43, 25, -43, 25, -43, 25,
     166             :     -83, -36, -83, -36, -83, -36, -83, -36,
     167             :     -87, -80, -87, -80, -87, -80, -87, -80,
     168             :     -50, -89, -50, -89, -50, -89, -50, -89,
     169             :     9, -57, 9, -57, 9, -57, 9, -57,
     170             :     57, -9, 57, -9, 57, -9, 57, -9,
     171             :     -18, -75, -18, -75, -18, -75, -18, -75,
     172             :     -80, -87, -80, -87, -80, -87, -80, -87,
     173             :     -25, 43, -25, 43, -25, 43, -25, 43,
     174             :     50, 89, 50, 89, 50, 89, 50, 89,
     175             :     90, 70, 90, 70, 90, 70, 90, 70,
     176             :     43, -87, 43, -87, 43, -87, 43, -87,
     177             :     -50, -18, -50, -18, -50, -18, -50, -18,
     178             :     -90, 70, -90, 70, -90, 70, -90, 70,
     179             :     57, 9, 57, 9, 57, 9, 57, 9,
     180             :     89, -75, 89, -75, 89, -75, 89, -75,
     181             :     25, -80, 25, -80, 25, -80, 25, -80,
     182             :     25, 43, 25, 43, 25, 43, 25, 43,
     183             :     -75, 89, -75, 89, -75, 89, -75, 89,
     184             :     -70, 9, -70, 9, -70, 9, -70, 9,
     185             :     90, -57, 90, -57, 90, -57, 90, -57,
     186             :     18, 50, 18, 50, 18, 50, 18, 50,
     187             :     -80, 87, -80, 87, -80, 87, -80, 87,
     188             :     9, 70, 9, 70, 9, 70, 9, 70,
     189             :     -89, -50, -89, -50, -89, -50, -89, -50,
     190             :     -25, -80, -25, -80, -25, -80, -25, -80,
     191             :     43, 87, 43, 87, 43, 87, 43, 87,
     192             :     -75, -18, -75, -18, -75, -18, -75, -18,
     193             :     -57, -90, -57, -90, -57, -90, -57, -90,
     194             :     -9, -70, -9, -70, -9, -70, -9, -70,
     195             :     25, 80, 25, 80, 25, 80, 25, 80,
     196             :     -43, -87, -43, -87, -43, -87, -43, -87,
     197             :     57, 90, 57, 90, 57, 90, 57, 90,
     198             :     -25, -43, -25, -43, -25, -43, -25, -43,
     199             :     70, -9, 70, -9, 70, -9, 70, -9,
     200             :     -90, 57, -90, 57, -90, 57, -90, 57,
     201             :     80, -87, 80, -87, 80, -87, 80, -87,
     202             :     -43, 87, -43, 87, -43, 87, -43, 87,
     203             :     90, -70, 90, -70, 90, -70, 90, -70,
     204             :     -57, -9, -57, -9, -57, -9, -57, -9,
     205             :     -25, 80, -25, 80, -25, 80, -25, 80,
     206             :     -57, 9, -57, 9, -57, 9, -57, 9,
     207             :     80, 87, 80, 87, 80, 87, 80, 87,
     208             :     25, -43, 25, -43, 25, -43, 25, -43,
     209             :     -90, -70, -90, -70, -90, -70, -90, -70,
     210             :     -70, -90, -70, -90, -70, -90, -70, -90,
     211             :     43, -25, 43, -25, 43, -25, 43, -25,
     212             :     87, 80, 87, 80, 87, 80, 87, 80,
     213             :     -9, 57, -9, 57, -9, 57, -9, 57,
     214             :     -80, 25, -80, 25, -80, 25, -80, 25,
     215             :     -9, -57, -9, -57, -9, -57, -9, -57,
     216             :     70, -90, 70, -90, 70, -90, 70, -90,
     217             :     87, -43, 87, -43, 87, -43, 87, -43,
     218             :     -87, 80, -87, 80, -87, 80, -87, 80,
     219             :     -57, 90, -57, 90, -57, 90, -57, 90,
     220             :     -9, 70, -9, 70, -9, 70, -9, 70,
     221             :     43, 25, 43, 25, 43, 25, 43, 25,
     222             :     -90, -57, -90, -57, -90, -57, -90, -57,
     223             :     -87, -43, -87, -43, -87, -43, -87, -43,
     224             :     -80, -25, -80, -25, -80, -25, -80, -25,
     225             :     -70, -9, -70, -9, -70, -9, -70, -9,
     226             :     90, 61, 90, 61, 90, 61, 90, 61,
     227             :     90, 54, 90, 54, 90, 54, 90, 54,
     228             :     88, 46, 88, 46, 88, 46, 88, 46,
     229             :     85, 38, 85, 38, 85, 38, 85, 38,
     230             :     82, 31, 82, 31, 82, 31, 82, 31,
     231             :     78, 22, 78, 22, 78, 22, 78, 22,
     232             :     73, 13, 73, 13, 73, 13, 73, 13,
     233             :     67, 4, 67, 4, 67, 4, 67, 4,
     234             :     90, -73, 90, -73, 90, -73, 90, -73,
     235             :     82, -85, 82, -85, 82, -85, 82, -85,
     236             :     67, -90, 67, -90, 67, -90, 67, -90,
     237             :     46, -88, 46, -88, 46, -88, 46, -88,
     238             :     22, -78, 22, -78, 22, -78, 22, -78,
     239             :     -4, -61, -4, -61, -4, -61, -4, -61,
     240             :     -31, -38, -31, -38, -31, -38, -31, -38,
     241             :     -54, -13, -54, -13, -54, -13, -54, -13,
     242             :     88, -46, 88, -46, 88, -46, 88, -46,
     243             :     67, -4, 67, -4, 67, -4, 67, -4,
     244             :     31, 38, 31, 38, 31, 38, 31, 38,
     245             :     -13, 73, -13, 73, -13, 73, -13, 73,
     246             :     -54, 90, -54, 90, -54, 90, -54, 90,
     247             :     -82, 85, -82, 85, -82, 85, -82, 85,
     248             :     -90, 61, -90, 61, -90, 61, -90, 61,
     249             :     -78, 22, -78, 22, -78, 22, -78, 22,
     250             :     85, 82, 85, 82, 85, 82, 85, 82,
     251             :     46, 88, 46, 88, 46, 88, 46, 88,
     252             :     -13, 54, -13, 54, -13, 54, -13, 54,
     253             :     -67, -4, -67, -4, -67, -4, -67, -4,
     254             :     -90, -61, -90, -61, -90, -61, -90, -61,
     255             :     -73, -90, -73, -90, -73, -90, -73, -90,
     256             :     -22, -78, -22, -78, -22, -78, -22, -78,
     257             :     38, -31, 38, -31, 38, -31, 38, -31,
     258             :     22, -46, 22, -46, 22, -46, 22, -46,
     259             :     -54, -90, -54, -90, -54, -90, -54, -90,
     260             :     -90, -67, -90, -67, -90, -67, -90, -67,
     261             :     -61, 4, -61, 4, -61, 4, -61, 4,
     262             :     13, 73, 13, 73, 13, 73, 13, 73,
     263             :     78, 88, 78, 88, 78, 88, 78, 88,
     264             :     78, -88, 78, -88, 78, -88, 78, -88,
     265             :     -82, 31, -82, 31, -82, 31, -82, 31,
     266             :     -73, 90, -73, 90, -73, 90, -73, 90,
     267             :     13, 54, 13, 54, 13, 54, 13, 54,
     268             :     85, -38, 85, -38, 85, -38, 85, -38,
     269             :     -22, -46, -22, -46, -22, -46, -22, -46,
     270             :     73, -13, 73, -13, 73, -13, 73, -13,
     271             :     -31, 82, -31, 82, -31, 82, -31, 82,
     272             :     -38, 85, -38, 85, -38, 85, -38, 85,
     273             :     -90, 54, -90, 54, -90, 54, -90, 54,
     274             :     67, 90, 67, 90, 67, 90, 67, 90,
     275             :     -54, 13, -54, 13, -54, 13, -54, 13,
     276             :     -78, -88, -78, -88, -78, -88, -78, -88,
     277             :     -22, 46, -22, 46, -22, 46, -22, 46,
     278             :     -90, -73, -90, -73, -90, -73, -90, -73,
     279             :     4, -61, 4, -61, 4, -61, 4, -61,
     280             :     61, -4, 61, -4, 61, -4, 61, -4,
     281             :     -46, 22, -46, 22, -46, 22, -46, 22,
     282             :     82, 85, 82, 85, 82, 85, 82, 85,
     283             :     31, -38, 31, -38, 31, -38, 31, -38,
     284             :     -88, -78, -88, -78, -88, -78, -88, -78,
     285             :     90, 67, 90, 67, 90, 67, 90, 67,
     286             :     54, -90, 54, -90, 54, -90, 54, -90,
     287             :     -85, 38, -85, 38, -85, 38, -85, 38,
     288             :     -4, 67, -4, 67, -4, 67, -4, 67,
     289             :     88, -78, 88, -78, 88, -78, 88, -78,
     290             :     -46, -22, -46, -22, -46, -22, -46, -22,
     291             :     -61, 90, -61, 90, -61, 90, -61, 90,
     292             :     82, -31, 82, -31, 82, -31, 82, -31,
     293             :     13, -73, 13, -73, 13, -73, 13, -73,
     294             :     46, 22, 46, 22, 46, 22, 46, 22,
     295             :     -90, 67, -90, 67, -90, 67, -90, 67,
     296             :     38, -85, 38, -85, 38, -85, 38, -85,
     297             :     54, 13, 54, 13, 54, 13, 54, 13,
     298             :     -90, 73, -90, 73, -90, 73, -90, 73,
     299             :     31, -82, 31, -82, 31, -82, 31, -82,
     300             :     61, 4, 61, 4, 61, 4, 61, 4,
     301             :     -88, 78, -88, 78, -88, 78, -88, 78,
     302             :     38, 85, 38, 85, 38, 85, 38, 85,
     303             :     -4, 61, -4, 61, -4, 61, -4, 61,
     304             :     -67, -90, -67, -90, -67, -90, -67, -90,
     305             :     -31, -82, -31, -82, -31, -82, -31, -82,
     306             :     -78, -22, -78, -22, -78, -22, -78, -22,
     307             :     90, 73, 90, 73, 90, 73, 90, 73,
     308             :     -61, -90, -61, -90, -61, -90, -61, -90,
     309             :     4, 67, 4, 67, 4, 67, 4, 67,
     310             :     54, -13, 54, -13, 54, -13, 54, -13,
     311             :     -88, -46, -88, -46, -88, -46, -88, -46,
     312             :     85, -82, 85, -82, 85, -82, 85, -82,
     313             :     -38, -31, -38, -31, -38, -31, -38, -31,
     314             :     -13, -73, -13, -73, -13, -73, -13, -73,
     315             :     22, 78, 22, 78, 22, 78, 22, 78,
     316             :     -46, -88, -46, -88, -46, -88, -46, -88,
     317             :     54, 90, 54, 90, 54, 90, 54, 90
     318             : };
     319             : 
     320             : #ifdef __GNUC__
     321             : #ifndef __cplusplus
     322             : __attribute__((visibility("hidden")))
     323             : #endif
     324             : #endif
     325             : EB_ALIGN(16) const int16_t inv_dst_transform_asm_const_sse2[] = {
     326             :     64, 0, 64, 0, 64, 0, 64, 0,
     327             :     29, 84, 29, 84, 29, 84, 29, 84,
     328             :     74, 55, 74, 55, 74, 55, 74, 55,
     329             :     55, -29, 55, -29, 55, -29, 55, -29,
     330             :     74, -84, 74, -84, 74, -84, 74, -84,
     331             :     74, -74, 74, -74, 74, -74, 74, -74,
     332             :     0, 74, 0, 74, 0, 74, 0, 74,
     333             :     84, 55, 84, 55, 84, 55, 84, 55,
     334             :     -74, -29, -74, -29, -74, -29, -74, -29,
     335             : };
     336             : 
     337             : // Coefficients for inverse 32-point transform
     338             : EB_EXTERN const int16_t coeff_tbl2[48 * 8] =
     339             : {
     340             :     64, 89, 64, 75, 64, 50, 64, 18, 64, -18, 64, -50, 64, -75, 64, -89,
     341             :     83, 75, 36, -18, -36, -89, -83, -50, -83, 50, -36, 89, 36, 18, 83, -75,
     342             :     64, 50, -64, -89, -64, 18, 64, 75, 64, -75, -64, -18, -64, 89, 64, -50,
     343             :     36, 18, -83, -50, 83, 75, -36, -89, -36, 89, 83, -75, -83, 50, 36, -18,
     344             :     90, 87, 87, 57, 80, 9, 70, -43, 57, -80, 43, -90, 25, -70, 9, -25,
     345             :     80, 70, 9, -43, -70, -87, -87, 9, -25, 90, 57, 25, 90, -80, 43, -57,
     346             :     57, 43, -80, -90, -25, 57, 90, 25, -9, -87, -87, 70, 43, 9, 70, -80,
     347             :     25, 9, -70, -25, 90, 43, -80, -57, 43, 70, 9, -80, -57, 87, 87, -90,
     348             :     90, 90, 90, 82, 88, 67, 85, 46, 82, 22, 78, -4, 73, -31, 67, -54,
     349             :     61, -73, 54, -85, 46, -90, 38, -88, 31, -78, 22, -61, 13, -38, 4, -13,
     350             :     88, 85, 67, 46, 31, -13, -13, -67, -54, -90, -82, -73, -90, -22, -78, 38,
     351             :     -46, 82, -4, 88, 38, 54, 73, -4, 90, -61, 85, -90, 61, -78, 22, -31,
     352             :     82, 78, 22, -4, -54, -82, -90, -73, -61, 13, 13, 85, 78, 67, 85, -22,
     353             :     31, -88, -46, -61, -90, 31, -67, 90, 4, 54, 73, -38, 88, -90, 38, -46,
     354             :     73, 67, -31, -54, -90, -78, -22, 38, 78, 85, 67, -22, -38, -90, -90, 4,
     355             :     -13, 90, 82, 13, 61, -88, -46, -31, -88, 82, -4, 46, 85, -73, 54, -61,
     356             :     61, 54, -73, -85, -46, -4, 82, 88, 31, -46, -88, -61, -13, 82, 90, 13,
     357             :     -4, -90, -90, 38, 22, 67, 85, -78, -38, -22, -78, 90, 54, -31, 67, -73,
     358             :     46, 38, -90, -88, 38, 73, 54, -4, -90, -67, 31, 90, 61, -46, -88, -31,
     359             :     22, 85, 67, -78, -85, 13, 13, 61, 73, -90, -82, 54, 4, 22, 78, -82,
     360             :     31, 22, -78, -61, 90, 85, -61, -90, 4, 73, 54, -38, -88, -4, 82, 46,
     361             :     -38, -78, -22, 90, 73, -82, -90, 54, 67, -13, -13, -31, -46, 67, 85, -88,
     362             :     13, 4, -38, -13, 61, 22, -78, -31, 88, 38, -90, -46, 85, 54, -73, -61,
     363             :     54, 67, -31, -73, 4, 78, 22, -82, -46, 85, 67, -88, -82, 90, 90, -90
     364             : };
     365             : 
     366             : #ifdef __GNUC__
     367             : #ifndef __cplusplus
     368             : __attribute__((visibility("hidden")))
     369             : #endif
     370             : #endif
     371             : EB_EXTERN const int16_t coeff_tbl[48 * 8] =
     372             : {
     373             :     64, 64, 89, 75, 83, 36, 75, -18, 64, -64, 50, -89, 36, -83, 18, -50,
     374             :     64, 64, 50, 18, -36, -83, -89, -50, -64, 64, 18, 75, 83, -36, 75, -89,
     375             :     64, 64, -18, -50, -83, -36, 50, 89, 64, -64, -75, -18, -36, 83, 89, -75,
     376             :     64, 64, -75, -89, 36, 83, 18, -75, -64, 64, 89, -50, -83, 36, 50, -18,
     377             :     90, 87, 87, 57, 80, 9, 70, -43, 57, -80, 43, -90, 25, -70, 9, -25,
     378             :     80, 70, 9, -43, -70, -87, -87, 9, -25, 90, 57, 25, 90, -80, 43, -57,
     379             :     57, 43, -80, -90, -25, 57, 90, 25, -9, -87, -87, 70, 43, 9, 70, -80,
     380             :     25, 9, -70, -25, 90, 43, -80, -57, 43, 70, 9, -80, -57, 87, 87, -90,
     381             :     90, 90, 90, 82, 88, 67, 85, 46, 82, 22, 78, -4, 73, -31, 67, -54,
     382             :     61, -73, 54, -85, 46, -90, 38, -88, 31, -78, 22, -61, 13, -38, 4, -13,
     383             :     88, 85, 67, 46, 31, -13, -13, -67, -54, -90, -82, -73, -90, -22, -78, 38,
     384             :     -46, 82, -4, 88, 38, 54, 73, -4, 90, -61, 85, -90, 61, -78, 22, -31,
     385             :     82, 78, 22, -4, -54, -82, -90, -73, -61, 13, 13, 85, 78, 67, 85, -22,
     386             :     31, -88, -46, -61, -90, 31, -67, 90, 4, 54, 73, -38, 88, -90, 38, -46,
     387             :     73, 67, -31, -54, -90, -78, -22, 38, 78, 85, 67, -22, -38, -90, -90, 4,
     388             :     -13, 90, 82, 13, 61, -88, -46, -31, -88, 82, -4, 46, 85, -73, 54, -61,
     389             :     61, 54, -73, -85, -46, -4, 82, 88, 31, -46, -88, -61, -13, 82, 90, 13,
     390             :     -4, -90, -90, 38, 22, 67, 85, -78, -38, -22, -78, 90, 54, -31, 67, -73,
     391             :     46, 38, -90, -88, 38, 73, 54, -4, -90, -67, 31, 90, 61, -46, -88, -31,
     392             :     22, 85, 67, -78, -85, 13, 13, 61, 73, -90, -82, 54, 4, 22, 78, -82,
     393             :     31, 22, -78, -61, 90, 85, -61, -90, 4, 73, 54, -38, -88, -4, 82, 46,
     394             :     -38, -78, -22, 90, 73, -82, -90, 54, 67, -13, -13, -31, -46, 67, 85, -88,
     395             :     13, 4, -38, -13, 61, 22, -78, -31, 88, 38, -90, -46, 85, 54, -73, -61,
     396             :     54, 67, -31, -73, 4, 78, 22, -82, -46, 85, 67, -88, -82, 90, 90, -90
     397             : };
     398             : 
     399           0 : void PfreqTranspose32Type1_SSE2(
     400             :     int16_t *src,
     401             :     uint32_t  src_stride,
     402             :     int16_t *dst,
     403             :     uint32_t  dst_stride)
     404             : {
     405             :     uint32_t i, j;
     406           0 :     for (i = 0; i < 2; i++)
     407             :     {
     408           0 :         for (j = 0; j < 2; j++)
     409             :         {
     410             :             __m128i a0, a1, a2, a3, a4, a5, a6, a7;
     411             :             __m128i b0, b1, b2, b3, b4, b5, b6, b7;
     412             : 
     413           0 :             a0 = _mm_loadu_si128((const __m128i *)(src + (8 * i + 0)*src_stride + 8 * j));
     414           0 :             a1 = _mm_loadu_si128((const __m128i *)(src + (8 * i + 1)*src_stride + 8 * j));
     415           0 :             a2 = _mm_loadu_si128((const __m128i *)(src + (8 * i + 2)*src_stride + 8 * j));
     416           0 :             a3 = _mm_loadu_si128((const __m128i *)(src + (8 * i + 3)*src_stride + 8 * j));
     417           0 :             a4 = _mm_loadu_si128((const __m128i *)(src + (8 * i + 4)*src_stride + 8 * j));
     418           0 :             a5 = _mm_loadu_si128((const __m128i *)(src + (8 * i + 5)*src_stride + 8 * j));
     419           0 :             a6 = _mm_loadu_si128((const __m128i *)(src + (8 * i + 6)*src_stride + 8 * j));
     420           0 :             a7 = _mm_loadu_si128((const __m128i *)(src + (8 * i + 7)*src_stride + 8 * j));
     421             : 
     422           0 :             b0 = _mm_unpacklo_epi16(a0, a4);
     423           0 :             b1 = _mm_unpacklo_epi16(a1, a5);
     424           0 :             b2 = _mm_unpacklo_epi16(a2, a6);
     425           0 :             b3 = _mm_unpacklo_epi16(a3, a7);
     426           0 :             b4 = _mm_unpackhi_epi16(a0, a4);
     427           0 :             b5 = _mm_unpackhi_epi16(a1, a5);
     428           0 :             b6 = _mm_unpackhi_epi16(a2, a6);
     429           0 :             b7 = _mm_unpackhi_epi16(a3, a7);
     430             : 
     431           0 :             a0 = _mm_unpacklo_epi16(b0, b2);
     432           0 :             a1 = _mm_unpacklo_epi16(b1, b3);
     433           0 :             a2 = _mm_unpackhi_epi16(b0, b2);
     434           0 :             a3 = _mm_unpackhi_epi16(b1, b3);
     435           0 :             a4 = _mm_unpacklo_epi16(b4, b6);
     436           0 :             a5 = _mm_unpacklo_epi16(b5, b7);
     437           0 :             a6 = _mm_unpackhi_epi16(b4, b6);
     438           0 :             a7 = _mm_unpackhi_epi16(b5, b7);
     439             : 
     440           0 :             b0 = _mm_unpacklo_epi16(a0, a1);
     441           0 :             b1 = _mm_unpackhi_epi16(a0, a1);
     442           0 :             b2 = _mm_unpacklo_epi16(a2, a3);
     443           0 :             b3 = _mm_unpackhi_epi16(a2, a3);
     444           0 :             b4 = _mm_unpacklo_epi16(a4, a5);
     445           0 :             b5 = _mm_unpackhi_epi16(a4, a5);
     446           0 :             b6 = _mm_unpacklo_epi16(a6, a7);
     447           0 :             b7 = _mm_unpackhi_epi16(a6, a7);
     448             : 
     449           0 :             _mm_storeu_si128((__m128i *)(dst + (8 * j + 0)*dst_stride + 8 * i), b0);
     450           0 :             _mm_storeu_si128((__m128i *)(dst + (8 * j + 1)*dst_stride + 8 * i), b1);
     451           0 :             _mm_storeu_si128((__m128i *)(dst + (8 * j + 2)*dst_stride + 8 * i), b2);
     452           0 :             _mm_storeu_si128((__m128i *)(dst + (8 * j + 3)*dst_stride + 8 * i), b3);
     453           0 :             _mm_storeu_si128((__m128i *)(dst + (8 * j + 4)*dst_stride + 8 * i), b4);
     454           0 :             _mm_storeu_si128((__m128i *)(dst + (8 * j + 5)*dst_stride + 8 * i), b5);
     455           0 :             _mm_storeu_si128((__m128i *)(dst + (8 * j + 6)*dst_stride + 8 * i), b6);
     456           0 :             _mm_storeu_si128((__m128i *)(dst + (8 * j + 7)*dst_stride + 8 * i), b7);
     457             :         }
     458             :     }
     459           0 : }

Generated by: LCOV version 1.14