tests/checkasm/hpeldsp.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along
  15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17  */
  18
  19 #include <assert.h>
  20 #include <stddef.h>
  21
  22 #include "checkasm.h"
  23 #include "libavutil/intreadwrite.h"
  24 #include "libavutil/macros.h"
  25 #include "libavutil/mem_internal.h"
  26 #include "libavcodec/avcodec.h"
  27 #include "libavcodec/hpeldsp.h"
  28
  29 #define MAX_BLOCK_SIZE 16
  30 #define MAX_HEIGHT     16
  31 #define MAX_STRIDE     64
  32 // BUF_SIZE is bigger than necessary in order to test strides > block width.
  33 #define BUF_SIZE ((MAX_HEIGHT - 1) * MAX_STRIDE + MAX_BLOCK_SIZE)
  34 // Due to hpel interpolation the input needs to have one more line than
  35 // the output and the last line needs one more element.
  36 // The input is not subject to alignment requirements; making the input buffer
  37 // bigger (by MAX_BLOCK_SIZE - 1) allows us to use a random misalignment.
  38 #define INPUT_BUF_SIZE (MAX_HEIGHT * MAX_STRIDE + MAX_BLOCK_SIZE + 1 + (MAX_BLOCK_SIZE - 1))
  39
  40 #define randomize_buffers(buf0, buf1)                      \
  41     do {                                                   \
  42         static_assert(sizeof(buf0) == sizeof(buf1), "Incompatible buffers"); \
  43         static_assert(!(sizeof(buf0) % 4), "Tail handling needed"); \
  44         static_assert(sizeof(buf0[0]) == 1 && sizeof(buf1[0]) == 1, \
  45                       "Pointer arithmetic needs to be adapted"); \
  46         for (size_t k = 0; k < sizeof(buf0); k += 4) {     \
  47             uint32_t r = rnd();                            \
  48             AV_WN32A(buf0 + k, r);                         \
  49             AV_WN32A(buf1 + k, r);                         \
  50         }                                                  \
  51     } while (0)
  52
  53
  54 void checkasm_check_hpeldsp(void)
  55 {
  56     DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, srcbuf0)[INPUT_BUF_SIZE];
  57     DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, srcbuf1)[INPUT_BUF_SIZE];
  58     DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, dstbuf0)[BUF_SIZE];
  59     DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, dstbuf1)[BUF_SIZE];
  60     HpelDSPContext hdsp;
  61     static const struct {
  62         const char *name;
  63         size_t offset;
  64         unsigned nb_blocksizes;
  65     } tests[] = {
  66 #define TEST(NAME, NB) { .name = #NAME, .offset = offsetof(HpelDSPContext, NAME), .nb_blocksizes = NB }
  67         TEST(put_pixels_tab, 4),
  68         TEST(avg_pixels_tab, 4),
  69         TEST(put_no_rnd_pixels_tab, 2), // put_no_rnd_pixels_tab only has two usable blocksizes
  70         TEST(avg_no_rnd_pixels_tab, 1),
  71     };
  72     declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h);
  73
  74     ff_hpeldsp_init(&hdsp, AV_CODEC_FLAG_BITEXACT);
  75
  76     for (size_t i = 0; i < FF_ARRAY_ELEMS(tests); ++i) {
  77         op_pixels_func (*func_tab)[4] = (op_pixels_func (*)[4])((char*)&hdsp + tests[i].offset);
  78         for (unsigned j = 0; j < tests[i].nb_blocksizes; ++j) {
  79             const unsigned blocksize = MAX_BLOCK_SIZE >> j;
  80             // h must always be a multiple of four, except when width is two or four.
  81             const unsigned h_mult = blocksize <= 4 ? 2 : 4;
  82
  83             for (unsigned dxy = 0; dxy < 4; ++dxy) {
  84                 if (check_func(func_tab[j][dxy], "%s[%u][%u]", tests[i].name, j, dxy)) {
  85                     // Don't always use output that is 16-aligned.
  86                     size_t dst_offset = (rnd() % (MAX_BLOCK_SIZE / blocksize)) * blocksize;
  87                     size_t src_offset = rnd() % MAX_BLOCK_SIZE;
  88                     ptrdiff_t stride  = (rnd() % (MAX_STRIDE / blocksize) + 1) * blocksize;
  89                     const uint8_t *src0 = srcbuf0 + src_offset, *src1 = srcbuf1 + src_offset;
  90                     uint8_t *dst0 = dstbuf0 + dst_offset, *dst1 = dstbuf1 + dst_offset;
  91
  92                     // Always use the same height for each test, so that comparisons of benchmarks
  93                     // from different instruction sets are meaningful.
  94                     static int saved_heights[FF_ARRAY_ELEMS(tests)][4][4];
  95                     int h = saved_heights[i][j][dxy];
  96                     if (!h)
  97                         saved_heights[i][j][dxy] = h = (rnd() % (MAX_HEIGHT / h_mult) + 1) * h_mult;
  98
  99                     if (rnd() & 1) {
 100                         // Flip stride.
 101                         dst1  += (h - 1) * stride;
 102                         dst0  += (h - 1) * stride;
 103                         // Due to interpolation potentially h + 1 lines are read
 104                         // from src, hence h * stride.
 105                         src0  += h * stride;
 106                         src1  += h * stride;
 107                         stride = -stride;
 108                     }
 109
 110                     randomize_buffers(srcbuf0, srcbuf1);
 111                     randomize_buffers(dstbuf0, dstbuf1);
 112                     call_ref(dst0, src0, stride, h);
 113                     call_new(dst1, src1, stride, h);
 114                     if (memcmp(srcbuf0, srcbuf1, sizeof(srcbuf0)) || memcmp(dstbuf0, dstbuf1, sizeof(dstbuf0)))
 115                         fail();
 116                     bench_new(dst0, src0, stride, h);
 117                 }
 118             }
 119         }
 120     }
 121 }