tests/checkasm/qpeldsp.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along
  15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17  */
  18
  19 #include <assert.h>
  20 #include <stddef.h>
  21
  22 #include "checkasm.h"
  23 #include "libavutil/intreadwrite.h"
  24 #include "libavutil/macros.h"
  25 #include "libavutil/mem_internal.h"
  26 #include "libavcodec/qpeldsp.h"
  27
  28 #define MAX_BLOCK_SIZE 16
  29 #define MAX_STRIDE     64
  30 // BUF_SIZE is bigger than necessary in order to test strides > block width.
  31 #define BUF_SIZE ((MAX_BLOCK_SIZE - 1) * MAX_STRIDE + MAX_BLOCK_SIZE)
  32 // Due to qpel interpolation the input needs to have one more line than
  33 // the output and the last line needs one more element.
  34 // The input is not subject to alignment requirements; making the input buffer
  35 // bigger (by MAX_BLOCK_SIZE - 1) allows us to use a random misalignment.
  36 #define INPUT_BUF_SIZE (MAX_BLOCK_SIZE * MAX_STRIDE + MAX_BLOCK_SIZE + 1 + (MAX_BLOCK_SIZE - 1))
  37
  38 #define randomize_buffers(buf0, buf1)                      \
  39     do {                                                   \
  40         static_assert(sizeof(buf0) == sizeof(buf1), "Incompatible buffers"); \
  41         static_assert(!(sizeof(buf0) % 4), "Tail handling needed"); \
  42         static_assert(sizeof(buf0[0]) == 1 && sizeof(buf1[0]) == 1, \
  43                       "Pointer arithmetic needs to be adapted"); \
  44         for (size_t k = 0; k < sizeof(buf0); k += 4) {     \
  45             uint32_t r = rnd();                            \
  46             AV_WN32A(buf0 + k, r);                         \
  47             AV_WN32A(buf1 + k, r);                         \
  48         }                                                  \
  49     } while (0)
  50
  51
  52 void checkasm_check_qpeldsp(void)
  53 {
  54     DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, srcbuf0)[INPUT_BUF_SIZE];
  55     DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, srcbuf1)[INPUT_BUF_SIZE];
  56     DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, dstbuf0)[BUF_SIZE];
  57     DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, dstbuf1)[BUF_SIZE];
  58     QpelDSPContext qdsp;
  59     static const struct {
  60         const char *name;
  61         size_t offset;
  62     } tests[] = {
  63 #define TEST(NAME) { .name = #NAME, .offset = offsetof(QpelDSPContext, NAME) }
  64         TEST(put_qpel_pixels_tab),
  65         TEST(avg_qpel_pixels_tab),
  66         TEST(put_no_rnd_qpel_pixels_tab),
  67     };
  68     declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
  69
  70     ff_qpeldsp_init(&qdsp);
  71
  72     for (size_t i = 0; i < FF_ARRAY_ELEMS(tests); ++i) {
  73         qpel_mc_func (*func_tab)[16] = (qpel_mc_func (*)[16])((char*)&qdsp + tests[i].offset);
  74         for (unsigned j = 0; j < 2; ++j) {
  75             const unsigned blocksize = MAX_BLOCK_SIZE >> j;
  76
  77             for (unsigned dxy = 0; dxy < 16; ++dxy) {
  78                 if (check_func(func_tab[j][dxy], "%s[%u][%u]", tests[i].name, j, dxy)) {
  79                     // Don't always use output that is 16-aligned.
  80                     size_t dst_offset = (rnd() % (MAX_BLOCK_SIZE / blocksize)) * blocksize;
  81                     size_t src_offset = rnd() % MAX_BLOCK_SIZE;
  82                     ptrdiff_t stride  = (rnd() % (MAX_STRIDE / blocksize) + 1) * blocksize;
  83                     const uint8_t *src0 = srcbuf0 + src_offset, *src1 = srcbuf1 + src_offset;
  84                     uint8_t *dst0 = dstbuf0 + dst_offset, *dst1 = dstbuf1 + dst_offset;
  85
  86                     if (rnd() & 1) {
  87                         // Flip stride.
  88                         dst1  += (blocksize - 1) * stride;
  89                         dst0  += (blocksize - 1) * stride;
  90                         // Due to interpolation potentially blocksize + 1 lines are read
  91                         // from src, hence blocksize * stride.
  92                         src0  += blocksize * stride;
  93                         src1  += blocksize * stride;
  94                         stride = -stride;
  95                     }
  96
  97                     randomize_buffers(srcbuf0, srcbuf1);
  98                     randomize_buffers(dstbuf0, dstbuf1);
  99                     call_ref(dst0, src0, stride);
 100                     call_new(dst1, src1, stride);
 101                     if (memcmp(srcbuf0, srcbuf1, sizeof(srcbuf0)) || memcmp(dstbuf0, dstbuf1, sizeof(dstbuf0)))
 102                         fail();
 103                     bench_new(dst0, src0, stride);
 104                 }
 105             }
 106         }
 107     }
 108 }