2 * Assembly testing and benchmarking tool
3 * Copyright (c) 2015 Henrik Gramner
4 * Copyright (c) 2008 Loren Merritt
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 * Copyright © 2018, VideoLAN and dav1d authors
23 * Copyright © 2018, Two Orioles, LLC
24 * All rights reserved.
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions are met:
29 * 1. Redistributions of source code must retain the above copyright notice, this
30 * list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright notice,
33 * this list of conditions and the following disclaimer in the documentation
34 * and/or other materials provided with the distribution.
36 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
40 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
41 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
45 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 #include "config_components.h"
52 # define _GNU_SOURCE // for syscall (performance monitoring API), strsignal()
61 #include "libavutil/avassert.h"
62 #include "libavutil/common.h"
63 #include "libavutil/cpu.h"
64 #include "libavutil/intfloat.h"
65 #include "libavutil/random_seed.h"
71 #include <sys/prctl.h>
74 #if defined(_WIN32) && !defined(SIGBUS)
75 /* non-standard, use the same value as mingw-w64 */
79 #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
81 #define COLOR_RED FOREGROUND_RED
82 #define COLOR_GREEN FOREGROUND_GREEN
83 #define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
87 #define COLOR_YELLOW 3
99 #include "libavutil/aarch64/cpu.h"
101 #include "libavutil/riscv/cpu.h"
104 #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
105 #include "libavutil/arm/cpu.h"
107 void (*checkasm_checked_call
)(void *func
, int dummy
, ...) = checkasm_checked_call_novfp
;
110 /* Trade-off between speed and accuracy */
111 uint64_t bench_runs
= 1U << 10;
113 /* List of tests to invoke */
114 static const struct {
119 #if CONFIG_AAC_DECODER
120 { "aacpsdsp", checkasm_check_aacpsdsp
},
121 { "sbrdsp", checkasm_check_sbrdsp
},
123 #if CONFIG_AAC_ENCODER
124 { "aacencdsp", checkasm_check_aacencdsp
},
127 { "ac3dsp", checkasm_check_ac3dsp
},
129 #if CONFIG_ALAC_DECODER
130 { "alacdsp", checkasm_check_alacdsp
},
132 #if CONFIG_APV_DECODER
133 { "apv_dsp", checkasm_check_apv_dsp
},
136 { "audiodsp", checkasm_check_audiodsp
},
139 { "blockdsp", checkasm_check_blockdsp
},
142 { "bswapdsp", checkasm_check_bswapdsp
},
144 #if CONFIG_CAVS_DECODER
145 { "cavsdsp", checkasm_check_cavsdsp
},
147 #if CONFIG_DCA_DECODER
148 { "dcadsp", checkasm_check_dcadsp
},
149 { "synth_filter", checkasm_check_synth_filter
},
151 #if CONFIG_DIRAC_DECODER
152 { "diracdsp", checkasm_check_diracdsp
},
154 #if CONFIG_EXR_DECODER
155 { "exrdsp", checkasm_check_exrdsp
},
158 { "fdctdsp", checkasm_check_fdctdsp
},
160 #if CONFIG_FLAC_DECODER
161 { "flacdsp", checkasm_check_flacdsp
},
163 #if CONFIG_FMTCONVERT
164 { "fmtconvert", checkasm_check_fmtconvert
},
167 { "g722dsp", checkasm_check_g722dsp
},
170 { "h263dsp", checkasm_check_h263dsp
},
172 #if CONFIG_H264CHROMA
173 { "h264chroma", checkasm_check_h264chroma
},
176 { "h264dsp", checkasm_check_h264dsp
},
179 { "h264pred", checkasm_check_h264pred
},
182 { "h264qpel", checkasm_check_h264qpel
},
184 #if CONFIG_HEVC_DECODER
185 { "hevc_add_res", checkasm_check_hevc_add_res
},
186 { "hevc_deblock", checkasm_check_hevc_deblock
},
187 { "hevc_idct", checkasm_check_hevc_idct
},
188 { "hevc_pel", checkasm_check_hevc_pel
},
189 { "hevc_sao", checkasm_check_hevc_sao
},
192 { "hpeldsp", checkasm_check_hpeldsp
},
194 #if CONFIG_HUFFYUV_DECODER
195 { "huffyuvdsp", checkasm_check_huffyuvdsp
},
198 { "idctdsp", checkasm_check_idctdsp
},
200 #if CONFIG_JPEG2000_DECODER
201 { "jpeg2000dsp", checkasm_check_jpeg2000dsp
},
204 { "llauddsp", checkasm_check_llauddsp
},
206 #if CONFIG_HUFFYUVDSP
207 { "llviddsp", checkasm_check_llviddsp
},
209 #if CONFIG_LLVIDENCDSP
210 { "llviddspenc", checkasm_check_llviddspenc
},
213 { "lpc", checkasm_check_lpc
},
216 { "motion", checkasm_check_motion
},
218 #if CONFIG_MPEGVIDEOENCDSP
219 { "mpegvideoencdsp", checkasm_check_mpegvideoencdsp
},
221 #if CONFIG_OPUS_DECODER
222 { "opusdsp", checkasm_check_opusdsp
},
224 #if CONFIG_PIXBLOCKDSP
225 { "pixblockdsp", checkasm_check_pixblockdsp
},
228 { "qpeldsp", checkasm_check_qpeldsp
},
231 { "rv34dsp", checkasm_check_rv34dsp
},
233 #if CONFIG_RV40_DECODER
234 { "rv40dsp", checkasm_check_rv40dsp
},
236 #if CONFIG_SVQ1_ENCODER
237 { "svq1enc", checkasm_check_svq1enc
},
239 #if CONFIG_TAK_DECODER
240 { "takdsp", checkasm_check_takdsp
},
242 #if CONFIG_UTVIDEO_DECODER
243 { "utvideodsp", checkasm_check_utvideodsp
},
245 #if CONFIG_V210_DECODER
246 { "v210dec", checkasm_check_v210dec
},
248 #if CONFIG_V210_ENCODER
249 { "v210enc", checkasm_check_v210enc
},
252 { "vc1dsp", checkasm_check_vc1dsp
},
255 { "vp3dsp", checkasm_check_vp3dsp
},
257 #if CONFIG_VP6_DECODER
258 { "vp6dsp", checkasm_check_vp6dsp
},
261 { "vp8dsp", checkasm_check_vp8dsp
},
263 #if CONFIG_VP9_DECODER
264 { "vp9dsp", checkasm_check_vp9dsp
},
267 { "videodsp", checkasm_check_videodsp
},
269 #if CONFIG_VORBIS_DECODER
270 { "vorbisdsp", checkasm_check_vorbisdsp
},
272 #if CONFIG_VVC_DECODER
273 { "vvc_alf", checkasm_check_vvc_alf
},
274 { "vvc_mc", checkasm_check_vvc_mc
},
275 { "vvc_sao", checkasm_check_vvc_sao
},
280 { "scene_sad", checkasm_check_scene_sad
},
282 #if CONFIG_AFIR_FILTER
283 { "af_afir", checkasm_check_afir
},
285 #if CONFIG_BLACKDETECT_FILTER
286 { "vf_blackdetect", checkasm_check_blackdetect
},
288 #if CONFIG_BLEND_FILTER
289 { "vf_blend", checkasm_check_blend
},
291 #if CONFIG_BWDIF_FILTER
292 { "vf_bwdif", checkasm_check_vf_bwdif
},
294 #if CONFIG_COLORDETECT_FILTER
295 { "vf_colordetect", checkasm_check_colordetect
},
297 #if CONFIG_COLORSPACE_FILTER
298 { "vf_colorspace", checkasm_check_colorspace
},
301 { "vf_eq", checkasm_check_vf_eq
},
303 #if CONFIG_FSPP_FILTER
304 { "vf_fspp", checkasm_check_vf_fspp
},
306 #if CONFIG_GBLUR_FILTER
307 { "vf_gblur", checkasm_check_vf_gblur
},
309 #if CONFIG_HFLIP_FILTER
310 { "vf_hflip", checkasm_check_vf_hflip
},
312 #if CONFIG_IDET_FILTER
313 { "vf_idet", checkasm_check_idet
},
315 #if CONFIG_NLMEANS_FILTER
316 { "vf_nlmeans", checkasm_check_nlmeans
},
318 #if CONFIG_THRESHOLD_FILTER
319 { "vf_threshold", checkasm_check_vf_threshold
},
321 #if CONFIG_SOBEL_FILTER
322 { "vf_sobel", checkasm_check_vf_sobel
},
326 { "sw_gbrp", checkasm_check_sw_gbrp
},
327 { "sw_range_convert", checkasm_check_sw_range_convert
},
328 { "sw_rgb", checkasm_check_sw_rgb
},
329 { "sw_scale", checkasm_check_sw_scale
},
330 { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb
},
331 { "sw_yuv2yuv", checkasm_check_sw_yuv2yuv
},
332 { "sw_ops", checkasm_check_sw_ops
},
335 { "aes", checkasm_check_aes
},
336 { "fixed_dsp", checkasm_check_fixed_dsp
},
337 { "float_dsp", checkasm_check_float_dsp
},
338 { "lls", checkasm_check_lls
},
339 { "av_tx", checkasm_check_av_tx
},
344 /* List of cpu flags to check */
345 static const struct {
351 { "ARMV8", "armv8", AV_CPU_FLAG_ARMV8
},
352 { "NEON", "neon", AV_CPU_FLAG_NEON
},
353 { "DOTPROD", "dotprod", AV_CPU_FLAG_DOTPROD
},
354 { "I8MM", "i8mm", AV_CPU_FLAG_I8MM
},
355 { "SVE", "sve", AV_CPU_FLAG_SVE
},
356 { "SVE2", "sve2", AV_CPU_FLAG_SVE2
},
358 { "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE
},
359 { "ARMV6", "armv6", AV_CPU_FLAG_ARMV6
},
360 { "ARMV6T2", "armv6t2", AV_CPU_FLAG_ARMV6T2
},
361 { "VFP", "vfp", AV_CPU_FLAG_VFP
},
362 { "VFP_VM", "vfp_vm", AV_CPU_FLAG_VFP_VM
},
363 { "VFPV3", "vfp3", AV_CPU_FLAG_VFPV3
},
364 { "NEON", "neon", AV_CPU_FLAG_NEON
},
366 { "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC
},
367 { "VSX", "vsx", AV_CPU_FLAG_VSX
},
368 { "POWER8", "power8", AV_CPU_FLAG_POWER8
},
370 { "RVI", "rvi", AV_CPU_FLAG_RVI
},
371 { "misaligned", "misaligned", AV_CPU_FLAG_RV_MISALIGNED
},
372 { "RV_zbb", "rvb_b", AV_CPU_FLAG_RVB_BASIC
},
373 { "RVB", "rvb", AV_CPU_FLAG_RVB
},
374 { "RV_zve32x","rvv_i32", AV_CPU_FLAG_RVV_I32
},
375 { "RV_zve32f","rvv_f32", AV_CPU_FLAG_RVV_F32
},
376 { "RV_zve64x","rvv_i64", AV_CPU_FLAG_RVV_I64
},
377 { "RV_zve64d","rvv_f64", AV_CPU_FLAG_RVV_F64
},
378 { "RV_zvbb", "rv_zvbb", AV_CPU_FLAG_RV_ZVBB
},
380 { "MMI", "mmi", AV_CPU_FLAG_MMI
},
381 { "MSA", "msa", AV_CPU_FLAG_MSA
},
383 { "MMX", "mmx", AV_CPU_FLAG_MMX
|AV_CPU_FLAG_CMOV
},
384 { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT
},
385 { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW
},
386 { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT
},
387 { "SSE", "sse", AV_CPU_FLAG_SSE
},
388 { "SSE2", "sse2", AV_CPU_FLAG_SSE2
|AV_CPU_FLAG_SSE2SLOW
},
389 { "SSE3", "sse3", AV_CPU_FLAG_SSE3
|AV_CPU_FLAG_SSE3SLOW
},
390 { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3
|AV_CPU_FLAG_ATOM
},
391 { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4
},
392 { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42
},
393 { "AES-NI", "aesni", AV_CPU_FLAG_AESNI
},
394 { "AVX", "avx", AV_CPU_FLAG_AVX
},
395 { "XOP", "xop", AV_CPU_FLAG_XOP
},
396 { "FMA3", "fma3", AV_CPU_FLAG_FMA3
},
397 { "FMA4", "fma4", AV_CPU_FLAG_FMA4
},
398 { "AVX2", "avx2", AV_CPU_FLAG_AVX2
},
399 { "AVX-512", "avx512", AV_CPU_FLAG_AVX512
},
400 { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL
},
402 { "LSX", "lsx", AV_CPU_FLAG_LSX
},
403 { "LASX", "lasx", AV_CPU_FLAG_LASX
},
405 { "SIMD128", "simd128", AV_CPU_FLAG_SIMD128
},
410 typedef struct CheckasmFuncVersion
{
411 struct CheckasmFuncVersion
*next
;
416 } CheckasmFuncVersion
;
418 /* Binary search tree node */
419 typedef struct CheckasmFunc
{
420 struct CheckasmFunc
*child
[2];
421 CheckasmFuncVersion versions
;
422 uint8_t color
; /* 0 = red, 1 = black */
429 CheckasmFunc
*current_func
;
430 CheckasmFuncVersion
*current_func_ver
;
431 const char *current_test_name
;
432 const char *bench_pattern
;
433 int bench_pattern_len
;
442 const char *cpu_flag_name
;
443 const char *test_pattern
;
447 volatile sig_atomic_t catch_signals
;
453 /* float compare support code */
454 static int is_negative(union av_intfloat32 u
)
459 int float_near_ulp(float a
, float b
, unsigned max_ulp
)
461 union av_intfloat32 x
, y
;
466 if (is_negative(x
) != is_negative(y
)) {
467 // handle -0.0 == +0.0
471 if (llabs((int64_t)x
.i
- y
.i
) <= max_ulp
)
477 int float_near_ulp_array(const float *a
, const float *b
, unsigned max_ulp
,
482 for (i
= 0; i
< len
; i
++) {
483 if (!float_near_ulp(a
[i
], b
[i
], max_ulp
))
489 int float_near_abs_eps(float a
, float b
, float eps
)
491 float abs_diff
= fabsf(a
- b
);
495 fprintf(stderr
, "test failed comparing %g with %g (abs diff=%g with EPS=%g)\n", a
, b
, abs_diff
, eps
);
500 int float_near_abs_eps_array(const float *a
, const float *b
, float eps
,
505 for (i
= 0; i
< len
; i
++) {
506 if (!float_near_abs_eps(a
[i
], b
[i
], eps
))
512 int float_near_abs_eps_ulp(float a
, float b
, float eps
, unsigned max_ulp
)
514 return float_near_ulp(a
, b
, max_ulp
) || float_near_abs_eps(a
, b
, eps
);
517 int float_near_abs_eps_array_ulp(const float *a
, const float *b
, float eps
,
518 unsigned max_ulp
, unsigned len
)
522 for (i
= 0; i
< len
; i
++) {
523 if (!float_near_abs_eps_ulp(a
[i
], b
[i
], eps
, max_ulp
))
529 int double_near_abs_eps(double a
, double b
, double eps
)
531 double abs_diff
= fabs(a
- b
);
533 return abs_diff
< eps
;
536 int double_near_abs_eps_array(const double *a
, const double *b
, double eps
,
541 for (i
= 0; i
< len
; i
++) {
542 if (!double_near_abs_eps(a
[i
], b
[i
], eps
))
548 /* Print colored text to stderr if the terminal supports it */
549 static void color_printf(int color
, const char *fmt
, ...)
551 static int use_color
= -1;
554 #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
556 static WORD org_attributes
;
559 CONSOLE_SCREEN_BUFFER_INFO con_info
;
560 con
= GetStdHandle(STD_ERROR_HANDLE
);
561 if (con
&& con
!= INVALID_HANDLE_VALUE
&& GetConsoleScreenBufferInfo(con
, &con_info
)) {
562 org_attributes
= con_info
.wAttributes
;
568 SetConsoleTextAttribute(con
, (org_attributes
& 0xfff0) | (color
& 0x0f));
571 const char *term
= getenv("TERM");
572 use_color
= term
&& strcmp(term
, "dumb") && isatty(2);
575 fprintf(stderr
, "\x1b[%d;3%dm", (color
& 0x08) >> 3, color
& 0x07);
579 vfprintf(stderr
, fmt
, arg
);
583 #if HAVE_SETCONSOLETEXTATTRIBUTE && HAVE_GETSTDHANDLE
584 SetConsoleTextAttribute(con
, org_attributes
);
586 fprintf(stderr
, "\x1b[0m");
591 /* Deallocate a tree */
592 static void destroy_func_tree(CheckasmFunc
*f
)
595 CheckasmFuncVersion
*v
= f
->versions
.next
;
597 CheckasmFuncVersion
*next
= v
->next
;
602 destroy_func_tree(f
->child
[0]);
603 destroy_func_tree(f
->child
[1]);
608 /* Allocate a zero-initialized block, clean up and exit on failure */
609 static void *checkasm_malloc(size_t size
)
611 void *ptr
= calloc(1, size
);
613 fprintf(stderr
, "checkasm: malloc failed\n");
614 destroy_func_tree(state
.funcs
);
620 /* Get the suffix of the specified cpu flag */
621 static const char *cpu_suffix(int cpu
)
623 int i
= FF_ARRAY_ELEMS(cpus
);
626 if (cpu
& cpus
[i
].flag
)
627 return cpus
[i
].suffix
;
632 static int cmp_nop(const void *a
, const void *b
)
634 return *(const uint16_t*)a
- *(const uint16_t*)b
;
637 /* Measure the overhead of the timing code (in decicycles) */
638 static int measure_nop_time(void)
640 uint16_t nops
[10000];
642 av_unused
const int sysfd
= state
.sysfd
;
645 for (i
= 0; i
< 10000; i
++) {
651 qsort(nops
, 10000, sizeof(uint16_t), cmp_nop
);
652 for (i
= 2500; i
< 7500; i
++)
655 return nop_sum
/ 500;
658 static inline double avg_cycles_per_call(const CheckasmPerf
*const p
)
661 const double cycles
= (double)(10 * p
->cycles
) / p
->iterations
- state
.nop_time
;
663 return cycles
/ 32.0; /* 32 calls per iteration */
668 /* Print benchmark results */
669 static void print_benchs(CheckasmFunc
*f
)
672 CheckasmFuncVersion
*v
= &f
->versions
;
673 const CheckasmPerf
*p
= &v
->perf
;
674 const double baseline
= avg_cycles_per_call(p
);
677 print_benchs(f
->child
[0]);
682 decicycles
= avg_cycles_per_call(p
);
683 if (state
.csv
|| state
.tsv
) {
684 const char sep
= state
.csv
? ',' : '\t';
685 printf("%s%c%s%c%.1f\n", f
->name
, sep
,
686 cpu_suffix(v
->cpu
), sep
,
689 const int pad_length
= 10 + 50 -
690 printf("%s_%s:", f
->name
, cpu_suffix(v
->cpu
));
691 const double ratio
= decicycles
?
692 baseline
/ decicycles
: 0.0;
693 printf("%*.1f (%5.2fx)\n", FFMAX(pad_length
, 0),
694 decicycles
/ 10.0, ratio
);
697 } while ((v
= v
->next
));
699 print_benchs(f
->child
[1]);
703 /* ASCIIbetical sort except preserving natural order for numbers */
704 static int cmp_func_names(const char *a
, const char *b
)
706 const char *start
= a
;
707 int ascii_diff
, digit_diff
;
709 for (; !(ascii_diff
= *(const unsigned char*)a
- *(const unsigned char*)b
) && *a
; a
++, b
++);
710 for (; av_isdigit(*a
) && av_isdigit(*b
); a
++, b
++);
712 if (a
> start
&& av_isdigit(a
[-1]) && (digit_diff
= av_isdigit(*a
) - av_isdigit(*b
)))
718 /* Perform a tree rotation in the specified direction and return the new root */
719 static CheckasmFunc
*rotate_tree(CheckasmFunc
*f
, int dir
)
721 CheckasmFunc
*r
= f
->child
[dir
^1];
722 f
->child
[dir
^1] = r
->child
[dir
];
729 #define is_red(f) ((f) && !(f)->color)
731 /* Balance a left-leaning red-black tree at the specified node */
732 static void balance_tree(CheckasmFunc
**root
)
734 CheckasmFunc
*f
= *root
;
736 if (is_red(f
->child
[0]) && is_red(f
->child
[1])) {
738 f
->child
[0]->color
= f
->child
[1]->color
= 1;
741 if (!is_red(f
->child
[0]) && is_red(f
->child
[1]))
742 *root
= rotate_tree(f
, 0); /* Rotate left */
743 else if (is_red(f
->child
[0]) && is_red(f
->child
[0]->child
[0]))
744 *root
= rotate_tree(f
, 1); /* Rotate right */
747 /* Get a node with the specified name, creating it if it doesn't exist */
748 static CheckasmFunc
*get_func(CheckasmFunc
**root
, const char *name
)
750 CheckasmFunc
*f
= *root
;
753 /* Search the tree for a matching node */
754 int cmp
= cmp_func_names(name
, f
->name
);
756 f
= get_func(&f
->child
[cmp
> 0], name
);
758 /* Rebalance the tree on the way up if a new node was inserted */
759 if (!f
->versions
.func
)
763 /* Allocate and insert a new node into the tree */
764 int name_length
= strlen(name
);
765 f
= *root
= checkasm_malloc(sizeof(CheckasmFunc
) + name_length
);
766 memcpy(f
->name
, name
, name_length
+ 1);
772 checkasm_context checkasm_context_buf
;
774 /* Crash handling: attempt to catch crashes and handle them
775 * gracefully instead of just aborting abruptly. */
777 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
778 static LONG NTAPI
signal_handler(EXCEPTION_POINTERS
*e
) {
781 if (!state
.catch_signals
)
782 return EXCEPTION_CONTINUE_SEARCH
;
784 switch (e
->ExceptionRecord
->ExceptionCode
) {
785 case EXCEPTION_FLT_DIVIDE_BY_ZERO
:
786 case EXCEPTION_INT_DIVIDE_BY_ZERO
:
789 case EXCEPTION_ILLEGAL_INSTRUCTION
:
790 case EXCEPTION_PRIV_INSTRUCTION
:
793 case EXCEPTION_ACCESS_VIOLATION
:
794 case EXCEPTION_ARRAY_BOUNDS_EXCEEDED
:
795 case EXCEPTION_DATATYPE_MISALIGNMENT
:
796 case EXCEPTION_STACK_OVERFLOW
:
799 case EXCEPTION_IN_PAGE_ERROR
:
803 return EXCEPTION_CONTINUE_SEARCH
;
805 state
.catch_signals
= 0;
806 checkasm_load_context(s
);
807 return EXCEPTION_CONTINUE_EXECUTION
; /* never reached, but shuts up gcc */
810 #elif !defined(_WASI_EMULATED_SIGNAL)
811 static void signal_handler(int s
);
813 static const struct sigaction signal_handler_act
= {
814 .sa_handler
= signal_handler
,
815 .sa_flags
= SA_RESETHAND
,
818 static void signal_handler(int s
) {
819 if (state
.catch_signals
) {
820 state
.catch_signals
= 0;
821 sigaction(s
, &signal_handler_act
, NULL
);
822 checkasm_load_context(s
);
827 /* Compares a string with a wildcard pattern. */
828 static int wildstrcmp(const char *str
, const char *pattern
)
830 const char *wild
= strchr(pattern
, '*');
832 const size_t len
= wild
- pattern
;
833 if (strncmp(str
, pattern
, len
)) return 1;
834 while (*++wild
== '*');
835 if (!*wild
) return 0;
837 while (*str
&& wildstrcmp(str
, wild
)) str
++;
840 return strcmp(str
, pattern
);
843 /* Perform tests and benchmarks for the specified cpu flag if supported by the host */
844 static void check_cpu_flag(const char *name
, int flag
)
846 int old_cpu_flag
= state
.cpu_flag
;
848 flag
|= old_cpu_flag
;
849 av_force_cpu_flags(-1);
850 state
.cpu_flag
= flag
& av_get_cpu_flags();
851 av_force_cpu_flags(state
.cpu_flag
);
853 if (!flag
|| state
.cpu_flag
!= old_cpu_flag
) {
856 state
.cpu_flag_name
= name
;
857 for (i
= 0; tests
[i
].func
; i
++) {
858 if (state
.test_pattern
&& wildstrcmp(tests
[i
].name
, state
.test_pattern
))
860 state
.current_test_name
= tests
[i
].name
;
866 /* Print the name of the current CPU flag, but only do it once */
867 static void print_cpu_name(void)
869 if (state
.cpu_flag_name
) {
870 color_printf(COLOR_YELLOW
, "%s:\n", state
.cpu_flag_name
);
871 state
.cpu_flag_name
= NULL
;
875 #if CONFIG_LINUX_PERF
876 static int bench_init_linux(void)
878 struct perf_event_attr attr
= {
879 .type
= PERF_TYPE_HARDWARE
,
880 .size
= sizeof(struct perf_event_attr
),
881 .config
= PERF_COUNT_HW_CPU_CYCLES
,
882 .disabled
= 1, // start counting only on demand
890 fprintf(stderr
, "benchmarking with Linux Perf Monitoring API\n");
892 state
.sysfd
= syscall(__NR_perf_event_open
, &attr
, 0, -1, -1, 0);
893 if (state
.sysfd
== -1) {
894 perror("perf_event_open");
899 #elif CONFIG_MACOS_KPERF
900 static int bench_init_kperf(void)
906 static int bench_init_ffmpeg(void)
909 if (!checkasm_save_context()) {
910 checkasm_set_signal_handler_state(1);
912 checkasm_set_signal_handler_state(0);
914 fprintf(stderr
, "checkasm: unable to execute platform specific timer\n");
917 fprintf(stderr
, "benchmarking with native FFmpeg timers\n");
920 fprintf(stderr
, "checkasm: --bench is not supported on your system\n");
926 static int bench_init(void)
928 #if CONFIG_LINUX_PERF
929 int ret
= bench_init_linux();
930 #elif CONFIG_MACOS_KPERF
931 int ret
= bench_init_kperf();
933 int ret
= bench_init_ffmpeg();
938 state
.nop_time
= measure_nop_time();
939 fprintf(stderr
, "nop: %d.%d\n", state
.nop_time
/10, state
.nop_time
%10);
943 static void bench_uninit(void)
945 #if CONFIG_LINUX_PERF
950 static int usage(const char *path
)
953 "Usage: %s [options...] [seed]\n"
954 " --test=<pattern> Run specific test.\n"
955 " --bench Run benchmark.\n"
956 " --csv, --tsv Output results in rows of comma or tab separated values.\n"
957 " --runs=<ptwo> Manual number of benchmark iterations to run 2**<ptwo>.\n"
958 " --verbose Increase verbosity.\n",
963 int main(int argc
, char *argv
[])
965 unsigned int seed
= av_get_random_seed();
967 char arch_info_buf
[50] = "";
970 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
971 AddVectoredExceptionHandler(0, signal_handler
);
973 #elif !defined(_WASI_EMULATED_SIGNAL)
974 sigaction(SIGBUS
, &signal_handler_act
, NULL
);
975 sigaction(SIGFPE
, &signal_handler_act
, NULL
);
976 sigaction(SIGILL
, &signal_handler_act
, NULL
);
977 sigaction(SIGSEGV
, &signal_handler_act
, NULL
);
979 #if HAVE_PRCTL && defined(PR_SET_UNALIGN)
980 prctl(PR_SET_UNALIGN
, PR_UNALIGN_SIGBUS
);
982 #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
983 if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags()))
984 checkasm_checked_call
= checkasm_checked_call_vfp
;
987 if (!tests
[0].func
|| !cpus
[0].flag
) {
988 fprintf(stderr
, "checkasm: no tests to perform\n");
992 for (i
= 1; i
< argc
; i
++) {
993 const char *arg
= argv
[i
];
997 if (!strncmp(arg
, "--bench", 7)) {
998 if (bench_init() < 0)
1000 if (arg
[7] == '=') {
1001 state
.bench_pattern
= arg
+ 8;
1002 state
.bench_pattern_len
= strlen(state
.bench_pattern
);
1004 state
.bench_pattern
= "*";
1005 } else if (!strncmp(arg
, "--test=", 7)) {
1006 state
.test_pattern
= arg
+ 7;
1007 } else if (!strcmp(arg
, "--csv")) {
1008 state
.csv
= 1; state
.tsv
= 0;
1009 } else if (!strcmp(arg
, "--tsv")) {
1010 state
.csv
= 0; state
.tsv
= 1;
1011 } else if (!strcmp(arg
, "--verbose") || !strcmp(arg
, "-v")) {
1013 } else if (!strncmp(arg
, "--runs=", 7)) {
1014 l
= strtoul(arg
+ 7, &end
, 10);
1017 fprintf(stderr
, "checkasm: error: runs exponent must be within the range 0 <= 30\n");
1020 bench_runs
= 1U << l
;
1022 return usage(argv
[0]);
1024 } else if ((l
= strtoul(arg
, &end
, 10)) <= UINT_MAX
&&
1028 return usage(argv
[0]);
1032 #if ARCH_AARCH64 && HAVE_SVE
1033 if (have_sve(av_get_cpu_flags()))
1034 snprintf(arch_info_buf
, sizeof(arch_info_buf
),
1035 "SVE %d bits, ", 8 * ff_aarch64_sve_length());
1036 #elif ARCH_RISCV && HAVE_RVV
1037 if (av_get_cpu_flags() & AV_CPU_FLAG_RVV_I32
)
1038 snprintf(arch_info_buf
, sizeof (arch_info_buf
),
1039 "%zu-bit vectors, ", 8 * ff_get_rv_vlenb());
1041 fprintf(stderr
, "checkasm: %susing random seed %u\n", arch_info_buf
, seed
);
1042 av_lfg_init(&checkasm_lfg
, seed
);
1044 if (state
.bench_pattern
)
1045 fprintf(stderr
, "checkasm: bench runs %" PRIu64
" (1 << %i)\n", bench_runs
, av_log2(bench_runs
));
1047 check_cpu_flag(NULL
, 0);
1048 for (i
= 0; cpus
[i
].flag
; i
++)
1049 check_cpu_flag(cpus
[i
].name
, cpus
[i
].flag
);
1051 if (state
.num_failed
) {
1052 fprintf(stderr
, "checkasm: %d of %d tests have failed\n", state
.num_failed
, state
.num_checked
);
1055 fprintf(stderr
, "checkasm: all %d tests passed\n", state
.num_checked
);
1056 if (state
.bench_pattern
) {
1057 print_benchs(state
.funcs
);
1061 destroy_func_tree(state
.funcs
);
1066 /* Decide whether or not the specified function needs to be tested and
1067 * allocate/initialize data structures if needed. Returns a pointer to a
1068 * reference function if the function should be tested, otherwise NULL */
1069 void *checkasm_check_func(void *func
, const char *name
, ...)
1073 CheckasmFuncVersion
*v
;
1077 va_start(arg
, name
);
1078 name_length
= vsnprintf(name_buf
, sizeof(name_buf
), name
, arg
);
1081 if (!func
|| name_length
<= 0 || name_length
>= sizeof(name_buf
))
1084 state
.current_func
= get_func(&state
.funcs
, name_buf
);
1085 state
.funcs
->color
= 1;
1086 v
= &state
.current_func
->versions
;
1089 CheckasmFuncVersion
*prev
;
1091 /* Only test functions that haven't already been tested */
1092 if (v
->func
== func
)
1099 } while ((v
= v
->next
));
1101 v
= prev
->next
= checkasm_malloc(sizeof(CheckasmFuncVersion
));
1106 v
->cpu
= state
.cpu_flag
;
1107 state
.current_func_ver
= v
;
1110 state
.num_checked
++;
1115 /* Decide whether or not the current function needs to be benchmarked */
1116 int checkasm_bench_func(void)
1118 return !state
.num_failed
&& state
.bench_pattern
&&
1119 !wildstrcmp(state
.current_func
->name
, state
.bench_pattern
);
1122 /* Indicate that the current test has failed, return whether verbose printing
1124 int checkasm_fail_func(const char *msg
, ...)
1126 if (state
.current_func_ver
&& state
.current_func_ver
->cpu
&&
1127 state
.current_func_ver
->ok
)
1132 fprintf(stderr
, " %s_%s (", state
.current_func
->name
, cpu_suffix(state
.current_func_ver
->cpu
));
1134 vfprintf(stderr
, msg
, arg
);
1136 fprintf(stderr
, ")\n");
1138 state
.current_func_ver
->ok
= 0;
1141 return state
.verbose
;
1144 void checkasm_set_signal_handler_state(int enabled
) {
1145 state
.catch_signals
= enabled
;
1148 int checkasm_handle_signal(int s
) {
1151 checkasm_fail_func("fatal signal %d: %s", s
, strsignal(s
));
1153 checkasm_fail_func(s
== SIGFPE
? "fatal arithmetic error" :
1154 s
== SIGILL
? "illegal instruction" :
1155 s
== SIGBUS
? "bus error" :
1156 "segmentation fault");
1162 /* Get the benchmark context of the current function */
1163 CheckasmPerf
*checkasm_get_perf_context(void)
1165 CheckasmPerf
*perf
= &state
.current_func_ver
->perf
;
1166 memset(perf
, 0, sizeof(*perf
));
1167 perf
->sysfd
= state
.sysfd
;
1171 /* Print the outcome of all tests performed since the last time this function was called */
1172 void checkasm_report(const char *name
, ...)
1174 static int prev_checked
, prev_failed
, max_length
;
1176 if (state
.num_checked
> prev_checked
) {
1177 int pad_length
= max_length
+ 4;
1181 pad_length
-= fprintf(stderr
, " - %s.", state
.current_test_name
);
1182 va_start(arg
, name
);
1183 pad_length
-= vfprintf(stderr
, name
, arg
);
1185 fprintf(stderr
, "%*c", FFMAX(pad_length
, 0) + 2, '[');
1187 if (state
.num_failed
== prev_failed
)
1188 color_printf(COLOR_GREEN
, "OK");
1190 color_printf(COLOR_RED
, "FAILED");
1191 fprintf(stderr
, "]\n");
1193 prev_checked
= state
.num_checked
;
1194 prev_failed
= state
.num_failed
;
1195 } else if (!state
.cpu_flag
) {
1196 /* Calculate the amount of padding required to make the output vertically aligned */
1197 int length
= strlen(state
.current_test_name
);
1200 va_start(arg
, name
);
1201 length
+= vsnprintf(NULL
, 0, name
, arg
);
1204 if (length
> max_length
)
1205 max_length
= length
;
1209 static int check_err(const char *file
, int line
,
1210 const char *name
, int w
, int h
,
1215 if (!checkasm_fail_func("%s:%d", file
, line
))
1218 fprintf(stderr
, "%s (%dx%d):\n", name
, w
, h
);
1222 #define DEF_CHECKASM_CHECK_BODY(compare, type, fmt) \
1224 int64_t aligned_w = (w - 1LL + align_w) & ~(align_w - 1); \
1225 int64_t aligned_h = (h - 1LL + align_h) & ~(align_h - 1); \
1228 av_assert0(aligned_w == (int32_t)aligned_w);\
1229 av_assert0(aligned_h == (int32_t)aligned_h);\
1230 stride1 /= sizeof(*buf1); \
1231 stride2 /= sizeof(*buf2); \
1232 for (y = 0; y < h; y++) \
1233 if (!compare(&buf1[y*stride1], &buf2[y*stride2], w)) \
1236 if (check_err(file, line, name, w, h, &err)) \
1238 for (y = 0; y < h; y++) { \
1239 for (int x = 0; x < w; x++) \
1240 fprintf(stderr, " " fmt, buf1[x]); \
1241 fprintf(stderr, " "); \
1242 for (int x = 0; x < w; x++) \
1243 fprintf(stderr, " " fmt, buf2[x]); \
1244 fprintf(stderr, " "); \
1245 for (int x = 0; x < w; x++) \
1246 fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
1249 fprintf(stderr, "\n"); \
1251 buf1 -= h*stride1; \
1252 buf2 -= h*stride2; \
1254 for (y = -padding; y < 0; y++) \
1255 if (!compare(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
1257 if (check_err(file, line, name, w, h, &err)) \
1259 fprintf(stderr, " overwrite above\n"); \
1262 for (y = aligned_h; y < aligned_h + padding; y++) \
1263 if (!compare(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
1265 if (check_err(file, line, name, w, h, &err)) \
1267 fprintf(stderr, " overwrite below\n"); \
1270 for (y = 0; y < h; y++) \
1271 if (!compare(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
1273 if (check_err(file, line, name, w, h, &err)) \
1275 fprintf(stderr, " overwrite left\n"); \
1278 for (y = 0; y < h; y++) \
1279 if (!compare(&buf1[y*stride1 + aligned_w], &buf2[y*stride2 + aligned_w], \
1281 if (check_err(file, line, name, w, h, &err)) \
1283 fprintf(stderr, " overwrite right\n"); \
1289 #define cmp_int(a, b, len) (!memcmp(a, b, (len) * sizeof(*(a))))
1290 #define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
1291 int checkasm_check_##type(const char *file, int line, \
1292 const type *buf1, ptrdiff_t stride1, \
1293 const type *buf2, ptrdiff_t stride2, \
1294 int w, int h, const char *name, \
1295 int align_w, int align_h, \
1298 DEF_CHECKASM_CHECK_BODY(cmp_int, type, fmt); \
1301 DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x")
1302 DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
1303 DEF_CHECKASM_CHECK_FUNC(uint32_t, "%08x")
1304 DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d")
1305 DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d")
1307 int checkasm_check_float_ulp(const char *file
, int line
,
1308 const float *buf1
, ptrdiff_t stride1
,
1309 const float *buf2
, ptrdiff_t stride2
,
1310 int w
, int h
, const char *name
,
1311 unsigned max_ulp
, int align_w
, int align_h
,
1314 #define cmp_float(a, b, len) float_near_ulp_array(a, b, max_ulp, len)
1315 DEF_CHECKASM_CHECK_BODY(cmp_float
, float, "%g");