2 * Copyright (C) 2025 Niklas Haas
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 #include "libavutil/avassert.h"
24 #include "libavutil/mem_internal.h"
25 #include "libavutil/refstruct.h"
27 #include "libswscale/ops.h"
28 #include "libswscale/ops_internal.h"
45 #define FMT(fmt, ...) tprintf((char[256]) {0}, 256, fmt, __VA_ARGS__)
46 static const char *tprintf(char buf
[], size_t size
, const char *fmt
, ...)
50 vsnprintf(buf
, size
, fmt
, ap
);
55 static int rw_pixel_bits(const SwsOp
*op
)
57 const int elems
= op
->rw
.packed
? op
->rw
.elems
: 1;
58 const int size
= ff_sws_pixel_type_size(op
->type
);
59 const int bits
= 8 >> op
->rw
.frac
;
60 av_assert1(bits
>= 1);
61 return elems
* size
* bits
;
64 static float rndf(void)
66 union { uint32_t u
; float f
; } x
;
69 } while (!isnormal(x
.f
));
73 static void fill32f(float *line
, int num
, unsigned range
)
75 const float scale
= (float) range
/ UINT32_MAX
;
76 for (int i
= 0; i
< num
; i
++)
77 line
[i
] = range
? scale
* rnd() : rndf();
80 static void fill32(uint32_t *line
, int num
, unsigned range
)
82 for (int i
= 0; i
< num
; i
++)
83 line
[i
] = (range
&& range
< UINT_MAX
) ? rnd() % (range
+ 1) : rnd();
86 static void fill16(uint16_t *line
, int num
, unsigned range
)
89 fill32((uint32_t *) line
, AV_CEIL_RSHIFT(num
, 1), 0);
91 for (int i
= 0; i
< num
; i
++)
92 line
[i
] = rnd() % (range
+ 1);
96 static void fill8(uint8_t *line
, int num
, unsigned range
)
99 fill32((uint32_t *) line
, AV_CEIL_RSHIFT(num
, 2), 0);
101 for (int i
= 0; i
< num
; i
++)
102 line
[i
] = rnd() % (range
+ 1);
106 static void check_ops(const char *report
, const unsigned ranges
[NB_PLANES
],
109 SwsContext
*ctx
= sws_alloc_context();
110 SwsCompiledOp comp_ref
= {0}, comp_new
= {0};
111 const SwsOpBackend
*backend_new
= NULL
;
112 SwsOpList oplist
= { .ops
= (SwsOp
*) ops
};
113 const SwsOp
*read_op
, *write_op
;
114 static const unsigned def_ranges
[4] = {0};
118 declare_func(void, const SwsOpExec
*, const void *, int bx
, int y
, int bx_end
, int y_end
);
120 DECLARE_ALIGNED_64(char, src0
)[NB_PLANES
][LINES
][PIXELS
* sizeof(uint32_t[4])];
121 DECLARE_ALIGNED_64(char, src1
)[NB_PLANES
][LINES
][PIXELS
* sizeof(uint32_t[4])];
122 DECLARE_ALIGNED_64(char, dst0
)[NB_PLANES
][LINES
][PIXELS
* sizeof(uint32_t[4])];
123 DECLARE_ALIGNED_64(char, dst1
)[NB_PLANES
][LINES
][PIXELS
* sizeof(uint32_t[4])];
127 ctx
->flags
= SWS_BITEXACT
;
130 for (oplist
.num_ops
= 0; ops
[oplist
.num_ops
].op
; oplist
.num_ops
++)
131 write_op
= &ops
[oplist
.num_ops
];
133 const int read_size
= PIXELS
* rw_pixel_bits(read_op
) >> 3;
134 const int write_size
= PIXELS
* rw_pixel_bits(write_op
) >> 3;
136 for (int p
= 0; p
< NB_PLANES
; p
++) {
137 void *plane
= src0
[p
];
138 switch (read_op
->type
) {
139 case U8
: fill8(plane
, sizeof(src0
[p
]) / sizeof(uint8_t), ranges
[p
]); break;
140 case U16
: fill16(plane
, sizeof(src0
[p
]) / sizeof(uint16_t), ranges
[p
]); break;
141 case U32
: fill32(plane
, sizeof(src0
[p
]) / sizeof(uint32_t), ranges
[p
]); break;
142 case F32
: fill32f(plane
, sizeof(src0
[p
]) / sizeof(uint32_t), ranges
[p
]); break;
146 memcpy(src1
, src0
, sizeof(src0
));
147 memset(dst0
, 0, sizeof(dst0
));
148 memset(dst1
, 0, sizeof(dst1
));
150 /* Compile `ops` using both the asm and c backends */
151 for (int n
= 0; ff_sws_op_backends
[n
]; n
++) {
152 const SwsOpBackend
*backend
= ff_sws_op_backends
[n
];
153 const bool is_ref
= !strcmp(backend
->name
, "c");
154 if (is_ref
|| !comp_new
.func
) {
156 int ret
= ff_sws_ops_compile_backend(ctx
, backend
, &oplist
, &comp
);
157 if (ret
== AVERROR(ENOTSUP
))
161 else if (PIXELS
% comp
.block_size
!= 0)
166 if (!comp_new
.func
) {
168 backend_new
= backend
;
173 av_assert0(comp_ref
.func
&& comp_new
.func
);
175 SwsOpExec exec
= {0};
177 exec
.height
= exec
.slice_h
= 1;
178 for (int i
= 0; i
< NB_PLANES
; i
++) {
179 exec
.in_stride
[i
] = sizeof(src0
[i
][0]);
180 exec
.out_stride
[i
] = sizeof(dst0
[i
][0]);
181 exec
.in_bump
[i
] = exec
.in_stride
[i
] - read_size
;
182 exec
.out_bump
[i
] = exec
.out_stride
[i
] - write_size
;
186 * Don't use check_func() because the actual function pointer may be a
187 * wrapper shared by multiple implementations. Instead, take a hash of both
188 * the backend pointer and the active CPU flags.
190 uintptr_t id
= (uintptr_t) backend_new
;
191 id
^= (id
<< 6) + (id
>> 2) + 0x9e3779b97f4a7c15 + comp_new
.cpu_flags
;
193 checkasm_save_context();
194 if (checkasm_check_func((void *) id
, "%s", report
)) {
195 func_new
= comp_new
.func
;
196 func_ref
= comp_ref
.func
;
198 exec
.block_size_in
= comp_ref
.block_size
* rw_pixel_bits(read_op
) >> 3;
199 exec
.block_size_out
= comp_ref
.block_size
* rw_pixel_bits(write_op
) >> 3;
200 for (int i
= 0; i
< NB_PLANES
; i
++) {
201 exec
.in
[i
] = (void *) src0
[i
];
202 exec
.out
[i
] = (void *) dst0
[i
];
204 call_ref(&exec
, comp_ref
.priv
, 0, 0, PIXELS
/ comp_ref
.block_size
, LINES
);
206 exec
.block_size_in
= comp_new
.block_size
* rw_pixel_bits(read_op
) >> 3;
207 exec
.block_size_out
= comp_new
.block_size
* rw_pixel_bits(write_op
) >> 3;
208 for (int i
= 0; i
< NB_PLANES
; i
++) {
209 exec
.in
[i
] = (void *) src1
[i
];
210 exec
.out
[i
] = (void *) dst1
[i
];
212 call_new(&exec
, comp_new
.priv
, 0, 0, PIXELS
/ comp_new
.block_size
, LINES
);
214 for (int i
= 0; i
< NB_PLANES
; i
++) {
215 const char *name
= FMT("%s[%d]", report
, i
);
216 const int stride
= sizeof(dst0
[i
][0]);
218 switch (write_op
->type
) {
220 checkasm_check(uint8_t, (void *) dst0
[i
], stride
,
221 (void *) dst1
[i
], stride
,
222 write_size
, LINES
, name
);
225 checkasm_check(uint16_t, (void *) dst0
[i
], stride
,
226 (void *) dst1
[i
], stride
,
227 write_size
>> 1, LINES
, name
);
230 checkasm_check(uint32_t, (void *) dst0
[i
], stride
,
231 (void *) dst1
[i
], stride
,
232 write_size
>> 2, LINES
, name
);
235 checkasm_check(float_ulp
, (void *) dst0
[i
], stride
,
236 (void *) dst1
[i
], stride
,
237 write_size
>> 2, LINES
, name
, 0);
241 if (write_op
->rw
.packed
)
245 bench_new(&exec
, comp_new
.priv
, 0, 0, PIXELS
/ comp_new
.block_size
, LINES
);
248 if (comp_new
.func
!= comp_ref
.func
&& comp_new
.free
)
249 comp_new
.free(comp_new
.priv
);
251 comp_ref
.free(comp_ref
.priv
);
252 sws_free_context(&ctx
);
255 #define CHECK_RANGES(NAME, RANGES, N_IN, N_OUT, IN, OUT, ...) \
257 check_ops(NAME, RANGES, (SwsOp[]) { \
265 .op = SWS_OP_WRITE, \
272 #define MK_RANGES(R) ((const unsigned[]) { R, R, R, R })
273 #define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT, ...) \
274 CHECK_RANGES(NAME, MK_RANGES(RANGE), N_IN, N_OUT, IN, OUT, __VA_ARGS__)
276 #define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT, ...) \
277 CHECK_RANGE(FMT("%s_p1000", NAME), RANGE, 1, 1, IN, OUT, __VA_ARGS__); \
278 CHECK_RANGE(FMT("%s_p1110", NAME), RANGE, 3, 3, IN, OUT, __VA_ARGS__); \
279 CHECK_RANGE(FMT("%s_p1111", NAME), RANGE, 4, 4, IN, OUT, __VA_ARGS__); \
280 CHECK_RANGE(FMT("%s_p1001", NAME), RANGE, 4, 2, IN, OUT, __VA_ARGS__, { \
281 .op = SWS_OP_SWIZZLE, \
283 .swizzle = SWS_SWIZZLE(0, 3, 1, 2), \
286 #define CHECK(NAME, N_IN, N_OUT, IN, OUT, ...) \
287 CHECK_RANGE(NAME, 0, N_IN, N_OUT, IN, OUT, __VA_ARGS__)
289 #define CHECK_COMMON(NAME, IN, OUT, ...) \
290 CHECK_COMMON_RANGE(NAME, 0, IN, OUT, __VA_ARGS__)
292 static void check_read_write(void)
294 for (SwsPixelType t
= U8
; t
< SWS_PIXEL_TYPE_NB
; t
++) {
295 const char *type
= ff_sws_pixel_type_name(t
);
296 for (int i
= 1; i
<= 4; i
++) {
297 /* Test N->N planar read/write */
298 for (int o
= 1; o
<= i
; o
++) {
299 check_ops(FMT("rw_%d_%d_%s", i
, o
, type
), NULL
, (SwsOp
[]) {
312 /* Test packed read/write */
316 check_ops(FMT("read_packed%d_%s", i
, type
), NULL
, (SwsOp
[]) {
329 check_ops(FMT("write_packed%d_%s", i
, type
), NULL
, (SwsOp
[]) {
344 /* Test fractional reads/writes */
345 for (int frac
= 1; frac
<= 3; frac
++) {
346 const int bits
= 8 >> frac
;
347 const int range
= (1 << bits
) - 1;
349 continue; /* no 2 bit packed formats currently exist */
351 check_ops(FMT("read_frac%d", frac
), NULL
, (SwsOp
[]) {
364 check_ops(FMT("write_frac%d", frac
), MK_RANGES(range
), (SwsOp
[]) {
379 static void check_swap_bytes(void)
381 CHECK_COMMON("swap_bytes_16", U16
, U16
, {
382 .op
= SWS_OP_SWAP_BYTES
,
386 CHECK_COMMON("swap_bytes_32", U32
, U32
, {
387 .op
= SWS_OP_SWAP_BYTES
,
392 static void check_pack_unpack(void)
398 { U8
, {{ 3, 3, 2 }}},
399 { U8
, {{ 2, 3, 3 }}},
400 { U8
, {{ 1, 2, 1 }}},
401 {U16
, {{ 5, 6, 5 }}},
402 {U16
, {{ 5, 5, 5 }}},
403 {U16
, {{ 4, 4, 4 }}},
404 {U32
, {{ 2, 10, 10, 10 }}},
405 {U32
, {{10, 10, 10, 2 }}},
408 for (int i
= 0; i
< FF_ARRAY_ELEMS(patterns
); i
++) {
409 const SwsPixelType type
= patterns
[i
].type
;
410 const SwsPackOp pack
= patterns
[i
].op
;
411 const int num
= pack
.pattern
[3] ? 4 : 3;
412 const char *pat
= FMT("%d%d%d%d", pack
.pattern
[0], pack
.pattern
[1],
413 pack
.pattern
[2], pack
.pattern
[3]);
414 const int total
= pack
.pattern
[0] + pack
.pattern
[1] +
415 pack
.pattern
[2] + pack
.pattern
[3];
416 const unsigned ranges
[4] = {
417 (1 << pack
.pattern
[0]) - 1,
418 (1 << pack
.pattern
[1]) - 1,
419 (1 << pack
.pattern
[2]) - 1,
420 (1 << pack
.pattern
[3]) - 1,
423 CHECK_RANGES(FMT("pack_%s", pat
), ranges
, num
, 1, type
, type
, {
429 CHECK_RANGE(FMT("unpack_%s", pat
), UINT32_MAX
>> (32 - total
), 1, num
, type
, type
, {
437 static AVRational
rndq(SwsPixelType t
)
439 const unsigned num
= rnd();
440 if (ff_sws_pixel_type_is_int(t
)) {
441 const unsigned mask
= UINT_MAX
>> (32 - ff_sws_pixel_type_size(t
) * 8);
442 return (AVRational
) { num
& mask
, 1 };
444 const unsigned den
= rnd();
445 return (AVRational
) { num
, den
? den
: 1 };
449 static void check_clear(void)
451 for (SwsPixelType t
= U8
; t
< SWS_PIXEL_TYPE_NB
; t
++) {
452 const char *type
= ff_sws_pixel_type_name(t
);
453 const int bits
= ff_sws_pixel_type_size(t
) * 8;
455 /* TODO: AVRational can't fit 32 bit constants */
457 const AVRational chroma
= (AVRational
) { 1 << (bits
- 1), 1};
458 const AVRational alpha
= (AVRational
) { (1 << bits
) - 1, 1};
459 const AVRational zero
= (AVRational
) { 0, 1};
460 const AVRational none
= {0};
462 const SwsConst patterns
[] = {
464 {.q4
= { none
, none
, none
, zero
}},
465 {.q4
= { zero
, none
, none
, none
}},
467 {.q4
= { none
, none
, none
, alpha
}},
468 {.q4
= { alpha
, none
, none
, none
}},
470 {.q4
= { chroma
, chroma
, none
, none
}},
471 {.q4
= { none
, chroma
, chroma
, none
}},
472 {.q4
= { none
, none
, chroma
, chroma
}},
473 {.q4
= { chroma
, none
, chroma
, none
}},
474 {.q4
= { none
, chroma
, none
, chroma
}},
476 {.q4
= { chroma
, chroma
, none
, alpha
}},
477 {.q4
= { none
, chroma
, chroma
, alpha
}},
478 {.q4
= { alpha
, none
, chroma
, chroma
}},
479 {.q4
= { chroma
, none
, chroma
, alpha
}},
480 {.q4
= { alpha
, chroma
, none
, chroma
}},
482 {.q4
= { none
, rndq(t
), rndq(t
), rndq(t
) }},
483 {.q4
= { none
, rndq(t
), rndq(t
), rndq(t
) }},
484 {.q4
= { none
, rndq(t
), rndq(t
), rndq(t
) }},
485 {.q4
= { none
, rndq(t
), rndq(t
), rndq(t
) }},
488 for (int i
= 0; i
< FF_ARRAY_ELEMS(patterns
); i
++) {
489 CHECK(FMT("clear_pattern_%s[%d]", type
, i
), 4, 4, t
, t
, {
495 } else if (!ff_sws_pixel_type_is_int(t
)) {
496 /* Floating point YUV doesn't exist, only alpha needs to be cleared */
497 CHECK(FMT("clear_alpha_%s", type
), 4, 4, t
, t
, {
506 static void check_shift(void)
508 for (SwsPixelType t
= U16
; t
< SWS_PIXEL_TYPE_NB
; t
++) {
509 const char *type
= ff_sws_pixel_type_name(t
);
510 if (!ff_sws_pixel_type_is_int(t
))
513 for (int shift
= 1; shift
<= 8; shift
++) {
514 CHECK_COMMON(FMT("lshift%d_%s", shift
, type
), t
, t
, {
520 CHECK_COMMON(FMT("rshift%d_%s", shift
, type
), t
, t
, {
529 static void check_swizzle(void)
531 for (SwsPixelType t
= U8
; t
< SWS_PIXEL_TYPE_NB
; t
++) {
532 const char *type
= ff_sws_pixel_type_name(t
);
533 static const int patterns
[][4] = {
560 for (int i
= 0; i
< FF_ARRAY_ELEMS(patterns
); i
++) {
561 const int x
= patterns
[i
][0], y
= patterns
[i
][1],
562 z
= patterns
[i
][2], w
= patterns
[i
][3];
563 CHECK(FMT("swizzle_%d%d%d%d_%s", x
, y
, z
, w
, type
), 4, 4, t
, t
, {
564 .op
= SWS_OP_SWIZZLE
,
566 .swizzle
= SWS_SWIZZLE(x
, y
, z
, w
),
572 static void check_convert(void)
574 for (SwsPixelType i
= U8
; i
< SWS_PIXEL_TYPE_NB
; i
++) {
575 const char *itype
= ff_sws_pixel_type_name(i
);
576 const int isize
= ff_sws_pixel_type_size(i
);
577 for (SwsPixelType o
= U8
; o
< SWS_PIXEL_TYPE_NB
; o
++) {
578 const char *otype
= ff_sws_pixel_type_name(o
);
579 const int osize
= ff_sws_pixel_type_size(o
);
580 const char *name
= FMT("convert_%s_%s", itype
, otype
);
584 if (isize
< osize
|| !ff_sws_pixel_type_is_int(o
)) {
585 CHECK_COMMON(name
, i
, o
, {
586 .op
= SWS_OP_CONVERT
,
590 } else if (isize
> osize
|| !ff_sws_pixel_type_is_int(i
)) {
591 uint32_t range
= UINT32_MAX
>> (32 - osize
* 8);
592 CHECK_COMMON_RANGE(name
, range
, i
, o
, {
593 .op
= SWS_OP_CONVERT
,
601 /* Check expanding conversions */
602 CHECK_COMMON("expand16", U8
, U16
, {
603 .op
= SWS_OP_CONVERT
,
606 .convert
.expand
= true,
609 CHECK_COMMON("expand32", U8
, U32
, {
610 .op
= SWS_OP_CONVERT
,
613 .convert
.expand
= true,
617 static void check_dither(void)
619 for (SwsPixelType t
= F32
; t
< SWS_PIXEL_TYPE_NB
; t
++) {
620 const char *type
= ff_sws_pixel_type_name(t
);
621 if (ff_sws_pixel_type_is_int(t
))
624 /* Test all sizes up to 256x256 */
625 for (int size_log2
= 0; size_log2
<= 8; size_log2
++) {
626 const int size
= 1 << size_log2
;
627 AVRational
*matrix
= av_refstruct_allocz(size
* size
* sizeof(*matrix
));
634 matrix
[0] = (AVRational
) { 1, 2 };
636 for (int i
= 0; i
< size
* size
; i
++)
640 CHECK_COMMON(FMT("dither_%dx%d_%s", size
, size
, type
), t
, t
, {
643 .dither
.size_log2
= size_log2
,
644 .dither
.matrix
= matrix
,
647 av_refstruct_unref(&matrix
);
652 static void check_min_max(void)
654 for (SwsPixelType t
= U8
; t
< SWS_PIXEL_TYPE_NB
; t
++) {
655 const char *type
= ff_sws_pixel_type_name(t
);
656 CHECK_COMMON(FMT("min_%s", type
), t
, t
, {
659 .c
.q4
= { rndq(t
), rndq(t
), rndq(t
), rndq(t
) },
662 CHECK_COMMON(FMT("max_%s", type
), t
, t
, {
665 .c
.q4
= { rndq(t
), rndq(t
), rndq(t
), rndq(t
) },
670 static void check_linear(void)
672 static const struct {
677 { "luma", SWS_MASK_LUMA
},
678 { "alpha", SWS_MASK_ALPHA
},
679 { "luma+alpha", SWS_MASK_LUMA
| SWS_MASK_ALPHA
},
682 { "row0", SWS_MASK_ROW(0) },
683 { "row0+alpha", SWS_MASK_ROW(0) | SWS_MASK_ALPHA
},
684 { "off3", SWS_MASK_OFF3
},
685 { "off3+alpha", SWS_MASK_OFF3
| SWS_MASK_ALPHA
},
686 { "diag3", SWS_MASK_DIAG3
},
687 { "diag4", SWS_MASK_DIAG4
},
688 { "diag3+alpha", SWS_MASK_DIAG3
| SWS_MASK_ALPHA
},
689 { "diag3+off3", SWS_MASK_DIAG3
| SWS_MASK_OFF3
},
690 { "diag3+off3+alpha", SWS_MASK_DIAG3
| SWS_MASK_OFF3
| SWS_MASK_ALPHA
},
691 { "diag4+off4", SWS_MASK_DIAG4
| SWS_MASK_OFF4
},
692 { "matrix3", SWS_MASK_MAT3
},
693 { "matrix3+off3", SWS_MASK_MAT3
| SWS_MASK_OFF3
},
694 { "matrix3+off3+alpha", SWS_MASK_MAT3
| SWS_MASK_OFF3
| SWS_MASK_ALPHA
},
695 { "matrix4", SWS_MASK_MAT4
},
696 { "matrix4+off4", SWS_MASK_MAT4
| SWS_MASK_OFF4
},
699 for (SwsPixelType t
= F32
; t
< SWS_PIXEL_TYPE_NB
; t
++) {
700 const char *type
= ff_sws_pixel_type_name(t
);
701 if (ff_sws_pixel_type_is_int(t
))
704 for (int p
= 0; p
< FF_ARRAY_ELEMS(patterns
); p
++) {
705 const uint32_t mask
= patterns
[p
].mask
;
706 SwsLinearOp lin
= { .mask
= mask
};
708 for (int i
= 0; i
< 4; i
++) {
709 for (int j
= 0; j
< 5; j
++) {
710 if (mask
& SWS_MASK(i
, j
)) {
711 lin
.m
[i
][j
] = rndq(t
);
713 lin
.m
[i
][j
] = (AVRational
) { i
== j
, 1 };
718 CHECK(FMT("linear_%s_%s", patterns
[p
].name
, type
), 4, 4, t
, t
, {
727 static void check_scale(void)
729 for (SwsPixelType t
= F32
; t
< SWS_PIXEL_TYPE_NB
; t
++) {
730 const char *type
= ff_sws_pixel_type_name(t
);
731 const int bits
= ff_sws_pixel_type_size(t
) * 8;
732 if (ff_sws_pixel_type_is_int(t
)) {
733 /* Ensure the result won't exceed the value range */
734 const unsigned max
= (1 << bits
) - 1;
735 const unsigned scale
= rnd() & max
;
736 const unsigned range
= max
/ (scale
? scale
: 1);
737 CHECK_COMMON_RANGE(FMT("scale_%s", type
), range
, t
, t
, {
743 CHECK_COMMON(FMT("scale_%s", type
), t
, t
, {
752 void checkasm_check_sw_ops(void)
755 report("read_write");
757 report("swap_bytes");
759 report("pack_unpack");