2 * default memory allocator for libavutil
3 * Copyright (c) 2002 Fabrice Bellard
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * default memory allocator for libavutil
27 #define _XOPEN_SOURCE 600
34 #include <stdatomic.h>
40 #include "attributes.h"
45 #include "intreadwrite.h"
51 #define malloc AV_JOIN(MALLOC_PREFIX, malloc)
52 #define memalign AV_JOIN(MALLOC_PREFIX, memalign)
53 #define posix_memalign AV_JOIN(MALLOC_PREFIX, posix_memalign)
54 #define realloc AV_JOIN(MALLOC_PREFIX, realloc)
55 #define free AV_JOIN(MALLOC_PREFIX, free)
57 void *malloc(size_t size
);
58 void *memalign(size_t align
, size_t size
);
59 int posix_memalign(void **ptr
, size_t align
, size_t size
);
60 void *realloc(void *ptr
, size_t size
);
63 #endif /* MALLOC_PREFIX */
65 #define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16))
67 #define FF_MEMORY_POISON 0x2a
69 /* NOTE: if you want to override these functions with your own
70 * implementations (not recommended) you have to link libav* as
71 * dynamic libraries and remove -Wl,-Bsymbolic from the linker flags.
72 * Note that this will cost performance. */
74 static atomic_size_t max_alloc_size
= INT_MAX
;
76 void av_max_alloc(size_t max
){
77 atomic_store_explicit(&max_alloc_size
, max
, memory_order_relaxed
);
80 static int size_mult(size_t a
, size_t b
, size_t *r
)
84 #if (!defined(__INTEL_COMPILER) && AV_GCC_VERSION_AT_LEAST(5,1)) || AV_HAS_BUILTIN(__builtin_mul_overflow)
85 if (__builtin_mul_overflow(a
, b
, &t
))
86 return AVERROR(EINVAL
);
89 /* Hack inspired from glibc: don't try the division if nelem and elsize
90 * are both less than sqrt(SIZE_MAX). */
91 if ((a
| b
) >= ((size_t)1 << (sizeof(size_t) * 4)) && a
&& t
/ a
!= b
)
92 return AVERROR(EINVAL
);
98 void *av_malloc(size_t size
)
102 if (size
> atomic_load_explicit(&max_alloc_size
, memory_order_relaxed
))
105 #if HAVE_POSIX_MEMALIGN
106 if (size
) //OS X on SDK 10.6 has a broken posix_memalign implementation
107 if (posix_memalign(&ptr
, ALIGN
, size
))
109 #elif HAVE_ALIGNED_MALLOC
110 ptr
= _aligned_malloc(size
, ALIGN
);
113 ptr
= memalign(ALIGN
, size
);
115 ptr
= memalign(size
, ALIGN
);
118 * Indeed, we should align it:
121 * on 32 for 586, PPro - K6-III
122 * on 64 for K7 (maybe for P3 too).
123 * Because L1 and L2 caches are aligned on those values.
124 * But I don't want to code such logic here!
127 * For AVX ASM. SSE / NEON needs only 16.
128 * Why not larger? Because I did not see a difference in benchmarks ...
130 /* benchmarks with P3
131 * memalign(64) + 1 3071, 3051, 3032
132 * memalign(64) + 2 3051, 3032, 3041
133 * memalign(64) + 4 2911, 2896, 2915
134 * memalign(64) + 8 2545, 2554, 2550
135 * memalign(64) + 16 2543, 2572, 2563
136 * memalign(64) + 32 2546, 2545, 2571
137 * memalign(64) + 64 2570, 2533, 2558
139 * BTW, malloc seems to do 8-byte alignment by default here.
148 #if CONFIG_MEMORY_POISONING
150 memset(ptr
, FF_MEMORY_POISON
, size
);
155 void *av_realloc(void *ptr
, size_t size
)
158 if (size
> atomic_load_explicit(&max_alloc_size
, memory_order_relaxed
))
161 #if HAVE_ALIGNED_MALLOC
162 ret
= _aligned_realloc(ptr
, size
+ !size
, ALIGN
);
164 ret
= realloc(ptr
, size
+ !size
);
166 #if CONFIG_MEMORY_POISONING
168 memset(ret
, FF_MEMORY_POISON
, size
);
173 void *av_realloc_f(void *ptr
, size_t nelem
, size_t elsize
)
178 if (size_mult(elsize
, nelem
, &size
)) {
182 r
= av_realloc(ptr
, size
);
188 int av_reallocp(void *ptr
, size_t size
)
197 memcpy(&val
, ptr
, sizeof(val
));
198 val
= av_realloc(val
, size
);
202 return AVERROR(ENOMEM
);
205 memcpy(ptr
, &val
, sizeof(val
));
209 void *av_malloc_array(size_t nmemb
, size_t size
)
212 if (size_mult(nmemb
, size
, &result
) < 0)
214 return av_malloc(result
);
217 void *av_realloc_array(void *ptr
, size_t nmemb
, size_t size
)
220 if (size_mult(nmemb
, size
, &result
) < 0)
222 return av_realloc(ptr
, result
);
225 int av_reallocp_array(void *ptr
, size_t nmemb
, size_t size
)
229 memcpy(&val
, ptr
, sizeof(val
));
230 val
= av_realloc_f(val
, nmemb
, size
);
231 memcpy(ptr
, &val
, sizeof(val
));
232 if (!val
&& nmemb
&& size
)
233 return AVERROR(ENOMEM
);
238 void av_free(void *ptr
)
240 #if HAVE_ALIGNED_MALLOC
247 void av_freep(void *arg
)
251 memcpy(&val
, arg
, sizeof(val
));
252 memcpy(arg
, &(void *){ NULL
}, sizeof(val
));
256 void *av_mallocz(size_t size
)
258 void *ptr
= av_malloc(size
);
260 memset(ptr
, 0, size
);
264 void *av_calloc(size_t nmemb
, size_t size
)
267 if (size_mult(nmemb
, size
, &result
) < 0)
269 return av_mallocz(result
);
272 char *av_strdup(const char *s
)
276 size_t len
= strlen(s
) + 1;
277 ptr
= av_realloc(NULL
, len
);
284 char *av_strndup(const char *s
, size_t len
)
286 char *ret
= NULL
, *end
;
291 end
= memchr(s
, 0, len
);
295 ret
= av_realloc(NULL
, len
+ 1);
304 void *av_memdup(const void *p
, size_t size
)
308 ptr
= av_malloc(size
);
310 memcpy(ptr
, p
, size
);
315 int av_dynarray_add_nofree(void *tab_ptr
, int *nb_ptr
, void *elem
)
318 memcpy(&tab
, tab_ptr
, sizeof(tab
));
320 FF_DYNARRAY_ADD(INT_MAX
, sizeof(*tab
), tab
, *nb_ptr
, {
322 memcpy(tab_ptr
, &tab
, sizeof(tab
));
324 return AVERROR(ENOMEM
);
329 void av_dynarray_add(void *tab_ptr
, int *nb_ptr
, void *elem
)
332 memcpy(&tab
, tab_ptr
, sizeof(tab
));
334 FF_DYNARRAY_ADD(INT_MAX
, sizeof(*tab
), tab
, *nb_ptr
, {
336 memcpy(tab_ptr
, &tab
, sizeof(tab
));
343 void *av_dynarray2_add(void **tab_ptr
, int *nb_ptr
, size_t elem_size
,
344 const uint8_t *elem_data
)
346 uint8_t *tab_elem_data
= NULL
;
348 FF_DYNARRAY_ADD(INT_MAX
, elem_size
, *tab_ptr
, *nb_ptr
, {
349 tab_elem_data
= (uint8_t *)*tab_ptr
+ (*nb_ptr
) * elem_size
;
351 memcpy(tab_elem_data
, elem_data
, elem_size
);
352 else if (CONFIG_MEMORY_POISONING
)
353 memset(tab_elem_data
, FF_MEMORY_POISON
, elem_size
);
358 return tab_elem_data
;
361 static void fill16(uint8_t *dst
, int len
)
363 uint32_t v
= AV_RN16(dst
- 2);
379 static void fill24(uint8_t *dst
, int len
)
382 uint32_t v
= AV_RB24(dst
- 3);
383 uint32_t a
= v
<< 8 | v
>> 16;
384 uint32_t b
= v
<< 16 | v
>> 8;
385 uint32_t c
= v
<< 24 | v
;
387 uint32_t v
= AV_RL24(dst
- 3);
388 uint32_t a
= v
| v
<< 24;
389 uint32_t b
= v
>> 8 | v
<< 16;
390 uint32_t c
= v
>> 16 | v
<< 8;
419 static void fill32(uint8_t *dst
, int len
)
421 uint32_t v
= AV_RN32(dst
- 4);
424 uint64_t v2
= v
+ ((uint64_t)v
<<32);
447 void av_memcpy_backptr(uint8_t *dst
, int back
, int cnt
)
449 const uint8_t *src
= &dst
[-back
];
454 memset(dst
, *src
, cnt
);
455 } else if (back
== 2) {
457 } else if (back
== 3) {
459 } else if (back
== 4) {
464 while (cnt
> blocklen
) {
465 memcpy(dst
, src
, blocklen
);
470 memcpy(dst
, src
, cnt
);
474 AV_COPY32U(dst
, src
);
475 AV_COPY32U(dst
+ 4, src
+ 4);
481 AV_COPY32U(dst
, src
);
487 AV_COPY16U(dst
, src
);
497 void *av_fast_realloc(void *ptr
, unsigned int *size
, size_t min_size
)
501 if (min_size
<= *size
)
504 max_size
= atomic_load_explicit(&max_alloc_size
, memory_order_relaxed
);
505 /* *size is an unsigned, so the real maximum is <= UINT_MAX. */
506 max_size
= FFMIN(max_size
, UINT_MAX
);
508 if (min_size
> max_size
) {
513 min_size
= FFMIN(max_size
, FFMAX(min_size
+ min_size
/ 16 + 32, min_size
));
515 ptr
= av_realloc(ptr
, min_size
);
516 /* we could set this to the unmodified min_size but this is safer
517 * if the user lost the ptr and uses NULL now
527 static inline void fast_malloc(void *ptr
, unsigned int *size
, size_t min_size
, int zero_realloc
)
532 memcpy(&val
, ptr
, sizeof(val
));
533 if (min_size
<= *size
) {
534 av_assert0(val
|| !min_size
);
538 max_size
= atomic_load_explicit(&max_alloc_size
, memory_order_relaxed
);
539 /* *size is an unsigned, so the real maximum is <= UINT_MAX. */
540 max_size
= FFMIN(max_size
, UINT_MAX
);
542 if (min_size
> max_size
) {
547 min_size
= FFMIN(max_size
, FFMAX(min_size
+ min_size
/ 16 + 32, min_size
));
549 val
= zero_realloc
? av_mallocz(min_size
) : av_malloc(min_size
);
550 memcpy(ptr
, &val
, sizeof(val
));
557 void av_fast_malloc(void *ptr
, unsigned int *size
, size_t min_size
)
559 fast_malloc(ptr
, size
, min_size
, 0);
562 void av_fast_mallocz(void *ptr
, unsigned int *size
, size_t min_size
)
564 fast_malloc(ptr
, size
, min_size
, 1);
567 int av_size_mult(size_t a
, size_t b
, size_t *r
)
569 return size_mult(a
, b
, r
);