vulkan: fix host copy stride
[ffmpeg.git] / libavutil / float_dsp.h
1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #ifndef AVUTIL_FLOAT_DSP_H
20 #define AVUTIL_FLOAT_DSP_H
21
22 #include <stddef.h>
23
24 typedef struct AVFloatDSPContext {
25 /**
26 * Calculate the entry wise product of two vectors of floats and store the result in
27 * a vector of floats.
28 *
29 * @param dst output vector
30 * constraints: 32-byte aligned
31 * @param src0 first input vector
32 * constraints: 32-byte aligned
33 * @param src1 second input vector
34 * constraints: 32-byte aligned
35 * @param len number of elements in the input
36 * constraints: multiple of 16
37 */
38 void (*vector_fmul)(float *dst, const float *src0, const float *src1,
39 int len);
40
41 /**
42 * Multiply a vector of floats by a scalar float and add to
43 * destination vector. Source and destination vectors must
44 * overlap exactly or not at all.
45 *
46 * @param dst result vector
47 * constraints: 32-byte aligned
48 * @param src input vector
49 * constraints: 32-byte aligned
50 * @param mul scalar value
51 * @param len length of vector
52 * constraints: multiple of 16
53 */
54 void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
55 int len);
56
57 /**
58 * Multiply a vector of doubles by a scalar double and add to
59 * destination vector. Source and destination vectors must
60 * overlap exactly or not at all.
61 *
62 * @param dst result vector
63 * constraints: 32-byte aligned
64 * @param src input vector
65 * constraints: 32-byte aligned
66 * @param mul scalar value
67 * @param len length of vector
68 * constraints: multiple of 16
69 */
70 void (*vector_dmac_scalar)(double *dst, const double *src, double mul,
71 int len);
72
73 /**
74 * Multiply a vector of floats by a scalar float. Source and
75 * destination vectors must overlap exactly or not at all.
76 *
77 * @param dst result vector
78 * constraints: 16-byte aligned
79 * @param src input vector
80 * constraints: 16-byte aligned
81 * @param mul scalar value
82 * @param len length of vector
83 * constraints: multiple of 4
84 */
85 void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
86 int len);
87
88 /**
89 * Multiply a vector of double by a scalar double. Source and
90 * destination vectors must overlap exactly or not at all.
91 *
92 * @param dst result vector
93 * constraints: 32-byte aligned
94 * @param src input vector
95 * constraints: 32-byte aligned
96 * @param mul scalar value
97 * @param len length of vector
98 * constraints: multiple of 8
99 */
100 void (*vector_dmul_scalar)(double *dst, const double *src, double mul,
101 int len);
102
103 /**
104 * Overlap/add with window function.
105 * Used primarily by MDCT-based audio codecs.
106 * Source and destination vectors must overlap exactly or not at all.
107 *
108 * @param dst result vector
109 * constraints: 16-byte aligned
110 * @param src0 first source vector
111 * constraints: 16-byte aligned
112 * @param src1 second source vector
113 * constraints: 16-byte aligned
114 * @param win half-window vector
115 * constraints: 16-byte aligned
116 * @param len length of vector
117 * constraints: multiple of 4
118 */
119 void (*vector_fmul_window)(float *dst, const float *src0,
120 const float *src1, const float *win, int len);
121
122 /**
123 * Calculate the entry wise product of two vectors of floats, add a third vector of
124 * floats and store the result in a vector of floats.
125 *
126 * @param dst output vector
127 * constraints: 32-byte aligned
128 * @param src0 first input vector
129 * constraints: 32-byte aligned
130 * @param src1 second input vector
131 * constraints: 32-byte aligned
132 * @param src2 third input vector
133 * constraints: 32-byte aligned
134 * @param len number of elements in the input
135 * constraints: multiple of 16
136 */
137 void (*vector_fmul_add)(float *dst, const float *src0, const float *src1,
138 const float *src2, int len);
139
140 /**
141 * Calculate the entry wise product of two vectors of floats, and store the result
142 * in a vector of floats. The second vector of floats is iterated over
143 * in reverse order.
144 *
145 * @param dst output vector
146 * constraints: 32-byte aligned
147 * @param src0 first input vector
148 * constraints: 32-byte aligned
149 * @param src1 second input vector
150 * constraints: 32-byte aligned
151 * @param len number of elements in the input
152 * constraints: multiple of 16
153 */
154 void (*vector_fmul_reverse)(float *dst, const float *src0,
155 const float *src1, int len);
156
157 /**
158 * Calculate the sum and difference of two vectors of floats.
159 *
160 * @param v1 first input vector, sum output, 16-byte aligned
161 * @param v2 second input vector, difference output, 16-byte aligned
162 * @param len length of vectors, multiple of 4
163 */
164 void (*butterflies_float)(float *restrict v1, float *restrict v2, int len);
165
166 /**
167 * Calculate the scalar product of two vectors of floats.
168 *
169 * @param v1 first vector, 16-byte aligned
170 * @param v2 second vector, 16-byte aligned
171 * @param len length of vectors, multiple of 4
172 *
173 * @return sum of elementwise products
174 */
175 float (*scalarproduct_float)(const float *v1, const float *v2, int len);
176
177 /**
178 * Calculate the entry wise product of two vectors of doubles and store the result in
179 * a vector of doubles.
180 *
181 * @param dst output vector
182 * constraints: 32-byte aligned
183 * @param src0 first input vector
184 * constraints: 32-byte aligned
185 * @param src1 second input vector
186 * constraints: 32-byte aligned
187 * @param len number of elements in the input
188 * constraints: multiple of 16
189 */
190 void (*vector_dmul)(double *dst, const double *src0, const double *src1,
191 int len);
192
193 /**
194 * Calculate the scalar product of two vectors of doubles.
195 *
196 * @param v1 first vector
197 * constraints: 32-byte aligned
198 * @param v2 second vector
199 * constraints: 32-byte aligned
200 * @param len length of vectors
201 * constraints: multiple of 16
202 *
203 * @return inner product of the vectors
204 */
205 double (*scalarproduct_double)(const double *v1, const double *v2,
206 size_t len);
207 } AVFloatDSPContext;
208
209 /**
210 * Return the scalar product of two vectors of floats.
211 *
212 * @param v1 first input vector
213 * @param v2 first input vector
214 * @param len number of elements
215 *
216 * @return sum of elementwise products
217 */
218 float ff_scalarproduct_float_c(const float *v1, const float *v2, int len);
219
220 /**
221 * Return the scalar product of two vectors of doubles.
222 *
223 * @param v1 first input vector
224 * @param v2 first input vector
225 * @param len number of elements
226 *
227 * @return inner product of the vectors
228 */
229 double ff_scalarproduct_double_c(const double *v1, const double *v2,
230 size_t len);
231
232 void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
233 void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
234 void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
235 void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp);
236 void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
237 void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp);
238
239 /**
240 * Allocate a float DSP context.
241 *
242 * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant
243 */
244 AVFloatDSPContext *avpriv_float_dsp_alloc(int strict);
245
246 #endif /* AVUTIL_FLOAT_DSP_H */