avcodec/x86/h264_idct: Fix ff_h264_luma_dc_dequant_idct_sse2 checkasm failures
[ffmpeg.git] / libavutil / murmur3.c
1 /*
2 * Copyright (C) 2013 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include "mem.h"
25 #include "intreadwrite.h"
26 #include "murmur3.h"
27
28 typedef struct AVMurMur3 {
29 uint64_t h1, h2;
30 uint8_t state[16];
31 int state_pos;
32 uint64_t len;
33 } AVMurMur3;
34
35 AVMurMur3 *av_murmur3_alloc(void)
36 {
37 return av_mallocz(sizeof(AVMurMur3));
38 }
39
40 void av_murmur3_init_seeded(AVMurMur3 *c, uint64_t seed)
41 {
42 memset(c, 0, sizeof(*c));
43 c->h1 = c->h2 = seed;
44 }
45
46 void av_murmur3_init(AVMurMur3 *c)
47 {
48 // arbitrary random number as seed
49 av_murmur3_init_seeded(c, 0x725acc55daddca55);
50 }
51
52 static const uint64_t c1 = UINT64_C(0x87c37b91114253d5);
53 static const uint64_t c2 = UINT64_C(0x4cf5ad432745937f);
54
55 #define ROT(a, b) (((a) << (b)) | ((a) >> (64 - (b))))
56
57 static uint64_t inline get_k1(const uint8_t *src)
58 {
59 uint64_t k = AV_RL64(src);
60 k *= c1;
61 k = ROT(k, 31);
62 k *= c2;
63 return k;
64 }
65
66 static inline uint64_t get_k2(const uint8_t *src)
67 {
68 uint64_t k = AV_RL64(src + 8);
69 k *= c2;
70 k = ROT(k, 33);
71 k *= c1;
72 return k;
73 }
74
75 static inline uint64_t update_h1(uint64_t k, uint64_t h1, uint64_t h2)
76 {
77 k ^= h1;
78 k = ROT(k, 27);
79 k += h2;
80 k *= 5;
81 k += 0x52dce729;
82 return k;
83 }
84
85 static inline uint64_t update_h2(uint64_t k, uint64_t h1, uint64_t h2)
86 {
87 k ^= h2;
88 k = ROT(k, 31);
89 k += h1;
90 k *= 5;
91 k += 0x38495ab5;
92 return k;
93 }
94
95 void av_murmur3_update(AVMurMur3 *c, const uint8_t *src, size_t len)
96 {
97 const uint8_t *end;
98 uint64_t h1 = c->h1, h2 = c->h2;
99 uint64_t k1, k2;
100 if (len <= 0) return;
101 c->len += len;
102 if (c->state_pos > 0) {
103 while (c->state_pos < 16) {
104 c->state[c->state_pos++] = *src++;
105 if (--len <= 0) return;
106 }
107 c->state_pos = 0;
108 k1 = get_k1(c->state);
109 k2 = get_k2(c->state);
110 h1 = update_h1(k1, h1, h2);
111 h2 = update_h2(k2, h1, h2);
112 }
113
114 end = src + (len & ~15);
115 while (src < end) {
116 // These could be done sequentially instead
117 // of interleaved, but like this is over 10% faster
118 k1 = get_k1(src);
119 k2 = get_k2(src);
120 h1 = update_h1(k1, h1, h2);
121 h2 = update_h2(k2, h1, h2);
122 src += 16;
123 }
124 c->h1 = h1;
125 c->h2 = h2;
126
127 len &= 15;
128 if (len > 0) {
129 memcpy(c->state, src, len);
130 c->state_pos = len;
131 }
132 }
133
134 static inline uint64_t fmix(uint64_t k)
135 {
136 k ^= k >> 33;
137 k *= UINT64_C(0xff51afd7ed558ccd);
138 k ^= k >> 33;
139 k *= UINT64_C(0xc4ceb9fe1a85ec53);
140 k ^= k >> 33;
141 return k;
142 }
143
144 void av_murmur3_final(AVMurMur3 *c, uint8_t dst[16])
145 {
146 uint64_t h1 = c->h1, h2 = c->h2;
147 memset(c->state + c->state_pos, 0, sizeof(c->state) - c->state_pos);
148 h1 ^= get_k1(c->state) ^ c->len;
149 h2 ^= get_k2(c->state) ^ c->len;
150 h1 += h2;
151 h2 += h1;
152 h1 = fmix(h1);
153 h2 = fmix(h2);
154 h1 += h2;
155 h2 += h1;
156 AV_WL64(dst, h1);
157 AV_WL64(dst + 8, h2);
158 }