aboutsummaryrefslogtreecommitdiff
path: root/simd/jsimd.h
blob: 371586e64a56def86d4a6ae31f1e7e6c98e4fcda (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
/*
 * simd/jsimd.h
 *
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 * 
 * Based on the x86 SIMD extension for IJG JPEG library,
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
 *
 */

/* Bitmask for supported acceleration methods */

#define JSIMD_NONE    0x00
#define JSIMD_MMX     0x01
#define JSIMD_3DNOW   0x02
#define JSIMD_SSE     0x04
#define JSIMD_SSE2    0x08

/* Short forms of external names for systems with brain-damaged linkers. */

#ifdef NEED_SHORT_EXTERNAL_NAMES
#define jpeg_simd_cpu_support                 jSiCpuSupport
#define jsimd_rgb_ycc_convert_mmx             jSRGBYCCM
#define jsimd_ycc_rgb_convert_mmx             jSYCCRGBM
#define jconst_rgb_ycc_convert_sse2           jSCRGBYCCS2
#define jsimd_rgb_ycc_convert_sse2            jSRGBYCCS2
#define jconst_ycc_rgb_convert_sse2           jSCYCCRGBS2
#define jsimd_ycc_rgb_convert_sse2            jSYCCRGBS2
#define jsimd_h2v2_downsample_mmx             jSDnH2V2M
#define jsimd_h2v1_downsample_mmx             jSDnH2V1M
#define jsimd_h2v2_downsample_sse2            jSDnH2V2S2
#define jsimd_h2v1_downsample_sse2            jSDnH2V1S2
#define jsimd_h2v2_upsample_mmx               jSUpH2V2M
#define jsimd_h2v1_upsample_mmx               jSUpH2V1M
#define jsimd_h2v2_fancy_upsample_mmx         jSFUpH2V2M
#define jsimd_h2v1_fancy_upsample_mmx         jSFUpH2V1M
#define jsimd_h2v2_merged_upsample_mmx        jSMUpH2V2M
#define jsimd_h2v1_merged_upsample_mmx        jSMUpH2V1M
#define jsimd_h2v2_upsample_sse2              jSUpH2V2S2
#define jsimd_h2v1_upsample_sse2              jSUpH2V1S2
#define jconst_fancy_upsample_sse2            jSCFUpS2
#define jsimd_h2v2_fancy_upsample_sse2        jSFUpH2V2S2
#define jsimd_h2v1_fancy_upsample_sse2        jSFUpH2V1S2
#define jconst_merged_upsample_sse2           jSCMUpS2
#define jsimd_h2v2_merged_upsample_sse2       jSMUpH2V2S2
#define jsimd_h2v1_merged_upsample_sse2       jSMUpH2V1S2
#define jsimd_convsamp_mmx                    jSConvM
#define jsimd_convsamp_sse2                   jSConvS2
#define jsimd_convsamp_float_3dnow            jSConvF3D
#define jsimd_convsamp_float_sse              jSConvFS
#define jsimd_convsamp_float_sse2             jSConvFS2
#define jsimd_fdct_islow_mmx                  jSFDMIS
#define jsimd_fdct_ifast_mmx                  jSFDMIF
#define jconst_fdct_islow_sse2                jSCFDS2IS
#define jsimd_fdct_islow_sse2                 jSFDS2IS
#define jconst_fdct_ifast_sse2                jSCFDS2IF
#define jsimd_fdct_ifast_sse2                 jSFDS2IF
#define jsimd_fdct_float_3dnow                jSFD3DF
#define jconst_fdct_float_sse                 jSCFDSF
#define jsimd_fdct_float_sse                  jSFDSF
#define jsimd_quantize_mmx                    jSQuantM
#define jsimd_quantize_sse2                   jSQuantS2
#define jsimd_quantize_float_3dnow            jSQuantF3D
#define jsimd_quantize_float_sse              jSQuantFS
#define jsimd_quantize_float_sse2             jSQuantFS2
#define jsimd_idct_2x2_mmx                    jSIDM22
#define jsimd_idct_4x4_mmx                    jSIDM44
#define jconst_idct_red_sse2                  jSCIDS2R
#define jsimd_idct_2x2_sse2                   jSIDS222
#define jsimd_idct_4x4_sse2                   jSIDS244
#define jsimd_idct_islow_mmx                  jSIDMIS
#define jsimd_idct_ifast_mmx                  jSIDMIF
#define jconst_idct_islow_sse2                jSCIDS2IS
#define jsimd_idct_islow_sse2                 jSIDS2IS
#define jconst_idct_ifast_sse2                jSCIDS2IF
#define jsimd_idct_ifast_sse2                 jSIDS2IF
#define jsimd_idct_float_3dnow                jSID3DF
#define jconst_fdct_float_sse                 jSCIDSF
#define jsimd_idct_float_sse                  jSIDSF
#define jconst_fdct_float_sse2                jSCIDS2F
#define jsimd_idct_float_sse2                 jSIDS2F
#endif /* NEED_SHORT_EXTERNAL_NAMES */

/* SIMD Ext: retrieve SIMD/CPU information */
EXTERN(unsigned int) jpeg_simd_cpu_support JPP((void));

/* SIMD Color Space Conversion */
EXTERN(void) jsimd_rgb_ycc_convert_mmx
        JPP((JDIMENSION img_width,
             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
             JDIMENSION output_row, int num_rows));
EXTERN(void) jsimd_ycc_rgb_convert_mmx
        JPP((JDIMENSION out_width,
             JSAMPIMAGE input_buf, JDIMENSION input_row,
             JSAMPARRAY output_buf, int num_rows));

extern const int jconst_rgb_ycc_convert_sse2[];
EXTERN(void) jsimd_rgb_ycc_convert_sse2
        JPP((JDIMENSION img_width,
             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
             JDIMENSION output_row, int num_rows));
extern const int jconst_ycc_rgb_convert_sse2[];
EXTERN(void) jsimd_ycc_rgb_convert_sse2
        JPP((JDIMENSION out_width,
             JSAMPIMAGE input_buf, JDIMENSION input_row,
             JSAMPARRAY output_buf, int num_rows));

/* SIMD Downsample */
EXTERN(void) jsimd_h2v2_downsample_mmx
        JPP((JDIMENSION image_width, int max_v_samp_factor,
             JDIMENSION v_samp_factor, JDIMENSION width_blocks,
             JSAMPARRAY input_data, JSAMPARRAY output_data));
EXTERN(void) jsimd_h2v1_downsample_mmx
        JPP((JDIMENSION image_width, int max_v_samp_factor,
             JDIMENSION v_samp_factor, JDIMENSION width_blocks,
             JSAMPARRAY input_data, JSAMPARRAY output_data));

EXTERN(void) jsimd_h2v2_downsample_sse2
        JPP((JDIMENSION image_width, int max_v_samp_factor,
             JDIMENSION v_samp_factor, JDIMENSION width_blocks,
             JSAMPARRAY input_data, JSAMPARRAY output_data));
EXTERN(void) jsimd_h2v1_downsample_sse2
        JPP((JDIMENSION image_width, int max_v_samp_factor,
             JDIMENSION v_samp_factor, JDIMENSION width_blocks,
             JSAMPARRAY input_data, JSAMPARRAY output_data));

/* SIMD Upsample */
EXTERN(void) jsimd_h2v2_upsample_mmx
        JPP((int max_v_samp_factor, JDIMENSION output_width,
             JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v1_upsample_mmx
        JPP((int max_v_samp_factor, JDIMENSION output_width,
             JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));

EXTERN(void) jsimd_h2v2_fancy_upsample_mmx
        JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
             JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v1_fancy_upsample_mmx
        JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
             JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));

EXTERN(void) jsimd_h2v2_merged_upsample_mmx
        JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
             JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
EXTERN(void) jsimd_h2v1_merged_upsample_mmx
        JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
             JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));

EXTERN(void) jsimd_h2v2_upsample_sse2
        JPP((int max_v_samp_factor, JDIMENSION output_width,
             JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v1_upsample_sse2
        JPP((int max_v_samp_factor, JDIMENSION output_width,
             JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));

extern const int jconst_fancy_upsample_sse2[];
EXTERN(void) jsimd_h2v2_fancy_upsample_sse2
        JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
             JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v1_fancy_upsample_sse2
        JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
             JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));

extern const int jconst_merged_upsample_sse2[];
EXTERN(void) jsimd_h2v2_merged_upsample_sse2
        JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
             JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
EXTERN(void) jsimd_h2v1_merged_upsample_sse2
        JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
             JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));

/* SIMD Sample Conversion */
EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data,
                                     JDIMENSION start_col,
                                     DCTELEM * workspace));

EXTERN(void) jsimd_convsamp_sse2 JPP((JSAMPARRAY sample_data,
                                      JDIMENSION start_col,
                                      DCTELEM * workspace));

EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data,
                                             JDIMENSION start_col,
                                             FAST_FLOAT * workspace));

EXTERN(void) jsimd_convsamp_float_sse JPP((JSAMPARRAY sample_data,
                                           JDIMENSION start_col,
                                           FAST_FLOAT * workspace));

EXTERN(void) jsimd_convsamp_float_sse2 JPP((JSAMPARRAY sample_data,
                                            JDIMENSION start_col,
                                            FAST_FLOAT * workspace));

/* SIMD Forward DCT */
EXTERN(void) jsimd_fdct_islow_mmx JPP((DCTELEM * data));
EXTERN(void) jsimd_fdct_ifast_mmx JPP((DCTELEM * data));

extern const int jconst_fdct_ifast_sse2[];
EXTERN(void) jsimd_fdct_islow_sse2 JPP((DCTELEM * data));
extern const int jconst_fdct_islow_sse2[];
EXTERN(void) jsimd_fdct_ifast_sse2 JPP((DCTELEM * data));

EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data));

extern const int jconst_fdct_float_sse[];
EXTERN(void) jsimd_fdct_float_sse JPP((FAST_FLOAT * data));

/* SIMD Quantization */
EXTERN(void) jsimd_quantize_mmx JPP((JCOEFPTR coef_block,
                                     DCTELEM * divisors,
                                     DCTELEM * workspace));

EXTERN(void) jsimd_quantize_sse2 JPP((JCOEFPTR coef_block,
                                      DCTELEM * divisors,
                                      DCTELEM * workspace));

EXTERN(void) jsimd_quantize_float_3dnow JPP((JCOEFPTR coef_block,
                                             FAST_FLOAT * divisors,
                                             FAST_FLOAT * workspace));

EXTERN(void) jsimd_quantize_float_sse JPP((JCOEFPTR coef_block,
                                           FAST_FLOAT * divisors,
                                           FAST_FLOAT * workspace));

EXTERN(void) jsimd_quantize_float_sse2 JPP((JCOEFPTR coef_block,
                                            FAST_FLOAT * divisors,
                                            FAST_FLOAT * workspace));

/* SIMD Reduced Inverse DCT */
EXTERN(void) jsimd_idct_2x2_mmx JPP((void * dct_table,
                                     JCOEFPTR coef_block,
                                     JSAMPARRAY output_buf,
                                     JDIMENSION output_col));
EXTERN(void) jsimd_idct_4x4_mmx JPP((void * dct_table,
                                     JCOEFPTR coef_block,
                                     JSAMPARRAY output_buf,
                                     JDIMENSION output_col));

extern const int jconst_idct_red_sse2[];
EXTERN(void) jsimd_idct_2x2_sse2 JPP((void * dct_table,
                                      JCOEFPTR coef_block,
                                      JSAMPARRAY output_buf,
                                      JDIMENSION output_col));
EXTERN(void) jsimd_idct_4x4_sse2 JPP((void * dct_table,
                                      JCOEFPTR coef_block,
                                      JSAMPARRAY output_buf,
                                      JDIMENSION output_col));

/* SIMD Inverse DCT */
EXTERN(void) jsimd_idct_islow_mmx JPP((void * dct_table,
                                       JCOEFPTR coef_block,
                                       JSAMPARRAY output_buf,
                                       JDIMENSION output_col));
EXTERN(void) jsimd_idct_ifast_mmx JPP((void * dct_table,
                                       JCOEFPTR coef_block,
                                       JSAMPARRAY output_buf,
                                       JDIMENSION output_col));

extern const int jconst_idct_islow_sse2[];
EXTERN(void) jsimd_idct_islow_sse2 JPP((void * dct_table,
                                        JCOEFPTR coef_block,
                                        JSAMPARRAY output_buf,
                                        JDIMENSION output_col));
extern const int jconst_idct_ifast_sse2[];
EXTERN(void) jsimd_idct_ifast_sse2 JPP((void * dct_table,
                                        JCOEFPTR coef_block,
                                        JSAMPARRAY output_buf,
                                        JDIMENSION output_col));

EXTERN(void) jsimd_idct_float_3dnow JPP((void * dct_table,
                                         JCOEFPTR coef_block,
                                         JSAMPARRAY output_buf,
                                         JDIMENSION output_col));

extern const int jconst_idct_float_sse[];
EXTERN(void) jsimd_idct_float_sse JPP((void * dct_table,
                                       JCOEFPTR coef_block,
                                       JSAMPARRAY output_buf,
                                       JDIMENSION output_col));

extern const int jconst_idct_float_sse2[];
EXTERN(void) jsimd_idct_float_sse2 JPP((void * dct_table,
                                        JCOEFPTR coef_block,
                                        JSAMPARRAY output_buf,
                                        JDIMENSION output_col));