1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
|
/*
* simd/jsimd.h
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
*
*/
/* Bitmask for supported acceleration methods */
#define JSIMD_NONE 0x00
#define JSIMD_MMX 0x01
#define JSIMD_3DNOW 0x02
#define JSIMD_SSE 0x04
#define JSIMD_SSE2 0x08
/* Short forms of external names for systems with brain-damaged linkers. */
#ifdef NEED_SHORT_EXTERNAL_NAMES
#define jpeg_simd_cpu_support jSiCpuSupport
#define jsimd_rgb_ycc_convert_mmx jSRGBYCCM
#define jsimd_ycc_rgb_convert_mmx jSYCCRGBM
#define jconst_rgb_ycc_convert_sse2 jSCRGBYCCS2
#define jsimd_rgb_ycc_convert_sse2 jSRGBYCCS2
#define jconst_ycc_rgb_convert_sse2 jSCYCCRGBS2
#define jsimd_ycc_rgb_convert_sse2 jSYCCRGBS2
#define jsimd_h2v2_downsample_mmx jSDnH2V2M
#define jsimd_h2v1_downsample_mmx jSDnH2V1M
#define jsimd_h2v2_downsample_sse2 jSDnH2V2S2
#define jsimd_h2v1_downsample_sse2 jSDnH2V1S2
#define jsimd_h2v2_upsample_mmx jSUpH2V2M
#define jsimd_h2v1_upsample_mmx jSUpH2V1M
#define jsimd_h2v2_fancy_upsample_mmx jSFUpH2V2M
#define jsimd_h2v1_fancy_upsample_mmx jSFUpH2V1M
#define jsimd_h2v2_merged_upsample_mmx jSMUpH2V2M
#define jsimd_h2v1_merged_upsample_mmx jSMUpH2V1M
#define jsimd_h2v2_upsample_sse2 jSUpH2V2S2
#define jsimd_h2v1_upsample_sse2 jSUpH2V1S2
#define jconst_fancy_upsample_sse2 jSCFUpS2
#define jsimd_h2v2_fancy_upsample_sse2 jSFUpH2V2S2
#define jsimd_h2v1_fancy_upsample_sse2 jSFUpH2V1S2
#define jconst_merged_upsample_sse2 jSCMUpS2
#define jsimd_h2v2_merged_upsample_sse2 jSMUpH2V2S2
#define jsimd_h2v1_merged_upsample_sse2 jSMUpH2V1S2
#define jsimd_convsamp_mmx jSConvM
#define jsimd_convsamp_sse2 jSConvS2
#define jsimd_convsamp_float_3dnow jSConvF3D
#define jsimd_convsamp_float_sse jSConvFS
#define jsimd_convsamp_float_sse2 jSConvFS2
#define jsimd_fdct_islow_mmx jSFDMIS
#define jsimd_fdct_ifast_mmx jSFDMIF
#define jconst_fdct_islow_sse2 jSCFDS2IS
#define jsimd_fdct_islow_sse2 jSFDS2IS
#define jconst_fdct_ifast_sse2 jSCFDS2IF
#define jsimd_fdct_ifast_sse2 jSFDS2IF
#define jsimd_fdct_float_3dnow jSFD3DF
#define jconst_fdct_float_sse jSCFDSF
#define jsimd_fdct_float_sse jSFDSF
#define jsimd_quantize_mmx jSQuantM
#define jsimd_quantize_sse2 jSQuantS2
#define jsimd_quantize_float_3dnow jSQuantF3D
#define jsimd_quantize_float_sse jSQuantFS
#define jsimd_quantize_float_sse2 jSQuantFS2
#define jsimd_idct_2x2_mmx jSIDM22
#define jsimd_idct_4x4_mmx jSIDM44
#define jconst_idct_red_sse2 jSCIDS2R
#define jsimd_idct_2x2_sse2 jSIDS222
#define jsimd_idct_4x4_sse2 jSIDS244
#define jsimd_idct_islow_mmx jSIDMIS
#define jsimd_idct_ifast_mmx jSIDMIF
#define jconst_idct_islow_sse2 jSCIDS2IS
#define jsimd_idct_islow_sse2 jSIDS2IS
#define jconst_idct_ifast_sse2 jSCIDS2IF
#define jsimd_idct_ifast_sse2 jSIDS2IF
#define jsimd_idct_float_3dnow jSID3DF
#define jconst_fdct_float_sse jSCIDSF
#define jsimd_idct_float_sse jSIDSF
#define jconst_fdct_float_sse2 jSCIDS2F
#define jsimd_idct_float_sse2 jSIDS2F
#endif /* NEED_SHORT_EXTERNAL_NAMES */
/* SIMD Ext: retrieve SIMD/CPU information */
EXTERN(unsigned int) jpeg_simd_cpu_support JPP((void));
/* SIMD Color Space Conversion */
EXTERN(void) jsimd_rgb_ycc_convert_mmx
JPP((JDIMENSION img_width,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows));
EXTERN(void) jsimd_ycc_rgb_convert_mmx
JPP((JDIMENSION out_width,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows));
extern const int jconst_rgb_ycc_convert_sse2[];
EXTERN(void) jsimd_rgb_ycc_convert_sse2
JPP((JDIMENSION img_width,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows));
extern const int jconst_ycc_rgb_convert_sse2[];
EXTERN(void) jsimd_ycc_rgb_convert_sse2
JPP((JDIMENSION out_width,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows));
/* SIMD Downsample */
EXTERN(void) jsimd_h2v2_downsample_mmx
JPP((JDIMENSION image_width, int max_v_samp_factor,
JDIMENSION v_samp_factor, JDIMENSION width_blocks,
JSAMPARRAY input_data, JSAMPARRAY output_data));
EXTERN(void) jsimd_h2v1_downsample_mmx
JPP((JDIMENSION image_width, int max_v_samp_factor,
JDIMENSION v_samp_factor, JDIMENSION width_blocks,
JSAMPARRAY input_data, JSAMPARRAY output_data));
EXTERN(void) jsimd_h2v2_downsample_sse2
JPP((JDIMENSION image_width, int max_v_samp_factor,
JDIMENSION v_samp_factor, JDIMENSION width_blocks,
JSAMPARRAY input_data, JSAMPARRAY output_data));
EXTERN(void) jsimd_h2v1_downsample_sse2
JPP((JDIMENSION image_width, int max_v_samp_factor,
JDIMENSION v_samp_factor, JDIMENSION width_blocks,
JSAMPARRAY input_data, JSAMPARRAY output_data));
/* SIMD Upsample */
EXTERN(void) jsimd_h2v2_upsample_mmx
JPP((int max_v_samp_factor, JDIMENSION output_width,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v1_upsample_mmx
JPP((int max_v_samp_factor, JDIMENSION output_width,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v2_fancy_upsample_mmx
JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v1_fancy_upsample_mmx
JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v2_merged_upsample_mmx
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
EXTERN(void) jsimd_h2v1_merged_upsample_mmx
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
EXTERN(void) jsimd_h2v2_upsample_sse2
JPP((int max_v_samp_factor, JDIMENSION output_width,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v1_upsample_sse2
JPP((int max_v_samp_factor, JDIMENSION output_width,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
extern const int jconst_fancy_upsample_sse2[];
EXTERN(void) jsimd_h2v2_fancy_upsample_sse2
JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
EXTERN(void) jsimd_h2v1_fancy_upsample_sse2
JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
extern const int jconst_merged_upsample_sse2[];
EXTERN(void) jsimd_h2v2_merged_upsample_sse2
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
EXTERN(void) jsimd_h2v1_merged_upsample_sse2
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf));
/* SIMD Sample Conversion */
EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
DCTELEM * workspace));
EXTERN(void) jsimd_convsamp_sse2 JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
DCTELEM * workspace));
EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
FAST_FLOAT * workspace));
EXTERN(void) jsimd_convsamp_float_sse JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
FAST_FLOAT * workspace));
EXTERN(void) jsimd_convsamp_float_sse2 JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
FAST_FLOAT * workspace));
/* SIMD Forward DCT */
EXTERN(void) jsimd_fdct_islow_mmx JPP((DCTELEM * data));
EXTERN(void) jsimd_fdct_ifast_mmx JPP((DCTELEM * data));
extern const int jconst_fdct_ifast_sse2[];
EXTERN(void) jsimd_fdct_islow_sse2 JPP((DCTELEM * data));
extern const int jconst_fdct_islow_sse2[];
EXTERN(void) jsimd_fdct_ifast_sse2 JPP((DCTELEM * data));
EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data));
extern const int jconst_fdct_float_sse[];
EXTERN(void) jsimd_fdct_float_sse JPP((FAST_FLOAT * data));
/* SIMD Quantization */
EXTERN(void) jsimd_quantize_mmx JPP((JCOEFPTR coef_block,
DCTELEM * divisors,
DCTELEM * workspace));
EXTERN(void) jsimd_quantize_sse2 JPP((JCOEFPTR coef_block,
DCTELEM * divisors,
DCTELEM * workspace));
EXTERN(void) jsimd_quantize_float_3dnow JPP((JCOEFPTR coef_block,
FAST_FLOAT * divisors,
FAST_FLOAT * workspace));
EXTERN(void) jsimd_quantize_float_sse JPP((JCOEFPTR coef_block,
FAST_FLOAT * divisors,
FAST_FLOAT * workspace));
EXTERN(void) jsimd_quantize_float_sse2 JPP((JCOEFPTR coef_block,
FAST_FLOAT * divisors,
FAST_FLOAT * workspace));
/* SIMD Reduced Inverse DCT */
EXTERN(void) jsimd_idct_2x2_mmx JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
EXTERN(void) jsimd_idct_4x4_mmx JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
extern const int jconst_idct_red_sse2[];
EXTERN(void) jsimd_idct_2x2_sse2 JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
EXTERN(void) jsimd_idct_4x4_sse2 JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
/* SIMD Inverse DCT */
EXTERN(void) jsimd_idct_islow_mmx JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
EXTERN(void) jsimd_idct_ifast_mmx JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
extern const int jconst_idct_islow_sse2[];
EXTERN(void) jsimd_idct_islow_sse2 JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
extern const int jconst_idct_ifast_sse2[];
EXTERN(void) jsimd_idct_ifast_sse2 JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
EXTERN(void) jsimd_idct_float_3dnow JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
extern const int jconst_idct_float_sse[];
EXTERN(void) jsimd_idct_float_sse JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
extern const int jconst_idct_float_sse2[];
EXTERN(void) jsimd_idct_float_sse2 JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
|