00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include <fvutils/color/yuvrgb.h>
00026 #include <core/macros.h>
00027
00028 #include <fvutils/cpu/mmx.h>
00029
00030 namespace firevision {
00031 #if 0
00032 }
00033 #endif
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 void
00051 yuv411packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB,
00052 unsigned int width, unsigned int height)
00053 {
00054 register int y0, y1, y2, y3, u, v;
00055 register unsigned int i = 0;
00056 while (i < (width * height)*3/2) {
00057 u = YUV[i++] - 128;
00058 y0 = YUV[i++] - 16;
00059 y1 = YUV[i++] - 16;
00060 v = YUV[i++] - 128;
00061 y2 = YUV[i++] - 16;
00062 y3 = YUV[i++] - 16;
00063
00064
00065 *RGB++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
00066 *RGB++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
00067 *RGB++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
00068
00069
00070 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00071 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00072 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00073
00074
00075 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
00076 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
00077 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
00078
00079
00080 *RGB++ = clip( (76284 * y3 + 104595 * v ) >> 16 );
00081 *RGB++ = clip( (76284 * y3 - 25625 * u - 53281 * v ) >> 16 );
00082 *RGB++ = clip( (76284 * y3 + 132252 * u ) >> 16 );
00083
00084 }
00085 }
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103 void
00104 yuv422planar_to_rgb_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height)
00105 {
00106
00107 register short y1, y2, u, v;
00108 register const unsigned char *yp, *up, *vp;
00109 register unsigned int i;
00110
00111 yp = planar;
00112 up = planar + (width * height);
00113 vp = up + (width * height / 2);
00114
00115 for (i = 0; i < (width * height / 2); ++i) {
00116
00117 y1 = *yp++;
00118 y2 = *yp++;
00119 u = *up++;
00120 v = *vp++;
00121
00122 y1 -= 16;
00123 y2 -= 16;
00124 u -= 128;
00125 v -= 128;
00126
00127
00128 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00129 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00130 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00131
00132
00133 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
00134 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
00135 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
00136
00137 }
00138 }
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157 void
00158 yuv422packed_to_rgb_plainc(const unsigned char *YUV, unsigned char *RGB,
00159 unsigned int width, unsigned int height)
00160 {
00161 register int y0, y1, u, v;
00162 register unsigned int i = 0;
00163 while (i < (width * height)/2) {
00164 u = YUV[i++] - 128;
00165 y0 = YUV[i++] - 16;
00166 v = YUV[i++] - 128;
00167 y1 = YUV[i++] - 16;
00168
00169
00170 *RGB++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
00171 *RGB++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
00172 *RGB++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
00173
00174
00175 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00176 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00177 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00178 }
00179 }
00180
00181
00182
00183
00184
00185
00186
00187
00188 void
00189 yuv422planar_to_bgr_plainc(const unsigned char *planar, unsigned char *BGR,
00190 unsigned int width, unsigned int height)
00191 {
00192
00193 register short y1, y2, u, v;
00194 register const unsigned char *yp, *up, *vp;
00195 register unsigned int i;
00196
00197 yp = planar;
00198 up = planar + (width * height);
00199 vp = up + (width * height / 2);
00200
00201 for (i = 0; i < (width * height / 2); ++i) {
00202
00203 y1 = *yp++;
00204 y2 = *yp++;
00205 u = *up++;
00206 v = *vp++;
00207
00208 y1 -= 16;
00209 y2 -= 16;
00210 u -= 128;
00211 v -= 128;
00212
00213
00214 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00215 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00216 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00217
00218
00219 *BGR++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
00220 *BGR++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
00221 *BGR++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
00222 }
00223 }
00224
00225
00226 void
00227 yuv422planar_to_rgb_with_alpha_plainc(const unsigned char *planar, unsigned char *RGB, unsigned int width, unsigned int height)
00228 {
00229
00230 register short y1, y2, u, v;
00231 register const unsigned char *yp, *up, *vp;
00232 register unsigned int i;
00233
00234 yp = planar;
00235 up = planar + (width * height);
00236 vp = up + (width * height / 2);
00237
00238 for (i = 0; i < (width * height / 2); ++i) {
00239
00240 y1 = *yp++;
00241 y2 = *yp++;
00242 u = *up++;
00243 v = *vp++;
00244
00245 y1 -= 16;
00246 y2 -= 16;
00247 u -= 128;
00248 v -= 128;
00249
00250
00251 *RGB++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00252 *RGB++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00253 *RGB++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00254 *RGB++ = 255;
00255
00256
00257 *RGB++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
00258 *RGB++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
00259 *RGB++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
00260 *RGB++ = 255;
00261
00262 }
00263
00264 }
00265
00266
00267 void
00268 yuv422planar_to_bgr_with_alpha_plainc(const unsigned char *planar, unsigned char *BGR, unsigned int width, unsigned int height)
00269 {
00270
00271 register short y1, y2, u, v;
00272 register const unsigned char *yp, *up, *vp;
00273 register unsigned int i;
00274
00275 yp = planar;
00276 up = planar + (width * height);
00277 vp = up + (width * height / 2);
00278
00279 for (i = 0; i < (width * height / 2); ++i) {
00280
00281 y1 = *yp++;
00282 y2 = *yp++;
00283 u = *up++;
00284 v = *vp++;
00285
00286 y1 -= 16;
00287 y2 -= 16;
00288 u -= 128;
00289 v -= 128;
00290
00291
00292 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00293 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00294 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00295 *BGR++ = 255;
00296
00297
00298 *BGR++ = clip( (76284 * y2 + 132252 * u ) >> 16 );
00299 *BGR++ = clip( (76284 * y2 - 25625 * u - 53281 * v ) >> 16 );
00300 *BGR++ = clip( (76284 * y2 + 104595 * v ) >> 16 );
00301 *BGR++ = 255;
00302
00303 }
00304
00305 }
00306
00307
00308 void
00309 yuv422packed_to_bgr_with_alpha_plainc(const unsigned char *YUV, unsigned char *BGR,
00310 unsigned int width, unsigned int height)
00311 {
00312
00313 register int y0, y1, u, v;
00314 register unsigned int i = 0;
00315 while (i < (width * height * 2)) {
00316 u = YUV[i++] - 128;
00317 y0 = YUV[i++] - 16;
00318 v = YUV[i++] - 128;
00319 y1 = YUV[i++] - 16;
00320
00321
00322 *BGR++ = clip( (76284 * y0 + 132252 * u ) >> 16 );
00323 *BGR++ = clip( (76284 * y0 - 25625 * u - 53281 * v ) >> 16 );
00324 *BGR++ = clip( (76284 * y0 + 104595 * v ) >> 16 );
00325 *BGR++ = 255;
00326
00327
00328 *BGR++ = clip( (76284 * y1 + 132252 * u ) >> 16 );
00329 *BGR++ = clip( (76284 * y1 - 25625 * u - 53281 * v ) >> 16 );
00330 *BGR++ = clip( (76284 * y1 + 104595 * v ) >> 16 );
00331 *BGR++ = 255;
00332
00333 }
00334 }
00335
00336
00337 #if ( \
00338 defined __i386__ || \
00339 defined __386__ || \
00340 defined __X86__ || \
00341 defined _M_IX86 || \
00342 defined i386)
00343
00344 #define CRV 104595
00345 #define CBU 132251
00346 #define CGU 25624
00347 #define CGV 53280
00348 #define YMUL 76283
00349 #define OFF 32768
00350 #define BITRES 16
00351
00352
00353
00354
00355
00356
00357 #define RES 6
00358
00359 #define RZ(i) (i >> (BITRES - RES))
00360 #define FOUR(i) {i, i, i, i}
00361
00362 __aligned(8) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
00363 __aligned(8) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
00364 __aligned(8) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
00365 __aligned(8) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
00366 __aligned(8) const volatile unsigned short _const_ymul [4] = FOUR(RZ(YMUL));
00367 __aligned(8) const volatile unsigned short _const_128 [4] = FOUR(128);
00368 __aligned(8) const volatile unsigned short _const_32 [4] = FOUR(RZ(OFF));
00369 __aligned(8) const volatile unsigned short _const_16 [4] = FOUR(16);
00370
00371 #define CONST_CRVCRV *_const_crvcrv
00372 #define CONST_CBUCBU *_const_cbucbu
00373 #define CONST_CGUCGU *_const_cgucgu
00374 #define CONST_CGVCGV *_const_cgvcgv
00375 #define CONST_YMUL *_const_ymul
00376 #define CONST_128 *_const_128
00377 #define CONST_32 *_const_32
00378 #define CONST_16 *_const_16
00379
00380 void
00381 yuv411planar_to_rgb_mmx (const unsigned char *yuv, unsigned char *rgb,
00382 unsigned int w, unsigned int h)
00383 {
00384 unsigned int xx, yy;
00385 register const unsigned char *yp1, *up, *vp;
00386 unsigned char *dp1;
00387
00388
00389 yp1 = yuv;
00390 up = yuv + (w * h);
00391 vp = up + (w * (h / 4));
00392
00393 dp1 = rgb;
00394
00395
00396
00397 yp1 = yuv;
00398 up = yuv + (w * h);
00399 vp = up + ((w / 2) * (h / 2));
00400 dp1 = rgb;
00401 for (yy = 0; yy < h; yy++)
00402 {
00403 for (xx = 0; xx < w; xx += 8)
00404 {
00405 movq_m2r(*yp1, mm0);
00406 movq_r2r(mm0, mm1);
00407 psrlw_i2r(8, mm0);
00408 psllw_i2r(8, mm1);
00409 psrlw_i2r(8, mm1);
00410
00411 pxor_r2r(mm7, mm7);
00412 movd_m2r(*up, mm3);
00413 movd_m2r(*vp, mm2);
00414
00415 punpcklbw_r2r(mm7, mm2);
00416 punpcklbw_r2r(mm7, mm3);
00417
00418 movq_m2r(CONST_16, mm4);
00419 psubsw_r2r(mm4, mm0);
00420 psubsw_r2r(mm4, mm1);
00421
00422 movq_m2r(CONST_128, mm5);
00423 psubsw_r2r(mm5, mm2);
00424 psubsw_r2r(mm5, mm3);
00425
00426 movq_m2r(CONST_YMUL, mm4);
00427 pmullw_r2r(mm4, mm0);
00428 pmullw_r2r(mm4, mm1);
00429
00430 movq_m2r(CONST_CRVCRV, mm7);
00431 pmullw_r2r(mm3, mm7);
00432
00433 movq_m2r(CONST_CBUCBU, mm6);
00434 pmullw_r2r(mm2, mm6);
00435
00436 movq_m2r(CONST_CGUCGU, mm5);
00437 pmullw_r2r(mm2, mm5);
00438
00439 movq_m2r(CONST_CGVCGV, mm4);
00440 pmullw_r2r(mm3, mm4);
00441
00442 movq_r2r(mm0, mm2);
00443 paddsw_r2r(mm7, mm2);
00444 paddsw_r2r(mm1, mm7);
00445
00446 psraw_i2r(RES, mm2);
00447 psraw_i2r(RES, mm7);
00448 packuswb_r2r(mm7, mm2);
00449
00450 pxor_r2r(mm7, mm7);
00451 movq_r2r(mm2, mm3);
00452 punpckhbw_r2r(mm7, mm2);
00453 punpcklbw_r2r(mm3, mm7);
00454 por_r2r(mm7, mm2);
00455
00456 movq_r2r(mm0, mm3);
00457 psubsw_r2r(mm5, mm3);
00458 psubsw_r2r(mm4, mm3);
00459 paddsw_m2r(CONST_32, mm3);
00460
00461 movq_r2r(mm1, mm7);
00462 psubsw_r2r(mm5, mm7);
00463 psubsw_r2r(mm4, mm7);
00464 paddsw_m2r(CONST_32, mm7);
00465
00466 psraw_i2r(RES, mm3);
00467 psraw_i2r(RES, mm7);
00468 packuswb_r2r(mm7, mm3);
00469
00470 pxor_r2r(mm7, mm7);
00471 movq_r2r(mm3, mm4);
00472 punpckhbw_r2r(mm7, mm3);
00473 punpcklbw_r2r(mm4, mm7);
00474 por_r2r(mm7, mm3);
00475
00476 movq_m2r(CONST_32, mm4);
00477 paddsw_r2r(mm6, mm0);
00478 paddsw_r2r(mm6, mm1);
00479 paddsw_r2r(mm4, mm0);
00480 paddsw_r2r(mm4, mm1);
00481 psraw_i2r(RES, mm0);
00482 psraw_i2r(RES, mm1);
00483 packuswb_r2r(mm1, mm0);
00484
00485 pxor_r2r(mm7, mm7);
00486 movq_r2r(mm0, mm5);
00487 punpckhbw_r2r(mm7, mm0);
00488 punpcklbw_r2r(mm5, mm7);
00489 por_r2r(mm7, mm0);
00490
00491 pxor_r2r(mm1, mm1);
00492 movq_r2r(mm0, mm5);
00493 movq_r2r(mm3, mm6);
00494 movq_r2r(mm2, mm7);
00495 punpckhbw_r2r(mm3, mm2);
00496 punpcklbw_r2r(mm6, mm7);
00497 punpckhbw_r2r(mm1, mm0);
00498 punpcklbw_r2r(mm1, mm5);
00499
00500 movq_r2r(mm7, mm1);
00501 punpckhwd_r2r(mm5, mm7);
00502 punpcklwd_r2r(mm5, mm1);
00503
00504 movq_r2r(mm2, mm4);
00505 punpckhwd_r2r(mm0, mm2);
00506 punpcklwd_r2r(mm0, mm4);
00507
00508 movntq_r2m(mm1, *(dp1));
00509 movntq_r2m(mm7, *(dp1 + 8));
00510 movntq_r2m(mm4, *(dp1 + 16));
00511 movntq_r2m(mm2, *(dp1 + 24));
00512
00513 yp1 += 8;
00514 up += 4;
00515 vp += 4;
00516 dp1 += 8 * 4;
00517 }
00518 if (yy & 0x1)
00519 {
00520 up -= w / 2;
00521 vp -= w / 2;
00522 }
00523 }
00524 emms();
00525 }
00526 #endif
00527
00528 }