/*****************************************************************************
 * $Id: vop-deint-bob.c,v 1.1 2004/09/18 16:46:55 alainjj Exp $
 * Program under GNU General Public License (see ../COPYING)
 * Deinterlace routines for xine by Miguel Freitas
 * based of DScaler project sources (deinterlace.sourceforge.net)
 *****************************************************************************/
#include <stdio.h>
#include "config.h"

#ifdef ARCH_X86

#include "colorspace.h"
#include "vop.h"
#include "memcpy.h"
#include "cpu_accel.h"
extern int debug;

static int deinterlace_bob_yuv_mmx(vop2 *v,  unsigned char *dest, unsigned char *src, 
				   int width, int height) {
  int Line;
  uint64_t *YVal1;
  uint64_t *YVal2;
  uint64_t *YVal3;
  uint64_t *Dest;
  uint8_t* pEvenLines = src;
  uint8_t* pOddLines = src+2*width;
  int LineLength = width * 2;
  int SourcePitch = width * 4;
  int IsOdd = 1;
  long EdgeDetect = 625;
  long JaggieThreshold = 73;

  int n;

  uint64_t qwEdgeDetect;
  uint64_t qwThreshold;

  static mmx_t YMask = {ub:{0xff,0,0xff,0,0xff,0,0xff,0}};
  static mmx_t Mask = {ub:{0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe}};

  qwEdgeDetect = EdgeDetect;
  qwEdgeDetect += (qwEdgeDetect << 48) + (qwEdgeDetect << 32) + (qwEdgeDetect << 16);
  qwThreshold = JaggieThreshold;
  qwThreshold += (qwThreshold << 48) + (qwThreshold << 32) + (qwThreshold << 16);


  // copy first even line no matter what, and the first odd line if we're
  // processing an odd field.
  fast_memcpy(dest, pEvenLines, LineLength);
  if (IsOdd)
    fast_memcpy(dest + LineLength, pOddLines, LineLength);

  height = height / 2;
  for (Line = 0; Line < height - 1; ++Line)
  {
    if (IsOdd)
    {
      YVal1 = (uint64_t *)(pOddLines + Line * SourcePitch);
      YVal2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
      YVal3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch);
      Dest = (uint64_t *)(dest + (Line * 2 + 2) * LineLength);
    }
    else
    {
      YVal1 = (uint64_t *)(pEvenLines + Line * SourcePitch);
      YVal2 = (uint64_t *)(pOddLines + Line * SourcePitch);
      YVal3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
      Dest = (uint64_t *)(dest + (Line * 2 + 1) * LineLength);
    }

    // For ease of reading, the comments below assume that we're operating on an odd
    // field (i.e., that bIsOdd is true).  The exact same processing is done when we
    // operate on an even field, but the roles of the odd and even fields are reversed.
    // It's just too cumbersome to explain the algorithm in terms of "the next odd
    // line if we're doing an odd field, or the next even line if we're doing an
    // even field" etc.  So wherever you see "odd" or "even" below, keep in mind that
    // half the time this function is called, those words' meanings will invert.

    // Copy the odd line to the overlay verbatim.
    fast_memcpy((char *)Dest + LineLength, YVal3, LineLength);

    n = LineLength >> 3;
    while( n-- )
    {
      movq_m2r (*YVal1++, mm0);
      movq_m2r (*YVal2++, mm1);
      movq_m2r (*YVal3++, mm2);

      // get intensities in mm3 - 4
      movq_r2r ( mm0, mm3 );
      pand_m2r ( YMask, mm3 );
      movq_r2r ( mm1, mm4 );
      pand_m2r ( YMask, mm4 );
      movq_r2r ( mm2, mm5 );
      pand_m2r ( YMask, mm5 );

      // get average in mm0
      pand_m2r ( Mask, mm0 );
      pand_m2r ( Mask, mm2 );
      psrlw_i2r ( 01, mm0 );
      psrlw_i2r ( 01, mm2 );
      paddw_r2r ( mm2, mm0 );

      // work out (O1 - E) * (O2 - E) / 2 - EdgeDetect * (O1 - O2) ^ 2 >> 12
      // result will be in mm6

      psrlw_i2r ( 01, mm3 );
      psrlw_i2r ( 01, mm4 );
      psrlw_i2r ( 01, mm5 );

      movq_r2r ( mm3, mm6 );
      psubw_r2r ( mm4, mm6 );	//mm6 = O1 - E

      movq_r2r ( mm5, mm7 );
      psubw_r2r ( mm4, mm7 );	//mm7 = O2 - E

      pmullw_r2r ( mm7, mm6 );		// mm6 = (O1 - E) * (O2 - E)

      movq_r2r ( mm3, mm7 );
      psubw_r2r ( mm5, mm7 );		// mm7 = (O1 - O2)
      pmullw_r2r ( mm7, mm7 );	// mm7 = (O1 - O2) ^ 2
      psrlw_i2r ( 12, mm7 );		// mm7 = (O1 - O2) ^ 2 >> 12
      pmullw_m2r ( *&qwEdgeDetect, mm7 );// mm7  = EdgeDetect * (O1 - O2) ^ 2 >> 12

      psubw_r2r ( mm7, mm6 );      // mm6 is what we want

      pcmpgtw_m2r ( *&qwThreshold, mm6 );

      movq_r2r ( mm6, mm7 );

      pand_r2r ( mm6, mm0 );

      pandn_r2r ( mm1, mm7 );

      por_r2r ( mm0, mm7 );

      movq_r2m ( mm7, *Dest++ );
    }
  }

  // Copy last odd line if we're processing an even field.
  if (! IsOdd)
  {
    fast_memcpy(dest + (height * 2 - 1) * LineLength,
                      pOddLines + (height - 1) * SourcePitch,
                      LineLength);
  }

  // clear out the MMX registers ready for doing floating point
  // again
  emms();
  return 1;
}

vop vop_deint_bob = {
  "deintbob",  /* name */
  1,           /* only 1 image is needed */
  VIDEO_YUYV,  /* input format  */
  VIDEO_YUYV,  /* output format */
  deinterlace_bob_yuv_mmx,  /* The TREATMENT function */
  NULL,        /* No reinitialization function */
  0,           /* the width of the input is equal to the output width*/
  0,           /* idem for the height */
  -1,          /* the destination height is 480 for ntsc, 576 for ntsc */
  1            /* preferably the last treatment */
};

#endif

