From f94ced49be133b70eb1932f2d4b50fd7947b8612 Mon Sep 17 00:00:00 2001
From: Juha Jeronen <juha.jeronen@jyu.fi>
Date: Thu, 31 Mar 2011 21:48:22 +0300
Subject: [PATCH] Inverse telecine deinterlacer

Signed-off-by: Laurent Aimar <fenrir@videolan.org>
Modified-by: Laurent Aimar <fenrir@videolan.org>
---
 modules/video_filter/deinterlace.c | 2824 +++++++++++++++++++++++++++++++++++-
 src/control/video.c                |    2 +-
 src/libvlc-module.c                |    6 +-
 src/video_output/interlacing.c     |    1 +
 4 files changed, 2771 insertions(+), 62 deletions(-)

diff --git a/modules/video_filter/deinterlace.c b/modules/video_filter/deinterlace.c
index 15ae38e3c3..e9dbcd3995 100644
--- a/modules/video_filter/deinterlace.c
+++ b/modules/video_filter/deinterlace.c
@@ -5,7 +5,7 @@
  * $Id$
  *
  * Author: Sam Hocevar <sam@zoy.org>
- *         Juha Jeronen <juha.jeronen@jyu.fi> (Phosphor mode)
+ *         Juha Jeronen <juha.jeronen@jyu.fi> (Phosphor and IVTC modes)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -31,6 +31,7 @@
 #endif
 
 #include <assert.h>
+#include <stdint.h> /* int_fast32_t */
 
 #ifdef HAVE_ALTIVEC_H
 #   include <altivec.h>
@@ -54,6 +55,7 @@
 #define DEINTERLACE_YADIF    7
 #define DEINTERLACE_YADIF2X  8
 #define DEINTERLACE_PHOSPHOR 9
+#define DEINTERLACE_IVTC     10
 
 /*****************************************************************************
  * Module descriptor
@@ -71,10 +73,10 @@ static void Close( vlc_object_t * );
 
 static const char *const mode_list[] = {
     "discard", "blend", "mean", "bob", "linear", "x",
-    "yadif", "yadif2x", "phosphor" };
+    "yadif", "yadif2x", "phosphor", "ivtc" };
 static const char *const mode_list_text[] = {
     N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"), N_("Linear"), "X",
-    "Yadif", "Yadif (2x)", N_("Phosphor") };
+    "Yadif", "Yadif (2x)", N_("Phosphor"), N_("Film NTSC (IVTC)") };
 
 /* Tooltips drop linefeeds (at least in the Qt GUI);
    thus the space before each set of consecutive \n. */
@@ -163,6 +165,7 @@ static void RenderLinear  ( filter_t *, picture_t *, picture_t *, int );
 static void RenderX       ( picture_t *, picture_t * );
 static int  RenderYadif   ( filter_t *, picture_t *, picture_t *, int, int );
 static int  RenderPhosphor( filter_t *, picture_t *, picture_t *, int, int );
+static int  RenderIVTC    ( filter_t *, picture_t *, picture_t * );
 
 static void MergeGeneric ( void *, const void *, const void *, size_t );
 #if defined(CAN_COMPILE_C_ALTIVEC)
@@ -217,6 +220,84 @@ typedef struct
     int i_dimmer_strength;
 } phosphor_sys_t;
 
+/**
+ * Inverse telecine subsystem state.
+ * @see RenderIVTC()
+ */
+#define IVTC_NUM_FIELD_PAIRS 7
+#define IVTC_DETECTION_HISTORY_SIZE 3
+#define IVTC_LATEST (IVTC_DETECTION_HISTORY_SIZE-1)
+typedef struct
+{
+    int i_mode; /**< Detecting, hard TC, or soft TC. @see ivtc_mode */
+    int i_old_mode; /**< @see IVTCSoftTelecineDetect() */
+
+    int i_cadence_pos; /**< Cadence counter, 0..4. Runs when locked on. */
+    int i_tfd; /**< TFF or BFF telecine. Detected from the video. */
+
+    /** Raw low-level detector output.
+     *
+     *  @see IVTCLowLevelDetect()
+     */
+    int pi_scores[IVTC_NUM_FIELD_PAIRS]; /**< Interlace scores. */
+    int pi_motion[IVTC_DETECTION_HISTORY_SIZE]; /**< 8x8 blocks with motion. */
+    int pi_top_rep[IVTC_DETECTION_HISTORY_SIZE]; /**< Hard top field repeat. */
+    int pi_bot_rep[IVTC_DETECTION_HISTORY_SIZE]; /**< Hard bot field repeat. */
+
+    /** Interlace scores of outgoing frames, used for judging IVTC output
+     *  (detecting cadence breaks).
+     *
+     *  @see IVTCOutputOrDropFrame()
+     */
+    int pi_final_scores[IVTC_DETECTION_HISTORY_SIZE];
+
+    /** Cadence position detection history (in ivtc_cadence_pos format).
+     *  Contains the detected cadence position and a corresponding
+     *  reliability flag for each algorithm.
+     *
+     *  s = scores, interlace scores based algorithm, original to this filter.
+     *  v = vektor, hard field repeat based algorithm, inspired by
+     *              the TVTime/Xine IVTC filter by Billy Biggs (Vektor).
+     *
+     *  Each algorithm may also keep internal, opaque data.
+     *
+     *  @see ivtc_cadence_pos
+     *  @see IVTCCadenceDetectAlgoScores()
+     *  @see IVTCCadenceDetectAlgoVektor()
+     */
+    int  pi_s_cadence_pos[IVTC_DETECTION_HISTORY_SIZE];
+    bool pb_s_reliable[IVTC_DETECTION_HISTORY_SIZE];
+    int  pi_v_raw[IVTC_DETECTION_HISTORY_SIZE]; /**< "vektor" algo internal */
+    int  pi_v_cadence_pos[IVTC_DETECTION_HISTORY_SIZE];
+    bool pb_v_reliable[IVTC_DETECTION_HISTORY_SIZE];
+
+    /** Final result, chosen by IVTCCadenceDetectFinalize() from the results
+     *  given by the different detection algorithms.
+     *
+     *  @see IVTCCadenceDetectFinalize()
+     */
+    int pi_cadence_pos_history[IVTC_DETECTION_HISTORY_SIZE];
+
+    /**
+     *  Set by cadence analyzer. Whether the sequence of last
+     *  IVTC_DETECTION_HISTORY_SIZE detected positions, stored in
+     *  pi_cadence_pos_history, looks like a valid telecine.
+     *
+     *  @see IVTCCadenceAnalyze()
+     */
+    bool b_sequence_valid;
+
+    /**
+     *  Set by cadence analyzer. True if detected position = "dea".
+     *  The three entries of this are used for detecting three progressive
+     *  stencil positions in a row, i.e. five progressive frames in a row;
+     *  this triggers exit from hard IVTC.
+     *
+     *  @see IVTCCadenceAnalyze()
+     */
+    bool pb_all_progressives[IVTC_DETECTION_HISTORY_SIZE];
+} ivtc_sys_t;
+
 /* Top-level subsystem state */
 #define HISTORY_SIZE (3)
 #define CUSTOM_PTS -1
@@ -241,6 +322,7 @@ struct filter_sys_t
 
     /* Algorithm-specific substructures */
     phosphor_sys_t phosphor;
+    ivtc_sys_t ivtc;
 };
 
 /*  NOTE on i_frame_offset:
@@ -365,6 +447,13 @@ static void SetFilterMethod( filter_t *p_filter, const char *psz_method, vlc_fou
         p_sys->b_half_height = false;
         p_sys->b_use_frame_history = true;
     }
+    else if( !strcmp( psz_method, "ivtc" ) )
+    {
+        p_sys->i_mode = DEINTERLACE_IVTC;
+        p_sys->b_double_rate = false;
+        p_sys->b_half_height = false;
+        p_sys->b_use_frame_history = true;
+    }
     else if( !strcmp( psz_method, "discard" ) )
     {
         const bool b_i422 = i_chroma == VLC_CODEC_I422 ||
@@ -417,6 +506,7 @@ static void GetOutputFormat( filter_t *p_filter,
         case DEINTERLACE_YADIF:
         case DEINTERLACE_YADIF2X:
         case DEINTERLACE_PHOSPHOR:
+        case DEINTERLACE_IVTC:
             p_dst->i_chroma = p_src->i_chroma;
             break;
         default:
@@ -2331,75 +2421,2681 @@ static int RenderPhosphor( filter_t *p_filter,
 }
 
 /*****************************************************************************
- * video filter2 functions
+ * Inverse telecine (IVTC) filter (a.k.a. "film mode", "3:2 reverse pulldown")
  *****************************************************************************/
-#define DEINTERLACE_DST_SIZE 3
-static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
+
+/**
+ * @fn RenderIVTC
+ * Deinterlace filter. Performs inverse telecine.
+ *
+ * Also known as "film mode" or "3:2 reverse pulldown" in some equipment.
+ *
+ * This filter attempts to reconstruct the original film frames from an
+ * NTSC telecined signal. It is intended for 24fps progressive material
+ * that was telecined to NTSC 60i. For example, most NTSC anime DVDs
+ * are like this.
+ *
+ * @param p_filter The filter instance.
+ * @param[in] p_src Input frame.
+ * @param[out] p_dst Output frame. Must be allocated by caller.
+ * @return VLC error code (int).
+ * @retval VLC_SUCCESS A film frame was reconstructed to p_dst.
+ * @retval VLC_EGENERIC Frame dropped as part of normal IVTC operation.
+ * @see Deinterlace()
+ * @see ComposeFrame()
+ * @see CalculateInterlaceScore()
+ * @see EstimateNumBlocksWithMotion()
+ *
+ *   Overall explanation:
+ *
+ *   This filter attempts to do in realtime what Transcode's
+ *   ivtc->decimate->32detect chain does offline. Additionally, it removes
+ *   soft telecine. It is an original design, based on some ideas from
+ *   Transcode, some from TVTime, and some original.
+ *
+ *   If the input material is pure NTSC telecined film, inverse telecine
+ *   (also known as "film mode") will (ideally) exactly recover the original
+ *   (progressive film frames. The output will run at 4/5 of the original
+ *   (framerate with no loss of information. Interlacing artifacts are removed,
+ *   and motion becomes as smooth as it was on the original film.
+ *   For soft-telecined material, on the other hand, the progressive frames
+ *   alredy exist, so only the timings are changed such that the output
+ *   becomes smooth 24fps (or would, if the output device had an infinite
+ *   framerate).
+ *
+ *   Put in simple terms, this filter is targeted for NTSC movies and
+ *   especially anime. Virtually all 1990s and early 2000s anime is
+ *   hard-telecined. Because the source material is like that,
+ *   IVTC is needed for also virtually all official R1 (US) anime DVDs.
+ *
+ *   Note that some anime from the turn of the century (e.g. Silent Mobius
+ *   and Sol Bianca) is a hybrid of telecined film and true interlaced
+ *   computer-generated effects and camera pans. In this case, applying IVTC
+ *   will effectively attempt to reconstruct the frames based on the film
+ *   component, but even if this is successful, the framerate reduction will
+ *   cause the computer-generated effects to stutter. This is mathematically
+ *   unavoidable. Instead of IVTC, a framerate doubling deinterlacer is
+ *   recommended for such material. Try "Phosphor", "Bob", or "Linear".
+ *
+ *   Fortunately, 30fps true progressive anime is on the rise (e.g. ARIA,
+ *   Black Lagoon, Galaxy Angel, Ghost in the Shell: Solid State Society,
+ *   Mai Otome, Last Exile, and Rocket Girls). This type requires no
+ *   deinterlacer at all.
+ *
+ *   Another recent trend is using 24fps computer-generated effects and
+ *   telecining them along with the cels (e.g. Kiddy Grade, Str.A.In. and
+ *   The Third: The Girl with the Blue Eye). For this group, IVTC is the
+ *   correct way to deinterlace, and works properly.
+ *
+ *   Soft telecined anime, while rare, also exists. Stellvia of the Universe
+ *   and Angel Links are examples of this. Stellvia constantly alternates
+ *   between soft and hard telecine - pure CGI sequences are soft-telecined,
+ *   while sequences incorporating cel animation are hard-telecined.
+ *   This makes it very hard for the cadence detector to lock on,
+ *   and indeed Stellvia gives some trouble for the filter.
+ *
+ *   To finish the list of different material types, Azumanga Daioh deserves
+ *   a special mention. The OP and ED sequences are both 30fps progressive,
+ *   while the episodes themselves are hard-telecined. This filter should
+ *   mostly work correctly with such material, too. (The beginning of the OP
+ *   shows some artifacts, but otherwise both the OP and ED are indeed
+ *   rendered progressive. The technical reason is that the filter has been
+ *   designed to aggressively reconstruct film frames, which helps in many
+ *   cases with hard-telecined material. In very rare cases, this approach may
+ *   go wrong, regardless of whether the input is telecined or progressive.)
+ *
+ *   Finally, note also that IVTC is the only correct way to deinterlace NTSC
+ *   telecined material. Simply applying an interpolating deinterlacing filter
+ *   (with no framerate doubling) is harmful for two reasons. First, even if
+ *   (the filter does not damage already progressive frames, it will lose half
+ *   (of the available vertical resolution of those frames that are judged
+ *   interlaced. Some algorithms combining data from multiple frames may be
+ *   able to counter this to an extent, effectively performing something akin
+ *   to the frame reconstruction part of IVTC. A more serious problem is that
+ *   any motion will stutter, because (even in the ideal case) one out of
+ *   every four film frames will be shown twice, while the other three will
+ *   be shown only once. Duplicate removal and framerate reduction - which are
+ *   part of IVTC - are also needed to properly play back telecined material
+ *   on progressive displays at a non-doubled framerate.
+ *
+ *   So, try this filter on your NTSC anime DVDs. It just might help.
+ *
+ *
+ *   Technical details:
+ *
+ *
+ *   First, NTSC hard telecine in a nutshell:
+ *
+ *   Film is commonly captured at 24 fps. The framerate must be raised from
+ *   24 fps to 59.94 fields per second, This starts by pretending that the
+ *   original framerate is 23.976 fps. When authoring, the audio can be
+ *   slowed down by 0.1% to match. Now 59.94 = 5/4 * (2*23.976), which gives
+ *   a nice ratio made out of small integers.
+ *
+ *   Thus, each group of four film frames must become five frames in the NTSC
+ *   video stream. One cannot simply repeat one frame of every four, because
+ *   this would result in jerky motion. To slightly soften the jerkiness,
+ *   the extra frame is split into two extra fields, inserted at different
+ *   times. The content of the extra fields is (in classical telecine)
+ *   duplicated as-is from existing fields.
+ *
+ *   The field duplication technique is called "3:2 pulldown". The pattern
+ *   is called the cadence. The output from 3:2 pulldown looks like this
+ *   (if the telecine is TFF, top field first):
+ *
+ *   a  b  c  d  e     Telecined frame (actual frames stored on DVD)
+ *   T1 T1 T2 T3 T4    *T*op field content
+ *   B1 B2 B3 B3 B4    *B*ottom field content
+ *
+ *   Numbers 1-4 denote the original film frames. E.g. T1 = top field of
+ *   original film frame 1. The field Tb, and one of either Bc or Bd, are
+ *   the extra fields inserted in the telecine. With exact duplication, it
+ *   of course doesn't matter whether Bc or Bd is the extra field, but
+ *   with "full field blended" material (see below) this will affect how to
+ *   correctly wxtract film frame 3.
+ *
+ *   See the following web pages for illustrations and discussion:
+ *   http://neuron2.net/LVG/telecining1.html
+ *   http://arbor.ee.ntu.edu.tw/~jackeikuo/dvd2avi/ivtc/
+ *
+ *   Note that film frame 2 has been stored "half and half" into two telecined
+ *   frames (b and c). Note also that telecine produces a sequence of
+ *   3 progressive frames (d, e and a) followed by 2 interlaced frames
+ *   (b and c).
+ *
+ *   The output may also look like this (BFF telecine, bottom field first):
+ *
+ *   a' b' c' d' e'
+ *   T1 T2 T3 T3 T4
+ *   B1 B1 B2 B3 B4
+ *
+ *   Now field Bb', and one of either Tc' or Td', are the extra fields.
+ *   Again, film frame 2 is stored "half and half" (into b' and c').
+ *
+ *   Whether the pattern is like abcde or a'b'c'd'e', depends on the telecine
+ *   field dominance (TFF or BFF). This must match the video field dominance,
+ *   but is conceptually different. Importantly, there is no temporal
+ *   difference between those fields that came from the same film frame.
+ *   Also, see the section on soft telecine below.
+ *
+ *   In a hard telecine, the TFD and VFD must match for field renderers
+ *   (e.g. traditional DVD player + CRT TV) to work correctly; this should be
+ *   fairly obvious by considering the above telecine patterns and how a
+ *   field renderer displays the material (one field at a time, dominant
+ *   field first).
+ *
+ *   Note that the VFD may, *correctly*, flip mid-stream, if soft field repeats
+ *   (repeat_pict) have been used. They are commonly used in soft telecine
+ *   (see below), but also occasional lone field repeats exist in some streams,
+ *   e.g., Sol Bianca.
+ *
+ *   See e.g.
+ *   http://www.cambridgeimaging.co.uk/downloads/Telecine%20field%20dominance.pdf
+ *   for discussion. The document discusses mostly PAL, but includes some notes
+ *   on NTSC, too.
+ *
+ *   The reason for the words "classical telecine" above, when field
+ *   duplication was first mentioned, is that there exists a
+ *   "full field blended" version, where the added fields are not exact
+ *   "duplicates, but are blends of the original film frames. This is rare
+ *   in NTSC, but some material like this reportedly exists. See
+ *   http://www.animemusicvideos.org/guides/avtech/videogetb2a.html
+ *   In these cases, the additional fields are a (probably 50%) blend of the
+ *   frames between which they have been inserted. Which one of the two
+ *   possibilites is the extra field then becomes important.
+ *   This filter does NOT support "full field blended" material.
+ *
+ *   To summarize, the 3:2 pulldown sequence produces a group of ten fields
+ *   out of every four film frames. Only eight of these fields are unique.
+ *   To remove the telecine, the duplicate fields must be removed, and the
+ *   original progressive frames restored. Additionally, the presentation
+ *   timestamps (PTS) must be adjusted, and one frame out of five (containing
+ *   no new information) dropped. The duration of each frame in the output
+ *   becomes 5/4 of that in the input, i.e. 25% longer.
+ *
+ *   Theoretically, this whole mess could be avoided by soft telecining, if the
+ *   original material is pure 24fps progressive. By using the stream flags
+ *   correctly, the original progressive frames can be stored on the DVD.
+ *   In such cases, the DVD player will apply "soft" 3:2 pulldown. See the
+ *   following section.
+ *
+ *   Also, the mess with cadence detection for hard telecine (see below) could
+ *   be avoided by using the progressive frame flag and a five-frame future
+ *   buffer, but no one ever sets the flag correctly for hard-telecined
+ *   streams. All frames are marked as interlaced, regardless of their cadence
+ *   position. This is evil, but sort-of-understandable, given that video
+ *   editors often come with "progressive" and "interlaced" editing modes,
+ *   but no separate "telecined" mode that could correctly handle this
+ *   information.
+ *
+ *   In practice, most material with its origins in Asia (including virtually
+ *   all official US (R1) anime DVDs) is hard-telecined. Combined with the
+ *   turn-of-the-century practice of rendering true interlaced effects
+ *   on top of the hard-telecined stream, we have what can only be described
+ *   as a monstrosity. Fortunately, recent material is much more consistent,
+ *   even though still almost always hard-telecined.
+ *
+ *   Finally, note that telecined video is often edited directly in interlaced
+ *   form, disregarding safe cut positions as pertains to the telecine sequence
+ *   (there are only two: between "d" and "e", or between "e" and the
+ *   (next "a"). Thus, the telecine sequence will in practice jump erratically
+ *   at cuts [**]. An aggressive detection strategy is needed to cope with
+ *   this.
+ *
+ *   [**] http://users.softlab.ece.ntua.gr/~ttsiod/ivtc.html
+ *
+ *
+ *   Note about chroma formats: 4:2:0 is very common at least on anime DVDs.
+ *   In the interlaced frames in a hard telecine, the chroma alternates
+ *   every chroma line, even if the chroma format is 4:2:0! This means that
+ *   if the interlaced picture is viewed as-is, the luma alternates every line,
+ *   while the chroma alternates only every two lines of the picture.
+ *
+ *   That is, an interlaced frame from a 4:2:0 telecine looks like this
+ *   (numbers indicate which frame the data comes from):
+ *
+ *   luma  stored 4:2:0 chroma  displayed chroma
+ *   1111  1111                 1111
+ *   2222                       1111
+ *   1111  2222                 2222
+ *   2222                       2222
+ *   ...   ...                  ...
+ *
+ *   The deinterlace filter sees the stored 4:2:0 chroma.
+ *   The "displayed chroma" is only generated later in the filter chain
+ *   (probably when YUV is converted to the display format, if the display
+ *   does not accept YUV 4:2:0 directly).
+ *
+ *
+ *   Next, how NTSC soft telecine works:
+ *
+ *   a  b  c  d     Frame index (actual frames stored on DVD)
+ *   T1 T2 T3 T4    *T*op field content
+ *   B1 B2 B3 B4    *B*ottom field content
+ *
+ *   Here the progressive frames are stored as-is. The catch is in the stream
+ *   flags. For hard telecine, which was explained above, we have
+ *   VFD = constant and nb_fields = 2, just like in a true progressive or
+ *   true interlaced stream. Soft telecine, on the other hand, looks like this:
+ *
+ *   a  b  c  d
+ *   3  2  3  2     nb_fields
+ *   T  B  B  T     *Video* field dominance (for TFF telecine)
+ *   B  T  T  B     *Video* field dominance (for BFF telecine)
+ *
+ *   Now the video field dominance flipflops every two frames!
+ *
+ *   Note that nb_fields = 3 means the frame duration will be 1.5x that of a
+ *   normal frame. Often, soft-telecined frames are correctly flagged as
+ *   progressive.
+ *
+ *   Here the telecining is expected to be done by the player, utilizing the
+ *   soft field repeat (repeat_pict) feature. This is indeed what a field
+ *   renderer (traditional interlaced equipment, or a framerate doubler)
+ *   should do with such a stream.
+ *
+ *   In the IVTC filter, our job is to even out the frame durations, but
+ *   disregard video field dominance and just pass the progressive pictures
+ *   through as-is.
+ *
+ *   Fortunately, for soft telecine to work at all, the stream flags must be
+ *   set correctly. Thus this type can be detected reliably by reading
+ *   nb_fields from three consecutive frames:
+ *
+ *   Let P = previous, C = current, N = next. If the frame to be rendered is C,
+ *   there are only three relevant nb_fields flag patterns for the three-frame
+ *   stencil concerning soft telecine:
+ *
+ *   P C N   What is happening:
+ *   2 3 2   Entering soft telecine at frame C, or running inside it already.
+ *   3 2 3   Running inside soft telecine.
+ *   3 2 2   Exiting soft telecine at frame C. C is the last frame that should
+ *           be handled as soft-telecined. (If we do timing adjustments to the
+ *           "3"s only, we can already exit soft telecine mode  when we see
+ *           this pattern.)
+ *
+ *   Note that the same stream may alternate between soft and hard telecine,
+ *   but these cannot occur at the same time. The start and end of the
+ *   soft-telecined parts can be read off the stream flags, and the rest of
+ *   the stream can be handed to the hard IVTC part of the filter for analysis.
+ *
+ *   Finally, note also that a stream may also request a lone field repeat
+ *   (a sudden "3" surrounded by "2"s). Fortunately, these can be handled as
+ *   (a two-frame soft telecine, as they match the first and third
+ *   flag patterns above.
+ *
+ *   Combinations with several "3"s in a row are not valid for soft or hard
+ *   telecine, so if they occur, the frames can be passed through as-is.
+ *
+ *
+ *   Cadence detection for hard telecine:
+ *
+ *   Consider viewing the TFF and BFF hard telecine sequences through a
+ *   three-frame stencil. Again, let P = previous, C = current, N = next.
+ *   A brief analysis leads to the following cadence tables.
+ *
+ *   PCN                 = stencil position (Previous Current Next),
+ *   Dups.               = duplicate fields,
+ *   Best field pairs... = combinations of fields which correctly reproduce
+ *                         the original progressive frames,
+ *   *                   = see timestamp considerations below for why
+ *                         this particular arrangement.
+ *
+ *   For TFF:
+ *
+ *   PCN   Dups.     Best field pairs for progressive (correct, theoretical)
+ *   abc   TP = TC   TPBP = frame 1, TCBP = frame 1, TNBC = frame 2
+ *   bcd   BC = BN   TCBP = frame 2, TNBC = frame 3, TNBN = frame 3
+ *   cde   BP = BC   TCBP = frame 3, TCBC = frame 3, TNBN = frame 4
+ *   dea   none      TPBP = frame 3, TCBC = frame 4, TNBN = frame 1
+ *   eab   TC = TN   TPBP = frame 4, TCBC = frame 1, TNBC = frame 1
+ *
+ *   (table cont'd)
+ *   PCN   Progressive output*
+ *   abc   frame 2 = TNBC (compose TN+BC)
+ *   bcd   frame 3 = TNBN (copy N)
+ *   cde   frame 4 = TNBN (copy N)
+ *   dea   (drop)
+ *   eab   frame 1 = TCBC (copy C), or TNBC (compose TN+BC)
+ *
+ *   On the rows "dea" and "eab", frame 1 refers to a frame from the next
+ *   group of 4. "Compose TN+BC" means to construct a frame using the
+ *   top field of N, and the bottom field of C. See ComposeFrame().
+ *
+ *   For BFF, swap all B and T, and rearrange the symbol pairs to again
+ *   read "TxBx". We have:
+ *
+ *   PCN   Dups.     Best field pairs for progressive (correct, theoretical)
+ *   abc   BP = BC   TPBP = frame 1, TPBC = frame 1, TCBN = frame 2
+ *   bcd   TC = TN   TPBC = frame 2, TCBN = frame 3, TNBN = frame 3
+ *   cde   TP = TC   TPBC = frame 3, TCBC = frame 3, TNBN = frame 4
+ *   dea   none      TPBP = frame 3, TCBC = frame 4, TNBN = frame 1
+ *   eab   BC = BN   TPBP = frame 4, TCBC = frame 1, TCBN = frame 1
+ *
+ *   (table cont'd)
+ *   PCN   Progressive output*
+ *   abc   frame 2 = TCBN (compose TC+BN)
+ *   bcd   frame 3 = TNBN (copy N)
+ *   cde   frame 4 = TNBN (copy N)
+ *   dea   (drop)
+ *   eab   frame 1 = TCBC (copy C), or TCBN (compose TC+BN)
+ *
+ *   From these cadence tables we can extract two strategies for
+ *   cadence detection. We use both.
+ *
+ *   Strategy 1: duplicated fields.
+ *
+ *   Consider that each stencil position has a unique duplicate field
+ *   condition. In one unique position, "dea", there is no match; in all
+ *   other positions, exactly one. By conservatively filtering the
+ *   possibilities based on detected hard field repeats (identical fields
+ *   in successive input frames), it is possible to gradually lock on
+ *   to the cadence. This kind of strategy is used by Vektor's classic
+ *   IVTC filter from TVTime (although there are some implementation
+ *   differences when compared to ours).
+ *
+ *   "Conservative" here means that we do not rule anything out, but start at
+ *   each stencil position by suggesting the position "dea", and then only add
+ *   to the list of possibilities based on field repeats that are detected at
+ *   the present stencil position. This estimate is then filtered by ANDing
+ *   against a shifted (time-advanced) version of the estimate from the
+ *   previous stencil position. Once the detected position becomes unique,
+ *   the filter locks on. If the new detection is inconsistent with the
+ *   previous one, the detector resets itself and starts from scratch.
+ *
+ *   The strategy is very reliable, as it only requires running (fuzzy)
+ *   duplicate field detection against the input. It is very good at staying
+ *   locked on once it acquires the cadence, and it does so correctly very
+ *   often. These are indeed characteristics that can be observed in the
+ *   behaviour of Vektor's classic filter.
+ *
+ *   Note especially that 8fps/12fps animation, common in anime, will cause
+ *   spurious hard-repeated fields. The conservative nature of the method
+ *   makes it very good at dealing with this - any spurious repeats will only
+ *   slow down the lock-on, not completely confuse it. It should also be good
+ *   at detecting the presence of a telecine, as neither true interlaced nor
+ *   true progressive material should contain any hard field repeats.
+ *   (This, however, has not been tested yet.)
+ *
+ *   The disadvantages are that at times the method may lock on slowly,
+ *   because the detection must be filtered against the history until
+ *   a unique solution is found. Resets, if they happen, will also
+ *   slow down the lock-on.
+ *
+ *   The hard duplicate detection required by this strategy can be made
+ *   data-adaptive in several ways. TVTime uses a running average of motion
+ *   scores for its history buffer. We utilize a different, original approach.
+ *   It is rare, if not nonexistent, that only one field changes between
+ *   two valid frames. Thus, if one field changes "much more" than the other
+ *   in fieldwise motion detection, the less changed one is probably a
+ *   duplicate. Importantly, this works with telecined input, too - the field
+ *   that changes "much" may be part of another film frame, while the "less"
+ *   changed one is actually a duplicate from the previous film frame.
+ *   If both fields change "about as much", then no hard field repeat
+ *   is detected.
+ *
+ *
+ *   Strategy 2: progressive/interlaced field combinations.
+ *
+ *   We can also form a second strategy, which is not as reliable in practice,
+ *   but which locks on faster. This is original to this filter.
+ *
+ *   Consider all possible field pairs from two successive frames: TCBC, TCBN,
+ *   TNBC, TNBN. After one frame, these become TPBP, TPBC, TCBP, TCBC.
+ *   These eight pairs (seven unique, disregarding the duplicate TCBC)
+ *   are the exhaustive list of possible field pairs from two successive
+ *   frames in the three-frame PCN stencil.
+ *
+ *   The field pairs can be used for cadence position detection. The above
+ *   tables list triplets of field pair combinations for each cadence position,
+ *   which should produce progressive frames. All the given triplets are unique
+ *   in each table alone, although the one at "dea" is indistinguishable from
+ *   the case of pure progressive material. It is also the only one which is
+ *   not unique across both tables.
+ *
+ *   Thus, all sequences of two neighboring triplets are unique across both
+ *   tables. (For "neighboring", each table is considered to wrap around from
+ *   "eab" back to "abc", i.e. from the last row back to the first row.)
+ *   Furthermore, each sequence of three neighboring triplets is redundantly
+ *   unique (i.e. is unique, and reduces the chance of false positives).
+ *
+ *   The important idea is: *all other* field pair combinations should produce
+ *   frames that look interlaced. This includes those combinations present in
+ *   the "wrong" (i.e. not current position) rows of the table (insofar as
+ *   those combinations are not also present in the "correct" row; by the
+ *   uniqueness property, *every* "wrong" row will always contain at least one
+ *   combination that differs from those in the "correct" row).
+ *
+ *   As for how we use these observations, we generate the artificial frames
+ *   TCBC, TCBN, TNBC and TNBN (virtually; no data is actually moved).
+ *   Two of these are just the frames C and N, which already exist; the two
+ *   others correspond to composing the given field pairs. We then compute
+ *   the interlace score for each of these frames. The interlace scores
+ *   of what are now TPBP, TPBC and TCBP, also needed, were computed by
+ *   this same mechanism during the previous input frame. These can be slided
+ *   in history and reused.
+ *
+ *   We then check, using the computed interlace scores, and taking into
+ *   account the video field dominance information (to only check valid
+ *   combinations), which field combination triplet given in the tables
+ *   produces the smallest sum of interlace scores. Unless we are at
+ *   PCN = "dea" (which could also be pure progressive!), this immediately
+ *   gives us the most likely current cadence position. Combined with a
+ *   two-step history, the sequence of three most likely positions found this
+ *   way always allows us to make a more or less reliable detection. (That is,
+ *   when a reliable detection is possible; note that if the video has no
+ *   motion at all, every detection will report the position "dea". In anime,
+ *   still shots are common. Thus we must augment this with a full-frame motion
+ *   detection that switches the detector off if no motion was detected.)
+ *
+ *   The detection seems to need four full-frame interlace analyses per frame.
+ *   Actually, three are enough, because the previous N is the new C, so we can
+ *   slide the already computed result. Also during initialization, we only
+ *   need to compute TNBN on the first frame; this has become TPBP when the
+ *   third frame is reached. Similarly, we compute TNBN, TNBC and TCBN during
+ *   the second frame (just before the filter starts), and these get slided
+ *   into TCBC, TCBP and TPBC when the third frame is reached. At that point,
+ *   initialization is complete.
+ *
+ *   Because we only compare interlace scores against each other, no threshold
+ *   is needed in the cadence detector. Thus it, trivially, adapts to the
+ *   material automatically.
+ *
+ *   The weakness of this approach is that any comb metric detects incorrectly
+ *   every now and then. Especially slow vertical camera pans often get treated
+ *   wrong, because the messed-up field combination looks less interlaced
+ *   according to the comb metric (especially in anime) than the correct one
+ *   (which contains, correctly, one-pixel thick cartoon outlines, parts of
+ *   which often perfectly horizontal).
+ *
+ *   The advantage is that this strategy catches horizontal camera pans
+ *   immediately and reliably, while the other strategy may still be trying
+ *   to lock on.
+ *
+ *
+ *   Frame reconstruction:
+ *
+ *   We utilize a hybrid approach. If a valid cadence is locked on, we use the
+ *   operation table to decide what to do. This handles those cases correctly,
+ *   which would be difficult for the interlace detector alone (e.g. vertical
+ *   camera pans). Note that the operations that must be performed for IVTC
+ *   include timestamp mangling and frame dropping, which can only be done
+ *   reliably on a valid cadence.
+ *
+ *   When the cadence fails (we detect this from a sudden upward jump in the
+ *   interlace scores of the constructed frames), we reset the "TVTime"
+ *   detector strategy and fall back to an emergency frame composer, where we
+ *   use ideas from Transcode's IVTC.
+ *
+ *   In the emergency mode, we simply output the least interlaced frame out of
+ *   the combinations TNBN, TNBC and TCBN (where only one of the last two is
+ *   tested, based on the stream TFF/BFF information). In this mode, we do not
+ *   touch the timestamps, and just pass all five frames from each group right
+ *   through. This introduces some stutter, but in practice it is often not
+ *   noticeable. This is because the kind of material that is likely to trip up
+ *   the cadence detector usually includes irregular 8fps/12fps motion. With
+ *   true 24fps motion, the cadence quickly locks on, and stays locked on.
+ *
+ *   Once the cadence locks on again, we resume normal operation based on
+ *   the operation table.
+ *
+ *
+ *   Timestamp mangling:
+ *
+ *   To make five into four we need to extend frame durations by 25%.
+ *   Consider the following diagram (times given in 90kHz ticks, rounded to
+ *   integers; this is just for illustration):
+ *
+ *   NTSC input (29.97 fps)
+ *   a       b       c       d        e        a (from next group) ...
+ *   0    3003    6006    9009    12012    15015
+ *   0      3754      7508       11261     15015
+ *   1         2         3           4         1 (from next group) ...
+ *   Film output (23.976 fps)
+ *
+ *   Three of the film frames have length 3754, and one has 3753
+ *   (it is 1/90000 sec shorter). This rounding was chosen so that the lengths
+ *   (of the group of four sum to the original 15015.
+ *
+ *   From the diagram we get these deltas for presentation timestamp adjustment
+ *   (in 90 kHz ticks, for illustration):
+ *   (1-a)   (2-b)  (3-c)   (4-d)   (skip)   (1-a) ...
+ *       0   +751   +1502   +2252   (skip)       0 ...
+ *
+ *   In fractions of (p_next->date - p_cur->date), regardless of actual
+ *   time unit, the deltas are:
+ *   (1-a)   (2-b)  (3-c)   (4-d)   (skip)   (1-a) ...
+ *       0   +0.25  +0.50   +0.75   (skip)       0 ...
+ *
+ *   This is what we actually use. In our implementation, the values are stored
+ *   multiplied by 4, as integers.
+ *
+ *   The "current" frame should be displayed at [original time + delta].
+ *   E.g., when "current" = b (i.e. PCN = abc), start displaying film frame 2
+ *   at time [original time of b + 751 ticks]. So, when we catch the cadence,
+ *   we will start mangling the timestamps according to the cadence position
+ *   of the "current" frame, using the deltas given above. This will cause
+ *   a one-time jerk, most noticeable if the cadence happens to catch at
+ *   position "d". (Alternatively, upon lock-on, we could wait until we are
+ *   at "a" before switching on IVTC, but this makes the maximal delay
+ *   [max. detection + max. wait] = 3 + 4 = 7 input frames, which comes to
+ *   [7/30 ~ 0.23 seconds instead of the 3/30 = 0.10 seconds from purely
+ *   the detection. I prefer the one-time jerk, which also happens to be
+ *   simpler to implement.)
+ *
+ *   It is clear that "e" is a safe choice for the dropped frame. This can be
+ *   seen from the timings and the cadence tables. First, consider the timings.
+ *   If we have only one future frame, "e" is the only one whose PTS, comparing
+ *   to the film frames, allows dropping it safely. To see this, consider which
+ *   film frame needs to be rendered as each new input frame arrives. Secondly,
+ *   consider the cadence tables. It is ok to drop "e", because the same
+ *   film frame "1" is available also at the next PCN position "eab".
+ *   (As a side note, it is interesting that Vektor's filter drops "b".
+ *   See the TVTime sources.)
+ *
+ *   When the filter falls out of film mode, the timestamps of the incoming
+ *   frames are left untouched. Thus, the output from this filter has a
+ *   variable framerate: 4/5 of the input framerate when IVTC is active
+ *   (whether hard or soft), and the same framerate as input when it is not
+ *   (or when in emergency mode).
+ *
+ *
+ *   For other open-source IVTC codes, which may be a useful source for ideas,
+ *   see the following:
+ *
+ *   The classic filter by Billy Biggs (Vektor). Written in 2001-2003 for
+ *   TVTime, and adapted into Xine later. In xine-lib 1.1.19, it is at
+ *   src/post/deinterlace/pulldown.*. Also needed are tvtime.*, and speedy.*.
+ *
+ *   Transcode's ivtc->decimate->32detect chain by Thanassis Tsiodras.
+ *   Written in 2002, added in Transcode 0.6.12. This probably has something
+ *   to do with the same chain in MPlayer, considering that MPlayer acquired
+ *   an IVTC filter around the same time. In Transcode 1.1.5, the IVTC part is
+ *   at filter/filter_ivtc.c. Transcode 1.1.5 sources can be downloaded from
+ *   http://developer.berlios.de/project/showfiles.php?group_id=10094
+ */
+
+/**
+ * Helper function: estimates "how much interlaced" the given field pair is.
+ *
+ * It is allowed that p_pic_top == p_pic_bottom.
+ *
+ * If p_pic_top != p_pic_bot (fields come from different pictures), you can use
+ * ComposeFrame() to actually construct the picture if needed.
+ *
+ * Number of planes, and number of lines in each plane, in p_pic_top and
+ * p_pic_bot must match. If the visible pitches differ, only the compatible
+ * (smaller) part will be tested.
+ *
+ * Luma and chroma planes are tested in the same way. This is correct for
+ * telecined input, where in the interlaced frames also chroma alternates
+ * every chroma line, even if the chroma format is 4:2:0!
+ *
+ * This is just a raw detector that produces a score. The overall score
+ * indicating a progressive or indicated frame may vary wildly, depending on
+ * the material, especially in anime. The scores should be compared to
+ * each other locally (in the temporal sense) to make meaningful decisions
+ * about progressive or interlaced frames.
+ *
+ * @param p_pic_top Picture to take the top field from.
+ * @param p_pic_bot Picture to take the bottom field from.
+ * @return Interlace score, >= 0. Higher values mean more interlaced.
+ * @retval -1 Error: incompatible input pictures.
+ * @see RenderIVTC()
+ * @see ComposeFrame()
+ */
+static int CalculateInterlaceScore( const picture_t* p_pic_top,
+                                    const picture_t* p_pic_bot )
 {
-    filter_sys_t *p_sys = p_filter->p_sys;
-    picture_t *p_dst[DEINTERLACE_DST_SIZE];
+    /*
+        We use the comb metric from the IVTC filter of Transcode 1.1.5.
+        This was found to work better for the particular purpose of IVTC
+        than RenderX()'s comb metric.
 
-    /* Request output picture */
-    p_dst[0] = filter_NewPicture( p_filter );
-    if( p_dst[0] == NULL )
-    {
-        picture_Release( p_pic );
-        return NULL;
-    }
-    picture_CopyProperties( p_dst[0], p_pic );
+        Note that we *must not* subsample at all in order to catch interlacing
+        in telecined frames with localized motion (e.g. anime with characters
+        talking, where only mouths move and everything else stays still.)
+    */
 
-    /* Any unused p_dst pointers must be NULL, because they are used to check how many output frames we have. */
-    for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
-        p_dst[i] = NULL;
+    assert( p_pic_top != NULL );
+    assert( p_pic_bot != NULL );
 
-    /* Update the input frame history, if the currently active algorithm needs it. */
-    if( p_sys->b_use_frame_history )
+    if( p_pic_top->i_planes != p_pic_bot->i_planes )
+        return -1;
+
+    unsigned u_cpu = vlc_CPU();
+
+    /* Amount of bits must be known for MMX, thus int32_t.
+       Doesn't hurt the C implementation. */
+    int32_t i_score = 0;
+
+#ifdef CAN_COMPILE_MMXEXT
+    if( u_cpu & CPU_CAPABILITY_MMXEXT )
+        pxor_r2r( mm7, mm7 ); /* we will keep score in mm7 */
+#endif
+
+    for( int i_plane = 0 ; i_plane < p_pic_top->i_planes ; ++i_plane )
     {
-        /* Duplicate the picture
-         * TODO when the vout rework is finished, picture_Hold() might be enough
-         * but becarefull, the pitches must match */
-        picture_t *p_dup = picture_NewFromFormat( &p_pic->format );
-        if( p_dup )
-            picture_Copy( p_dup, p_pic );
+        /* Sanity check */
+        if( p_pic_top->p[i_plane].i_visible_lines !=
+            p_pic_bot->p[i_plane].i_visible_lines )
+            return -1;
+
+        const int i_lasty = p_pic_top->p[i_plane].i_visible_lines-1;
+        const int w = FFMIN( p_pic_top->p[i_plane].i_visible_pitch,
+                             p_pic_bot->p[i_plane].i_visible_pitch );
+        const int wm8 = w % 8;   /* remainder */
+        const int w8  = w - wm8; /* part of width that is divisible by 8 */
+
+        /* Current line / neighbouring lines picture pointers */
+        const picture_t *cur = p_pic_bot;
+        const picture_t *ngh = p_pic_top;
+        int wc = cur->p[i_plane].i_pitch;
+        int wn = ngh->p[i_plane].i_pitch;
+
+        /* Transcode 1.1.5 only checks every other line. Checking every line
+           works better for anime, which may contain horizontal,
+           one pixel thick cartoon outlines.
+        */
+        for( int y = 1; y < i_lasty; ++y )
+        {
+            uint8_t *p_c = &cur->p[i_plane].p_pixels[y*wc];     /* this line */
+            uint8_t *p_p = &ngh->p[i_plane].p_pixels[(y-1)*wn]; /* prev line */
+            uint8_t *p_n = &ngh->p[i_plane].p_pixels[(y+1)*wn]; /* next line */
 
-        /* Slide the history */
-        if( p_sys->pp_history[0] )
-            picture_Release( p_sys->pp_history[0] );
-        for( int i = 1; i < HISTORY_SIZE; i++ )
-            p_sys->pp_history[i-1] = p_sys->pp_history[i];
-        p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
+/* Threshold (value from Transcode 1.1.5) */
+#define T 100
+#ifdef CAN_COMPILE_MMXEXT
+            /* Easy-to-read C version further below.
+
+               Assumptions: 0 < T < 127
+                            # of pixels < (2^32)/255
+               Note: calculates score * 255
+            */
+            if( u_cpu & CPU_CAPABILITY_MMXEXT )
+            {
+                static const mmx_t b0   = { .uq = 0x0000000000000000ULL };
+                static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
+                static const mmx_t bT   = { .ub = { T, T, T, T, T, T, T, T } };
+
+                for( int x = 0; x < w8; x += 8 )
+                {
+                    movq_m2r( *((int64_t*)p_c), mm0 );
+                    movq_m2r( *((int64_t*)p_p), mm1 );
+                    movq_m2r( *((int64_t*)p_n), mm2 );
+
+                    psubb_m2r( b128, mm0 );
+                    psubb_m2r( b128, mm1 );
+                    psubb_m2r( b128, mm2 );
+
+                    psubsb_r2r( mm0, mm1 );
+                    psubsb_r2r( mm0, mm2 );
+
+                    pxor_r2r( mm3, mm3 );
+                    pxor_r2r( mm4, mm4 );
+                    pxor_r2r( mm5, mm5 );
+                    pxor_r2r( mm6, mm6 );
+
+                    punpcklbw_r2r( mm1, mm3 );
+                    punpcklbw_r2r( mm2, mm4 );
+                    punpckhbw_r2r( mm1, mm5 );
+                    punpckhbw_r2r( mm2, mm6 );
+
+                    pmulhw_r2r( mm3, mm4 );
+                    pmulhw_r2r( mm5, mm6 );
+
+                    packsswb_r2r(mm4, mm6);
+                    pcmpgtb_m2r( bT, mm6 );
+                    psadbw_m2r( b0, mm6 );
+                    paddd_r2r( mm6, mm7 );
+
+                    p_c += 8;
+                    p_p += 8;
+                    p_n += 8;
+                }
+                /* Handle the width remainder if any. */
+                if( wm8 )
+                {
+                    for( int x = 0; x < wm8; ++x )
+                    {
+                        int_fast32_t C = *p_c;
+                        int_fast32_t P = *p_p;
+                        int_fast32_t N = *p_n;
+
+                        int_fast32_t comb = (P - C) * (N - C);
+                        if( comb > T )
+                            ++i_score;
+
+                        ++p_c;
+                        ++p_p;
+                        ++p_n;
+                    }
+                }
+            }
+            else
+            {
+#endif
+                for( int x = 0; x < w; ++x )
+                {
+                    /* Worst case: need 17 bits for "comb". */
+                    int_fast32_t C = *p_c;
+                    int_fast32_t P = *p_p;
+                    int_fast32_t N = *p_n;
+
+                    /* Comments in Transcode's filter_ivtc.c attribute this
+                       combing metric to Gunnar Thalin.
+
+                        The idea is that if the picture is interlaced, both
+                        expressions will have the same sign, and this comes
+                        up positive. The value T = 100 has been chosen such
+                        that a pixel difference of 10 (on average) will
+                        trigger the detector.
+                    */
+                    int_fast32_t comb = (P - C) * (N - C);
+                    if( comb > T )
+                        ++i_score;
+
+                    ++p_c;
+                    ++p_p;
+                    ++p_n;
+                }
+#ifdef CAN_COMPILE_MMXEXT
+            }
+#endif
+
+            /* Now the other field - swap current and neighbour pictures */
+            const picture_t *tmp = cur;
+            cur = ngh;
+            ngh = tmp;
+            int tmp_pitch = wc;
+            wc = wn;
+            wn = tmp_pitch;
+        }
     }
 
-    /* Slide the metadata history. */
-    for( int i = 1; i < METADATA_SIZE; i++ )
+#ifdef CAN_COMPILE_MMXEXT
+    if( u_cpu & CPU_CAPABILITY_MMXEXT )
     {
-        p_sys->meta.pi_date[i-1]            = p_sys->meta.pi_date[i];
-        p_sys->meta.pi_nb_fields[i-1]       = p_sys->meta.pi_nb_fields[i];
-        p_sys->meta.pb_top_field_first[i-1] = p_sys->meta.pb_top_field_first[i];
+        movd_r2m( mm7, i_score );
+        emms();
+        i_score /= 255;
     }
-    /* The last element corresponds to the current input frame. */
-    p_sys->meta.pi_date[METADATA_SIZE-1]            = p_pic->date;
-    p_sys->meta.pi_nb_fields[METADATA_SIZE-1]       = p_pic->i_nb_fields;
-    p_sys->meta.pb_top_field_first[METADATA_SIZE-1] = p_pic->b_top_field_first;
+#endif
 
-    /* Remember the frame offset that we should use for this frame.
-       The value in p_sys will be updated to reflect the correct value
-       for the *next* frame when we call the renderer. */
-    int i_frame_offset = p_sys->i_frame_offset;
-    int i_meta_idx     = (METADATA_SIZE-1) - i_frame_offset;
+    return i_score;
+}
+#undef T
 
-    /* These correspond to the current *outgoing* frame. */
-    bool b_top_field_first;
-    int i_nb_fields;
-    if( i_frame_offset != CUSTOM_PTS )
-    {
-        /* Pick the correct values from the history. */
-        b_top_field_first = p_sys->meta.pb_top_field_first[i_meta_idx];
-        i_nb_fields       = p_sys->meta.pi_nb_fields[i_meta_idx];
-    }
-    else
-    {
-        /* Framerate doublers must not request CUSTOM_PTS, as they need the original field timings,
+/**
+ * Internal helper function for EstimateNumBlocksWithMotion():
+ * estimates whether there is motion in the given 8x8 block on one plane
+ * between two images. The block as a whole and its fields are evaluated
+ * separately, and use different motion thresholds.
+ *
+ * This is a low-level function only used by EstimateNumBlocksWithMotion().
+ * There is no need to call this function manually.
+ *
+ * For interpretation of pi_top and pi_bot, it is assumed that the block
+ * starts on an even-numbered line (belonging to the top field).
+ *
+ * The b_mmx parameter avoids the need to call vlc_CPU() separately
+ * for each block.
+ *
+ * @param[in] p_pix_p Base pointer to the block in previous picture
+ * @param[in] p_pix_c Base pointer to the same block in current picture
+ * @param i_pitch_prev i_pitch of previous picture
+ * @param i_pitch_curr i_pitch of current picture
+ * @param b_mmx (vlc_CPU() & CPU_CAPABILITY_MMXEXT) or false.
+ * @param[out] pi_top 1 if top field of the block had motion, 0 if no
+ * @param[out] pi_bot 1 if bottom field of the block had motion, 0 if no
+ * @return 1 if the block had motion, 0 if no
+ * @see EstimateNumBlocksWithMotion()
+ */
+static inline int TestForMotionInBlock( uint8_t *p_pix_p, uint8_t *p_pix_c,
+                                        int i_pitch_prev, int i_pitch_curr,
+                                        bool b_mmx,
+                                        int* pi_top, int* pi_bot )
+{
+/* Pixel luma/chroma difference threshold to detect motion. */
+#define T 10
+
+    int32_t i_motion = 0;
+    int32_t i_top_motion = 0;
+    int32_t i_bot_motion = 0;
+
+/* See below for the C version to see more quickly what this does. */
+#ifdef CAN_COMPILE_MMXEXT
+    if( b_mmx )
+    {
+        static const mmx_t bT   = { .ub = { T, T, T, T, T, T, T, T } };
+        pxor_r2r( mm6, mm6 ); /* zero, used in psadbw */
+        movq_m2r( bT,  mm5 );
+
+        pxor_r2r( mm3, mm3 ); /* score (top field) */
+        pxor_r2r( mm4, mm4 ); /* score (bottom field) */
+        for( int y = 0; y < 8; y+=2 )
+        {
+            /* top field */
+            movq_m2r( *((uint64_t*)p_pix_c), mm0 );
+            movq_m2r( *((uint64_t*)p_pix_p), mm1 );
+            movq_r2r( mm0, mm2 );
+            psubusb_r2r( mm1, mm2 );
+            psubusb_r2r( mm0, mm1 );
+
+            pcmpgtb_r2r( mm5, mm2 );
+            pcmpgtb_r2r( mm5, mm1 );
+            psadbw_r2r(  mm6, mm2 );
+            psadbw_r2r(  mm6, mm1 );
+
+            paddd_r2r( mm2, mm1 );
+            paddd_r2r( mm1, mm3 ); /* add to top field score */
+
+            p_pix_c += i_pitch_curr;
+            p_pix_p += i_pitch_prev;
+
+            /* bottom field - handling identical to top field, except... */
+            movq_m2r( *((uint64_t*)p_pix_c), mm0 );
+            movq_m2r( *((uint64_t*)p_pix_p), mm1 );
+            movq_r2r( mm0, mm2 );
+            psubusb_r2r( mm1, mm2 );
+            psubusb_r2r( mm0, mm1 );
+
+            pcmpgtb_r2r( mm5, mm2 );
+            pcmpgtb_r2r( mm5, mm1 );
+            psadbw_r2r(  mm6, mm2 );
+            psadbw_r2r(  mm6, mm1 );
+
+            paddd_r2r( mm2, mm1 );
+            paddd_r2r( mm1, mm4 ); /* ...here we add to bottom field score */
+
+            p_pix_c += i_pitch_curr;
+            p_pix_p += i_pitch_prev;
+        }
+        movq_r2r(  mm3, mm7 ); /* score (total) */
+        paddd_r2r( mm4, mm7 );
+        movd_r2m( mm3, i_top_motion );
+        movd_r2m( mm4, i_bot_motion );
+        movd_r2m( mm7, i_motion );
+
+        /* The loop counts actual score * 255. */
+        i_top_motion /= 255;
+        i_bot_motion /= 255;
+        i_motion     /= 255;
+
+        emms();
+    }
+    else
+#endif
+    {
+        for( int y = 0; y < 8; ++y )
+        {
+            uint8_t *pc = p_pix_c;
+            uint8_t *pp = p_pix_p;
+            int score = 0;
+            for( int x = 0; x < 8; ++x )
+            {
+                int_fast16_t C = abs((*pc) - (*pp));
+                if( C > T )
+                    ++score;
+
+                ++pc;
+                ++pp;
+            }
+
+            i_motion += score;
+            if( y % 2 == 0 )
+                i_top_motion += score;
+            else
+                i_bot_motion += score;
+
+            p_pix_c += i_pitch_curr;
+            p_pix_p += i_pitch_prev;
+        }
+    }
+
+    /* Field motion thresholds.
+
+       Empirical value - works better in practice than the "4" that
+       would be consistent with the full-block threshold.
+
+       Especially the opening scene of The Third ep. 1 (just after the OP)
+       works better with this. It also fixes some talking scenes in
+       Stellvia ep. 1, where the cadence would otherwise catch on incorrectly,
+       leading to more interlacing artifacts than by just using the emergency
+       mode frame composer.
+    */
+    (*pi_top) = ( i_top_motion >= 8 );
+    (*pi_bot) = ( i_bot_motion >= 8 );
+
+    /* Full-block threshold = (8*8)/8: motion is detected if 1/8 of the block
+       changes "enough". */
+    return (i_motion >= 8);
+}
+#undef T
+
+/**
+ * Helper function: Estimates the number of 8x8 blocks which have motion
+ * between the given pictures. Needed for various detectors in RenderIVTC().
+ *
+ * Number of planes and visible lines in each plane, in the inputs must match.
+ * If the visible pitches do not match, only the compatible (smaller)
+ * part will be tested.
+ *
+ * Note that the return value is NOT simply *pi_top + *pi_bot, because
+ * the fields and the full block use different motion thresholds.
+ *
+ * It is allowed to set pi_top and pi_bot to NULL, if the caller does not want
+ * the separate field scores. This does not affect computation speed, and is
+ * only provided as a syntactic convenience.
+ *
+ * Motion in each picture plane (Y, U, V) counts separately.
+ * The sum of number of blocks with motion across all planes is returned.
+ *
+ * @param[in] p_prev Previous picture
+ * @param[in] p_curr Current picture
+ * @param[out] pi_top Number of 8x8 blocks where top field has motion.
+ * @param[out] pi_bot Number of 8x8 blocks where bottom field has motion.
+ * @return Number of 8x8 blocks that have motion.
+ * @retval -1 Error: incompatible input pictures.
+ * @see TestForMotionInBlock()
+ * @see RenderIVTC()
+ */
+static int EstimateNumBlocksWithMotion( const picture_t* p_prev,
+                                        const picture_t* p_curr,
+                                        int *pi_top, int *pi_bot)
+{
+    assert( p_prev != NULL );
+    assert( p_curr != NULL );
+
+    int i_score_top = 0;
+    int i_score_bot = 0;
+
+    if( p_prev->i_planes != p_curr->i_planes )
+        return -1;
+
+    /* We must tell our inline helper whether to use MMX acceleration. */
+#ifdef CAN_COMPILE_MMXEXT
+    bool b_mmx = ( vlc_CPU() & CPU_CAPABILITY_MMXEXT );
+#else
+    bool b_mmx = false;
+#endif
+
+    int i_score = 0;
+    for( int i_plane = 0 ; i_plane < p_prev->i_planes ; i_plane++ )
+    {
+        /* Sanity check */
+        if( p_prev->p[i_plane].i_visible_lines !=
+            p_curr->p[i_plane].i_visible_lines )
+            return -1;
+
+        const int i_pitch_prev = p_prev->p[i_plane].i_pitch;
+        const int i_pitch_curr = p_curr->p[i_plane].i_pitch;
+
+        /* Last pixels and lines (which do not make whole blocks) are ignored.
+           Shouldn't really matter for our purposes. */
+        const int i_mby = p_prev->p[i_plane].i_visible_lines / 8;
+        const int w = FFMIN( p_prev->p[i_plane].i_visible_pitch,
+                             p_curr->p[i_plane].i_visible_pitch );
+        const int i_mbx = w / 8;
+
+        for( int by = 0; by < i_mby; ++by )
+        {
+            uint8_t *p_pix_p = &p_prev->p[i_plane].p_pixels[i_pitch_prev*8*by];
+            uint8_t *p_pix_c = &p_curr->p[i_plane].p_pixels[i_pitch_curr*8*by];
+
+            for( int bx = 0; bx < i_mbx; ++bx )
+            {
+                int i_top_temp, i_bot_temp;
+                i_score += TestForMotionInBlock( p_pix_p, p_pix_c,
+                                                 i_pitch_prev, i_pitch_curr,
+                                                 b_mmx,
+                                                 &i_top_temp, &i_bot_temp );
+                i_score_top += i_top_temp;
+                i_score_bot += i_bot_temp;
+
+                p_pix_p += 8;
+                p_pix_c += 8;
+            }
+        }
+    }
+
+    if( pi_top )
+        (*pi_top) = i_score_top;
+    if( pi_bot )
+        (*pi_bot) = i_score_bot;
+
+    return i_score;
+}
+
+/* Fasten your seatbelt - lots of IVTC constants follow... */
+
+/**
+ * IVTC filter modes.
+ *
+ * Hard telecine: burned into video stream.
+ * Soft telecine: stream consists of progressive frames;
+ *                telecining handled by stream flags.
+ *
+ * @see ivtc_sys_t
+ * @see RenderIVTC()
+ */
+typedef enum { IVTC_MODE_DETECTING           = 0,
+               IVTC_MODE_TELECINED_NTSC_HARD = 1,
+               IVTC_MODE_TELECINED_NTSC_SOFT = 2 } ivtc_mode;
+
+/**
+ *  Field pair combinations from successive frames in the PCN stencil.
+ *  T = top, B = bottom, P = previous, C = current, N = next
+ *  These are used as array indices; hence the explicit numbering.
+ */
+typedef enum { FIELD_PAIR_TPBP = 0, FIELD_PAIR_TPBC = 1,
+               FIELD_PAIR_TCBP = 2, FIELD_PAIR_TCBC = 3,
+               FIELD_PAIR_TCBN = 4, FIELD_PAIR_TNBC = 5,
+               FIELD_PAIR_TNBN = 6 } ivtc_field_pair;
+
+/* Note: only valid ones count for NUM */
+#define NUM_CADENCE_POS 9
+/**
+ * Cadence positions for the PCN stencil (PCN, Previous Current Next).
+ *
+ * Note that "dea" in both cadence tables and a pure progressive signal
+ * are indistinguishable.
+ *
+ * Used as array indices except the -1.
+ *
+ * This is a combined raw position containing both i_cadence_pos
+ * and telecine field dominance.
+ * @see pi_detected_pos_to_cadence_pos
+ * @see pi_detected_pos_to_tfd
+ */
+typedef enum { CADENCE_POS_INVALID     = -1,
+               CADENCE_POS_PROGRESSIVE =  0,
+               CADENCE_POS_TFF_ABC     =  1,
+               CADENCE_POS_TFF_BCD     =  2,
+               CADENCE_POS_TFF_CDE     =  3,
+               CADENCE_POS_TFF_EAB     =  4,
+               CADENCE_POS_BFF_ABC     =  5,
+               CADENCE_POS_BFF_BCD     =  6,
+               CADENCE_POS_BFF_CDE     =  7,
+               CADENCE_POS_BFF_EAB     =  8 } ivtc_cadence_pos;
+/* First and one-past-end for TFF-only and BFF-only raw positions. */
+#define CADENCE_POS_TFF_FIRST 1
+#define CADENCE_POS_TFF_END   5
+#define CADENCE_POS_BFF_FIRST 5
+#define CADENCE_POS_BFF_END   9
+
+/**
+ * For Vektor-like cadence detector algorithm.
+ *
+ * The bitmask is stored in a word, and its layout is:
+ * blank blank BFF_CARRY BFF4 BFF3 BFF2 BFF1 BFF0   (high byte)
+ * blank blank TFF_CARRY TFF4 TFF3 TFF2 TFF1 TFF0   (low byte)
+ *
+ * This allows predicting the next position by left-shifting the previous
+ * result by one bit, copying the CARRY bits to the respective zeroth position,
+ * and ANDing with 0x1F1F.
+ *
+ * The table is indexed with a valid ivtc_cadence_pos.
+ */
+const int pi_detected_pos_to_bitmask[NUM_CADENCE_POS] = { 0x0808, /* prog. */
+                                                          0x0001, /* TFF ABC */
+                                                          0x0002, /* TFF BCD */
+                                                          0x0004, /* TFF CDE */
+                                                          0x0010, /* TFF EAB */
+                                                          0x0100, /* BFF ABC */
+                                                          0x0200, /* BFF BCD */
+                                                          0x0400, /* BFF CDE */
+                                                          0x1000, /* BFF EAB */
+                                                        };
+#define VEKTOR_CADENCE_POS_ALL 0x1F1F
+#define VEKTOR_CADENCE_POS_TFF 0x00FF
+#define VEKTOR_CADENCE_POS_BFF 0xFF00
+#define VEKTOR_CADENCE_POS_TFF_HIGH 0x0010
+#define VEKTOR_CADENCE_POS_TFF_LOW  0x0001
+#define VEKTOR_CADENCE_POS_BFF_HIGH 0x1000
+#define VEKTOR_CADENCE_POS_BFF_LOW  0x0100
+
+/* Telecine field dominance */
+typedef enum { TFD_INVALID = -1, TFD_TFF = 0, TFD_BFF = 1 } ivtc_tfd;
+
+/**
+ * Position detection table.
+ *
+ * These are the (only) field pair combinations that should give progressive
+ * frames.
+ *
+ * First index: detected pos
+ */
+static const ivtc_field_pair pi_best_field_pairs[NUM_CADENCE_POS][3] = {
+    {FIELD_PAIR_TPBP, FIELD_PAIR_TCBC, FIELD_PAIR_TNBN}, /* prog. */
+
+    {FIELD_PAIR_TPBP, FIELD_PAIR_TCBP, FIELD_PAIR_TNBC}, /* TFF ABC */
+    {FIELD_PAIR_TCBP, FIELD_PAIR_TNBC, FIELD_PAIR_TNBN}, /* TFF BCD */
+    {FIELD_PAIR_TCBP, FIELD_PAIR_TCBC, FIELD_PAIR_TNBN}, /* TFF CDE */
+    {FIELD_PAIR_TPBP, FIELD_PAIR_TCBC, FIELD_PAIR_TNBC}, /* TFF EAB */
+
+    {FIELD_PAIR_TPBP, FIELD_PAIR_TPBC, FIELD_PAIR_TCBN}, /* BFF ABC */
+    {FIELD_PAIR_TPBC, FIELD_PAIR_TCBN, FIELD_PAIR_TNBN}, /* BFF BCD */
+    {FIELD_PAIR_TPBC, FIELD_PAIR_TCBC, FIELD_PAIR_TNBN}, /* BFF CDE */
+    {FIELD_PAIR_TPBP, FIELD_PAIR_TCBC, FIELD_PAIR_TCBN}, /* BFF EAB */
+};
+
+/**
+ * Alternative position detection table.
+ *
+ * These field pair combinations should give only interlaced frames.
+ *
+ * Currently unused. During development it was tested that whether we detect
+ * best or worst, the resulting detected cadence positions are identical
+ * (neither strategy performs any different from the other).
+ */
+static const ivtc_field_pair pi_worst_field_pairs[NUM_CADENCE_POS][4] = {
+    {FIELD_PAIR_TPBC, FIELD_PAIR_TCBP,
+        FIELD_PAIR_TCBN, FIELD_PAIR_TNBC}, /* prog. */
+
+    {FIELD_PAIR_TPBC, FIELD_PAIR_TCBC,
+        FIELD_PAIR_TCBN, FIELD_PAIR_TNBN}, /* TFF ABC */
+    {FIELD_PAIR_TPBP, FIELD_PAIR_TPBC,
+        FIELD_PAIR_TCBC, FIELD_PAIR_TCBN}, /* TFF BCD */
+    {FIELD_PAIR_TPBP, FIELD_PAIR_TPBC,
+        FIELD_PAIR_TCBN, FIELD_PAIR_TNBC}, /* TFF CDE */
+    {FIELD_PAIR_TPBC, FIELD_PAIR_TCBP,
+        FIELD_PAIR_TCBN, FIELD_PAIR_TNBN}, /* TFF EAB */
+
+    {FIELD_PAIR_TCBP, FIELD_PAIR_TCBC,
+        FIELD_PAIR_TNBC, FIELD_PAIR_TNBN}, /* BFF ABC */
+    {FIELD_PAIR_TPBP, FIELD_PAIR_TCBP,
+        FIELD_PAIR_TCBC, FIELD_PAIR_TNBC}, /* BFF BCD */
+    {FIELD_PAIR_TPBP, FIELD_PAIR_TCBP,
+        FIELD_PAIR_TNBC, FIELD_PAIR_TCBN}, /* BFF CDE */
+    {FIELD_PAIR_TCBP, FIELD_PAIR_TPBC,
+        FIELD_PAIR_TNBC, FIELD_PAIR_TNBN}, /* BFF EAB */
+};
+
+/**
+ * Table for extracting the i_cadence_pos part of detected cadence position
+ * (ivtc_cadence_pos).
+ *
+ * The counter goes from 0 to 4, where "abc" = 0, "bcd" = 1, ...
+ *
+ * @see ivtc_cadence_pos
+ */
+static const int pi_detected_pos_to_cadence_pos[NUM_CADENCE_POS] = {
+    3, /* prog. */
+    0, /* TFF ABC */
+    1, /* TFF BCD */
+    2, /* TFF CDE */
+    4, /* TFF EAB */
+    0, /* BFF ABC */
+    1, /* BFF BCD */
+    2, /* BFF CDE */
+    4, /* BFF EAB */
+};
+
+/**
+ * Table for extracting the telecine field dominance part of detected
+ * cadence position (ivtc_cadence_pos).
+ *
+ * The position "dea" does not provide TFF/BFF information, because it is
+ * indistinguishable from progressive.
+ *
+ * @see ivtc_cadence_pos
+ */
+static const int pi_detected_pos_to_tfd[NUM_CADENCE_POS] = {
+    TFD_INVALID, /* prog. */
+    TFD_TFF, /* TFF ABC */
+    TFD_TFF, /* TFF BCD */
+    TFD_TFF, /* TFF CDE */
+    TFD_TFF, /* TFF EAB */
+    TFD_BFF, /* BFF ABC */
+    TFD_BFF, /* BFF BCD */
+    TFD_BFF, /* BFF CDE */
+    TFD_BFF, /* BFF EAB */
+};
+
+/* Valid telecine sequences (TFF and BFF). Indices: [TFD][i_cadence_pos] */
+/* Currently unused and left here for documentation only.
+   There is an easier way - just decode the i_cadence_pos part of the
+   detected position using the pi_detected_pos_to_cadence_pos table. */
+/*static const int pi_valid_cadences[2][5] = { {CADENCE_POS_TFF_ABC,
+                                             CADENCE_POS_TFF_BCD,
+                                             CADENCE_POS_TFF_CDE,
+                                             CADENCE_POS_PROGRESSIVE,
+                                             CADENCE_POS_TFF_EAB},
+
+                                             {CADENCE_POS_BFF_ABC,
+                                             CADENCE_POS_BFF_BCD,
+                                             CADENCE_POS_BFF_CDE,
+                                             CADENCE_POS_PROGRESSIVE,
+                                             CADENCE_POS_BFF_EAB},
+                                           };
+*/
+
+/**
+ * Operations needed in film frame reconstruction.
+ */
+typedef enum { IVTC_OP_DROP_FRAME,
+               IVTC_OP_COPY_N,
+               IVTC_OP_COPY_C,
+               IVTC_OP_COMPOSE_TNBC,
+               IVTC_OP_COMPOSE_TCBN } ivtc_op;
+
+/* Note: During hard IVTC, we must avoid COPY_C and do a compose instead.
+   If we COPY_C, some subtitles will flicker badly, even if we use the
+   cadence-based film frame reconstruction. Try the first scene in
+   Kanon (2006) vol. 3 to see the problem.
+
+   COPY_C can be used without problems when it is used consistently
+   (not constantly mixed in with COPY_N and compose operations),
+   for example in soft IVTC.
+*/
+/**
+ * Operation table for film frame reconstruction depending on cadence position.
+ * Indices: [TFD][i_cadence_pos]
+ * @see pi_detected_pos_to_tfd
+ * @see pi_detected_pos_to_cadence_pos
+ */
+static const ivtc_op pi_reconstruction_ops[2][5] = { /* TFF */
+                                                     {IVTC_OP_COMPOSE_TNBC,
+                                                      IVTC_OP_COPY_N,
+                                                      IVTC_OP_COPY_N,
+                                                      IVTC_OP_DROP_FRAME,
+                                                      IVTC_OP_COMPOSE_TNBC},
+
+                                                     /* BFF */
+                                                     {IVTC_OP_COMPOSE_TCBN,
+                                                      IVTC_OP_COPY_N,
+                                                      IVTC_OP_COPY_N,
+                                                      IVTC_OP_DROP_FRAME,
+                                                      IVTC_OP_COMPOSE_TCBN},
+                                                   };
+
+/**
+ * Timestamp mangling table.
+ *
+ * This is used in the 29.97 -> 23.976 fps conversion.
+ *
+ * Index: i_cadence_pos.
+ *
+ * Valid values are nonnegative. The -1 corresponds to the dropped frame
+ * and is never used, except for a debug assert.
+ *
+ * The unit of the values is 1/4 of frame duration.
+ * See the function documentation of RenderIVTC() for an explanation.
+ * @see ivtc_cadence_pos
+ * @see pi_detected_pos_to_cadence_pos
+ * @see pi_reconstruction_ops
+ * @see RenderIVTC()
+ */
+static const int pi_timestamp_deltas[5] = { 1, 2, 3, -1, 0 };
+
+/**
+ * Internal helper function for RenderIVTC(): performs initialization
+ * at the start of a new frame.
+ *
+ * In practice, this slides detector histories.
+ *
+ * This function should only perform initialization that does NOT require
+ * the input frame history buffer.
+ *
+ * This is an internal function only used by RenderIVTC().
+ * There is no need to call this function manually.
+ *
+ * @param p_filter The filter instance.
+ * @see RenderIVTC()
+ */
+static inline void IVTCFrameInit( filter_t *p_filter )
+{
+    assert( p_filter != NULL );
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+    ivtc_sys_t *p_ivtc  = &p_sys->ivtc;
+
+    /* Slide detector histories */
+    for( int i = 1; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
+    {
+        p_ivtc->pi_top_rep[i-1] = p_ivtc->pi_top_rep[i];
+        p_ivtc->pi_bot_rep[i-1] = p_ivtc->pi_bot_rep[i];
+        p_ivtc->pi_motion[i-1]  = p_ivtc->pi_motion[i];
+
+        p_ivtc->pi_s_cadence_pos[i-1] = p_ivtc->pi_s_cadence_pos[i];
+        p_ivtc->pb_s_reliable[i-1]    = p_ivtc->pb_s_reliable[i];
+        p_ivtc->pi_v_cadence_pos[i-1] = p_ivtc->pi_v_cadence_pos[i];
+        p_ivtc->pi_v_raw[i-1]         = p_ivtc->pi_v_raw[i];
+        p_ivtc->pb_v_reliable[i-1]    = p_ivtc->pb_v_reliable[i];
+
+        p_ivtc->pi_cadence_pos_history[i-1]
+                                      = p_ivtc->pi_cadence_pos_history[i];
+
+        p_ivtc->pb_all_progressives[i-1] = p_ivtc->pb_all_progressives[i];
+    }
+    /* The latest position has not been detected yet. */
+    p_ivtc->pi_s_cadence_pos[IVTC_LATEST] = CADENCE_POS_INVALID;
+    p_ivtc->pb_s_reliable[IVTC_LATEST]    = false;
+    p_ivtc->pi_v_cadence_pos[IVTC_LATEST] = CADENCE_POS_INVALID;
+    p_ivtc->pi_v_raw[IVTC_LATEST]         = VEKTOR_CADENCE_POS_ALL;
+    p_ivtc->pb_v_reliable[IVTC_LATEST]    = false;
+    p_ivtc->pi_cadence_pos_history[IVTC_LATEST] = CADENCE_POS_INVALID;
+    p_ivtc->pi_top_rep[IVTC_LATEST] =  0;
+    p_ivtc->pi_bot_rep[IVTC_LATEST] =  0;
+    p_ivtc->pi_motion[IVTC_LATEST]  = -1;
+    p_ivtc->pb_all_progressives[IVTC_LATEST] = false;
+
+    /* Slide history of field pair interlace scores */
+    p_ivtc->pi_scores[FIELD_PAIR_TPBP] = p_ivtc->pi_scores[FIELD_PAIR_TCBC];
+    p_ivtc->pi_scores[FIELD_PAIR_TPBC] = p_ivtc->pi_scores[FIELD_PAIR_TCBN];
+    p_ivtc->pi_scores[FIELD_PAIR_TCBP] = p_ivtc->pi_scores[FIELD_PAIR_TNBC];
+    p_ivtc->pi_scores[FIELD_PAIR_TCBC] = p_ivtc->pi_scores[FIELD_PAIR_TNBN];
+}
+
+/**
+ * Internal helper function for RenderIVTC(): computes various raw detector
+ * data at the start of a new frame.
+ *
+ * This function requires the input frame history buffer.
+ * IVTCFrameInit() must have been called first.
+ * Last two frames must be available in the history buffer.
+ *
+ * This is an internal function only used by RenderIVTC().
+ * There is no need to call this function manually.
+ *
+ * @param p_filter The filter instance.
+ * @see RenderIVTC()
+ * @see IVTCFrameInit()
+ */
+static inline void IVTCLowLevelDetect( filter_t *p_filter )
+{
+    assert( p_filter != NULL );
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+    ivtc_sys_t *p_ivtc  = &p_sys->ivtc;
+    picture_t *p_curr = p_sys->pp_history[1];
+    picture_t *p_next = p_sys->pp_history[2];
+
+    assert( p_next != NULL );
+    assert( p_curr != NULL );
+
+    /* Compute interlace scores for TNBN, TNBC and TCBN.
+        Note that p_next contains TNBN. */
+    p_ivtc->pi_scores[FIELD_PAIR_TNBN] = CalculateInterlaceScore( p_next,
+                                                                  p_next );
+    p_ivtc->pi_scores[FIELD_PAIR_TNBC] = CalculateInterlaceScore( p_next,
+                                                                  p_curr );
+    p_ivtc->pi_scores[FIELD_PAIR_TCBN] = CalculateInterlaceScore( p_curr,
+                                                                  p_next );
+
+    int i_top = 0, i_bot = 0;
+    int i_motion = EstimateNumBlocksWithMotion(p_curr, p_next, &i_top, &i_bot);
+    p_ivtc->pi_motion[IVTC_LATEST] = i_motion;
+
+    /* It's very rare if nonexistent that only one field changes between
+       frames. Thus, if one field changes "clearly more" than the other,
+       we know the less changed one is a likely duplicate.
+
+       Threshold 1/2 is too low for some scenes (e.g. pan of the space junk
+       at beginning of The Third ep. 1, right after the OP). Thus, we use 2/3,
+       which seems to work.
+    */
+    p_ivtc->pi_top_rep[IVTC_LATEST] = (i_top <= 2*i_bot/3);
+    p_ivtc->pi_bot_rep[IVTC_LATEST] = (i_bot <= 2*i_top/3);
+}
+
+/**
+ * Internal helper function for RenderIVTC(): using raw detector data,
+ * detect cadence position by an interlace scores based algorithm.
+ *
+ * IVTCFrameInit() and IVTCLowLevelDetect() must have been called first.
+ * Last frame must be available in the history buffer.
+ *
+ * This is an internal function only used by RenderIVTC().
+ * There is no need to call this function manually.
+ *
+ * @param p_filter The filter instance.
+ * @see RenderIVTC()
+ * @see IVTCFrameInit()
+ * @see IVTCLowLevelDetect()
+ * @see IVTCCadenceDetectFinalize()
+ */
+static inline void IVTCCadenceDetectAlgoScores( filter_t *p_filter )
+{
+    assert( p_filter != NULL );
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+    ivtc_sys_t *p_ivtc  = &p_sys->ivtc;
+    picture_t *p_next = p_sys->pp_history[2];
+
+    assert( p_next != NULL );
+
+    /* Detect likely cadence position according to the tables,
+       using the tabulated combinations of all 7 available interlace scores.
+    */
+    int pi_ivtc_scores[NUM_CADENCE_POS];
+    for( int i = 0; i < NUM_CADENCE_POS; i++ )
+        pi_ivtc_scores[i] = p_ivtc->pi_scores[ pi_best_field_pairs[i][0] ]
+                          + p_ivtc->pi_scores[ pi_best_field_pairs[i][1] ]
+                          + p_ivtc->pi_scores[ pi_best_field_pairs[i][2] ];
+    /* Find minimum */
+    int j = CADENCE_POS_PROGRESSIVE; /* valid regardless of TFD */
+    int minscore = pi_ivtc_scores[j];
+    /* Note that a TFF (respectively BFF) stream may only have TFF
+       (respectively BFF) telecine. Don't bother looking at solutions
+       we already know to be wrong. */
+    int imin = CADENCE_POS_TFF_FIRST; /* first TFF-only entry */
+    int iend = CADENCE_POS_TFF_END;   /* one past last TFF-only entry */
+    if( !p_next->b_top_field_first )
+    {
+        imin = CADENCE_POS_BFF_FIRST; /* first BFF-only entry */
+        iend = CADENCE_POS_BFF_END;   /* one past last BFF-only entry */
+    }
+    for( int i = imin; i < iend; i++ )
+    {
+        if( pi_ivtc_scores[i] < minscore )
+        {
+            minscore = pi_ivtc_scores[i];
+            j = i;
+        }
+    }
+
+    /* Now "j" contains the most likely position according to the tables,
+       accounting also for video TFF/BFF. */
+    p_ivtc->pi_s_cadence_pos[IVTC_LATEST] = j;
+
+    /* Estimate reliability of detector result.
+
+       We do this by checking if the winner is an outlier at least
+       to some extent. For anyone better versed in statistics,
+       feel free to improve this.
+    */
+
+    /* Compute sample mean with the winner included and without.
+
+       Sample mean is defined as mu = sum( x_i, i ) / N ,
+       where N is the number of samples.
+    */
+    int mean = pi_ivtc_scores[CADENCE_POS_PROGRESSIVE];
+    int mean_except_min = 0;
+    if( j != CADENCE_POS_PROGRESSIVE )
+        mean_except_min = pi_ivtc_scores[CADENCE_POS_PROGRESSIVE];
+    for( int i = imin; i < iend; i++ )
+    {
+        mean += pi_ivtc_scores[i];
+        if( i != j )
+            mean_except_min += pi_ivtc_scores[i];
+    }
+    /* iend points one past end, but progressive counts as the +1. */
+    mean /= (iend - imin + 1);
+    mean_except_min /= (iend - imin);
+
+    /* Check how much excluding the winner changes the mean. */
+    double mean_ratio = (double)mean_except_min / (double)mean;
+
+    /* Let's pretend that the detected position is a stochastic variable.
+        Compute sample variance with the winner included and without.
+
+        var = sum( (x_i - mu)^2, i ) / N ,
+
+        where mu is the sample mean.
+
+        Note that we really need int64_t; the numbers are pretty large.
+    */
+    int64_t diff = (int64_t)(pi_ivtc_scores[CADENCE_POS_PROGRESSIVE] - mean);
+    int64_t var = diff*diff;
+    int64_t var_except_min = 0;
+    if( j != CADENCE_POS_PROGRESSIVE )
+    {
+        int64_t diff_exm = (int64_t)(pi_ivtc_scores[CADENCE_POS_PROGRESSIVE]
+                                      - mean_except_min);
+        var_except_min = diff_exm*diff_exm;
+    }
+    for( int i = imin; i < iend; i++ )
+    {
+        diff = (int64_t)(pi_ivtc_scores[i] - mean);
+        var += (diff*diff);
+        if( i != j )
+        {
+            int64_t diff_exm = (int64_t)(pi_ivtc_scores[i] - mean_except_min);
+            var_except_min += (diff_exm*diff_exm);
+        }
+    }
+    /* iend points one past end, but progressive counts as the +1. */
+    var /= (uint64_t)(iend - imin + 1);
+    var_except_min /= (uint64_t)(iend - imin);
+
+    /* Extract cadence counter part of detected positions for the
+       last two frames.
+
+       Note that for the previous frame, we use the final detected cadence
+       position, which was not necessarily produced by this algorithm.
+       It is the result that was judged the most reliable.
+    */
+    int j_curr = p_ivtc->pi_cadence_pos_history[IVTC_LATEST-1];
+    int pos_next = pi_detected_pos_to_cadence_pos[j];
+
+    /* Be optimistic when unsure. We bias the detection toward accepting
+       the next "correct" position, even if the variance check comes up bad.
+    */
+    bool b_expected = false;
+    if( j_curr != CADENCE_POS_INVALID )
+    {
+        int pos_curr = pi_detected_pos_to_cadence_pos[j_curr];
+        b_expected = (pos_next == (pos_curr + 1) % 5);
+    }
+
+    /* Use motion detect result as a final sanity check.
+       If no motion, the result from this algorithm cannot be reliable.
+    */
+    int i_blocks_with_motion = p_ivtc->pi_motion[IVTC_LATEST];
+
+    /* The numbers given here are empirical constants that have been tuned
+       through trial and error. The test material used was NTSC anime DVDs.
+
+        Easy-to-detect parts seem to give variance boosts of 40-70%, but
+        hard-to-detect parts sometimes only 18%. Anything with a smaller boost
+        in variance doesn't seem reliable for catching a new lock-on,
+
+        Additionally, it seems that if the mean changes by less than 0.5%,
+        the result is not reliable.
+
+        Note that the numbers given are only valid for the pi_best_field_pairs
+        detector strategy.
+
+        For motion detection, the detector seems good enough so that
+        we can threshold at zero.
+    */
+    bool b_result_reliable =
+      ( i_blocks_with_motion > 0      &&
+        mean_ratio           > 1.005  &&
+        ( b_expected || ( (double)var > 1.17*(double)var_except_min ) )
+      );
+    p_ivtc->pb_s_reliable[IVTC_LATEST] = b_result_reliable;
+}
+
+/**
+ * Internal helper function for RenderIVTC(): using raw detector data,
+ * detect cadence position by a hard field repeat based algorithm.
+ *
+ * This algorithm is inspired by the classic TVTime/Xine IVTC filter
+ * by Billy Biggs (Vektor); hence the name. There are however some
+ * differences between this and the TVTime/Xine filter.
+ *
+ * IVTCFrameInit() and IVTCLowLevelDetect() must have been called first.
+ * Last frame must be available in the history buffer.
+ *
+ * This is an internal function only used by RenderIVTC().
+ * There is no need to call this function manually.
+ *
+ * @param p_filter The filter instance.
+ * @see RenderIVTC()
+ * @see IVTCFrameInit()
+ * @see IVTCLowLevelDetect()
+ * @see IVTCCadenceDetectFinalize()
+ */
+static inline void IVTCCadenceDetectAlgoVektor( filter_t *p_filter )
+{
+    assert( p_filter != NULL );
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+    ivtc_sys_t *p_ivtc  = &p_sys->ivtc;
+
+    picture_t *p_next = p_sys->pp_history[2];
+
+    assert( p_next != NULL );
+
+    /* Vektor-like cadence detection algorithm.
+
+       This is based on detecting repeated fields (by motion detection),
+       and conservatively estimating what the seen repeats could mean
+       for the cadence position.
+
+       Several possibilities are kept open until the sequence gives enough
+       information to make a unique detection. When the sequence becomes
+       inconsistent (e.g. bad cut), the detector resets itself.
+
+       The main ideas taken from Vektor's algorithm are:
+        1) conservatively using information from detected field repeats,
+        2) cadence counting the earlier detection results and combining with
+           the new detection result, and
+        3) the observation that video TFF/BFF uniquely determines TFD.
+
+        The main differences are
+        1) different motion detection (see EstimateNumBlocksWithMotion()).
+           Vektor's original estimates the average top/bottom field diff
+           over the last 3 frames, while ours uses a block-based approach
+           for diffing and just compares the field diffs of the "next" frame
+           against each other. Both approaches are adaptive, but in a
+           different way.
+        2) the specific detection logic used is a bit different (see both codes
+           for details; the original is in xine-lib, function
+           determine_pulldown_offset_short_history_new() in pulldown.c;
+           ours is obviously given below). I think this one is a bit simpler.
+    */
+
+    bool b_top_rep = p_ivtc->pi_top_rep[IVTC_LATEST];
+    bool b_bot_rep = p_ivtc->pi_bot_rep[IVTC_LATEST];
+    bool b_old_top_rep = p_ivtc->pi_top_rep[IVTC_LATEST-1];
+    bool b_old_bot_rep = p_ivtc->pi_bot_rep[IVTC_LATEST-1];
+
+    /* This is a conservative algorithm: we do not rule out possibilities
+       if repeats are *not* seen, but only *add* possibilities based on what
+       repeats *are* seen. We will do a raw detection, whose result is then
+       filtered against what we already know.
+
+       Progressive requires no repeats, so it is always a possibility.
+       Filtering will drop it out if we know that the current position
+       cannot be "dea".
+    */
+    int detected = 0;
+    detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_PROGRESSIVE ];
+
+    /* Add in other possibilities depending on field repeats seen during the
+       last three input frames (i.e. two transitions between input frames).
+       See the "Dups." column in the cadence tables.
+
+       Note that we always add and never explicitly rule anything out.
+       This is important. Otherwise full-frame repeats in the original film
+       (8fps or 12fps animation is common in anime) - causing spurious
+       field repeats - would mess up the detection. Handling that in a more
+       sophisticated way would be a nightmare - one would have to keep track
+       of full-frame repeats in the *outgoing* frames, too, and take into
+       account what would happen in the output if a particular cadence position
+       was chosen. Accounting for repeats in input frames only (i.e. limiting
+       the detection to the progressive parts of the cadence), this has been
+       tried, and found less reliable than the current, simpler strategy
+       that just ignores full-frame repeats.
+
+       Note also that we don't have to worry about getting the detection right
+       in *all* cases. It's enough if we work reliably, say, 99% of the time,
+       and the other 1% of the time just admit that we don't know the cadence
+       position. (This mostly happens after a bad cut, when the new scene has
+       "difficult" motion characteristics, such as repeated film frames.)
+
+       The alternative, "Transcode" strategy in the frame composer will catch
+       any telecined frames that slip through. Although in that case there will
+       be duplicates and the output PTSs will be wrong, this is less noticeable
+       than getting PTS jumps from an incorrectly locked-on cadence. Note that
+       it is mostly anime, and even there mostly low-motion scenes with
+       duplicate film frames that trigger the misbehavior - and in such cases
+       any slight irregularity in the output timings will go unnoticed,
+       as long as we get rid of interlacing artifacts.
+    */
+    if( b_top_rep )
+    {
+        detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_TFF_EAB ];
+        detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_BFF_BCD ];
+    }
+    if( b_old_top_rep )
+    {
+        detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_TFF_ABC ];
+        detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_BFF_CDE ];
+    }
+    if( b_bot_rep )
+    {
+        detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_TFF_BCD ];
+        detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_BFF_EAB ];
+    }
+    if( b_old_bot_rep )
+    {
+        detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_TFF_CDE ];
+        detected |= pi_detected_pos_to_bitmask[ CADENCE_POS_BFF_ABC ];
+    }
+
+    /* A TFF stream may only have TFF telecine, and similarly for BFF.
+        Discard the possibility we know to be incorrect for this stream.
+        (Note that the stream may flipflop between the possibilities
+        if it contains soft-telecined sequences or lone field repeats,
+        so we must keep detecting this for each incoming frame.)
+    */
+    bool b_tff = p_next->b_top_field_first;
+    if( b_tff )
+        detected &= VEKTOR_CADENCE_POS_TFF;
+    else
+        detected &= VEKTOR_CADENCE_POS_BFF;
+
+    /* Predict possible next positions based on our last detection.
+       Begin with a shift and carry. */
+    int predicted = p_ivtc->pi_v_raw[IVTC_LATEST-1];
+    bool b_wrap_tff = false;
+    bool b_wrap_bff = false;
+    if( predicted & VEKTOR_CADENCE_POS_TFF_HIGH )
+        b_wrap_tff = true;
+    if( predicted & VEKTOR_CADENCE_POS_BFF_HIGH )
+        b_wrap_bff = true;
+    /* bump to next position and keep only valid bits */
+    predicted = (predicted << 1) & VEKTOR_CADENCE_POS_ALL;
+    /* carry */
+    if( b_wrap_tff )
+        predicted |= VEKTOR_CADENCE_POS_TFF_LOW;
+    if( b_wrap_bff )
+        predicted |= VEKTOR_CADENCE_POS_BFF_LOW;
+
+    /* Filter: narrow down possibilities based on previous detection,
+       if consistent. If not consistent, reset the detector.
+       This works better than just using the latest raw detection. */
+    if( (detected & predicted) != 0 )
+        detected = detected & predicted;
+    else
+        detected = VEKTOR_CADENCE_POS_ALL;
+
+    /* We're done. Save result to our internal storage so we can use it
+       for prediction at the next frame.
+
+       Note that the outgoing frame check in IVTCReconstructFrame()
+       has a veto right, resetting us if it determines that the cadence
+       has become broken.
+    */
+    p_ivtc->pi_v_raw[IVTC_LATEST] = detected;
+
+    /* See if the position has been detected uniquely.
+       If so, we have acquired a lock-on. */
+    ivtc_cadence_pos exact = CADENCE_POS_INVALID;
+    if( detected != 0 )
+    {
+        for( int i = 0; i < NUM_CADENCE_POS; i++ )
+        {
+            /* Note that we must use "&" instead of just equality to catch
+               the progressive case, and also not to trigger on an incomplete
+               detection. */
+            if( detected == (detected & pi_detected_pos_to_bitmask[i]) )
+            {
+                exact = i;
+                break;
+            }
+        }
+    }
+
+    /* If the result was unique, now "exact" contains the detected
+       cadence position (and otherwise CADENCE_POS_INVALID).
+
+       In practice, if the result from this algorithm is unique,
+       it is always reliable.
+    */
+    p_ivtc->pi_v_cadence_pos[IVTC_LATEST] =  exact;
+    p_ivtc->pb_v_reliable[IVTC_LATEST]    = (exact != CADENCE_POS_INVALID);
+}
+
+/**
+ * Internal helper function for RenderIVTC(): decide the final detected
+ * cadence position for the current position of the stencil,
+ * using the results of the different cadence detection algorithms.
+ *
+ * Must be called after all IVTCCadenceDetectAlgo*() functions.
+ *
+ * This is an internal function only used by RenderIVTC().
+ * There is no need to call this function manually.
+ *
+ * @param p_filter The filter instance.
+ * @see RenderIVTC()
+ * @see IVTCCadenceDetectAlgoScores()
+ * @see IVTCCadenceDetectAlgoVektor()
+ */
+static inline void IVTCCadenceDetectFinalize( filter_t *p_filter )
+{
+    assert( p_filter != NULL );
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+    ivtc_sys_t *p_ivtc  = &p_sys->ivtc;
+
+    /* In practice "vektor" is more reliable than "scores", but it may
+       take longer to lock on. Thus, we prefer "vektor" if its reliable bit
+       is set, then "scores", and finally just give up.
+
+       For progressive sequences, "vektor" outputs "3, -, 3, -, ...".
+       In this case, "scores" fills in the blanks. (This particular task
+       could also be done without another cadence detector, by just
+       detecting the alternating pattern of "3" and no result.)
+    */
+    int pos = CADENCE_POS_INVALID;
+    if( p_ivtc->pb_v_reliable[IVTC_LATEST] )
+        pos = p_ivtc->pi_v_cadence_pos[IVTC_LATEST];
+    else if( p_ivtc->pb_s_reliable[IVTC_LATEST] )
+        pos = p_ivtc->pi_s_cadence_pos[IVTC_LATEST];
+    p_ivtc->pi_cadence_pos_history[IVTC_LATEST] = pos;
+}
+
+/**
+ * Internal helper function for RenderIVTC(): using stream flags,
+ * detect soft telecine.
+ *
+ * This function is different from the other detectors; it may enter or exit
+ * IVTC_MODE_TELECINED_NTSC_SOFT, if it detects that soft telecine has just
+ * been entered or exited.
+ *
+ * Upon exit from soft telecine, the filter will resume operation in its
+ * previous mode (which it had when soft telecine was entered).
+ *
+ * Last three frames must be available in the history buffer.
+ *
+ * This is an internal function only used by RenderIVTC().
+ * There is no need to call this function manually.
+ *
+ * @param p_filter The filter instance.
+ * @see RenderIVTC()
+ */
+static inline void IVTCSoftTelecineDetect( filter_t *p_filter )
+{
+    assert( p_filter != NULL );
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+    ivtc_sys_t *p_ivtc  = &p_sys->ivtc;
+    picture_t *p_prev = p_sys->pp_history[0];
+    picture_t *p_curr = p_sys->pp_history[1];
+    picture_t *p_next = p_sys->pp_history[2];
+
+    assert( p_next != NULL );
+    assert( p_curr != NULL );
+    assert( p_prev != NULL );
+
+    /* Soft telecine can be detected from the flag pattern:
+       nb_fields = 3,2,3,2,... and *video* TFF = true, false, false, true
+       (TFF telecine) or false, true, true, false (BFF telecine).
+
+       We don't particularly care which field goes first, because we're
+       constructing progressive frames, and the video FDs of successive frames
+       must in any case match any field repeats in order for field renderers
+       (such as traditional DVD player + CRT TV) to work correctly. Thus the
+       video TFF/BFF flag provides no additional useful information for us
+       on top of checking nb_fields.
+
+       Note that the only thing to *do* to soft telecine in an IVTC filter
+       is to even out the outgoing PTS diffs to 2.5 fields each, so that we get
+       a steady 24fps output. Thus, we can do this  processing even if it turns
+       out that we saw a lone field repeat (which are also sometimes used,
+       such as in the Silent Mobius OP and in Sol Bianca). We can be aggressive
+       and don't need to care about false positives - as long as we are equally
+       aggressive about dropping out of soft telecine mode the moment a "2" is
+       followed by another "2" and not a "3" as in soft TC.
+
+       Finally, we conclude that the one-frame future buffer is enough for us
+       to make soft TC decisions just in time for rendering the frame in the
+       "current" position (the flag patterns below constitute proof of this
+       property).
+
+       Soft telecine is relatively rare at least in anime, but it exists;
+       e.g. Angel Links OP, Silent Mobius, and Stellvia of the Universe have
+       sequences that are soft telecined. Stellvia, especially, alternates
+       between soft and hard telecine all the time.
+    */
+
+    /* Valid stream flag patterns for soft telecine. There are three: */
+
+    /* Entering soft telecine at frame curr, or running inside it already */
+    bool b_soft_telecine_1 = (p_prev->i_nb_fields == 2) &&
+                             (p_curr->i_nb_fields == 3) &&
+                             (p_next->i_nb_fields == 2);
+    /* Running inside soft telecine */
+    bool b_soft_telecine_2 = (p_prev->i_nb_fields == 3) &&
+                             (p_curr->i_nb_fields == 2) &&
+                             (p_next->i_nb_fields == 3);
+    /* Exiting soft telecine at frame curr (curr is the last frame
+       that should be handled as soft TC) */
+    bool b_soft_telecine_3 = (p_prev->i_nb_fields == 3) &&
+                             (p_curr->i_nb_fields == 2) &&
+                             (p_next->i_nb_fields == 2);
+
+    /* Soft telecine is very clear-cut - the moment we see or do not see
+       a valid flag pattern, we can change the filter mode.
+    */
+    if( b_soft_telecine_1 || b_soft_telecine_2 || b_soft_telecine_3 )
+    {
+        if( p_ivtc->i_mode != IVTC_MODE_TELECINED_NTSC_SOFT )
+        {
+            msg_Dbg( p_filter, "IVTC: 3:2 pulldown: NTSC soft telecine "\
+                               "detected." );
+            p_ivtc->i_old_mode = p_ivtc->i_mode;
+        }
+
+        /* Valid flag pattern seen, this frame is soft telecined */
+        p_ivtc->i_mode = IVTC_MODE_TELECINED_NTSC_SOFT;
+
+        /* Only used during IVTC'ing hard telecine. */
+        p_ivtc->i_cadence_pos = CADENCE_POS_INVALID;
+        p_ivtc->i_tfd         = TFD_INVALID;
+    }
+    /* Note: no flag pattern match now */
+    else if( p_ivtc->i_mode == IVTC_MODE_TELECINED_NTSC_SOFT )
+    {
+        msg_Dbg( p_filter, "IVTC: 3:2 pulldown: NTSC soft telecine ended. "\
+                           "Returning to previous mode." );
+
+        /* No longer soft telecined, return filter to the mode it had earlier.
+           This is needed to fix cases where we came in from hard telecine, and
+           should go back, but can't catch a cadence in time before telecined
+           frames slip through. Kickstarting back to hard IVTC fixes the
+           problem. This happens a lot in Stellvia.
+        */
+        p_ivtc->i_mode = p_ivtc->i_old_mode;
+        p_ivtc->i_cadence_pos = 0; /* Wild guess. The film frame reconstruction
+                                      will start in emergency mode, and this
+                                      will be filled in by the detector ASAP.*/
+        /* I suppose video field dominance no longer flipflops. */
+        p_ivtc->i_tfd = p_next->b_top_field_first;
+    }
+}
+
+/**
+ * Internal helper function for RenderIVTC(): using the history of detected
+ * cadence positions, analyze the cadence and enter or exit
+ * IVTC_MODE_TELECINED_NTSC_HARD when appropriate.
+ *
+ * This also updates b_sequence_valid.
+ *
+ * Last three frames must be available in the history buffer.
+ *
+ * This is an internal function only used by RenderIVTC().
+ * There is no need to call this function manually.
+ *
+ * @param p_filter The filter instance.
+ * @see RenderIVTC()
+ */
+static void IVTCCadenceAnalyze( filter_t *p_filter )
+{
+    assert( p_filter != NULL );
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+    ivtc_sys_t *p_ivtc  = &p_sys->ivtc;
+    picture_t *p_prev = p_sys->pp_history[0];
+    picture_t *p_curr = p_sys->pp_history[1];
+    picture_t *p_next = p_sys->pp_history[2];
+
+    assert( p_next != NULL );
+    assert( p_curr != NULL );
+    assert( p_prev != NULL );
+
+    /* Determine which frames in the buffer qualify for analysis.
+
+       Note that hard telecine always has nb_fields = 2 and
+       video TFF = constant (i.e. the stream flags look no different from
+       a true interlaced or true progressive stream). Basically, no one ever
+       sets the progressive frame flag for the input frames d, e, and a -
+       in practice they're all flagged as interlaced.
+
+       A frame may qualify for hard TC analysis if it has no soft field repeat
+       (i.e. it cannot be part of a soft telecine). The condition
+       nb_fields == 2 must always match.
+
+       Additionally, curr and next must have had motion with respect to the
+       previous frame, to ensure that the different field combinations have
+       produced unique pictures.
+
+       Alternatively, if there was no motion, but the cadence position was
+       reliably detected and it was the expected one, we qualify the frame
+       for analysis (mainly, for TFD voting).
+
+       We only proceed with the cadence analysis if all three frames
+       in the buffer qualify.
+    */
+
+    /* Note that these are the final detected positions
+       produced by IVTCCadenceDetectFinalize(). */
+    int j_next = p_ivtc->pi_cadence_pos_history[IVTC_LATEST];
+    int j_curr = p_ivtc->pi_cadence_pos_history[IVTC_LATEST-1];
+    int j_prev = p_ivtc->pi_cadence_pos_history[IVTC_LATEST-2];
+
+    bool b_expected = false;
+    if( j_next != CADENCE_POS_INVALID  &&  j_curr != CADENCE_POS_INVALID )
+    {
+        int pos_next = pi_detected_pos_to_cadence_pos[j_next];
+        int pos_curr = pi_detected_pos_to_cadence_pos[j_curr];
+        b_expected = (pos_next == (pos_curr + 1) % 5);
+    }
+    bool b_old_expected  = false;
+    if( j_curr != CADENCE_POS_INVALID  &&  j_prev != CADENCE_POS_INVALID )
+    {
+        int pos_curr = pi_detected_pos_to_cadence_pos[j_curr];
+        int pos_prev = pi_detected_pos_to_cadence_pos[j_prev];
+        b_old_expected = (pos_curr == (pos_prev + 1) % 5);
+    }
+
+    int i_motion     = p_ivtc->pi_motion[IVTC_LATEST];
+    int i_old_motion = p_ivtc->pi_motion[IVTC_LATEST-1];
+
+    bool b_prev_valid  = (p_prev->i_nb_fields == 2);
+    bool b_curr_valid  = (p_curr->i_nb_fields == 2)  &&
+                         (i_old_motion > 0  ||  b_old_expected);
+    bool b_next_valid  = (p_next->i_nb_fields == 2)  &&
+                         (i_motion > 0      ||  b_expected);
+    bool b_no_invalids = (b_prev_valid && b_curr_valid && b_next_valid);
+
+    /* Final sanity check: see that the detection history has been
+       completely filled,  i.e. the latest three positions of the stencil
+       have given a result from the cadence detector.
+    */
+    if( b_no_invalids )
+    {
+        for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; ++i )
+        {
+            const int i_detected_pos = p_ivtc->pi_cadence_pos_history[i];
+            if( i_detected_pos == CADENCE_POS_INVALID )
+            {
+                b_no_invalids = false;
+                break;
+            }
+        }
+    }
+
+    /* If still ok, do the analysis. */
+    p_ivtc->b_sequence_valid = false; /* needed in frame reconstruction */
+    if( b_no_invalids )
+    {
+        /* Convert the history elements to cadence position and TFD. */
+        int pi_tfd[IVTC_DETECTION_HISTORY_SIZE];
+        int pi_pos[IVTC_DETECTION_HISTORY_SIZE];
+        for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; ++i )
+        {
+            const int i_detected_pos = p_ivtc->pi_cadence_pos_history[i];
+            pi_pos[i] = pi_detected_pos_to_cadence_pos[i_detected_pos];
+            pi_tfd[i] = pi_detected_pos_to_tfd[i_detected_pos];
+        }
+
+        /* See if the sequence is valid. The cadence positions must be
+           successive mod 5.  We can't say anything about TFF/BFF yet,
+           because the progressive-looking position "dea"  may be there.
+           If the sequence otherwise looks valid, we handle that last
+           by voting.
+
+           We also test for a progressive signal here, so that we know
+           when to exit IVTC_MODE_TELECINED_NTSC_HARD.
+        */
+        p_ivtc->b_sequence_valid = true;
+        bool b_all_progressive = (pi_pos[0] == 3);
+        int j = pi_pos[0];
+        for( int i = 1; i < IVTC_DETECTION_HISTORY_SIZE; ++i )
+        {
+            if( pi_pos[i] != (++j % 5) )
+                p_ivtc->b_sequence_valid = false;
+            if( pi_pos[i] != 3 )
+                b_all_progressive = false;
+        }
+        p_ivtc->pb_all_progressives[IVTC_LATEST] = b_all_progressive;
+
+        if( p_ivtc->b_sequence_valid )
+        {
+            /* Determine TFF/BFF. */
+            int i_vote_invalid = 0;
+            int i_vote_tff     = 0;
+            int i_vote_bff     = 0;
+            for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; ++i )
+            {
+                if( pi_tfd[i] == TFD_INVALID )
+                    i_vote_invalid++;
+                else if( pi_tfd[i] == TFD_TFF )
+                    i_vote_tff++;
+                else if( pi_tfd[i] == TFD_BFF )
+                    i_vote_bff++;
+            }
+
+            /* With three entries, two votes for any one item are enough
+               to decide this conclusively. */
+            int i_telecine_field_dominance = TFD_INVALID;
+            if( i_vote_tff >= 2)
+                i_telecine_field_dominance = TFD_TFF;
+            else if( i_vote_bff >= 2)
+                i_telecine_field_dominance = TFD_BFF;
+            /* In all other cases, "invalid" won or no winner.
+               This means no NTSC telecine detected. */
+
+            /* Lock on to the cadence if it was valid and TFF/BFF was found.
+
+               Also, aggressively update the cadence counter from the
+               lock-on data whenever we can. In practice this has been found
+               to be a reliable strategy (if the cadence detectors are
+               good enough).
+            */
+            if( i_telecine_field_dominance == TFD_TFF )
+            {
+                if( p_ivtc->i_mode != IVTC_MODE_TELECINED_NTSC_HARD )
+                    msg_Dbg( p_filter, "IVTC: 3:2 pulldown: NTSC TFF "\
+                                       "hard telecine detected." );
+                p_ivtc->i_mode        = IVTC_MODE_TELECINED_NTSC_HARD;
+                p_ivtc->i_cadence_pos = pi_pos[IVTC_LATEST];
+                p_ivtc->i_tfd         = TFD_TFF;
+            }
+            else if( i_telecine_field_dominance == TFD_BFF )
+            {
+                if( p_ivtc->i_mode != IVTC_MODE_TELECINED_NTSC_HARD )
+                    msg_Dbg( p_filter, "IVTC: 3:2 pulldown: NTSC BFF "\
+                                       "hard telecine detected." );
+                p_ivtc->i_mode        = IVTC_MODE_TELECINED_NTSC_HARD;
+                p_ivtc->i_cadence_pos = pi_pos[IVTC_LATEST];
+                p_ivtc->i_tfd         = TFD_BFF;
+            }
+        }
+        /* No telecine... maybe a progressive signal? */
+        else if( b_all_progressive )
+        {
+            /* It seems that in practice, three "3"s in a row can still be
+               a fluke rather often. Four or five usually are not.
+               This fixes the Stellvia OP. */
+
+            bool b_really_all_progressive = true;
+            for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE ; i++ )
+            {
+                if( p_ivtc->pb_all_progressives[i] == false )
+                {
+                    b_really_all_progressive = false;
+                    break;
+                }
+            }
+
+            /* If we still think the signal is progressive... */
+            if( b_really_all_progressive )
+            {
+                /* ...exit film mode immediately. */
+                if( p_ivtc->i_mode == IVTC_MODE_TELECINED_NTSC_HARD )
+                    msg_Dbg( p_filter, "IVTC: 3:2 pulldown: progressive "\
+                                       "signal detected." );
+                p_ivtc->i_mode        = IVTC_MODE_DETECTING;
+                p_ivtc->i_cadence_pos = CADENCE_POS_INVALID;
+                p_ivtc->i_tfd         = TFD_INVALID;
+            }
+        }
+        /* Final missing "else": no valid NTSC telecine sequence detected.
+
+            Either there is no telecine, or the detector - although it produced
+            results - had trouble finding it. In this case we do nothing,
+            as it's not a good idea to act on unreliable data.
+        */
+    }
+}
+
+/**
+ * Internal helper function for RenderIVTC(): render or drop frame,
+ * whichever needs to be done. This also sets the output frame PTS.
+ *
+ * Last two frames must be available in the history buffer.
+ *
+ * This is an internal function only used by RenderIVTC().
+ * There is no need to call this function manually.
+ *
+ * @param p_filter The filter instance. Must be non-NULL.
+ * @param[out] p_dst Frame will be rendered here. Must be non-NULL.
+ * @return Whether a frame was constructed.
+ * @retval true Yes, output frame is in p_dst.
+ * @retval false No, this frame was dropped as part of normal IVTC operation.
+ * @see RenderIVTC()
+ */
+static bool IVTCOutputOrDropFrame( filter_t *p_filter, picture_t *p_dst )
+{
+    assert( p_filter != NULL );
+    assert( p_dst != NULL );
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+    ivtc_sys_t *p_ivtc  = &p_sys->ivtc;
+    mtime_t t_final = VLC_TS_INVALID; /* for custom timestamp mangling */
+
+    picture_t *p_curr = p_sys->pp_history[1];
+    picture_t *p_next = p_sys->pp_history[2];
+
+    assert( p_next != NULL );
+    assert( p_curr != NULL );
+
+    /* Perform IVTC if we're in film mode (either hard or soft telecine).
+
+       Note that we don't necessarily have a lock-on, even if we are in
+       IVTC_MODE_TELECINED_NTSC_HARD. We *may* be locked on, or alternatively,
+       we have seen a valid cadence some time in the past, but lock-on has
+       since been lost, and we have not seen a progressive signal after that.
+       The latter case usually results from bad cuts, which interrupt
+       the cadence.
+    */
+    int i_result_score = -1;
+    int op;
+    if( p_ivtc->i_mode == IVTC_MODE_TELECINED_NTSC_HARD )
+    {
+        assert( p_ivtc->i_cadence_pos != CADENCE_POS_INVALID );
+        assert( p_ivtc->i_tfd != TFD_INVALID );
+
+        /* Decide what to do. The operation table is only enabled
+           if the cadence seems reliable. Otherwise we use a backup strategy.
+        */
+        if( p_ivtc->b_sequence_valid )
+        {
+            /* Pick correct operation from the operation table. */
+            op = pi_reconstruction_ops[p_ivtc->i_tfd][p_ivtc->i_cadence_pos];
+
+            if( op == IVTC_OP_DROP_FRAME )
+            {
+                /* Bump cadence counter into the next expected position */
+                p_ivtc->i_cadence_pos = ++p_ivtc->i_cadence_pos % 5;
+
+                /* Drop frame. We're done. */
+                return false;
+            }
+            /* Frame not dropped */
+            else if( p_ivtc->b_sequence_valid )
+            {
+                if( op == IVTC_OP_COPY_N )
+                    i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TNBN];
+                else if( op == IVTC_OP_COPY_C )
+                    i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TCBC];
+                else if( op == IVTC_OP_COMPOSE_TNBC )
+                    i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TNBC];
+                else if( op == IVTC_OP_COMPOSE_TCBN )
+                    i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TCBN];
+
+                /* Sanity check the result */
+
+                /* Compute running mean of outgoing interlace score.
+                   See below for history mechanism. */
+                int i_avg = 0;
+                for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; i++)
+                    i_avg += p_ivtc->pi_final_scores[i];
+                i_avg /= IVTC_DETECTION_HISTORY_SIZE;
+
+                /* Check if the score suddenly became "clearly larger".
+                   Also, filter out spurious peaks at the low end. */
+                if( i_result_score > 1000  &&  i_result_score > 2*i_avg )
+                {
+                    /* Sequence wasn't reliable after all; we'll use
+                       the Transcode strategy for this frame. */
+                    p_ivtc->b_sequence_valid = false;
+                    msg_Dbg( p_filter, "Rejected cadence-based frame "\
+                                       "construction: interlace score %d "\
+                                       "(running average %d)",
+                                       i_result_score, i_avg );
+
+                    /* We also reset the detector used in the "vektor"
+                       algorithm, as it depends on having a reliable previous
+                       position. In practice, we continue using the Transcode
+                       strategy until the cadence becomes locked on again.
+                       (At that point, b_sequence_valid will become true again,
+                       and we continue with this strategy.)
+                    */
+                    p_ivtc->pi_v_raw[IVTC_LATEST] = VEKTOR_CADENCE_POS_ALL;
+                }
+            }
+        }
+
+        /* Frame not dropped, and the cadence counter seems unreliable.
+
+            Note that this is not an "else" to the previous case. This may
+            begin with a valid sequence, and then the above logic decides
+            that it wasn't valid after all.
+        */
+        if( !p_ivtc->b_sequence_valid )
+        {
+            /* In this case, we must proceed with no cadence information.
+                We use a Transcode-like strategy.
+
+                We check which field paired with TN or BN (accounting for
+                the field dominance) gives the smallest interlace score,
+                and declare that combination the resulting progressive frame.
+
+                This strategy gives good results on average, but often fails
+                in talking scenes in anime. Those can be handled more reliably
+                with a locked-on cadence produced by the "vektor" algorithm.
+            */
+
+            int tnbn = p_ivtc->pi_scores[FIELD_PAIR_TNBN]; /* TFF and BFF */
+            int tnbc = p_ivtc->pi_scores[FIELD_PAIR_TNBC]; /* TFF only */
+            int tcbn = p_ivtc->pi_scores[FIELD_PAIR_TCBN]; /* BFF only */
+
+            if( p_next->b_top_field_first )
+            {
+                if( tnbn <= tnbc )
+                {
+                    op = IVTC_OP_COPY_N;
+                    i_result_score = tnbn;
+                }
+                else
+                {
+                    op = IVTC_OP_COMPOSE_TNBC;
+                    i_result_score = tnbc;
+                }
+            }
+            else
+            {
+                if( tnbn <= tcbn )
+                {
+                    op = IVTC_OP_COPY_N;
+                    i_result_score = tnbn;
+                }
+                else
+                {
+                    op = IVTC_OP_COMPOSE_TCBN;
+                    i_result_score = tcbn;
+                }
+            }
+        }
+
+        /* Note that we get to this point only if we didn't drop the frame.
+            Mangle the presentation timestamp to convert 29.97 -> 23.976 fps.
+        */
+        int i_timestamp_delta = pi_timestamp_deltas[p_ivtc->i_cadence_pos];
+        if( p_ivtc->b_sequence_valid )
+            assert( i_timestamp_delta >= 0 );
+
+        /* "Current" is the frame that is being extracted now. Use its original
+           timestamp as the base.
+
+           Note that this way there will be no extra delay compared to the
+           raw stream, even though we look one frame into the future.
+        */
+        if( p_ivtc->b_sequence_valid )
+        {
+            /* FIXME: use field length as measured by Deinterlace()? */
+            t_final = p_curr->date
+                    + (p_next->date - p_curr->date)*i_timestamp_delta/4;
+        }
+        else /* Do not mangle timestamps (or drop frames, either) if cadence
+                is not locked on. This causes one of five output frames - if
+                all are reconstructed correctly - to be a duplicate, but in
+                practice at least with anime (which is the kind of material
+                that tends to have this problem) this is less noticeable than
+                a sudden jump in the cadence. Especially, a consistently wrong
+                lock-on will cause a very visible stutter, which we wish
+                to avoid. */
+        {
+            t_final = p_curr->date;
+        }
+
+        /* Bump cadence counter into the next expected position. */
+        p_ivtc->i_cadence_pos = ++p_ivtc->i_cadence_pos % 5;
+    }
+    else if( p_ivtc->i_mode == IVTC_MODE_TELECINED_NTSC_SOFT )
+    {
+        /* Soft telecine. We have the progressive frames already;
+           even out PTS diffs only. */
+
+        /* Pass through the "current" frame. We must choose the frame "current"
+           in order to be able to detect soft telecine before we have to output
+           the frame. See IVTCSoftTelecineDetect(). Also, this allows
+           us to peek at the next timestamp to calculate the duration of
+           "current".
+        */
+        op = IVTC_OP_COPY_C;
+        i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TCBC];
+
+        /* Timestamp mangling for soft telecine: bump "threes" forward by
+           0.5 field durations. This is more forgiving for the renderer
+           than bumping the "twos" back (which would  require to render
+           them sooner),
+        */
+        if( p_curr->i_nb_fields == 3 )
+        {
+            /* Approximate field duration from the PTS difference.  */
+            /* FIXME: use field length as measured by Deinterlace()? */
+            mtime_t i_half_field_dur = ( (p_next->date - p_curr->date)/3 ) / 2;
+            t_final = p_curr->date + i_half_field_dur;
+        }
+        else /* Otherwise, use original PTS of the outgoing frame. */
+        {
+            t_final = p_curr->date;
+        }
+    }
+    else /* Not film mode, timestamp mangling bypassed. */
+    {
+        op = IVTC_OP_COPY_N;
+        i_result_score = p_ivtc->pi_scores[FIELD_PAIR_TNBN];
+
+        /* Preserve original PTS (note that now, in principle,
+                                  "next" is the outgoing frame) */
+        t_final = p_next->date;
+    }
+
+    /* There is only one case where we should drop the frame,
+       and it was already handled above. */
+    assert( op != IVTC_OP_DROP_FRAME );
+
+    /* Render into p_dst according to the final operation chosen. */
+    if( op == IVTC_OP_COPY_N )
+        picture_Copy( p_dst, p_next );
+    else if( op == IVTC_OP_COPY_C )
+        picture_Copy( p_dst, p_curr );
+    else if( op == IVTC_OP_COMPOSE_TNBC )
+        ComposeFrame( p_filter, p_dst, p_next, p_curr, CC_ALTLINE );
+    else if( op == IVTC_OP_COMPOSE_TCBN )
+        ComposeFrame( p_filter, p_dst, p_curr, p_next, CC_ALTLINE );
+
+    /* Slide history of outgoing interlace scores. This must be done last,
+       and only if the frame was not dropped, so we do it here.
+
+       This is used during the reconstruction to get an idea of what is
+       (in the temporally local sense) an acceptable interlace score
+       for a correctly reconstructed frame. See above.
+    */
+    for( int i = 1; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
+        p_ivtc->pi_final_scores[i-1] = p_ivtc->pi_final_scores[i];
+    p_ivtc->pi_final_scores[IVTC_LATEST] = i_result_score;
+
+    /* Note that picture_Copy() copies the PTS, too. Apply timestamp mangling
+       now, if any was needed.
+    */
+    if( t_final > VLC_TS_INVALID )
+        p_dst->date = t_final;
+
+    return true;
+}
+
+/* The top-level routine of the IVTC filter.
+
+   See the lengthy comment above for function documentation.
+*/
+static int RenderIVTC( filter_t *p_filter, picture_t *p_dst, picture_t *p_src )
+{
+    assert( p_filter != NULL );
+    assert( p_src != NULL );
+    assert( p_dst != NULL );
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+    ivtc_sys_t *p_ivtc  = &p_sys->ivtc;
+
+    picture_t *p_prev = p_sys->pp_history[0];
+    picture_t *p_curr = p_sys->pp_history[1];
+    picture_t *p_next = p_sys->pp_history[2];
+
+    /* If the history mechanism has failed, we have nothing to do. */
+    if( !p_next )
+        return VLC_EGENERIC;
+
+    /* Slide algorithm-specific histories */
+    IVTCFrameInit( p_filter );
+
+    /* Filter if we have all the pictures we need.
+       Note that we always have p_next at this point. */
+    if( p_prev && p_curr )
+    {
+        /* Update raw data for motion, field repeats, interlace scores... */
+        IVTCLowLevelDetect( p_filter );
+
+        /* Detect soft telecine.
+
+           Enter/exit IVTC_MODE_TELECINED_NTSC_SOFT when needed.
+        */
+        IVTCSoftTelecineDetect( p_filter );
+
+        /* Detect hard telecine.
+
+           Enter/exit IVTC_MODE_TELECINED_NTSC_HARD when needed.
+
+           If we happen to be running in IVTC_MODE_TELECINED_NTSC_SOFT,
+           we nevertheless let the algorithms see for themselves that
+           the stream is progressive. This doesn't break anything,
+           and this way the full filter state gets updated at each frame.
+
+           See the individual function docs for details.
+        */
+        IVTCCadenceDetectAlgoScores( p_filter );
+        IVTCCadenceDetectAlgoVektor( p_filter );
+        IVTCCadenceDetectFinalize( p_filter ); /* pick winner */
+        IVTCCadenceAnalyze( p_filter ); /* update filter state */
+
+        /* Now we can... */
+        bool b_have_output_frame = IVTCOutputOrDropFrame( p_filter, p_dst );
+
+        /* The next frame will get a custom timestamp, too. */
+        p_sys->i_frame_offset = CUSTOM_PTS;
+
+        if( b_have_output_frame )
+            return VLC_SUCCESS;
+        else
+            return VLC_EGENERIC; /* Signal the caller not to expect a frame */
+    }
+    else if( !p_prev && !p_curr ) /* first frame */
+    {
+        /* Render the first frame as-is, so that a picture appears immediately.
+
+           We will also do some init for the filter. This score will become
+           TPBP by the time the actual filter starts. Note that the sliding of
+           final scores only starts when the filter has started (third frame).
+        */
+        int i_score = CalculateInterlaceScore( p_next, p_next );
+        p_ivtc->pi_scores[FIELD_PAIR_TNBN] = i_score;
+        p_ivtc->pi_final_scores[0]         = i_score;
+
+        picture_Copy( p_dst, p_next );
+        return VLC_SUCCESS;
+    }
+    else /* second frame */
+    {
+        /* If the history sliding mechanism works correctly,
+           the only remaining possibility is that: */
+        assert( p_curr && !p_prev );
+
+        /* We need three frames for the detector to work, so we drop this one.
+           We will only do some initialization for the detector here. */
+
+        /* These scores will become TCBC, TCBP and TPBC when the filter starts.
+           The score for the current TCBC has already been computed at the
+           first frame, and slid into place at the start of this frame
+           (by IVTCFrameInit()).
+        */
+        p_ivtc->pi_scores[FIELD_PAIR_TNBN] =
+                                     CalculateInterlaceScore( p_next, p_next );
+        p_ivtc->pi_scores[FIELD_PAIR_TNBC] =
+                                     CalculateInterlaceScore( p_next, p_curr );
+        p_ivtc->pi_scores[FIELD_PAIR_TCBN] =
+                                     CalculateInterlaceScore( p_curr, p_next );
+
+        /* TNBN is a wild guess, but doesn't really matter */
+        p_ivtc->pi_final_scores[1] = p_ivtc->pi_scores[FIELD_PAIR_TNBN];
+
+        /* At the next frame, the filter starts. The next frame will get
+           a custom timestamp. */
+        p_sys->i_frame_offset = CUSTOM_PTS;
+
+        /* Not really an error. This is expected, but we must
+           signal the caller not to expect an output frame. */
+        return VLC_EGENERIC;
+    }
+}
+
+/**
+ * Clears the inverse telecine subsystem state.
+ *
+ * Used during initialization and uninitialization.
+ *
+ * @param p_filter The filter instance.
+ * @see RenderIVTC()
+ * @see Open()
+ * @see Flush()
+ */
+static void IVTCClearState( filter_t *p_filter )
+{
+    assert( p_filter != NULL );
+
+    filter_sys_t *p_sys = p_filter->p_sys;
+    ivtc_sys_t *p_ivtc = &p_sys->ivtc;
+
+    p_ivtc->i_cadence_pos = CADENCE_POS_INVALID;
+    p_ivtc->i_tfd         = TFD_INVALID;
+    p_ivtc->b_sequence_valid = false;
+    p_ivtc->i_mode     = IVTC_MODE_DETECTING;
+    p_ivtc->i_old_mode = IVTC_MODE_DETECTING;
+    for( int i = 0; i < IVTC_NUM_FIELD_PAIRS; i++ )
+        p_ivtc->pi_scores[i] = 0;
+    for( int i = 0; i < IVTC_DETECTION_HISTORY_SIZE; i++ )
+    {
+        p_ivtc->pi_cadence_pos_history[i] = CADENCE_POS_INVALID;
+
+        p_ivtc->pi_s_cadence_pos[i] = CADENCE_POS_INVALID;
+        p_ivtc->pb_s_reliable[i]    = false;
+        p_ivtc->pi_v_cadence_pos[i] = CADENCE_POS_INVALID;
+        p_ivtc->pb_v_reliable[i]    = false;
+
+        p_ivtc->pi_v_raw[i]         = VEKTOR_CADENCE_POS_ALL;
+
+        /* the most neutral result considering the "vektor" algorithm */
+        p_ivtc->pi_top_rep[i] = 1;
+        p_ivtc->pi_bot_rep[i] = 1;
+        p_ivtc->pi_motion[i] = -1;
+
+        p_ivtc->pb_all_progressives[i] = false;
+
+        p_ivtc->pi_final_scores[i] = 0;
+    }
+}
+
+/*****************************************************************************
+ * video filter2 functions
+ *****************************************************************************/
+#define DEINTERLACE_DST_SIZE 3
+static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
+{
+    filter_sys_t *p_sys = p_filter->p_sys;
+    picture_t *p_dst[DEINTERLACE_DST_SIZE];
+
+    /* Request output picture */
+    p_dst[0] = filter_NewPicture( p_filter );
+    if( p_dst[0] == NULL )
+    {
+        picture_Release( p_pic );
+        return NULL;
+    }
+    picture_CopyProperties( p_dst[0], p_pic );
+
+    /* Any unused p_dst pointers must be NULL, because they are used to check how many output frames we have. */
+    for( int i = 1; i < DEINTERLACE_DST_SIZE; ++i )
+        p_dst[i] = NULL;
+
+    /* Update the input frame history, if the currently active algorithm needs it. */
+    if( p_sys->b_use_frame_history )
+    {
+        /* Duplicate the picture
+         * TODO when the vout rework is finished, picture_Hold() might be enough
+         * but becarefull, the pitches must match */
+        picture_t *p_dup = picture_NewFromFormat( &p_pic->format );
+        if( p_dup )
+            picture_Copy( p_dup, p_pic );
+
+        /* Slide the history */
+        if( p_sys->pp_history[0] )
+            picture_Release( p_sys->pp_history[0] );
+        for( int i = 1; i < HISTORY_SIZE; i++ )
+            p_sys->pp_history[i-1] = p_sys->pp_history[i];
+        p_sys->pp_history[HISTORY_SIZE-1] = p_dup;
+    }
+
+    /* Slide the metadata history. */
+    for( int i = 1; i < METADATA_SIZE; i++ )
+    {
+        p_sys->meta.pi_date[i-1]            = p_sys->meta.pi_date[i];
+        p_sys->meta.pi_nb_fields[i-1]       = p_sys->meta.pi_nb_fields[i];
+        p_sys->meta.pb_top_field_first[i-1] = p_sys->meta.pb_top_field_first[i];
+    }
+    /* The last element corresponds to the current input frame. */
+    p_sys->meta.pi_date[METADATA_SIZE-1]            = p_pic->date;
+    p_sys->meta.pi_nb_fields[METADATA_SIZE-1]       = p_pic->i_nb_fields;
+    p_sys->meta.pb_top_field_first[METADATA_SIZE-1] = p_pic->b_top_field_first;
+
+    /* Remember the frame offset that we should use for this frame.
+       The value in p_sys will be updated to reflect the correct value
+       for the *next* frame when we call the renderer. */
+    int i_frame_offset = p_sys->i_frame_offset;
+    int i_meta_idx     = (METADATA_SIZE-1) - i_frame_offset;
+
+    /* These correspond to the current *outgoing* frame. */
+    bool b_top_field_first;
+    int i_nb_fields;
+    if( i_frame_offset != CUSTOM_PTS )
+    {
+        /* Pick the correct values from the history. */
+        b_top_field_first = p_sys->meta.pb_top_field_first[i_meta_idx];
+        i_nb_fields       = p_sys->meta.pi_nb_fields[i_meta_idx];
+    }
+    else
+    {
+        /* Framerate doublers must not request CUSTOM_PTS, as they need the original field timings,
            and need Deinterlace() to allocate the correct number of output frames. */
         assert( !p_sys->b_double_rate );
 
@@ -2535,6 +5231,13 @@ static picture_t *Deinterlace( filter_t *p_filter, picture_t *p_pic )
                 RenderPhosphor( p_filter, p_dst[2], p_pic, 2,
                                 !b_top_field_first );
             break;
+
+        case DEINTERLACE_IVTC:
+            /* Note: RenderIVTC will automatically drop the duplicate frames
+                     produced by IVTC. This is part of normal operation. */
+            if( RenderIVTC( p_filter, p_dst[0], p_pic ) )
+                goto drop;
+            break;
     }
 
     /* Set output timestamps, if the algorithm didn't request CUSTOM_PTS for this frame. */
@@ -2603,6 +5306,7 @@ static void Flush( filter_t *p_filter )
             picture_Release( p_sys->pp_history[i] );
         p_sys->pp_history[i] = NULL;
     }
+    IVTCClearState( p_filter );
 }
 
 static int Mouse( filter_t *p_filter,
@@ -2646,6 +5350,8 @@ static int Open( vlc_object_t *p_this )
     for( int i = 0; i < HISTORY_SIZE; i++ )
         p_sys->pp_history[i] = NULL;
 
+    IVTCClearState( p_filter );
+
 #if defined(CAN_COMPILE_C_ALTIVEC)
     if( vlc_CPU() & CPU_CAPABILITY_ALTIVEC )
     {
diff --git a/src/control/video.c b/src/control/video.c
index 24a4de9e1d..e6655b9bed 100644
--- a/src/control/video.c
+++ b/src/control/video.c
@@ -567,7 +567,7 @@ void libvlc_video_set_deinterlace( libvlc_media_player_t *p_mi,
      && strcmp (psz_mode, "discard")  && strcmp (psz_mode, "linear")
      && strcmp (psz_mode, "mean")     && strcmp (psz_mode, "x")
      && strcmp (psz_mode, "yadif")    && strcmp (psz_mode, "yadif2x")
-     && strcmp (psz_mode, "phosphor"))
+     && strcmp (psz_mode, "phosphor") && strcmp (psz_mode, "ivtc"))
         return;
 
     if (*psz_mode)
diff --git a/src/libvlc-module.c b/src/libvlc-module.c
index edc0793c7a..ba60e56365 100644
--- a/src/libvlc-module.c
+++ b/src/libvlc-module.c
@@ -477,11 +477,13 @@ static const char * const  ppsz_deinterlace_text[] = {
     "Deinterlace method to use for video processing.")
 static const char * const ppsz_deinterlace_mode[] = {
     "discard", "blend", "mean", "bob",
-    "linear", "x", "yadif", "yadif2x", "phosphor"
+    "linear", "x", "yadif", "yadif2x", "phosphor",
+    "ivtc"
 };
 static const char * const ppsz_deinterlace_mode_text[] = {
     N_("Discard"), N_("Blend"), N_("Mean"), N_("Bob"),
-    N_("Linear"), "X", "Yadif", "Yadif (2x)", N_("Phosphor")
+    N_("Linear"), "X", "Yadif", "Yadif (2x)", N_("Phosphor"),
+    N_("Film NTSC (IVTC)")
 };
 
 static const int pi_pos_values[] = { 0, 1, 2, 4, 8, 5, 6, 9, 10 };
diff --git a/src/video_output/interlacing.c b/src/video_output/interlacing.c
index c530bd7dd8..d5dc241595 100644
--- a/src/video_output/interlacing.c
+++ b/src/video_output/interlacing.c
@@ -49,6 +49,7 @@ static const char *deinterlace_modes[] = {
     "yadif",
     "yadif2x",
     "phosphor",
+    "ivtc",
     NULL
 };
 static bool DeinterlaceIsModeValid(const char *mode)
-- 
2.11.4.GIT